Falcon source files (reference implementation)


common.c

    1 /*
    2  * Support functions for signatures (hash-to-point, norm).
    3  *
    4  * ==========================(LICENSE BEGIN)============================
    5  *
    6  * Copyright (c) 2017-2019  Falcon Project
    7  *
    8  * Permission is hereby granted, free of charge, to any person obtaining
    9  * a copy of this software and associated documentation files (the
   10  * "Software"), to deal in the Software without restriction, including
   11  * without limitation the rights to use, copy, modify, merge, publish,
   12  * distribute, sublicense, and/or sell copies of the Software, and to
   13  * permit persons to whom the Software is furnished to do so, subject to
   14  * the following conditions:
   15  *
   16  * The above copyright notice and this permission notice shall be
   17  * included in all copies or substantial portions of the Software.
   18  *
   19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   22  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
   23  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   26  *
   27  * ===========================(LICENSE END)=============================
   28  *
   29  * @author   Thomas Pornin <thomas.pornin@nccgroup.com>
   30  */
   31 
   32 #include "inner.h"
   33 
   34 /* see inner.h */
   35 void
   36 Zf(hash_to_point_vartime)(
   37         inner_shake256_context *sc,
   38         uint16_t *x, unsigned logn)
   39 {
   40         /*
   41          * This is the straightforward per-the-spec implementation. It
   42          * is not constant-time, thus it might reveal information on the
   43          * plaintext (at least, enough to check the plaintext against a
   44          * list of potential plaintexts) in a scenario where the
   45          * attacker does not have access to the signature value or to
   46          * the public key, but knows the nonce (without knowledge of the
   47          * nonce, the hashed output cannot be matched against potential
   48          * plaintexts).
   49          */
   50         size_t n;
   51 
   52         n = (size_t)1 << logn;
   53         while (n > 0) {
   54                 uint8_t buf[2];
   55                 uint32_t w;
   56 
   57                 inner_shake256_extract(sc, (void *)buf, sizeof buf);
   58                 w = ((unsigned)buf[0] << 8) | (unsigned)buf[1];
   59                 if (w < 61445) {
   60                         while (w >= 12289) {
   61                                 w -= 12289;
   62                         }
   63                         *x ++ = (uint16_t)w;
   64                         n --;
   65                 }
   66         }
   67 }
   68 
   69 /* see inner.h */
   70 void
   71 Zf(hash_to_point_ct)(
   72         inner_shake256_context *sc,
   73         uint16_t *x, unsigned logn, uint8_t *tmp)
   74 {
   75         /*
   76          * Each 16-bit sample is a value in 0..65535. The value is
   77          * kept if it falls in 0..61444 (because 61445 = 5*12289)
   78          * and rejected otherwise; thus, each sample has probability
   79          * about 0.93758 of being selected.
   80          *
   81          * We want to oversample enough to be sure that we will
   82          * have enough values with probability at least 1 - 2^(-256).
   83          * Depending on degree N, this leads to the following
   84          * required oversampling:
   85          *
   86          *   logn     n  oversampling
   87          *     1      2     65
   88          *     2      4     67
   89          *     3      8     71
   90          *     4     16     77
   91          *     5     32     86
   92          *     6     64    100
   93          *     7    128    122
   94          *     8    256    154
   95          *     9    512    205
   96          *    10   1024    287
   97          *
   98          * If logn >= 7, then the provided temporary buffer is large
   99          * enough. Otherwise, we use a stack buffer of 63 entries
  100          * (i.e. 126 bytes) for the values that do not fit in tmp[].
  101          */
  102 
  103         static const uint16_t overtab[] = {
  104                 0, /* unused */
  105                 65,
  106                 67,
  107                 71,
  108                 77,
  109                 86,
  110                 100,
  111                 122,
  112                 154,
  113                 205,
  114                 287
  115         };
  116 
  117         unsigned n, n2, u, m, p, over;
  118         uint16_t *tt1, tt2[63];
  119 
  120         /*
  121          * We first generate m 16-bit value. Values 0..n-1 go to x[].
  122          * Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[].
  123          * We also reduce modulo q the values; rejected values are set
  124          * to 0xFFFF.
  125          */
  126         n = 1U << logn;
  127         n2 = n << 1;
  128         over = overtab[logn];
  129         m = n + over;
  130         tt1 = (uint16_t *)tmp;
  131         for (u = 0; u < m; u ++) {
  132                 uint8_t buf[2];
  133                 uint32_t w, wr;
  134 
  135                 inner_shake256_extract(sc, buf, sizeof buf);
  136                 w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1];
  137                 wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1));
  138                 wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1));
  139                 wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1));
  140                 wr |= ((w - 61445) >> 31) - 1;
  141                 if (u < n) {
  142                         x[u] = (uint16_t)wr;
  143                 } else if (u < n2) {
  144                         tt1[u - n] = (uint16_t)wr;
  145                 } else {
  146                         tt2[u - n2] = (uint16_t)wr;
  147                 }
  148         }
  149 
  150         /*
  151          * Now we must "squeeze out" the invalid values. We do this in
  152          * a logarithmic sequence of passes; each pass computes where a
  153          * value should go, and moves it down by 'p' slots if necessary,
  154          * where 'p' uses an increasing powers-of-two scale. It can be
  155          * shown that in all cases where the loop decides that a value
  156          * has to be moved down by p slots, the destination slot is
  157          * "free" (i.e. contains an invalid value).
  158          */
  159         for (p = 1; p <= over; p <<= 1) {
  160                 unsigned v;
  161 
  162                 /*
  163                  * In the loop below:
  164                  *
  165                  *   - v contains the index of the final destination of
  166                  *     the value; it is recomputed dynamically based on
  167                  *     whether values are valid or not.
  168                  *
  169                  *   - u is the index of the value we consider ("source");
  170                  *     its address is s.
  171                  *
  172                  *   - The loop may swap the value with the one at index
  173                  *     u-p. The address of the swap destination is d.
  174                  */
  175                 v = 0;
  176                 for (u = 0; u < m; u ++) {
  177                         uint16_t *s, *d;
  178                         unsigned j, sv, dv, mk;
  179 
  180                         if (u < n) {
  181                                 s = &x[u];
  182                         } else if (u < n2) {
  183                                 s = &tt1[u - n];
  184                         } else {
  185                                 s = &tt2[u - n2];
  186                         }
  187                         sv = *s;
  188 
  189                         /*
  190                          * The value in sv should ultimately go to
  191                          * address v, i.e. jump back by u-v slots.
  192                          */
  193                         j = u - v;
  194 
  195                         /*
  196                          * We increment v for the next iteration, but
  197                          * only if the source value is valid. The mask
  198                          * 'mk' is -1 if the value is valid, 0 otherwise,
  199                          * so we _subtract_ mk.
  200                          */
  201                         mk = (sv >> 15) - 1U;
  202                         v -= mk;
  203 
  204                         /*
  205                          * In this loop we consider jumps by p slots; if
  206                          * u < p then there is nothing more to do.
  207                          */
  208                         if (u < p) {
  209                                 continue;
  210                         }
  211 
  212                         /*
  213                          * Destination for the swap: value at address u-p.
  214                          */
  215                         if ((u - p) < n) {
  216                                 d = &x[u - p];
  217                         } else if ((u - p) < n2) {
  218                                 d = &tt1[(u - p) - n];
  219                         } else {
  220                                 d = &tt2[(u - p) - n2];
  221                         }
  222                         dv = *d;
  223 
  224                         /*
  225                          * The swap should be performed only if the source
  226                          * is valid AND the jump j has its 'p' bit set.
  227                          */
  228                         mk &= -(((j & p) + 0x1FF) >> 9);
  229 
  230                         *s = (uint16_t)(sv ^ (mk & (sv ^ dv)));
  231                         *d = (uint16_t)(dv ^ (mk & (sv ^ dv)));
  232                 }
  233         }
  234 }
  235 
  236 /* see inner.h */
  237 int
  238 Zf(is_short)(
  239         const int16_t *s1, const int16_t *s2, unsigned logn)
  240 {
  241         /*
  242          * We use the l2-norm. Code below uses only 32-bit operations to
  243          * compute the square of the norm with saturation to 2^32-1 if
  244          * the value exceeds 2^31-1.
  245          */
  246         size_t n, u;
  247         uint32_t s, ng;
  248 
  249         n = (size_t)1 << logn;
  250         s = 0;
  251         ng = 0;
  252         for (u = 0; u < n; u ++) {
  253                 int32_t z;
  254 
  255                 z = s1[u];
  256                 s += (uint32_t)(z * z);
  257                 ng |= s;
  258                 z = s2[u];
  259                 s += (uint32_t)(z * z);
  260                 ng |= s;
  261         }
  262         s |= -(ng >> 31);
  263 
  264         /*
  265          * Acceptance bound on the l2-norm is:
  266          *   1.2*1.55*sqrt(q)*sqrt(2*N)
  267          * Value 7085 is floor((1.2^2)*(1.55^2)*2*1024).
  268          */
  269         return s < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn));
  270 }
  271 
  272 /* see inner.h */
  273 int
  274 Zf(is_short_half)(
  275         uint32_t sqn, const int16_t *s2, unsigned logn)
  276 {
  277         size_t n, u;
  278         uint32_t ng;
  279 
  280         n = (size_t)1 << logn;
  281         ng = -(sqn >> 31);
  282         for (u = 0; u < n; u ++) {
  283                 int32_t z;
  284 
  285                 z = s2[u];
  286                 sqn += (uint32_t)(z * z);
  287                 ng |= sqn;
  288         }
  289         sqn |= -(ng >> 31);
  290 
  291         /*
  292          * Acceptance bound on the l2-norm is:
  293          *   1.2*1.55*sqrt(q)*sqrt(2*N)
  294          * Value 7085 is floor((1.2^2)*(1.55^2)*2*1024).
  295          */
  296         return sqn < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn));
  297 }