Falcon source files (reference implementation)


vrfy.c

    1 /*
    2  * Falcon signature verification.
    3  *
    4  * ==========================(LICENSE BEGIN)============================
    5  *
    6  * Copyright (c) 2017-2019  Falcon Project
    7  *
    8  * Permission is hereby granted, free of charge, to any person obtaining
    9  * a copy of this software and associated documentation files (the
   10  * "Software"), to deal in the Software without restriction, including
   11  * without limitation the rights to use, copy, modify, merge, publish,
   12  * distribute, sublicense, and/or sell copies of the Software, and to
   13  * permit persons to whom the Software is furnished to do so, subject to
   14  * the following conditions:
   15  *
   16  * The above copyright notice and this permission notice shall be
   17  * included in all copies or substantial portions of the Software.
   18  *
   19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   22  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
   23  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   26  *
   27  * ===========================(LICENSE END)=============================
   28  *
   29  * @author   Thomas Pornin <thomas.pornin@nccgroup.com>
   30  */
   31 
   32 #include "inner.h"
   33 
   34 /* ===================================================================== */
   35 /*
   36  * Constants for NTT.
   37  *
   38  *   n = 2^logn  (2 <= n <= 1024)
   39  *   phi = X^n + 1
   40  *   q = 12289
   41  *   q0i = -1/q mod 2^16
   42  *   R = 2^16 mod q
   43  *   R2 = 2^32 mod q
   44  */
   45 
   46 #define Q     12289
   47 #define Q0I   12287
   48 #define R      4091
   49 #define R2    10952
   50 
   51 /*
   52  * Table for NTT, binary case:
   53  *   GMb[x] = R*(g^rev(x)) mod q
   54  * where g = 7 (it is a 2048-th primitive root of 1 modulo q)
   55  * and rev() is the bit-reversal function over 10 bits.
   56  */
   57 static const uint16_t GMb[] = {
   58          4091,  7888, 11060, 11208,  6960,  4342,  6275,  9759,
   59          1591,  6399,  9477,  5266,   586,  5825,  7538,  9710,
   60          1134,  6407,  1711,   965,  7099,  7674,  3743,  6442,
   61         10414,  8100,  1885,  1688,  1364, 10329, 10164,  9180,
   62         12210,  6240,   997,   117,  4783,  4407,  1549,  7072,
   63          2829,  6458,  4431,  8877,  7144,  2564,  5664,  4042,
   64         12189,   432, 10751,  1237,  7610,  1534,  3983,  7863,
   65          2181,  6308,  8720,  6570,  4843,  1690,    14,  3872,
   66          5569,  9368, 12163,  2019,  7543,  2315,  4673,  7340,
   67          1553,  1156,  8401, 11389,  1020,  2967, 10772,  7045,
   68          3316, 11236,  5285, 11578, 10637, 10086,  9493,  6180,
   69          9277,  6130,  3323,   883, 10469,   489,  1502,  2851,
   70         11061,  9729,  2742, 12241,  4970, 10481, 10078,  1195,
   71           730,  1762,  3854,  2030,  5892, 10922,  9020,  5274,
   72          9179,  3604,  3782, 10206,  3180,  3467,  4668,  2446,
   73          7613,  9386,   834,  7703,  6836,  3403,  5351, 12276,
   74          3580,  1739, 10820,  9787, 10209,  4070, 12250,  8525,
   75         10401,  2749,  7338, 10574,  6040,   943,  9330,  1477,
   76          6865,  9668,  3585,  6633, 12145,  4063,  3684,  7680,
   77          8188,  6902,  3533,  9807,  6090,   727, 10099,  7003,
   78          6945,  1949,  9731, 10559,  6057,   378,  7871,  8763,
   79          8901,  9229,  8846,  4551,  9589, 11664,  7630,  8821,
   80          5680,  4956,  6251,  8388, 10156,  8723,  2341,  3159,
   81          1467,  5460,  8553,  7783,  2649,  2320,  9036,  6188,
   82           737,  3698,  4699,  5753,  9046,  3687,    16,   914,
   83          5186, 10531,  4552,  1964,  3509,  8436,  7516,  5381,
   84         10733,  3281,  7037,  1060,  2895,  7156,  8887,  5357,
   85          6409,  8197,  2962,  6375,  5064,  6634,  5625,   278,
   86           932, 10229,  8927,  7642,   351,  9298,   237,  5858,
   87          7692,  3146, 12126,  7586,  2053, 11285,  3802,  5204,
   88          4602,  1748, 11300,   340,  3711,  4614,   300, 10993,
   89          5070, 10049, 11616, 12247,  7421, 10707,  5746,  5654,
   90          3835,  5553,  1224,  8476,  9237,  3845,   250, 11209,
   91          4225,  6326,  9680, 12254,  4136,  2778,   692,  8808,
   92          6410,  6718, 10105, 10418,  3759,  7356, 11361,  8433,
   93          6437,  3652,  6342,  8978,  5391,  2272,  6476,  7416,
   94          8418, 10824, 11986,  5733,   876,  7030,  2167,  2436,
   95          3442,  9217,  8206,  4858,  5964,  2746,  7178,  1434,
   96          7389,  8879, 10661, 11457,  4220,  1432, 10832,  4328,
   97          8557,  1867,  9454,  2416,  3816,  9076,   686,  5393,
   98          2523,  4339,  6115,   619,   937,  2834,  7775,  3279,
   99          2363,  7488,  6112,  5056,   824, 10204, 11690,  1113,
  100          2727,  9848,   896,  2028,  5075,  2654, 10464,  7884,
  101         12169,  5434,  3070,  6400,  9132, 11672, 12153,  4520,
  102          1273,  9739, 11468,  9937, 10039,  9720,  2262,  9399,
  103         11192,   315,  4511,  1158,  6061,  6751, 11865,   357,
  104          7367,  4550,   983,  8534,  8352, 10126,  7530,  9253,
  105          4367,  5221,  3999,  8777,  3161,  6990,  4130, 11652,
  106          3374, 11477,  1753,   292,  8681,  2806, 10378, 12188,
  107          5800, 11811,  3181,  1988,  1024,  9340,  2477, 10928,
  108          4582,  6750,  3619,  5503,  5233,  2463,  8470,  7650,
  109          7964,  6395,  1071,  1272,  3474, 11045,  3291, 11344,
  110          8502,  9478,  9837,  1253,  1857,  6233,  4720, 11561,
  111          6034,  9817,  3339,  1797,  2879,  6242,  5200,  2114,
  112          7962,  9353, 11363,  5475,  6084,  9601,  4108,  7323,
  113         10438,  9471,  1271,   408,  6911,  3079,   360,  8276,
  114         11535,  9156,  9049, 11539,   850,  8617,   784,  7919,
  115          8334, 12170,  1846, 10213, 12184,  7827, 11903,  5600,
  116          9779,  1012,   721,  2784,  6676,  6552,  5348,  4424,
  117          6816,  8405,  9959,  5150,  2356,  5552,  5267,  1333,
  118          8801,  9661,  7308,  5788,  4910,   909, 11613,  4395,
  119          8238,  6686,  4302,  3044,  2285, 12249,  1963,  9216,
  120          4296, 11918,   695,  4371,  9793,  4884,  2411, 10230,
  121          2650,   841,  3890, 10231,  7248,  8505, 11196,  6688,
  122          4059,  6060,  3686,  4722, 11853,  5816,  7058,  6868,
  123         11137,  7926,  4894, 12284,  4102,  3908,  3610,  6525,
  124          7938,  7982, 11977,  6755,   537,  4562,  1623,  8227,
  125         11453,  7544,   906, 11816,  9548, 10858,  9703,  2815,
  126         11736,  6813,  6979,   819,  8903,  6271, 10843,   348,
  127          7514,  8339,  6439,   694,   852,  5659,  2781,  3716,
  128         11589,  3024,  1523,  8659,  4114, 10738,  3303,  5885,
  129          2978,  7289, 11884,  9123,  9323, 11830,    98,  2526,
  130          2116,  4131, 11407,  1844,  3645,  3916,  8133,  2224,
  131         10871,  8092,  9651,  5989,  7140,  8480,  1670,   159,
  132         10923,  4918,   128,  7312,   725,  9157,  5006,  6393,
  133          3494,  6043, 10972,  6181, 11838,  3423, 10514,  7668,
  134          3693,  6658,  6905, 11953, 10212, 11922,  9101,  8365,
  135          5110,    45,  2400,  1921,  4377,  2720,  1695,    51,
  136          2808,   650,  1896,  9997,  9971, 11980,  8098,  4833,
  137          4135,  4257,  5838,  4765, 10985, 11532,   590, 12198,
  138           482, 12173,  2006,  7064, 10018,  3912, 12016, 10519,
  139         11362,  6954,  2210,   284,  5413,  6601,  3865, 10339,
  140         11188,  6231,   517,  9564, 11281,  3863,  1210,  4604,
  141          8160, 11447,   153,  7204,  5763,  5089,  9248, 12154,
  142         11748,  1354,  6672,   179,  5532,  2646,  5941, 12185,
  143           862,  3158,   477,  7279,  5678,  7914,  4254,   302,
  144          2893, 10114,  6890,  9560,  9647, 11905,  4098,  9824,
  145         10269,  1353, 10715,  5325,  6254,  3951,  1807,  6449,
  146          5159,  1308,  8315,  3404,  1877,  1231,   112,  6398,
  147         11724, 12272,  7286,  1459, 12274,  9896,  3456,   800,
  148          1397, 10678,   103,  7420,  7976,   936,   764,   632,
  149          7996,  8223,  8445,  7758, 10870,  9571,  2508,  1946,
  150          6524, 10158,  1044,  4338,  2457,  3641,  1659,  4139,
  151          4688,  9733, 11148,  3946,  2082,  5261,  2036, 11850,
  152          7636, 12236,  5366,  2380,  1399,  7720,  2100,  3217,
  153         10912,  8898,  7578, 11995,  2791,  1215,  3355,  2711,
  154          2267,  2004,  8568, 10176,  3214,  2337,  1750,  4729,
  155          4997,  7415,  6315, 12044,  4374,  7157,  4844,   211,
  156          8003, 10159,  9290, 11481,  1735,  2336,  5793,  9875,
  157          8192,   986,  7527,  1401,   870,  3615,  8465,  2756,
  158          9770,  2034, 10168,  3264,  6132,    54,  2880,  4763,
  159         11805,  3074,  8286,  9428,  4881,  6933,  1090, 10038,
  160          2567,   708,   893,  6465,  4962, 10024,  2090,  5718,
  161         10743,   780,  4733,  4623,  2134,  2087,  4802,   884,
  162          5372,  5795,  5938,  4333,  6559,  7549,  5269, 10664,
  163          4252,  3260,  5917, 10814,  5768,  9983,  8096,  7791,
  164          6800,  7491,  6272,  1907, 10947,  6289, 11803,  6032,
  165         11449,  1171,  9201,  7933,  2479,  7970, 11337,  7062,
  166          8911,  6728,  6542,  8114,  8828,  6595,  3545,  4348,
  167          4610,  2205,  6999,  8106,  5560, 10390,  9321,  2499,
  168          2413,  7272,  6881, 10582,  9308,  9437,  3554,  3326,
  169          5991, 11969,  3415, 12283,  9838, 12063,  4332,  7830,
  170         11329,  6605, 12271,  2044, 11611,  7353, 11201, 11582,
  171          3733,  8943,  9978,  1627,  7168,  3935,  5050,  2762,
  172          7496, 10383,   755,  1654, 12053,  4952, 10134,  4394,
  173          6592,  7898,  7497,  8904, 12029,  3581, 10748,  5674,
  174         10358,  4901,  7414,  8771,   710,  6764,  8462,  7193,
  175          5371,  7274, 11084,   290,  7864,  6827, 11822,  2509,
  176          6578,  4026,  5807,  1458,  5721,  5762,  4178,  2105,
  177         11621,  4852,  8897,  2856, 11510,  9264,  2520,  8776,
  178          7011,  2647,  1898,  7039,  5950, 11163,  5488,  6277,
  179          9182, 11456,   633, 10046, 11554,  5633,  9587,  2333,
  180          7008,  7084,  5047,  7199,  9865,  8997,   569,  6390,
  181         10845,  9679,  8268, 11472,  4203,  1997,     2,  9331,
  182           162,  6182,  2000,  3649,  9792,  6363,  7557,  6187,
  183          8510,  9935,  5536,  9019,  3706, 12009,  1452,  3067,
  184          5494,  9692,  4865,  6019,  7106,  9610,  4588, 10165,
  185          6261,  5887,  2652, 10172,  1580, 10379,  4638,  9949
  186 };
  187 
  188 /*
  189  * Table for inverse NTT, binary case:
  190  *   iGMb[x] = R*((1/g)^rev(x)) mod q
  191  * Since g = 7, 1/g = 8778 mod 12289.
  192  */
  193 static const uint16_t iGMb[] = {
  194          4091,  4401,  1081,  1229,  2530,  6014,  7947,  5329,
  195          2579,  4751,  6464, 11703,  7023,  2812,  5890, 10698,
  196          3109,  2125,  1960, 10925, 10601, 10404,  4189,  1875,
  197          5847,  8546,  4615,  5190, 11324, 10578,  5882, 11155,
  198          8417, 12275, 10599,  7446,  5719,  3569,  5981, 10108,
  199          4426,  8306, 10755,  4679, 11052,  1538, 11857,   100,
  200          8247,  6625,  9725,  5145,  3412,  7858,  5831,  9460,
  201          5217, 10740,  7882,  7506, 12172, 11292,  6049,    79,
  202            13,  6938,  8886,  5453,  4586, 11455,  2903,  4676,
  203          9843,  7621,  8822,  9109,  2083,  8507,  8685,  3110,
  204          7015,  3269,  1367,  6397, 10259,  8435, 10527, 11559,
  205         11094,  2211,  1808,  7319,    48,  9547,  2560,  1228,
  206          9438, 10787, 11800,  1820, 11406,  8966,  6159,  3012,
  207          6109,  2796,  2203,  1652,   711,  7004,  1053,  8973,
  208          5244,  1517,  9322, 11269,   900,  3888, 11133, 10736,
  209          4949,  7616,  9974,  4746, 10270,   126,  2921,  6720,
  210          6635,  6543,  1582,  4868,    42,   673,  2240,  7219,
  211          1296, 11989,  7675,  8578, 11949,   989, 10541,  7687,
  212          7085,  8487,  1004, 10236,  4703,   163,  9143,  4597,
  213          6431, 12052,  2991, 11938,  4647,  3362,  2060, 11357,
  214         12011,  6664,  5655,  7225,  5914,  9327,  4092,  5880,
  215          6932,  3402,  5133,  9394, 11229,  5252,  9008,  1556,
  216          6908,  4773,  3853,  8780, 10325,  7737,  1758,  7103,
  217         11375, 12273,  8602,  3243,  6536,  7590,  8591, 11552,
  218          6101,  3253,  9969,  9640,  4506,  3736,  6829, 10822,
  219          9130,  9948,  3566,  2133,  3901,  6038,  7333,  6609,
  220          3468,  4659,   625,  2700,  7738,  3443,  3060,  3388,
  221          3526,  4418, 11911,  6232,  1730,  2558, 10340,  5344,
  222          5286,  2190, 11562,  6199,  2482,  8756,  5387,  4101,
  223          4609,  8605,  8226,   144,  5656,  8704,  2621,  5424,
  224         10812,  2959, 11346,  6249,  1715,  4951,  9540,  1888,
  225          3764,    39,  8219,  2080,  2502,  1469, 10550,  8709,
  226          5601,  1093,  3784,  5041,  2058,  8399, 11448,  9639,
  227          2059,  9878,  7405,  2496,  7918, 11594,   371,  7993,
  228          3073, 10326,    40, 10004,  9245,  7987,  5603,  4051,
  229          7894,   676, 11380,  7379,  6501,  4981,  2628,  3488,
  230         10956,  7022,  6737,  9933,  7139,  2330,  3884,  5473,
  231          7865,  6941,  5737,  5613,  9505, 11568, 11277,  2510,
  232          6689,   386,  4462,   105,  2076, 10443,   119,  3955,
  233          4370, 11505,  3672, 11439,   750,  3240,  3133,   754,
  234          4013, 11929,  9210,  5378, 11881, 11018,  2818,  1851,
  235          4966,  8181,  2688,  6205,  6814,   926,  2936,  4327,
  236         10175,  7089,  6047,  9410, 10492,  8950,  2472,  6255,
  237           728,  7569,  6056, 10432, 11036,  2452,  2811,  3787,
  238           945,  8998,  1244,  8815, 11017, 11218,  5894,  4325,
  239          4639,  3819,  9826,  7056,  6786,  8670,  5539,  7707,
  240          1361,  9812,  2949, 11265, 10301,  9108,   478,  6489,
  241           101,  1911,  9483,  3608, 11997, 10536,   812,  8915,
  242           637,  8159,  5299,  9128,  3512,  8290,  7068,  7922,
  243          3036,  4759,  2163,  3937,  3755, 11306,  7739,  4922,
  244         11932,   424,  5538,  6228, 11131,  7778, 11974,  1097,
  245          2890, 10027,  2569,  2250,  2352,   821,  2550, 11016,
  246          7769,   136,   617,  3157,  5889,  9219,  6855,   120,
  247          4405,  1825,  9635,  7214, 10261, 11393,  2441,  9562,
  248         11176,   599,  2085, 11465,  7233,  6177,  4801,  9926,
  249          9010,  4514,  9455, 11352, 11670,  6174,  7950,  9766,
  250          6896, 11603,  3213,  8473,  9873,  2835, 10422,  3732,
  251          7961,  1457, 10857,  8069,   832,  1628,  3410,  4900,
  252         10855,  5111,  9543,  6325,  7431,  4083,  3072,  8847,
  253          9853, 10122,  5259, 11413,  6556,   303,  1465,  3871,
  254          4873,  5813, 10017,  6898,  3311,  5947,  8637,  5852,
  255          3856,   928,  4933,  8530,  1871,  2184,  5571,  5879,
  256          3481, 11597,  9511,  8153,    35,  2609,  5963,  8064,
  257          1080, 12039,  8444,  3052,  3813, 11065,  6736,  8454,
  258          2340,  7651,  1910, 10709,  2117,  9637,  6402,  6028,
  259          2124,  7701,  2679,  5183,  6270,  7424,  2597,  6795,
  260          9222, 10837,   280,  8583,  3270,  6753,  2354,  3779,
  261          6102,  4732,  5926,  2497,  8640, 10289,  6107, 12127,
  262          2958, 12287, 10292,  8086,   817,  4021,  2610,  1444,
  263          5899, 11720,  3292,  2424,  5090,  7242,  5205,  5281,
  264          9956,  2702,  6656,   735,  2243, 11656,   833,  3107,
  265          6012,  6801,  1126,  6339,  5250, 10391,  9642,  5278,
  266          3513,  9769,  3025,   779,  9433,  3392,  7437,   668,
  267         10184,  8111,  6527,  6568, 10831,  6482,  8263,  5711,
  268          9780,   467,  5462,  4425, 11999,  1205,  5015,  6918,
  269          5096,  3827,  5525, 11579,  3518,  4875,  7388,  1931,
  270          6615,  1541,  8708,   260,  3385,  4792,  4391,  5697,
  271          7895,  2155,  7337,   236, 10635, 11534,  1906,  4793,
  272          9527,  7239,  8354,  5121, 10662,  2311,  3346,  8556,
  273           707,  1088,  4936,   678, 10245,    18,  5684,   960,
  274          4459,  7957,   226,  2451,     6,  8874,   320,  6298,
  275          8963,  8735,  2852,  2981,  1707,  5408,  5017,  9876,
  276          9790,  2968,  1899,  6729,  4183,  5290, 10084,  7679,
  277          7941,  8744,  5694,  3461,  4175,  5747,  5561,  3378,
  278          5227,   952,  4319,  9810,  4356,  3088, 11118,   840,
  279          6257,   486,  6000,  1342, 10382,  6017,  4798,  5489,
  280          4498,  4193,  2306,  6521,  1475,  6372,  9029,  8037,
  281          1625,  7020,  4740,  5730,  7956,  6351,  6494,  6917,
  282         11405,  7487, 10202, 10155,  7666,  7556, 11509,  1546,
  283          6571, 10199,  2265,  7327,  5824, 11396, 11581,  9722,
  284          2251, 11199,  5356,  7408,  2861,  4003,  9215,   484,
  285          7526,  9409, 12235,  6157,  9025,  2121, 10255,  2519,
  286          9533,  3824,  8674, 11419, 10888,  4762, 11303,  4097,
  287          2414,  6496,  9953, 10554,   808,  2999,  2130,  4286,
  288         12078,  7445,  5132,  7915,   245,  5974,  4874,  7292,
  289          7560, 10539,  9952,  9075,  2113,  3721, 10285, 10022,
  290          9578,  8934, 11074,  9498,   294,  4711,  3391,  1377,
  291          9072, 10189,  4569, 10890,  9909,  6923,    53,  4653,
  292           439, 10253,  7028, 10207,  8343,  1141,  2556,  7601,
  293          8150, 10630,  8648,  9832,  7951, 11245,  2131,  5765,
  294         10343,  9781,  2718,  1419,  4531,  3844,  4066,  4293,
  295         11657, 11525, 11353,  4313,  4869, 12186,  1611, 10892,
  296         11489,  8833,  2393,    15, 10830,  5003,    17,   565,
  297          5891, 12177, 11058, 10412,  8885,  3974, 10981,  7130,
  298          5840, 10482,  8338,  6035,  6964,  1574, 10936,  2020,
  299          2465,  8191,   384,  2642,  2729,  5399,  2175,  9396,
  300         11987,  8035,  4375,  6611,  5010, 11812,  9131, 11427,
  301           104,  6348,  9643,  6757, 12110,  5617, 10935,   541,
  302           135,  3041,  7200,  6526,  5085, 12136,   842,  4129,
  303          7685, 11079,  8426,  1008,  2725, 11772,  6058,  1101,
  304          1950,  8424,  5688,  6876, 12005, 10079,  5335,   927,
  305          1770,   273,  8377,  2271,  5225, 10283,   116, 11807,
  306            91, 11699,   757,  1304,  7524,  6451,  8032,  8154,
  307          7456,  4191,   309,  2318,  2292, 10393, 11639,  9481,
  308         12238, 10594,  9569,  7912, 10368,  9889, 12244,  7179,
  309          3924,  3188,   367,  2077,   336,  5384,  5631,  8596,
  310          4621,  1775,  8866,   451,  6108,  1317,  6246,  8795,
  311          5896,  7283,  3132, 11564,  4977, 12161,  7371,  1366,
  312         12130, 10619,  3809,  5149,  6300,  2638,  4197,  1418,
  313         10065,  4156,  8373,  8644, 10445,   882,  8158, 10173,
  314          9763, 12191,   459,  2966,  3166,   405,  5000,  9311,
  315          6404,  8986,  1551,  8175,  3630, 10766,  9265,   700,
  316          8573,  9508,  6630, 11437, 11595,  5850,  3950,  4775,
  317         11941,  1446,  6018,  3386, 11470,  5310,  5476,   553,
  318          9474,  2586,  1431,  2741,   473, 11383,  4745,   836,
  319          4062, 10666,  7727, 11752,  5534,   312,  4307,  4351,
  320          5764,  8679,  8381,  8187,     5,  7395,  4363,  1152,
  321          5421,  5231,  6473,   436,  7567,  8603,  6229,  8230
  322 };
  323 
  324 /*
  325  * Reduce a small signed integer modulo q. The source integer MUST
  326  * be between -q/2 and +q/2.
  327  */
  328 static inline uint32_t
  329 mq_conv_small(int x)
  330 {
  331         /*
  332          * If x < 0, the cast to uint32_t will set the high bit to 1.
  333          */
  334         uint32_t y;
  335 
  336         y = (uint32_t)x;
  337         y += Q & -(y >> 31);
  338         return y;
  339 }
  340 
  341 /*
  342  * Addition modulo q. Operands must be in the 0..q-1 range.
  343  */
  344 static inline uint32_t
  345 mq_add(uint32_t x, uint32_t y)
  346 {
  347         /*
  348          * We compute x + y - q. If the result is negative, then the
  349          * high bit will be set, and 'd >> 31' will be equal to 1;
  350          * thus '-(d >> 31)' will be an all-one pattern. Otherwise,
  351          * it will be an all-zero pattern. In other words, this
  352          * implements a conditional addition of q.
  353          */
  354         uint32_t d;
  355 
  356         d = x + y - Q;
  357         d += Q & -(d >> 31);
  358         return d;
  359 }
  360 
  361 /*
  362  * Subtraction modulo q. Operands must be in the 0..q-1 range.
  363  */
  364 static inline uint32_t
  365 mq_sub(uint32_t x, uint32_t y)
  366 {
  367         /*
  368          * As in mq_add(), we use a conditional addition to ensure the
  369          * result is in the 0..q-1 range.
  370          */
  371         uint32_t d;
  372 
  373         d = x - y;
  374         d += Q & -(d >> 31);
  375         return d;
  376 }
  377 
  378 /*
  379  * Division by 2 modulo q. Operand must be in the 0..q-1 range.
  380  */
  381 static inline uint32_t
  382 mq_rshift1(uint32_t x)
  383 {
  384         x += Q & -(x & 1);
  385         return (x >> 1);
  386 }
  387 
  388 /*
  389  * Montgomery multiplication modulo q. If we set R = 2^16 mod q, then
  390  * this function computes: x * y / R mod q
  391  * Operands must be in the 0..q-1 range.
  392  */
  393 static inline uint32_t
  394 mq_montymul(uint32_t x, uint32_t y)
  395 {
  396         uint32_t z, w;
  397 
  398         /*
  399          * We compute x*y + k*q with a value of k chosen so that the 16
  400          * low bits of the result are 0. We can then shift the value.
  401          * After the shift, result may still be larger than q, but it
  402          * will be lower than 2*q, so a conditional subtraction works.
  403          */
  404 
  405         z = x * y;
  406         w = ((z * Q0I) & 0xFFFF) * Q;
  407 
  408         /*
  409          * When adding z and w, the result will have its low 16 bits
  410          * equal to 0. Since x, y and z are lower than q, the sum will
  411          * be no more than (2^15 - 1) * q + (q - 1)^2, which will
  412          * fit on 29 bits.
  413          */
  414         z = (z + w) >> 16;
  415 
  416         /*
  417          * After the shift, analysis shows that the value will be less
  418          * than 2q. We do a subtraction then conditional subtraction to
  419          * ensure the result is in the expected range.
  420          */
  421         z -= Q;
  422         z += Q & -(z >> 31);
  423         return z;
  424 }
  425 
  426 /*
  427  * Montgomery squaring (computes (x^2)/R).
  428  */
  429 static inline uint32_t
  430 mq_montysqr(uint32_t x)
  431 {
  432         return mq_montymul(x, x);
  433 }
  434 
  435 /*
  436  * Divide x by y modulo q = 12289.
  437  */
  438 static inline uint32_t
  439 mq_div_12289(uint32_t x, uint32_t y)
  440 {
  441         /*
  442          * We invert y by computing y^(q-2) mod q.
  443          *
  444          * We use the following addition chain for exponent e = 12287:
  445          *
  446          *   e0 = 1
  447          *   e1 = 2 * e0 = 2
  448          *   e2 = e1 + e0 = 3
  449          *   e3 = e2 + e1 = 5
  450          *   e4 = 2 * e3 = 10
  451          *   e5 = 2 * e4 = 20
  452          *   e6 = 2 * e5 = 40
  453          *   e7 = 2 * e6 = 80
  454          *   e8 = 2 * e7 = 160
  455          *   e9 = e8 + e2 = 163
  456          *   e10 = e9 + e8 = 323
  457          *   e11 = 2 * e10 = 646
  458          *   e12 = 2 * e11 = 1292
  459          *   e13 = e12 + e9 = 1455
  460          *   e14 = 2 * e13 = 2910
  461          *   e15 = 2 * e14 = 5820
  462          *   e16 = e15 + e10 = 6143
  463          *   e17 = 2 * e16 = 12286
  464          *   e18 = e17 + e0 = 12287
  465          *
  466          * Additions on exponents are converted to Montgomery
  467          * multiplications. We define all intermediate results as so
  468          * many local variables, and let the C compiler work out which
  469          * must be kept around.
  470          */
  471         uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
  472         uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18;
  473 
  474         y0 = mq_montymul(y, R2);
  475         y1 = mq_montysqr(y0);
  476         y2 = mq_montymul(y1, y0);
  477         y3 = mq_montymul(y2, y1);
  478         y4 = mq_montysqr(y3);
  479         y5 = mq_montysqr(y4);
  480         y6 = mq_montysqr(y5);
  481         y7 = mq_montysqr(y6);
  482         y8 = mq_montysqr(y7);
  483         y9 = mq_montymul(y8, y2);
  484         y10 = mq_montymul(y9, y8);
  485         y11 = mq_montysqr(y10);
  486         y12 = mq_montysqr(y11);
  487         y13 = mq_montymul(y12, y9);
  488         y14 = mq_montysqr(y13);
  489         y15 = mq_montysqr(y14);
  490         y16 = mq_montymul(y15, y10);
  491         y17 = mq_montysqr(y16);
  492         y18 = mq_montymul(y17, y0);
  493 
  494         /*
  495          * Final multiplication with x, which is not in Montgomery
  496          * representation, computes the correct division result.
  497          */
  498         return mq_montymul(y18, x);
  499 }
  500 
  501 /*
  502  * Compute NTT on a ring element.
  503  */
  504 static void
  505 mq_NTT(uint16_t *a, unsigned logn)
  506 {
  507         size_t n, t, m;
  508 
  509         n = (size_t)1 << logn;
  510         t = n;
  511         for (m = 1; m < n; m <<= 1) {
  512                 size_t ht, i, j1;
  513 
  514                 ht = t >> 1;
  515                 for (i = 0, j1 = 0; i < m; i ++, j1 += t) {
  516                         size_t j, j2;
  517                         uint32_t s;
  518 
  519                         s = GMb[m + i];
  520                         j2 = j1 + ht;
  521                         for (j = j1; j < j2; j ++) {
  522                                 uint32_t u, v;
  523 
  524                                 u = a[j];
  525                                 v = mq_montymul(a[j + ht], s);
  526                                 a[j] = (uint16_t)mq_add(u, v);
  527                                 a[j + ht] = (uint16_t)mq_sub(u, v);
  528                         }
  529                 }
  530                 t = ht;
  531         }
  532 }
  533 
  534 /*
  535  * Compute the inverse NTT on a ring element, binary case.
  536  */
  537 static void
  538 mq_iNTT(uint16_t *a, unsigned logn)
  539 {
  540         size_t n, t, m;
  541         uint32_t ni;
  542 
  543         n = (size_t)1 << logn;
  544         t = 1;
  545         m = n;
  546         while (m > 1) {
  547                 size_t hm, dt, i, j1;
  548 
  549                 hm = m >> 1;
  550                 dt = t << 1;
  551                 for (i = 0, j1 = 0; i < hm; i ++, j1 += dt) {
  552                         size_t j, j2;
  553                         uint32_t s;
  554 
  555                         j2 = j1 + t;
  556                         s = iGMb[hm + i];
  557                         for (j = j1; j < j2; j ++) {
  558                                 uint32_t u, v, w;
  559 
  560                                 u = a[j];
  561                                 v = a[j + t];
  562                                 a[j] = (uint16_t)mq_add(u, v);
  563                                 w = mq_sub(u, v);
  564                                 a[j + t] = (uint16_t)
  565                                         mq_montymul(w, s);
  566                         }
  567                 }
  568                 t = dt;
  569                 m = hm;
  570         }
  571 
  572         /*
  573          * To complete the inverse NTT, we must now divide all values by
  574          * n (the vector size). We thus need the inverse of n, i.e. we
  575          * need to divide 1 by 2 logn times. But we also want it in
  576          * Montgomery representation, i.e. we also want to multiply it
  577          * by R = 2^16. In the common case, this should be a simple right
  578          * shift. The loop below is generic and works also in corner cases;
  579          * its computation time is negligible.
  580          */
  581         ni = R;
  582         for (m = n; m > 1; m >>= 1) {
  583                 ni = mq_rshift1(ni);
  584         }
  585         for (m = 0; m < n; m ++) {
  586                 a[m] = (uint16_t)mq_montymul(a[m], ni);
  587         }
  588 }
  589 
  590 /*
  591  * Convert a polynomial (mod q) to Montgomery representation.
  592  */
  593 static void
  594 mq_poly_tomonty(uint16_t *f, unsigned logn)
  595 {
  596         size_t u, n;
  597 
  598         n = (size_t)1 << logn;
  599         for (u = 0; u < n; u ++) {
  600                 f[u] = (uint16_t)mq_montymul(f[u], R2);
  601         }
  602 }
  603 
  604 /*
  605  * Multiply two polynomials together (NTT representation, and using
  606  * a Montgomery multiplication). Result f*g is written over f.
  607  */
  608 static void
  609 mq_poly_montymul_ntt(uint16_t *f, const uint16_t *g, unsigned logn)
  610 {
  611         size_t u, n;
  612 
  613         n = (size_t)1 << logn;
  614         for (u = 0; u < n; u ++) {
  615                 f[u] = (uint16_t)mq_montymul(f[u], g[u]);
  616         }
  617 }
  618 
  619 /*
  620  * Subtract polynomial g from polynomial f.
  621  */
  622 static void
  623 mq_poly_sub(uint16_t *f, const uint16_t *g, unsigned logn)
  624 {
  625         size_t u, n;
  626 
  627         n = (size_t)1 << logn;
  628         for (u = 0; u < n; u ++) {
  629                 f[u] = (uint16_t)mq_sub(f[u], g[u]);
  630         }
  631 }
  632 
  633 /* ===================================================================== */
  634 
  635 /* see inner.h */
  636 void
  637 Zf(to_ntt_monty)(uint16_t *h, unsigned logn)
  638 {
  639         mq_NTT(h, logn);
  640         mq_poly_tomonty(h, logn);
  641 }
  642 
  643 /* see inner.h */
  644 int
  645 Zf(verify_raw)(const uint16_t *c0, const int16_t *s2,
  646         const uint16_t *h, unsigned logn, uint8_t *tmp)
  647 {
  648         size_t u, n;
  649         uint16_t *tt;
  650 
  651         n = (size_t)1 << logn;
  652         tt = (uint16_t *)tmp;
  653 
  654         /*
  655          * Reduce s2 elements modulo q ([0..q-1] range).
  656          */
  657         for (u = 0; u < n; u ++) {
  658                 uint32_t w;
  659 
  660                 w = (uint32_t)s2[u];
  661                 w += Q & -(w >> 31);
  662                 tt[u] = (uint16_t)w;
  663         }
  664 
  665         /*
  666          * Compute -s1 = s2*h - c0 mod phi mod q (in tt[]).
  667          */
  668         mq_NTT(tt, logn);
  669         mq_poly_montymul_ntt(tt, h, logn);
  670         mq_iNTT(tt, logn);
  671         mq_poly_sub(tt, c0, logn);
  672 
  673         /*
  674          * Normalize -s1 elements into the [-q/2..q/2] range.
  675          */
  676         for (u = 0; u < n; u ++) {
  677                 int32_t w;
  678 
  679                 w = (int32_t)tt[u];
  680                 w -= (int32_t)(Q & -(((Q >> 1) - (uint32_t)w) >> 31));
  681                 ((int16_t *)tt)[u] = (int16_t)w;
  682         }
  683 
  684         /*
  685          * Signature is valid if and only if the aggregate (-s1,s2) vector
  686          * is short enough.
  687          */
  688         return Zf(is_short)((int16_t *)tt, s2, logn);
  689 }
  690 
  691 /* see inner.h */
  692 int
  693 Zf(compute_public)(uint16_t *h,
  694         const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp)
  695 {
  696         size_t u, n;
  697         uint16_t *tt;
  698 
  699         n = (size_t)1 << logn;
  700         tt = (uint16_t *)tmp;
  701         for (u = 0; u < n; u ++) {
  702                 tt[u] = (uint16_t)mq_conv_small(f[u]);
  703                 h[u] = (uint16_t)mq_conv_small(g[u]);
  704         }
  705         mq_NTT(h, logn);
  706         mq_NTT(tt, logn);
  707         for (u = 0; u < n; u ++) {
  708                 if (tt[u] == 0) {
  709                         return 0;
  710                 }
  711                 h[u] = (uint16_t)mq_div_12289(h[u], tt[u]);
  712         }
  713         mq_iNTT(h, logn);
  714         return 1;
  715 }
  716 
  717 /* see inner.h */
  718 int
  719 Zf(complete_private)(int8_t *G,
  720         const int8_t *f, const int8_t *g, const int8_t *F,
  721         unsigned logn, uint8_t *tmp)
  722 {
  723         size_t u, n;
  724         uint16_t *t1, *t2;
  725 
  726         n = (size_t)1 << logn;
  727         t1 = (uint16_t *)tmp;
  728         t2 = t1 + n;
  729         for (u = 0; u < n; u ++) {
  730                 t1[u] = (uint16_t)mq_conv_small(g[u]);
  731                 t2[u] = (uint16_t)mq_conv_small(F[u]);
  732         }
  733         mq_NTT(t1, logn);
  734         mq_NTT(t2, logn);
  735         mq_poly_tomonty(t1, logn);
  736         mq_poly_montymul_ntt(t1, t2, logn);
  737         for (u = 0; u < n; u ++) {
  738                 t2[u] = (uint16_t)mq_conv_small(f[u]);
  739         }
  740         mq_NTT(t2, logn);
  741         for (u = 0; u < n; u ++) {
  742                 if (t2[u] == 0) {
  743                         return 0;
  744                 }
  745                 t1[u] = (uint16_t)mq_div_12289(t1[u], t2[u]);
  746         }
  747         mq_iNTT(t1, logn);
  748         for (u = 0; u < n; u ++) {
  749                 uint32_t w;
  750                 int32_t gi;
  751 
  752                 w = t1[u];
  753                 w -= (Q & ~-((w - (Q >> 1)) >> 31));
  754                 gi = *(int32_t *)&w;
  755                 if (gi < -127 || gi > +127) {
  756                         return 0;
  757                 }
  758                 G[u] = (int8_t)gi;
  759         }
  760         return 1;
  761 }
  762 
  763 /* see inner.h */
  764 int
  765 Zf(is_invertible)(
  766         const int16_t *s2, unsigned logn, uint8_t *tmp)
  767 {
  768         size_t u, n;
  769         uint16_t *tt;
  770         uint32_t r;
  771 
  772         n = (size_t)1 << logn;
  773         tt = (uint16_t *)tmp;
  774         for (u = 0; u < n; u ++) {
  775                 uint32_t w;
  776 
  777                 w = (uint32_t)s2[u];
  778                 w += Q & -(w >> 31);
  779                 tt[u] = (uint16_t)w;
  780         }
  781         mq_NTT(tt, logn);
  782         r = 0;
  783         for (u = 0; u < n; u ++) {
  784                 r |= (uint32_t)(tt[u] - 1);
  785         }
  786         return (int)(1u - (r >> 31));
  787 }
  788 
  789 /* see inner.h */
  790 int
  791 Zf(verify_recover)(uint16_t *h,
  792         const uint16_t *c0, const int16_t *s1, const int16_t *s2,
  793         unsigned logn, uint8_t *tmp)
  794 {
  795         size_t u, n;
  796         uint16_t *tt;
  797         uint32_t r;
  798 
  799         n = (size_t)1 << logn;
  800 
  801         /*
  802          * Reduce elements of s1 and s2 modulo q; then write s2 into tt[]
  803          * and c0 - s1 into h[].
  804          */
  805         tt = (uint16_t *)tmp;
  806         for (u = 0; u < n; u ++) {
  807                 uint32_t w;
  808 
  809                 w = (uint32_t)s2[u];
  810                 w += Q & -(w >> 31);
  811                 tt[u] = (uint16_t)w;
  812 
  813                 w = (uint32_t)s1[u];
  814                 w += Q & -(w >> 31);
  815                 w = mq_sub(c0[u], w);
  816                 h[u] = (uint16_t)w;
  817         }
  818 
  819         /*
  820          * Compute h = (c0 - s1) / s2. If one of the coefficients of s2
  821          * is zero (in NTT representation) then the operation fails. We
  822          * keep that information into a flag so that we do not deviate
  823          * from strict constant-time processing; if all coefficients of
  824          * s2 are non-zero, then the high bit of r will be zero.
  825          */
  826         mq_NTT(tt, logn);
  827         mq_NTT(h, logn);
  828         r = 0;
  829         for (u = 0; u < n; u ++) {
  830                 r |= (uint32_t)(tt[u] - 1);
  831                 h[u] = mq_div_12289(h[u], tt[u]);
  832         }
  833         mq_iNTT(h, logn);
  834 
  835         /*
  836          * Signature is acceptable if and only if it is short enough,
  837          * and s2 was invertible mod phi mod q. The caller must still
  838          * check that the rebuilt public key matches the expected
  839          * value (e.g. through a hash).
  840          */
  841         r = ~r & (uint32_t)-Zf(is_short)(s1, s2, logn);
  842         return (int)(r >> 31);
  843 }
  844 
  845 /* see inner.h */
  846 int
  847 Zf(count_nttzero)(int16_t *sig, unsigned logn, uint8_t *tmp)
  848 {
  849         uint16_t *s2;
  850         size_t u, n;
  851         uint32_t r;
  852 
  853         n = (size_t)1 << logn;
  854         s2 = (uint16_t *)tmp;
  855         for (u = 0; u < n; u ++) {
  856                 uint32_t w;
  857 
  858                 w = (uint32_t)sig[u];
  859                 w += Q & -(w >> 31);
  860                 s2[u] = (uint16_t)w;
  861         }
  862         mq_NTT(s2, logn);
  863         r = 0;
  864         for (u = 0; u < n; u ++) {
  865                 uint32_t w;
  866 
  867                 w = (uint32_t)s2[u] - 1u;
  868                 r += (w >> 31);
  869         }
  870         return (int)r;
  871 }