Falcon source files (reference implementation)


internal.h

    1 /*
    2  * Internal functions for Falcon. These function are not meant to be
    3  * called by applications directly.
    4  *
    5  * ==========================(LICENSE BEGIN)============================
    6  *
    7  * Copyright (c) 2017  Falcon Project
    8  *
    9  * Permission is hereby granted, free of charge, to any person obtaining
   10  * a copy of this software and associated documentation files (the
   11  * "Software"), to deal in the Software without restriction, including
   12  * without limitation the rights to use, copy, modify, merge, publish,
   13  * distribute, sublicense, and/or sell copies of the Software, and to
   14  * permit persons to whom the Software is furnished to do so, subject to
   15  * the following conditions:
   16  *
   17  * The above copyright notice and this permission notice shall be
   18  * included in all copies or substantial portions of the Software.
   19  *
   20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   21  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   22  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   23  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
   24  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   25  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   26  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   27  *
   28  * ===========================(LICENSE END)=============================
   29  *
   30  * @author   Thomas Pornin <thomas.pornin@nccgroup.trust>
   31  */
   32 
   33 #ifndef FALCON_INTERNAL_H__
   34 #define FALCON_INTERNAL_H__
   35 
   36 #include <stdlib.h>
   37 #include <string.h>
   38 
   39 #include "shake.h"
   40 #include "falcon.h"
   41 
   42 #ifdef __cplusplus
   43 extern "C" {
   44 #endif
   45 
   46 /*
   47  * On MSVC, disable warning about applying unary minus on an unsigned
   48  * type: this is perfectly defined standard behaviour and we do it
   49  * quite often.
   50  */
   51 #if _MSC_VER
   52 #pragma warning( disable : 4146 )
   53 #endif
   54 
   55 /*
   56  * This macro is defined to a non-zero value on systems which are known
   57  * to use little-endian encoding and to tolerate unaligned accesses.
   58  */
   59 #ifndef FALCON_LE_U
   60 #if __i386__ || _M_IX86 \
   61         || __x86_64__ || _M_X64 || \
   62         (_ARCH_PWR8 && (__LITTLE_ENDIAN || __LITTLE_ENDIAN__))
   63 #define FALCON_LE_U   1
   64 #else
   65 #define FALCON_LE_U   0
   66 #endif
   67 #endif
   68 
   69 /* ==================================================================== */
   70 /*
   71  * Encoding/decoding functions (falcon-enc.c).
   72  */
   73 
   74 /*
   75  * Encode a ring element into bytes. This function is used when q = 12289;
   76  * each coordinate is encoded over 14 bits.
   77  *
   78  * Returned value is the encoded length, in bytes. If 'out' is NULL, then
   79  * the encoded length is still computed and returned, but no output is
   80  * produced; in that case, 'max_out_len' is ignored. Otherwise, bytes
   81  * are written into the buffer pointed to by 'out', up to 'max_out_len'
   82  * bytes; if the output buffer is too small (total encoded length would
   83  * exceed the value of 'max_out_len') then 0 is returned.
   84  */
   85 size_t falcon_encode_12289(void *out, size_t max_out_len,
   86         const uint16_t *x, unsigned logn);
   87 
   88 /*
   89  * Decode a ring element, using q = 12289. The encoded length (in bytes)
   90  * is returned. If the source value is incorrect (too short or otherwise
   91  * invalid) then this function returns 0.
   92  */
   93 size_t falcon_decode_12289(uint16_t *x, unsigned logn,
   94         const void *data, size_t len);
   95 
   96 /*
   97  * Encode a ring element into bytes. This function is used when q = 18433;
   98  * each coordinate is encoded over 15 bits.
   99  *
  100  * Note that length of x[] is 1.5*2^logn.
  101  *
  102  * Returned value is the encoded length, in bytes. If 'out' is NULL, then
  103  * the encoded length is still computed and returned, but no output is
  104  * produced; in that case, 'max_out_len' is ignored. Otherwise, bytes
  105  * are written into the buffer pointed to by 'out', up to 'max_out_len'
  106  * bytes; if the output buffer is too small (total encoded length would
  107  * exceed the value of 'max_out_len') then 0 is returned.
  108  */
  109 size_t falcon_encode_18433(void *out, size_t max_out_len,
  110         const uint16_t *x, unsigned logn);
  111 
  112 /*
  113  * Decode a ring element, using q = 18433. The encoded length (in bytes)
  114  * is returned. If the source value is incorrect (too short or otherwise
  115  * invalid) then this function returns 0.
  116  *
  117  * Note that length of x[] is 1.5*2^logn.
  118  */
  119 size_t falcon_decode_18433(uint16_t *x, unsigned logn,
  120         const void *data, size_t len);
  121 
  122 /*
  123  * Encode a "small vector" into bytes.
  124  *
  125  * 'comp' is the compression type:
  126  *    0   uncompressed
  127  *    1   compressed with sort-of Huffman codes
  128  * Other values are reserved and trigger an error.
  129  *
  130  * Returned value is the encoded length, in bytes. If 'out' is NULL, then
  131  * the encoded length is still computed and returned, but no output is
  132  * produced; in that case, 'max_out_len' is ignored.
  133  *
  134  * Returned value is 0 on error. Possible error conditions are:
  135  *
  136  *  - At least one of the source values cannot be encoded (not in the
  137  *    supported range for the specified encoding type).
  138  *
  139  *  - Output buffer is too small (this may happen only if 'out' is not NULL).
  140  */
  141 size_t falcon_encode_small(void *out, size_t max_out_len,
  142         int comp, unsigned q, const int16_t *x, unsigned logn);
  143 
  144 /*
  145  * Decode a "small vector". The encoded length (in bytes) is returned. If
  146  * the source value is incorrect (too short or otherwise invalid), then
  147  * this function returns 0.
  148  */
  149 size_t falcon_decode_small(int16_t *x, unsigned logn,
  150         int comp, unsigned q, const void *data, size_t len);
  151 
  152 /*
  153  * From a SHAKE context (must be already flipped), produce a new point.
  154  */
  155 void falcon_hash_to_point(shake_context *sc, unsigned q,
  156         uint16_t *x, unsigned logn);
  157 
  158 /*
  159  * Tell whether a given vector (2N coordinates, in two halves) is
  160  * acceptable as a signature. This compares the appropriate norm of the
  161  * vector with the acceptance bound. Returned value is 1 on success
  162  * (vector is short enough to be acceptable), 0 otherwise.
  163  */
  164 int falcon_is_short(const int16_t *s1, const int16_t *s2,
  165         unsigned logn, unsigned ter);
  166 
  167 /* ==================================================================== */
  168 /*
  169  * Signature verification functions (falcon-vrfy.c).
  170  */
  171 
  172 /*
  173  * Internal signature verification code:
  174  *   c0[]      contains the hashed message
  175  *   s2[]      is the decoded signature
  176  *   h[]       contains the public key, in NTT + Montgomery format
  177  *   logn      is the degree log
  178  *   ternary   1 for the ternary case, 0 for binary
  179  * Returned value is 1 on success, 0 on error.
  180  */
  181 int falcon_vrfy_verify_raw(const uint16_t *c0, const int16_t *s2,
  182         const uint16_t *h, unsigned logn, int ternary);
  183 
  184 /*
  185  * Compute the public key h[], given the private key elements f[] and
  186  * g[]. This computes h = g/f mod phi mod q, where phi is the polynomial
  187  * modulus. This function returns 1 on success, 0 on error (an error is
  188  * reported if f is not invertible mod phi mod q).
  189  */
  190 int falcon_compute_public(uint16_t *h,
  191         const int16_t *f, const int16_t *g, unsigned logn, int ternary);
  192 
  193 /*
  194  * Recompute the fourth private key element. Private key consists in
  195  * four polynomials with small coefficients f, g, F and G, which are
  196  * such that fG - gF = q mod phi; further more, f is invertible modulo
  197  * phi and modulo q. This function recomputes G from f, g and F.
  198  *
  199  * Returned value is 1 in success, 0 on error (f not invertible).
  200  */
  201 int falcon_complete_private(int16_t *G,
  202         const int16_t *f, const int16_t *g, const int16_t *F,
  203         unsigned logn, int ternary);
  204 
  205 /* ==================================================================== */
  206 /*
  207  * Implementation of floating-point real numbers.
  208  */
  209 
  210 /*
  211  * Real numbers are implemented by an extra header file, included below.
  212  * This is meant to support pluggable implementations. The default
  213  * implementation relies on the C type 'double'.
  214  *
  215  * The included file must define the following types and inline functions:
  216  *
  217  *   fpr
  218  *         type for a real number
  219  *
  220  *   fpr fpr_of(int64_t i)
  221  *         cast an integer into a real number
  222  *
  223  *   fpr fpr_scaled(int64_t i, int sc)
  224  *         compute i*2^sc as a real number
  225  *
  226  *   fpr fpr_inverse_of(long i)
  227  *         compute 1/i, as a real number
  228  *
  229  *   fpr fpr_log2
  230  *         constant value, equal to the logarithm of 2
  231  *
  232  *   fpr fpr_p55
  233  *         constant value, equal to 2 to the power 55
  234  *
  235  *   int64_t fpr_rint(fpr x)
  236  *         round x to the nearest integer
  237  *
  238  *   long fpr_floor(fpr x)
  239  *         round down to the previous integer
  240  *
  241  *   fpr fpr_add(fpr x, fpr y)
  242  *         compute x + y
  243  *
  244  *   fpr fpr_sub(fpr x, fpr y)
  245  *         compute x - y
  246  *
  247  *   fpr fpr_neg(fpr x)
  248  *         compute -x
  249  *
  250  *   fpr fpr_half(fpr x)
  251  *         compute x/2
  252  *
  253  *   fpr fpr_double(fpr x)
  254  *         compute x*2
  255  *
  256  *   fpr fpr_mul(fpr x, fpr y)
  257  *         compute x * y
  258  *
  259  *   fpr fpr_sqr(fpr x)
  260  *         compute x * x
  261  *
  262  *   fpr fpr_inv(fpr x)
  263  *         compute 1/x
  264  *
  265  *   fpr fpr_div(fpr x, fpr y)
  266  *         compute x/y
  267  *
  268  *   fpr fpr_sqrt(fpr x)
  269  *         compute the square root of x
  270  *
  271  *   fpr fpr_max(fpr x, fpr y)
  272  *         compute the greater of x and y
  273  *
  274  *   int fpr_lt(fpr x, fpr y)
  275  *         return 1 if x < y, 0 otherwise
  276  *
  277  *   fpr fpr_exp_small(fpr x)
  278  *         return exp(x), assuming that |x| <= log(2). This function must
  279  *         have a precision of at least 50 bits.
  280  *
  281  *   void fpr_gauss(fpr *re, fpr *im, fpr sigma, uint32_t a, uint32_t b)
  282  *         compute re and im such that:
  283  *            re+i*im = sigma*sqrt(-2*ln(x)) * exp(i*2*pi*y)
  284  *         where x = (a + 1) / 2^32 and y = (b + 1) / 2^32.
  285  */
  286 #ifndef FPR_IMPL
  287 #define FPR_IMPL   "fpr-double.h"
  288 #endif
  289 #include FPR_IMPL
  290 
  291 /* ==================================================================== */
  292 /*
  293  * FFT (falcon-fft.c).
  294  *
  295  * A real polynomial is represented as an array of N 'fpr' elements.
  296  * The FFT representation of a real polynomial contains N/2 complex
  297  * elements; each is stored as two real numbers, for the real and
  298  * imaginary parts, respectively. See falcon-fft.c for details on the
  299  * internal representation.
  300  */
  301 
  302 /*
  303  * Compute FFT in-place: the source array should contain a real
  304  * polynomial (N coefficients); its storage area is reused to store
  305  * the FFT representation of that polynomial (N/2 complex numbers).
  306  *
  307  * 'logn' MUST lie between 1 and 10 (inclusive).
  308  */
  309 void falcon_FFT(fpr *f, unsigned logn);
  310 
  311 /*
  312  * Compute the inverse FFT in-place: the source array should contain the
  313  * FFT representation of a real polynomial (N/2 elements); the resulting
  314  * real polynomial (N coefficients of type 'fpr') is written over the
  315  * array.
  316  *
  317  * 'logn' MUST lie between 1 and 10 (inclusive).
  318  */
  319 void falcon_iFFT(fpr *f, unsigned logn);
  320 
  321 /*
  322  * Add polynomial b to polynomial a. a and b MUST NOT overlap. This
  323  * function works in normal representation.
  324  */
  325 void falcon_poly_add(fpr *restrict a, const fpr *restrict b, unsigned logn);
  326 
  327 /*
  328  * Add polynomial b to polynomial a. a and b MUST NOT overlap. This
  329  * function works in FFT representation.
  330  */
  331 #define falcon_poly_add_fft   falcon_poly_add
  332 
  333 /*
  334  * Add a constant value to a polynomial. This function works in normal
  335  * representation.
  336  */
  337 void falcon_poly_addconst(fpr *a, fpr x, unsigned logn);
  338 
  339 /*
  340  * Add a constant value to a polynomial. This function works in FFT
  341  * representation.
  342  */
  343 void falcon_poly_addconst_fft(fpr *a, fpr x, unsigned logn);
  344 
  345 /*
  346  * Subtract polynomial b from polynomial a. a and b MUST NOT overlap. This
  347  * function works in normal representation.
  348  */
  349 void falcon_poly_sub(fpr *restrict a, const fpr *restrict b, unsigned logn);
  350 
  351 /*
  352  * Subtract polynomial b from polynomial a. a and b MUST NOT overlap. This
  353  * function works in FFT representation.
  354  */
  355 #define falcon_poly_sub_fft   falcon_poly_sub
  356 
  357 /*
  358  * Negate polynomial a. This function works in normal representation.
  359  */
  360 void falcon_poly_neg(fpr *a, unsigned logn);
  361 
  362 /*
  363  * Negate polynomial a. This function works in FFT representation.
  364  */
  365 #define falcon_poly_neg_fft   falcon_poly_neg
  366 
  367 /*
  368  * Compute adjoint of polynomial a. This function works in normal
  369  * representation.
  370  */
  371 void falcon_poly_adj(fpr *a, unsigned logn);
  372 
  373 /*
  374  * Compute adjoint of polynomial a. This function works in FFT representation.
  375  */
  376 void falcon_poly_adj_fft(fpr *a, unsigned logn);
  377 
  378 /*
  379  * We do not define falcon_poly_mul() because implementation must use
  380  * extra temporary storage.
  381  *
  382 void falcon_poly_mul(fpr *restrict a, const fpr *restrict b, unsigned logn);
  383  */
  384 
  385 /*
  386  * Multiply polynomial a with polynomial b. a and b MUST NOT overlap.
  387  * This function works in FFT representation.
  388  */
  389 void falcon_poly_mul_fft(fpr *restrict a, const fpr *restrict b, unsigned logn);
  390 
  391 /*
  392  * We do not define falcon_poly_sqr() because implementation must use
  393  * extra temporary storage.
  394  *
  395 void falcon_poly_sqr(fpr *restrict a, const fpr *restrict b, unsigned logn);
  396  */
  397 
  398 /*
  399  * Square a polynomial. This function works in FFT representation.
  400  */
  401 void falcon_poly_sqr_fft(fpr *a, unsigned logn);
  402 
  403 /*
  404  * Multiply polynomial a with the adjoint of polynomial b. a and b MUST NOT
  405  * overlap. This function works in FFT representation.
  406  */
  407 void falcon_poly_muladj_fft(fpr *restrict a,
  408         const fpr *restrict b, unsigned logn);
  409 
  410 /*
  411  * Multiply polynomial with its own adjoint. This function works in FFT
  412  * representation.
  413  */
  414 void falcon_poly_mulselfadj_fft(fpr *a, unsigned logn);
  415 
  416 /*
  417  * Multiply polynomial with a real constant. This function works in normal
  418  * representation.
  419  */
  420 void falcon_poly_mulconst(fpr *a, fpr x, unsigned logn);
  421 
  422 /*
  423  * Multiply polynomial with a real constant. This function works in FFT
  424  * representation.
  425  */
  426 #define falcon_poly_mulconst_fft   falcon_poly_mulconst
  427 
  428 /*
  429  * Invert a polynomial modulo X^N+1 (FFT representation).
  430  */
  431 void falcon_poly_inv_fft(fpr *a, unsigned logn);
  432 
  433 /*
  434  * Divide polynomial a by polynomial b, modulo X^N+1 (FFT representation).
  435  * a and b MUST NOT overlap.
  436  */
  437 void falcon_poly_div_fft(fpr *restrict a, const fpr *restrict b, unsigned logn);
  438 
  439 /*
  440  * Divide polynomial a by polynomial adj(b), modulo X^N+1 (FFT
  441  * representation). a and b MUST NOT overlap.
  442  */
  443 void falcon_poly_divadj_fft(fpr *restrict a,
  444         const fpr *restrict b, unsigned logn);
  445 
  446 /*
  447  * Given f and g (in FFT representation), compute 1/(f*adj(f)+g*adj(g))
  448  * (also in FFT representation). Since the result is auto-adjoint, all its
  449  * coordinates in FFT representation are real; as such, only the first N/2
  450  * values of d[] are filled (the imaginary parts are skipped).
  451  *
  452  * Array d MUST NOT overlap with either a or b.
  453  */
  454 void falcon_poly_invnorm2_fft(fpr *restrict d,
  455         const fpr *restrict a, const fpr *restrict b, unsigned logn);
  456 
  457 /*
  458  * Given F, G, f and g (in FFT representation), compute F*adj(f)+G*adj(g)
  459  * (also in FFT representation). Destination d MUST NOT overlap with
  460  * any of the source arrays.
  461  */
  462 void falcon_poly_add_muladj_fft(fpr *restrict d,
  463         const fpr *restrict F, const fpr *restrict G,
  464         const fpr *restrict f, const fpr *restrict g, unsigned logn);
  465 
  466 /*
  467  * Multiply polynomial a by polynomial b, where b is autoadjoint. Both
  468  * a and b are in FFT representation. Since b is autoadjoint, all its
  469  * FFT coefficients are real, and the array b contains only N/2 elements.
  470  * a and b MUST NOT overlap.
  471  */
  472 void falcon_poly_mul_autoadj_fft(fpr *restrict a,
  473         const fpr *restrict b, unsigned logn);
  474 
  475 /*
  476  * Divide polynomial a by polynomial b, where b is autoadjoint. Both
  477  * a and b are in FFT representation. Since b is autoadjoint, all its
  478  * FFT coefficients are real, and the array b contains only N/2 elements.
  479  * a and b MUST NOT overlap.
  480  */
  481 void falcon_poly_div_autoadj_fft(fpr *restrict a,
  482         const fpr *restrict b, unsigned logn);
  483 
  484 /*
  485  * Apply "split" operation on a polynomial in FFT representation:
  486  * f = f0(x^2) + x*f1(x^2), for half-size polynomials f0 and f1
  487  * (polynomials modulo X^(N/2)+1). f0, f1 and f MUST NOT overlap.
  488  */
  489 void falcon_poly_split_fft(fpr *restrict t0, fpr *restrict t1,
  490         const fpr *restrict f, unsigned logn);
  491 
  492 /*
  493  * Apply "merge" operation on two polynomials in FFT representation:
  494  * given f0 and f1, polynomials moduo X^(N/2)+1, this function computes
  495  * f = f0(x^2) + x*f1(x^2), in FFT representation modulo X^N+1.
  496  * f MUST NOT overlap with either f0 or f1.
  497  */
  498 void falcon_poly_merge_fft(fpr *restrict f,
  499         const fpr *restrict f0, const fpr *restrict f1, unsigned logn);
  500 
  501 /*
  502  * FFT3 (falcon-fft.c).
  503  *
  504  * The 'FFT3' functions are for polynomials modulo X^N - X^(N/2) + 1.
  505  * The degree N is provided as two parameters: 'logn' and 'full'. If
  506  * 'full' is 0, then the degree is 2^logn. If 'full' is 1, then
  507  * the degree is 1.5*2^logn. The 'full' value MUST be 0 or 1 (other
  508  * non-zero values are not tolerated).
  509  *
  510  * When full = 0, logn must be between 1 and 8 (degree 2 to 256).
  511  * When full = 1, logn must be between 2 and 9 (degree 6 to 768).
  512  */
  513 
  514 /*
  515  * Compute FFT3 in-place: the source array should contain a real
  516  * polynomial (N coefficients); its storage area is reused to store
  517  * the FFT3 representation of that polynomial (N/2 complex numbers).
  518  *
  519  * 'logn' MUST lie between 2 and 9 (inclusive).
  520  */
  521 void falcon_FFT3(fpr *f, unsigned logn, unsigned full);
  522 
  523 /*
  524  * Compute the inverse FFT3 in-place: the source array should contain the
  525  * FFT3 representation of a real polynomial (N/2 elements); the resulting
  526  * real polynomial (N coefficients of type 'fpr') is written over the
  527  * array.
  528  *
  529  * 'logn' MUST lie between 2 and 9 (inclusive).
  530  */
  531 void falcon_iFFT3(fpr *f, unsigned logn, unsigned full);
  532 
  533 /*
  534  * Add polynomial b to polynomial a. a and b MUST NOT overlap. This
  535  * function works in normal representation.
  536  */
  537 void falcon_poly_add3(fpr *restrict a, const fpr *restrict b,
  538         unsigned logn, unsigned full);
  539 
  540 /*
  541  * Add polynomial b to polynomial a. a and b MUST NOT overlap. This
  542  * function works in FFT3 representation.
  543  */
  544 #define falcon_poly_add_fft3   falcon_poly_add3
  545 
  546 /*
  547  * Add a constant value to a polynomial. This function works in normal
  548  * representation.
  549  */
  550 void falcon_poly_addconst3(fpr *a, fpr x, unsigned logn, unsigned full);
  551 
  552 /*
  553  * Add a constant value to a polynomial. This function works in FFT3
  554  * representation.
  555  */
  556 void falcon_poly_addconst_fft3(fpr *a, fpr x, unsigned logn, unsigned full);
  557 
  558 /*
  559  * Subtract polynomial b from polynomial a. a and b MUST NOT overlap. This
  560  * function works in normal representation.
  561  */
  562 void falcon_poly_sub3(fpr *restrict a, const fpr *restrict b,
  563         unsigned logn, unsigned full);
  564 
  565 /*
  566  * Subtract polynomial b from polynomial a. a and b MUST NOT overlap. This
  567  * function works in FFT3 representation.
  568  */
  569 #define falcon_poly_sub_fft3   falcon_poly_sub3
  570 
  571 /*
  572  * Negate polynomial a. This function works in normal representation.
  573  */
  574 void falcon_poly_neg3(fpr *a, unsigned logn, unsigned full);
  575 
  576 /*
  577  * Negate polynomial a. This function works in FFT3 representation.
  578  */
  579 #define falcon_poly_neg_fft3   falcon_poly_neg3
  580 
  581 /*
  582  * Compute adjoint of polynomial a. This function works in FFT3 representation.
  583  */
  584 void falcon_poly_adj_fft3(fpr *a, unsigned logn, unsigned full);
  585 
  586 /*
  587  * We do not define falcon_poly_mul3() because implementation must use
  588  * extra temporary storage.
  589  *
  590 void falcon_poly_mul3(fpr *restrict a, const fpr *restrict b,
  591         unsigned logn, unsigned full);
  592  */
  593 
  594 /*
  595  * Multiply polynomial a with polynomial b. a and b MUST NOT overlap.
  596  * This function works in FFT3 representation.
  597  */
  598 void falcon_poly_mul_fft3(fpr *restrict a,
  599         const fpr *restrict b, unsigned logn, unsigned full);
  600 
  601 /*
  602  * We do not define falcon_poly_sqr() because implementation must use
  603  * extra temporary storage.
  604  *
  605 void falcon_poly_sqr3(fpr *restrict a, const fpr *restrict b,
  606         unsigned logn, unsigned full);
  607  */
  608 
  609 /*
  610  * Square a polynomial. This function works in FFT3 representation.
  611  */
  612 void falcon_poly_sqr_fft3(fpr *a, unsigned logn, unsigned full);
  613 
  614 /*
  615  * Multiply polynomial a with the adjoint of polynomial b. a and b MUST NOT
  616  * overlap. This function works in FFT3 representation.
  617  */
  618 void falcon_poly_muladj_fft3(fpr *restrict a,
  619         const fpr *restrict b, unsigned logn, unsigned full);
  620 
  621 /*
  622  * Multiply polynomial with its own adjoint. This function works in FFT3
  623  * representation.
  624  */
  625 void falcon_poly_mulselfadj_fft3(fpr *a, unsigned logn, unsigned full);
  626 
  627 /*
  628  * Multiply polynomial with a real constant. This function works in normal
  629  * representation.
  630  */
  631 void falcon_poly_mulconst3(fpr *a, fpr x, unsigned logn, unsigned full);
  632 
  633 /*
  634  * Multiply polynomial with a real constant. This function works in FFT3
  635  * representation.
  636  */
  637 #define falcon_poly_mulconst_fft3   falcon_poly_mulconst3
  638 
  639 /*
  640  * Invert a polynomial modulo X^N-X^(N/2)+1 (FFT3 representation).
  641  */
  642 void falcon_poly_inv_fft3(fpr *a, unsigned logn, unsigned full);
  643 
  644 /*
  645  * Divide polynomial a by polynomial b, modulo X^N-X^(N/2)+1 (FFT3
  646  * representation). a and b MUST NOT overlap.
  647  */
  648 void falcon_poly_div_fft3(fpr *restrict a,
  649         const fpr *restrict b, unsigned logn, unsigned full);
  650 
  651 /*
  652  * Divide polynomial a by polynomial adj(b), modulo X^N-X^(N/2)+1 (FFT3
  653  * representation). a and b MUST NOT overlap.
  654  */
  655 void falcon_poly_divadj_fft3(fpr *restrict a,
  656         const fpr *restrict b, unsigned logn, unsigned full);
  657 
  658 /*
  659  * Given f and g (in FFT representation), compute 1/(f*adj(f)+g*adj(g))
  660  * (also in FFT3 representation). Since the result is auto-adjoint, all its
  661  * coordinates in FFT3 representation are real; as such, only the first N/2
  662  * values of d[] are filled (the imaginary parts are skipped).
  663  *
  664  * Array d MUST NOT overlap with either a or b.
  665  */
  666 void falcon_poly_invnorm2_fft3(fpr *restrict d,
  667         const fpr *restrict a, const fpr *restrict b,
  668         unsigned logn, unsigned full);
  669 
  670 /*
  671  * Given F, G, f and g (in FFT3 representation), compute F*adj(f)+G*adj(g)
  672  * (also in FFT3 representation). Destination d MUST NOT overlap with
  673  * any of the source arrays.
  674  */
  675 void falcon_poly_add_muladj_fft3(fpr *restrict d,
  676         const fpr *restrict F, const fpr *restrict G,
  677         const fpr *restrict f, const fpr *restrict g,
  678         unsigned logn, unsigned full);
  679 
  680 /*
  681  * Multiply polynomial a by polynomial b, where b is autoadjoint. Both
  682  * a and b are in FFT3 representation. Since b is autoadjoint, all its
  683  * FFT3 coefficients are real, and the array b contains only N/2 elements.
  684  * a and b MUST NOT overlap.
  685  */
  686 void falcon_poly_mul_autoadj_fft3(fpr *restrict a,
  687         const fpr *restrict b, unsigned logn, unsigned full);
  688 
  689 /*
  690  * Divide polynomial a by polynomial b, where b is autoadjoint. Both
  691  * a and b are in FFT3 representation. Since b is autoadjoint, all its
  692  * FFT3 coefficients are real, and the array b contains only N/2 elements.
  693  * a and b MUST NOT overlap.
  694  */
  695 void falcon_poly_div_autoadj_fft3(fpr *restrict a,
  696         const fpr *restrict b, unsigned logn, unsigned full);
  697 
  698 /*
  699  * Apply "split" operation on a polynomial in FFT3 representation:
  700  * f = f0(x^3) + x*f1(x^3) + x^2*f2(x^3), for half-size polynomials f0,
  701  * f1 and f2. This function is for a f of degree N = 1.5*2^logn,
  702  * modulo X^N-X^(N/2)+1. f0, f1, f2 and f MUST NOT overlap. This function
  703  * requires that logn >= 2.
  704  */
  705 void falcon_poly_split_top_fft3(
  706         fpr *restrict f0, fpr *restrict f1, fpr *restrict f2,
  707         const fpr *restrict f, unsigned logn);
  708 
  709 /*
  710  * Apply "split" operation on a polynomial in FFT3 representation:
  711  * f = f0(x^2) + x*f1(x^2), for half-size polynomials f0 and f1.
  712  * This function is for a f of degree N = 2^logn, modulo X^N-X^(N/2)+1.
  713  * f0, f1 and f MUST NOT overlap. This function requires that logn >= 1.
  714  * If logn == 1, then *f0 and *f1 are filled with a single real value
  715  * each.
  716  */
  717 void falcon_poly_split_deep_fft3(fpr *restrict f0, fpr *restrict f1,
  718         const fpr *restrict f, unsigned logn);
  719 
  720 /*
  721  * Apply "merge" operation on three polynomials in FFT3 representation.
  722  * This is the opposite of falcon_poly_split_top_fft3().
  723  */
  724 void falcon_poly_merge_top_fft3(fpr *restrict f,
  725         const fpr *restrict f0, const fpr *restrict f1, const fpr *restrict f2,
  726         unsigned logn);
  727 
  728 /*
  729  * Apply "merge" operation on two polynomials in FFT3 representation.
  730  * This is the opposite of falcon_poly_split_deep_fft3().
  731  */
  732 void falcon_poly_merge_deep_fft3(fpr *restrict f,
  733         const fpr *restrict f0, const fpr *restrict f1, unsigned logn);
  734 
  735 /* ==================================================================== */
  736 /*
  737  * RNG stuff. This is a generic API to get a random seed from the
  738  * underlying hardware and/or operating system, and to use it to power
  739  * an efficient PRNG. The PRNG algorithm itself will be selected based
  740  * on what the current platform can do.
  741  */
  742 
  743 /*
  744  * Obtain a random seed from the system RNG.
  745  *
  746  * Returned value is 1 on success, 0 on error.
  747  */
  748 int falcon_get_seed(void *seed, size_t seed_len);
  749 
  750 /*
  751  * Structure for a PRNG. This includes a large buffer so that values
  752  * get generated in advance. The 'state' is used to keep the current
  753  * PRNG algorithm state (contents depend on the selected algorithm).
  754  *
  755  * The unions with 'dummy_u64' are there to ensure proper alignment for
  756  * 64-bit direct access.
  757  */
  758 typedef struct {
  759         union {
  760                 unsigned char d[4096];
  761                 uint64_t dummy_u64;
  762         } buf;
  763         size_t ptr;
  764         union {
  765                 unsigned char d[256];
  766                 uint64_t dummy_u64;
  767         } state;
  768         int type;
  769 } prng;
  770 
  771 /*
  772  * PRNG types. Only PRNG_CHACHA20 is currently implemented.
  773  */
  774 #define PRNG_CHACHA20        1
  775 #define PRNG_CHACHA20_SSE2   2
  776 #define PRNG_AES_X86NI       3
  777 
  778 /*
  779  * Instantiate a PRNG. That PRNG will feed over the provided SHAKE
  780  * context (in "flipped" state) to obtain its initial state.
  781  * The 'type' parameter is the requested PRNG type; if set to 0, then
  782  * a default type (adapted to the current platform) will be used.
  783  *
  784  * Returned value is the used RNG type (non-zero) on success, or 0 on
  785  * error. An error occurs only if the requested PRNG type is not
  786  * available; this cannot happen if 'type' is 0.
  787  */
  788 int falcon_prng_init(prng *p, shake_context *src, int type);
  789 
  790 /*
  791  * Refill the PRNG buffer. This is normally invoked automatically, and
  792  * is declared here only so that falcon_prng_get_u64() may be inlined.
  793  */
  794 void falcon_prng_refill(prng *p);
  795 
  796 /*
  797  * Get some bytes from a PRNG.
  798  */
  799 void falcon_prng_get_bytes(prng *p, void *dst, size_t len);
  800 
  801 /*
  802  * Get a 64-bit random value from a PRNG.
  803  */
  804 static inline uint64_t
  805 falcon_prng_get_u64(prng *p)
  806 {
  807         size_t u;
  808 
  809         /*
  810          * If there are less than 9 bytes in the buffer, we refill it.
  811          * This means that we may drop the last few bytes, but this allows
  812          * for faster extraction code. Also, it means that we never leave
  813          * an empty buffer.
  814          */
  815         u = p->ptr;
  816         if (u >= (sizeof p->buf.d) - 9) {
  817                 falcon_prng_refill(p);
  818                 u = 0;
  819         }
  820         p->ptr = u + 8;
  821 
  822         /*
  823          * On systems that use little-endian encoding and allow
  824          * unaligned accesses, we can simply read the data where it is.
  825          */
  826 #if FALCON_LE_U
  827         return *(uint64_t *)(p->buf.d + u);
  828 #else
  829         return (uint64_t)p->buf.d[u + 0]
  830                 | ((uint64_t)p->buf.d[u + 1] << 8)
  831                 | ((uint64_t)p->buf.d[u + 2] << 16)
  832                 | ((uint64_t)p->buf.d[u + 3] << 24)
  833                 | ((uint64_t)p->buf.d[u + 4] << 32)
  834                 | ((uint64_t)p->buf.d[u + 5] << 40)
  835                 | ((uint64_t)p->buf.d[u + 6] << 48)
  836                 | ((uint64_t)p->buf.d[u + 7] << 56);
  837 #endif
  838 }
  839 
  840 /*
  841  * Get an 8-bit random value from a PRNG.
  842  */
  843 static inline unsigned
  844 falcon_prng_get_u8(prng *p)
  845 {
  846         unsigned v;
  847 
  848         v = p->buf.d[p->ptr ++];
  849         if (p->ptr == sizeof p->buf.d) {
  850                 falcon_prng_refill(p);
  851         }
  852         return v;
  853 }
  854 
  855 /* ==================================================================== */
  856 
  857 #ifdef __cplusplus
  858 }
  859 #endif
  860 
  861 #endif