Falcon source files (reference implementation)


shake.c

    1 /*
    2  * SHAKE implementation.
    3  *
    4  * ==========================(LICENSE BEGIN)============================
    5  *
    6  * Copyright (c) 2017  Falcon Project
    7  *
    8  * Permission is hereby granted, free of charge, to any person obtaining
    9  * a copy of this software and associated documentation files (the
   10  * "Software"), to deal in the Software without restriction, including
   11  * without limitation the rights to use, copy, modify, merge, publish,
   12  * distribute, sublicense, and/or sell copies of the Software, and to
   13  * permit persons to whom the Software is furnished to do so, subject to
   14  * the following conditions:
   15  *
   16  * The above copyright notice and this permission notice shall be
   17  * included in all copies or substantial portions of the Software.
   18  *
   19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
   22  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
   23  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
   24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
   25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
   26  *
   27  * ===========================(LICENSE END)=============================
   28  *
   29  * @author   Thomas Pornin <thomas.pornin@nccgroup.trust>
   30  */
   31 
   32 #include <string.h>
   33 
   34 #include "shake.h"
   35 
   36 /*
   37  * Round constants.
   38  */
   39 static const uint64_t RC[] = {
   40         0x0000000000000001, 0x0000000000008082,
   41         0x800000000000808A, 0x8000000080008000,
   42         0x000000000000808B, 0x0000000080000001,
   43         0x8000000080008081, 0x8000000000008009,
   44         0x000000000000008A, 0x0000000000000088,
   45         0x0000000080008009, 0x000000008000000A,
   46         0x000000008000808B, 0x800000000000008B,
   47         0x8000000000008089, 0x8000000000008003,
   48         0x8000000000008002, 0x8000000000000080,
   49         0x000000000000800A, 0x800000008000000A,
   50         0x8000000080008081, 0x8000000000008080,
   51         0x0000000080000001, 0x8000000080008008
   52 };
   53 
   54 /*
   55  * Decode a 64-bit word, little-endian encoding.
   56  */
   57 static inline uint64_t
   58 dec64le(const void *data)
   59 {
   60         const unsigned char *buf;
   61 
   62         buf = data;
   63         return (uint64_t)buf[0]
   64                 | ((uint64_t)buf[1] << 8)
   65                 | ((uint64_t)buf[2] << 16)
   66                 | ((uint64_t)buf[3] << 24)
   67                 | ((uint64_t)buf[4] << 32)
   68                 | ((uint64_t)buf[5] << 40)
   69                 | ((uint64_t)buf[6] << 48)
   70                 | ((uint64_t)buf[7] << 56);
   71 }
   72 
   73 /*
   74  * Encode a 64-bit word, little-endian encoding.
   75  */
   76 static inline void
   77 enc64le(void *out, uint64_t x)
   78 {
   79         unsigned char *buf;
   80 
   81         buf = out;
   82         buf[0] = (unsigned char)x;
   83         buf[1] = (unsigned char)(x >> 8);
   84         buf[2] = (unsigned char)(x >> 16);
   85         buf[3] = (unsigned char)(x >> 24);
   86         buf[4] = (unsigned char)(x >> 32);
   87         buf[5] = (unsigned char)(x >> 40);
   88         buf[6] = (unsigned char)(x >> 48);
   89         buf[7] = (unsigned char)(x >> 56);
   90 }
   91 
   92 /*
   93  * XOR a block of data into the provided state. This supports only
   94  * blocks whose length is a multiple of 64 bits.
   95  */
   96 static void
   97 xor_block(uint64_t *A, const void *data, size_t rate)
   98 {
   99         size_t u;
  100 
  101         for (u = 0; u < rate; u += 8) {
  102                 A[u >> 3] ^= dec64le((const unsigned char *)data + u);
  103         }
  104 }
  105 
  106 /*
  107  * Process a block with the provided data. The data length must be a
  108  * multiple of 8 (in bytes); normally, this is the "rate".
  109  */
  110 static void
  111 process_block(uint64_t *A)
  112 {
  113         uint64_t t0, t1, t2, t3, t4;
  114         uint64_t tt0, tt1, tt2, tt3;
  115         uint64_t t, kt;
  116         uint64_t c0, c1, c2, c3, c4, bnn;
  117         int j;
  118 
  119         /*
  120          * Compute the 24 rounds. This loop is partially unrolled (each
  121          * iteration computes two rounds).
  122          */
  123         for (j = 0; j < 24; j += 2) {
  124 
  125                 tt0 = A[ 1] ^ A[ 6];
  126                 tt1 = A[11] ^ A[16];
  127                 tt0 ^= A[21] ^ tt1;
  128                 tt0 = (tt0 << 1) | (tt0 >> 63);
  129                 tt2 = A[ 4] ^ A[ 9];
  130                 tt3 = A[14] ^ A[19];
  131                 tt0 ^= A[24];
  132                 tt2 ^= tt3;
  133                 t0 = tt0 ^ tt2;
  134 
  135                 tt0 = A[ 2] ^ A[ 7];
  136                 tt1 = A[12] ^ A[17];
  137                 tt0 ^= A[22] ^ tt1;
  138                 tt0 = (tt0 << 1) | (tt0 >> 63);
  139                 tt2 = A[ 0] ^ A[ 5];
  140                 tt3 = A[10] ^ A[15];
  141                 tt0 ^= A[20];
  142                 tt2 ^= tt3;
  143                 t1 = tt0 ^ tt2;
  144 
  145                 tt0 = A[ 3] ^ A[ 8];
  146                 tt1 = A[13] ^ A[18];
  147                 tt0 ^= A[23] ^ tt1;
  148                 tt0 = (tt0 << 1) | (tt0 >> 63);
  149                 tt2 = A[ 1] ^ A[ 6];
  150                 tt3 = A[11] ^ A[16];
  151                 tt0 ^= A[21];
  152                 tt2 ^= tt3;
  153                 t2 = tt0 ^ tt2;
  154 
  155                 tt0 = A[ 4] ^ A[ 9];
  156                 tt1 = A[14] ^ A[19];
  157                 tt0 ^= A[24] ^ tt1;
  158                 tt0 = (tt0 << 1) | (tt0 >> 63);
  159                 tt2 = A[ 2] ^ A[ 7];
  160                 tt3 = A[12] ^ A[17];
  161                 tt0 ^= A[22];
  162                 tt2 ^= tt3;
  163                 t3 = tt0 ^ tt2;
  164 
  165                 tt0 = A[ 0] ^ A[ 5];
  166                 tt1 = A[10] ^ A[15];
  167                 tt0 ^= A[20] ^ tt1;
  168                 tt0 = (tt0 << 1) | (tt0 >> 63);
  169                 tt2 = A[ 3] ^ A[ 8];
  170                 tt3 = A[13] ^ A[18];
  171                 tt0 ^= A[23];
  172                 tt2 ^= tt3;
  173                 t4 = tt0 ^ tt2;
  174 
  175                 A[ 0] = A[ 0] ^ t0;
  176                 A[ 5] = A[ 5] ^ t0;
  177                 A[10] = A[10] ^ t0;
  178                 A[15] = A[15] ^ t0;
  179                 A[20] = A[20] ^ t0;
  180                 A[ 1] = A[ 1] ^ t1;
  181                 A[ 6] = A[ 6] ^ t1;
  182                 A[11] = A[11] ^ t1;
  183                 A[16] = A[16] ^ t1;
  184                 A[21] = A[21] ^ t1;
  185                 A[ 2] = A[ 2] ^ t2;
  186                 A[ 7] = A[ 7] ^ t2;
  187                 A[12] = A[12] ^ t2;
  188                 A[17] = A[17] ^ t2;
  189                 A[22] = A[22] ^ t2;
  190                 A[ 3] = A[ 3] ^ t3;
  191                 A[ 8] = A[ 8] ^ t3;
  192                 A[13] = A[13] ^ t3;
  193                 A[18] = A[18] ^ t3;
  194                 A[23] = A[23] ^ t3;
  195                 A[ 4] = A[ 4] ^ t4;
  196                 A[ 9] = A[ 9] ^ t4;
  197                 A[14] = A[14] ^ t4;
  198                 A[19] = A[19] ^ t4;
  199                 A[24] = A[24] ^ t4;
  200                 A[ 5] = (A[ 5] << 36) | (A[ 5] >> (64 - 36));
  201                 A[10] = (A[10] <<  3) | (A[10] >> (64 -  3));
  202                 A[15] = (A[15] << 41) | (A[15] >> (64 - 41));
  203                 A[20] = (A[20] << 18) | (A[20] >> (64 - 18));
  204                 A[ 1] = (A[ 1] <<  1) | (A[ 1] >> (64 -  1));
  205                 A[ 6] = (A[ 6] << 44) | (A[ 6] >> (64 - 44));
  206                 A[11] = (A[11] << 10) | (A[11] >> (64 - 10));
  207                 A[16] = (A[16] << 45) | (A[16] >> (64 - 45));
  208                 A[21] = (A[21] <<  2) | (A[21] >> (64 - 2));
  209                 A[ 2] = (A[ 2] << 62) | (A[ 2] >> (64 - 62));
  210                 A[ 7] = (A[ 7] <<  6) | (A[ 7] >> (64 -  6));
  211                 A[12] = (A[12] << 43) | (A[12] >> (64 - 43));
  212                 A[17] = (A[17] << 15) | (A[17] >> (64 - 15));
  213                 A[22] = (A[22] << 61) | (A[22] >> (64 - 61));
  214                 A[ 3] = (A[ 3] << 28) | (A[ 3] >> (64 - 28));
  215                 A[ 8] = (A[ 8] << 55) | (A[ 8] >> (64 - 55));
  216                 A[13] = (A[13] << 25) | (A[13] >> (64 - 25));
  217                 A[18] = (A[18] << 21) | (A[18] >> (64 - 21));
  218                 A[23] = (A[23] << 56) | (A[23] >> (64 - 56));
  219                 A[ 4] = (A[ 4] << 27) | (A[ 4] >> (64 - 27));
  220                 A[ 9] = (A[ 9] << 20) | (A[ 9] >> (64 - 20));
  221                 A[14] = (A[14] << 39) | (A[14] >> (64 - 39));
  222                 A[19] = (A[19] <<  8) | (A[19] >> (64 -  8));
  223                 A[24] = (A[24] << 14) | (A[24] >> (64 - 14));
  224                 bnn = ~A[12];
  225                 kt = A[ 6] | A[12];
  226                 c0 = A[ 0] ^ kt;
  227                 kt = bnn | A[18];
  228                 c1 = A[ 6] ^ kt;
  229                 kt = A[18] & A[24];
  230                 c2 = A[12] ^ kt;
  231                 kt = A[24] | A[ 0];
  232                 c3 = A[18] ^ kt;
  233                 kt = A[ 0] & A[ 6];
  234                 c4 = A[24] ^ kt;
  235                 A[ 0] = c0;
  236                 A[ 6] = c1;
  237                 A[12] = c2;
  238                 A[18] = c3;
  239                 A[24] = c4;
  240                 bnn = ~A[22];
  241                 kt = A[ 9] | A[10];
  242                 c0 = A[ 3] ^ kt;
  243                 kt = A[10] & A[16];
  244                 c1 = A[ 9] ^ kt;
  245                 kt = A[16] | bnn;
  246                 c2 = A[10] ^ kt;
  247                 kt = A[22] | A[ 3];
  248                 c3 = A[16] ^ kt;
  249                 kt = A[ 3] & A[ 9];
  250                 c4 = A[22] ^ kt;
  251                 A[ 3] = c0;
  252                 A[ 9] = c1;
  253                 A[10] = c2;
  254                 A[16] = c3;
  255                 A[22] = c4;
  256                 bnn = ~A[19];
  257                 kt = A[ 7] | A[13];
  258                 c0 = A[ 1] ^ kt;
  259                 kt = A[13] & A[19];
  260                 c1 = A[ 7] ^ kt;
  261                 kt = bnn & A[20];
  262                 c2 = A[13] ^ kt;
  263                 kt = A[20] | A[ 1];
  264                 c3 = bnn ^ kt;
  265                 kt = A[ 1] & A[ 7];
  266                 c4 = A[20] ^ kt;
  267                 A[ 1] = c0;
  268                 A[ 7] = c1;
  269                 A[13] = c2;
  270                 A[19] = c3;
  271                 A[20] = c4;
  272                 bnn = ~A[17];
  273                 kt = A[ 5] & A[11];
  274                 c0 = A[ 4] ^ kt;
  275                 kt = A[11] | A[17];
  276                 c1 = A[ 5] ^ kt;
  277                 kt = bnn | A[23];
  278                 c2 = A[11] ^ kt;
  279                 kt = A[23] & A[ 4];
  280                 c3 = bnn ^ kt;
  281                 kt = A[ 4] | A[ 5];
  282                 c4 = A[23] ^ kt;
  283                 A[ 4] = c0;
  284                 A[ 5] = c1;
  285                 A[11] = c2;
  286                 A[17] = c3;
  287                 A[23] = c4;
  288                 bnn = ~A[ 8];
  289                 kt = bnn & A[14];
  290                 c0 = A[ 2] ^ kt;
  291                 kt = A[14] | A[15];
  292                 c1 = bnn ^ kt;
  293                 kt = A[15] & A[21];
  294                 c2 = A[14] ^ kt;
  295                 kt = A[21] | A[ 2];
  296                 c3 = A[15] ^ kt;
  297                 kt = A[ 2] & A[ 8];
  298                 c4 = A[21] ^ kt;
  299                 A[ 2] = c0;
  300                 A[ 8] = c1;
  301                 A[14] = c2;
  302                 A[15] = c3;
  303                 A[21] = c4;
  304                 A[ 0] = A[ 0] ^ RC[j + 0];
  305 
  306                 tt0 = A[ 6] ^ A[ 9];
  307                 tt1 = A[ 7] ^ A[ 5];
  308                 tt0 ^= A[ 8] ^ tt1;
  309                 tt0 = (tt0 << 1) | (tt0 >> 63);
  310                 tt2 = A[24] ^ A[22];
  311                 tt3 = A[20] ^ A[23];
  312                 tt0 ^= A[21];
  313                 tt2 ^= tt3;
  314                 t0 = tt0 ^ tt2;
  315 
  316                 tt0 = A[12] ^ A[10];
  317                 tt1 = A[13] ^ A[11];
  318                 tt0 ^= A[14] ^ tt1;
  319                 tt0 = (tt0 << 1) | (tt0 >> 63);
  320                 tt2 = A[ 0] ^ A[ 3];
  321                 tt3 = A[ 1] ^ A[ 4];
  322                 tt0 ^= A[ 2];
  323                 tt2 ^= tt3;
  324                 t1 = tt0 ^ tt2;
  325 
  326                 tt0 = A[18] ^ A[16];
  327                 tt1 = A[19] ^ A[17];
  328                 tt0 ^= A[15] ^ tt1;
  329                 tt0 = (tt0 << 1) | (tt0 >> 63);
  330                 tt2 = A[ 6] ^ A[ 9];
  331                 tt3 = A[ 7] ^ A[ 5];
  332                 tt0 ^= A[ 8];
  333                 tt2 ^= tt3;
  334                 t2 = tt0 ^ tt2;
  335 
  336                 tt0 = A[24] ^ A[22];
  337                 tt1 = A[20] ^ A[23];
  338                 tt0 ^= A[21] ^ tt1;
  339                 tt0 = (tt0 << 1) | (tt0 >> 63);
  340                 tt2 = A[12] ^ A[10];
  341                 tt3 = A[13] ^ A[11];
  342                 tt0 ^= A[14];
  343                 tt2 ^= tt3;
  344                 t3 = tt0 ^ tt2;
  345 
  346                 tt0 = A[ 0] ^ A[ 3];
  347                 tt1 = A[ 1] ^ A[ 4];
  348                 tt0 ^= A[ 2] ^ tt1;
  349                 tt0 = (tt0 << 1) | (tt0 >> 63);
  350                 tt2 = A[18] ^ A[16];
  351                 tt3 = A[19] ^ A[17];
  352                 tt0 ^= A[15];
  353                 tt2 ^= tt3;
  354                 t4 = tt0 ^ tt2;
  355 
  356                 A[ 0] = A[ 0] ^ t0;
  357                 A[ 3] = A[ 3] ^ t0;
  358                 A[ 1] = A[ 1] ^ t0;
  359                 A[ 4] = A[ 4] ^ t0;
  360                 A[ 2] = A[ 2] ^ t0;
  361                 A[ 6] = A[ 6] ^ t1;
  362                 A[ 9] = A[ 9] ^ t1;
  363                 A[ 7] = A[ 7] ^ t1;
  364                 A[ 5] = A[ 5] ^ t1;
  365                 A[ 8] = A[ 8] ^ t1;
  366                 A[12] = A[12] ^ t2;
  367                 A[10] = A[10] ^ t2;
  368                 A[13] = A[13] ^ t2;
  369                 A[11] = A[11] ^ t2;
  370                 A[14] = A[14] ^ t2;
  371                 A[18] = A[18] ^ t3;
  372                 A[16] = A[16] ^ t3;
  373                 A[19] = A[19] ^ t3;
  374                 A[17] = A[17] ^ t3;
  375                 A[15] = A[15] ^ t3;
  376                 A[24] = A[24] ^ t4;
  377                 A[22] = A[22] ^ t4;
  378                 A[20] = A[20] ^ t4;
  379                 A[23] = A[23] ^ t4;
  380                 A[21] = A[21] ^ t4;
  381                 A[ 3] = (A[ 3] << 36) | (A[ 3] >> (64 - 36));
  382                 A[ 1] = (A[ 1] <<  3) | (A[ 1] >> (64 -  3));
  383                 A[ 4] = (A[ 4] << 41) | (A[ 4] >> (64 - 41));
  384                 A[ 2] = (A[ 2] << 18) | (A[ 2] >> (64 - 18));
  385                 A[ 6] = (A[ 6] <<  1) | (A[ 6] >> (64 -  1));
  386                 A[ 9] = (A[ 9] << 44) | (A[ 9] >> (64 - 44));
  387                 A[ 7] = (A[ 7] << 10) | (A[ 7] >> (64 - 10));
  388                 A[ 5] = (A[ 5] << 45) | (A[ 5] >> (64 - 45));
  389                 A[ 8] = (A[ 8] <<  2) | (A[ 8] >> (64 - 2));
  390                 A[12] = (A[12] << 62) | (A[12] >> (64 - 62));
  391                 A[10] = (A[10] <<  6) | (A[10] >> (64 -  6));
  392                 A[13] = (A[13] << 43) | (A[13] >> (64 - 43));
  393                 A[11] = (A[11] << 15) | (A[11] >> (64 - 15));
  394                 A[14] = (A[14] << 61) | (A[14] >> (64 - 61));
  395                 A[18] = (A[18] << 28) | (A[18] >> (64 - 28));
  396                 A[16] = (A[16] << 55) | (A[16] >> (64 - 55));
  397                 A[19] = (A[19] << 25) | (A[19] >> (64 - 25));
  398                 A[17] = (A[17] << 21) | (A[17] >> (64 - 21));
  399                 A[15] = (A[15] << 56) | (A[15] >> (64 - 56));
  400                 A[24] = (A[24] << 27) | (A[24] >> (64 - 27));
  401                 A[22] = (A[22] << 20) | (A[22] >> (64 - 20));
  402                 A[20] = (A[20] << 39) | (A[20] >> (64 - 39));
  403                 A[23] = (A[23] <<  8) | (A[23] >> (64 -  8));
  404                 A[21] = (A[21] << 14) | (A[21] >> (64 - 14));
  405                 bnn = ~A[13];
  406                 kt = A[ 9] | A[13];
  407                 c0 = A[ 0] ^ kt;
  408                 kt = bnn | A[17];
  409                 c1 = A[ 9] ^ kt;
  410                 kt = A[17] & A[21];
  411                 c2 = A[13] ^ kt;
  412                 kt = A[21] | A[ 0];
  413                 c3 = A[17] ^ kt;
  414                 kt = A[ 0] & A[ 9];
  415                 c4 = A[21] ^ kt;
  416                 A[ 0] = c0;
  417                 A[ 9] = c1;
  418                 A[13] = c2;
  419                 A[17] = c3;
  420                 A[21] = c4;
  421                 bnn = ~A[14];
  422                 kt = A[22] | A[ 1];
  423                 c0 = A[18] ^ kt;
  424                 kt = A[ 1] & A[ 5];
  425                 c1 = A[22] ^ kt;
  426                 kt = A[ 5] | bnn;
  427                 c2 = A[ 1] ^ kt;
  428                 kt = A[14] | A[18];
  429                 c3 = A[ 5] ^ kt;
  430                 kt = A[18] & A[22];
  431                 c4 = A[14] ^ kt;
  432                 A[18] = c0;
  433                 A[22] = c1;
  434                 A[ 1] = c2;
  435                 A[ 5] = c3;
  436                 A[14] = c4;
  437                 bnn = ~A[23];
  438                 kt = A[10] | A[19];
  439                 c0 = A[ 6] ^ kt;
  440                 kt = A[19] & A[23];
  441                 c1 = A[10] ^ kt;
  442                 kt = bnn & A[ 2];
  443                 c2 = A[19] ^ kt;
  444                 kt = A[ 2] | A[ 6];
  445                 c3 = bnn ^ kt;
  446                 kt = A[ 6] & A[10];
  447                 c4 = A[ 2] ^ kt;
  448                 A[ 6] = c0;
  449                 A[10] = c1;
  450                 A[19] = c2;
  451                 A[23] = c3;
  452                 A[ 2] = c4;
  453                 bnn = ~A[11];
  454                 kt = A[ 3] & A[ 7];
  455                 c0 = A[24] ^ kt;
  456                 kt = A[ 7] | A[11];
  457                 c1 = A[ 3] ^ kt;
  458                 kt = bnn | A[15];
  459                 c2 = A[ 7] ^ kt;
  460                 kt = A[15] & A[24];
  461                 c3 = bnn ^ kt;
  462                 kt = A[24] | A[ 3];
  463                 c4 = A[15] ^ kt;
  464                 A[24] = c0;
  465                 A[ 3] = c1;
  466                 A[ 7] = c2;
  467                 A[11] = c3;
  468                 A[15] = c4;
  469                 bnn = ~A[16];
  470                 kt = bnn & A[20];
  471                 c0 = A[12] ^ kt;
  472                 kt = A[20] | A[ 4];
  473                 c1 = bnn ^ kt;
  474                 kt = A[ 4] & A[ 8];
  475                 c2 = A[20] ^ kt;
  476                 kt = A[ 8] | A[12];
  477                 c3 = A[ 4] ^ kt;
  478                 kt = A[12] & A[16];
  479                 c4 = A[ 8] ^ kt;
  480                 A[12] = c0;
  481                 A[16] = c1;
  482                 A[20] = c2;
  483                 A[ 4] = c3;
  484                 A[ 8] = c4;
  485                 A[ 0] = A[ 0] ^ RC[j + 1];
  486                 t = A[ 5];
  487                 A[ 5] = A[18];
  488                 A[18] = A[11];
  489                 A[11] = A[10];
  490                 A[10] = A[ 6];
  491                 A[ 6] = A[22];
  492                 A[22] = A[20];
  493                 A[20] = A[12];
  494                 A[12] = A[19];
  495                 A[19] = A[15];
  496                 A[15] = A[24];
  497                 A[24] = A[ 8];
  498                 A[ 8] = t;
  499                 t = A[ 1];
  500                 A[ 1] = A[ 9];
  501                 A[ 9] = A[14];
  502                 A[14] = A[ 2];
  503                 A[ 2] = A[13];
  504                 A[13] = A[23];
  505                 A[23] = A[ 4];
  506                 A[ 4] = A[21];
  507                 A[21] = A[16];
  508                 A[16] = A[ 3];
  509                 A[ 3] = A[17];
  510                 A[17] = A[ 7];
  511                 A[ 7] = t;
  512         }
  513 }
  514 
  515 /* see falcon.h */
  516 void
  517 shake_init(shake_context *sc, int capacity)
  518 {
  519         sc->rate = 200 - (size_t)(capacity >> 3);
  520         sc->dptr = 0;
  521         memset(sc->A, 0, sizeof sc->A);
  522         sc->A[ 1] = ~(uint64_t)0;
  523         sc->A[ 2] = ~(uint64_t)0;
  524         sc->A[ 8] = ~(uint64_t)0;
  525         sc->A[12] = ~(uint64_t)0;
  526         sc->A[17] = ~(uint64_t)0;
  527         sc->A[20] = ~(uint64_t)0;
  528 }
  529 
  530 /* see falcon.h */
  531 void
  532 shake_inject(shake_context *sc, const void *data, size_t len)
  533 {
  534         const unsigned char *buf;
  535         size_t rate, dptr;
  536 
  537         buf = data;
  538         rate = sc->rate;
  539         dptr = sc->dptr;
  540         while (len > 0) {
  541                 size_t clen;
  542 
  543                 clen = rate - dptr;
  544                 if (clen > len) {
  545                         clen = len;
  546                 }
  547                 memcpy(sc->dbuf + dptr, buf, clen);
  548                 dptr += clen;
  549                 buf += clen;
  550                 len -= clen;
  551                 if (dptr == rate) {
  552                         xor_block(sc->A, sc->dbuf, rate);
  553                         process_block(sc->A);
  554                         dptr = 0;
  555                 }
  556         }
  557         sc->dptr = dptr;
  558 }
  559 
  560 /* see falcon.h */
  561 void
  562 shake_flip(shake_context *sc)
  563 {
  564         /*
  565          * We apply padding and pre-XOR the value into the state. We
  566          * set dptr to the end of the buffer, so that first call to
  567          * shake_extract() will process the block.
  568          */
  569         if ((sc->dptr + 1) == sc->rate) {
  570                 sc->dbuf[sc->dptr ++] = 0x9F;
  571         } else {
  572                 sc->dbuf[sc->dptr ++] = 0x1F;
  573                 memset(sc->dbuf + sc->dptr, 0x00, sc->rate - sc->dptr - 1);
  574                 sc->dbuf[sc->rate - 1] = 0x80;
  575                 sc->dptr = sc->rate;
  576         }
  577         xor_block(sc->A, sc->dbuf, sc->rate);
  578 }
  579 
  580 /* see falcon.h */
  581 void
  582 shake_extract(shake_context *sc, void *out, size_t len)
  583 {
  584         unsigned char *buf;
  585         size_t dptr, rate;
  586 
  587         buf = out;
  588         dptr = sc->dptr;
  589         rate = sc->rate;
  590         while (len > 0) {
  591                 size_t clen;
  592 
  593                 if (dptr == rate) {
  594                         unsigned char *dbuf;
  595                         uint64_t *A;
  596 
  597                         A = sc->A;
  598                         dbuf = sc->dbuf;
  599                         process_block(A);
  600                         enc64le(dbuf +   0,  A[ 0]);
  601                         enc64le(dbuf +   8, ~A[ 1]);
  602                         enc64le(dbuf +  16, ~A[ 2]);
  603                         enc64le(dbuf +  24,  A[ 3]);
  604                         enc64le(dbuf +  32,  A[ 4]);
  605                         enc64le(dbuf +  40,  A[ 5]);
  606                         enc64le(dbuf +  48,  A[ 6]);
  607                         enc64le(dbuf +  56,  A[ 7]);
  608                         enc64le(dbuf +  64, ~A[ 8]);
  609                         enc64le(dbuf +  72,  A[ 9]);
  610                         enc64le(dbuf +  80,  A[10]);
  611                         enc64le(dbuf +  88,  A[11]);
  612                         enc64le(dbuf +  96, ~A[12]);
  613                         enc64le(dbuf + 104,  A[13]);
  614                         enc64le(dbuf + 112,  A[14]);
  615                         enc64le(dbuf + 120,  A[15]);
  616                         enc64le(dbuf + 128,  A[16]);
  617                         enc64le(dbuf + 136, ~A[17]);
  618                         enc64le(dbuf + 144,  A[18]);
  619                         enc64le(dbuf + 152,  A[19]);
  620                         enc64le(dbuf + 160, ~A[20]);
  621                         enc64le(dbuf + 168,  A[21]);
  622                         enc64le(dbuf + 176,  A[22]);
  623                         enc64le(dbuf + 184,  A[23]);
  624                         enc64le(dbuf + 192,  A[24]);
  625                         dptr = 0;
  626                 }
  627                 clen = rate - dptr;
  628                 if (clen > len) {
  629                         clen = len;
  630                 }
  631                 memcpy(buf, sc->dbuf + dptr, clen);
  632                 dptr += clen;
  633                 buf += clen;
  634                 len -= clen;
  635         }
  636         sc->dptr = dptr;
  637 }