LCOV - code coverage report
Current view: top level - ballet/aes - fd_aes_base_ref.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 474 482 98.3 %
Date: 2026-06-09 08:01:34 Functions: 18 18 100.0 %

          Line data    Source code
       1             : /* fd_aes_ref.c was imported from the OpenSSL project circa 2023-Aug.
       2             :    Original source file:  crypto/aes/aes_core.c */
       3             : 
       4             : /*
       5             :  * Copyright 2002-2022 The OpenSSL Project Authors. All Rights Reserved.
       6             :  *
       7             :  * Licensed under the Apache License 2.0 (the "License").  You may not use
       8             :  * this file except in compliance with the License.  You can obtain a copy
       9             :  * in the file LICENSE in the source distribution or at
      10             :  * https://www.openssl.org/source/license.html
      11             :  */
      12             : 
      13             : /**
      14             :  * rijndael-alg-fst.c
      15             :  *
      16             :  * @version 3.0 (December 2000)
      17             :  *
      18             :  * Optimised ANSI C code for the Rijndael cipher (now AES)
      19             :  *
      20             :  * @author Vincent Rijmen
      21             :  * @author Antoon Bosselaers
      22             :  * @author Paulo Barreto
      23             :  *
      24             :  * This code is hereby placed in the public domain.
      25             :  *
      26             :  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
      27             :  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
      28             :  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      29             :  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
      30             :  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
      31             :  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
      32             :  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
      33             :  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
      34             :  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
      35             :  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
      36             :  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      37             :  */
      38             : 
      39             : /* Note: rewritten a little bit to provide error control and an OpenSSL-
      40             :    compatible API */
      41             : 
      42             : #include <stdlib.h>
      43             : #include "fd_aes_gcm_ref.h"
      44             : 
      45             : typedef union {
      46             :   uchar b[8];
      47             :   uint w[2];
      48             :   ulong d;
      49             : } uni;
      50             : 
      51             : /*
      52             :  * Compute w := (w * x) mod (x^8 + x^4 + x^3 + x^1 + 1)
      53             :  * Therefore the name "xtime".
      54             :  */
      55             : static void
      56    46591110 : XtimeWord( uint * w ) {
      57    46591110 :   uint a, b;
      58             : 
      59    46591110 :   a = *w;
      60    46591110 :   b = a & 0x80808080u;
      61    46591110 :   a ^= b;
      62    46591110 :   b -= b >> 7;
      63    46591110 :   b &= 0x1B1B1B1Bu;
      64    46591110 :   b ^= a << 1;
      65    46591110 :   *w = b;
      66    46591110 : }
      67             : 
      68             : static void
      69   474406110 : XtimeLong( ulong * w ) {
      70   474406110 :   ulong a, b;
      71             : 
      72   474406110 :   a = *w;
      73   474406110 :   b = a & (ulong)(0x8080808080808080);
      74   474406110 :   a ^= b;
      75   474406110 :   b -= b >> 7;
      76   474406110 :   b &= (ulong)(0x1B1B1B1B1B1B1B1B);
      77   474406110 :   b ^= a << 1;
      78   474406110 :   *w = b;
      79   474406110 : }
      80             : 
      81             : /*
      82             :  * This computes w := S * w ^ -1 + c, where c = {01100011}.
      83             :  * Instead of using GF(2^8) mod (x^8+x^4+x^3+x+1} we do the inversion
      84             :  * in GF(GF(GF(2^2)^2)^2) mod (X^2+X+8)
      85             :  * and GF(GF(2^2)^2) mod (X^2+X+2)
      86             :  * and GF(2^2) mod (X^2+X+1)
      87             :  * The first part of the algorithm below transfers the coordinates
      88             :  * {0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80} =>
      89             :  * {1,Y,Y^2,Y^3,Y^4,Y^5,Y^6,Y^7} with Y=0x41:
      90             :  * {0x01,0x41,0x66,0x6c,0x56,0x9a,0x58,0xc4}
      91             :  * The last part undoes the coordinate transfer and the final affine
      92             :  * transformation S:
      93             :  * b[i] = b[i] + b[(i+4)%8] + b[(i+5)%8] + b[(i+6)%8] + b[(i+7)%8] + c[i]
      94             :  * in one step.
      95             :  * The multiplication in GF(2^2^2^2) is done in ordinary coords:
      96             :  * A = (a0*1 + a1*x^4)
      97             :  * B = (b0*1 + b1*x^4)
      98             :  * AB = ((a0*b0 + 8*a1*b1)*1 + (a1*b0 + (a0+a1)*b1)*x^4)
      99             :  * When A = (a0,a1) is given we want to solve AB = 1:
     100             :  * (a) 1 = a0*b0 + 8*a1*b1
     101             :  * (b) 0 = a1*b0 + (a0+a1)*b1
     102             :  * => multiply (a) by a1 and (b) by a0
     103             :  * (c) a1 = a1*a0*b0 + (8*a1*a1)*b1
     104             :  * (d) 0 = a1*a0*b0 + (a0*a0+a1*a0)*b1
     105             :  * => add (c) + (d)
     106             :  * (e) a1 = (a0*a0 + a1*a0 + 8*a1*a1)*b1
     107             :  * => therefore
     108             :  * b1 = (a0*a0 + a1*a0 + 8*a1*a1)^-1 * a1
     109             :  * => and adding (a1*b0) to (b) we get
     110             :  * (f) a1*b0 = (a0+a1)*b1
     111             :  * => therefore
     112             :  * b0 = (a0*a0 + a1*a0 + 8*a1*a1)^-1 * (a0+a1)
     113             :  * Note this formula also works for the case
     114             :  * (a0+a1)*a0 + 8*a1*a1 = 0
     115             :  * if the inverse element for 0^-1 is mapped to 0.
     116             :  * Repeat the same for GF(2^2^2) and GF(2^2).
     117             :  * We get the following algorithm:
     118             :  * inv8(a0,a1):
     119             :  *   x0 = a0^a1
     120             :  *   [y0,y1] = mul4([x0,a1],[a0,a1]); (*)
     121             :  *   y1 = mul4(8,y1);
     122             :  *   t = inv4(y0^y1);
     123             :  *   [b0,b1] = mul4([x0,a1],[t,t]); (*)
     124             :  *   return [b0,b1];
     125             :  * The non-linear multiplies (*) can be done in parallel at no extra cost.
     126             :  */
     127             : static void
     128    46591110 : SubWord( uint * w ) {
     129    46591110 :   uint x, y, a1, a2, a3, a4, a5, a6;
     130             : 
     131    46591110 :   x = *w;
     132    46591110 :   y = ((x & 0xFEFEFEFEu) >> 1) | ((x & 0x01010101u) << 7);
     133    46591110 :   x &= 0xDDDDDDDDu;
     134    46591110 :   x ^= y & 0x57575757u;
     135    46591110 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     136    46591110 :   x ^= y & 0x1C1C1C1Cu;
     137    46591110 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     138    46591110 :   x ^= y & 0x4A4A4A4Au;
     139    46591110 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     140    46591110 :   x ^= y & 0x42424242u;
     141    46591110 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     142    46591110 :   x ^= y & 0x64646464u;
     143    46591110 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     144    46591110 :   x ^= y & 0xE0E0E0E0u;
     145    46591110 :   a1 = x;
     146    46591110 :   a1 ^= (x & 0xF0F0F0F0u) >> 4;
     147    46591110 :   a2 = ((x & 0xCCCCCCCCu) >> 2) | ((x & 0x33333333u) << 2);
     148    46591110 :   a3 = x & a1;
     149    46591110 :   a3 ^= (a3 & 0xAAAAAAAAu) >> 1;
     150    46591110 :   a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & 0xAAAAAAAAu;
     151    46591110 :   a4 = a2 & a1;
     152    46591110 :   a4 ^= (a4 & 0xAAAAAAAAu) >> 1;
     153    46591110 :   a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & 0xAAAAAAAAu;
     154    46591110 :   a5 = (a3 & 0xCCCCCCCCu) >> 2;
     155    46591110 :   a3 ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCu;
     156    46591110 :   a4 = a5 & 0x22222222u;
     157    46591110 :   a4 |= a4 >> 1;
     158    46591110 :   a4 ^= (a5 << 1) & 0x22222222u;
     159    46591110 :   a3 ^= a4;
     160    46591110 :   a5 = a3 & 0xA0A0A0A0u;
     161    46591110 :   a5 |= a5 >> 1;
     162    46591110 :   a5 ^= (a3 << 1) & 0xA0A0A0A0u;
     163    46591110 :   a4 = a5 & 0xC0C0C0C0u;
     164    46591110 :   a6 = a4 >> 2;
     165    46591110 :   a4 ^= (a5 << 2) & 0xC0C0C0C0u;
     166    46591110 :   a5 = a6 & 0x20202020u;
     167    46591110 :   a5 |= a5 >> 1;
     168    46591110 :   a5 ^= (a6 << 1) & 0x20202020u;
     169    46591110 :   a4 |= a5;
     170    46591110 :   a3 ^= a4 >> 4;
     171    46591110 :   a3 &= 0x0F0F0F0Fu;
     172    46591110 :   a2 = a3;
     173    46591110 :   a2 ^= (a3 & 0x0C0C0C0Cu) >> 2;
     174    46591110 :   a4 = a3 & a2;
     175    46591110 :   a4 ^= (uint)(a4 & 0x0A0A0A0A0Au) >> 1u;
     176    46591110 :   a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & 0x0A0A0A0Au;
     177    46591110 :   a5 = a4 & 0x08080808u;
     178    46591110 :   a5 |= a5 >> 1;
     179    46591110 :   a5 ^= (a4 << 1) & 0x08080808u;
     180    46591110 :   a4 ^= a5 >> 2;
     181    46591110 :   a4 &= 0x03030303u;
     182    46591110 :   a4 ^= (a4 & 0x02020202u) >> 1;
     183    46591110 :   a4 |= a4 << 2;
     184    46591110 :   a3 = a2 & a4;
     185    46591110 :   a3 ^= (a3 & 0x0A0A0A0Au) >> 1;
     186    46591110 :   a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & 0x0A0A0A0Au;
     187    46591110 :   a3 |= a3 << 4;
     188    46591110 :   a2 = ((a1 & 0xCCCCCCCCu) >> 2) | ((a1 & 0x33333333u) << 2);
     189    46591110 :   x = a1 & a3;
     190    46591110 :   x ^= (x & 0xAAAAAAAAu) >> 1;
     191    46591110 :   x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & 0xAAAAAAAAu;
     192    46591110 :   a4 = a2 & a3;
     193    46591110 :   a4 ^= (a4 & 0xAAAAAAAAu) >> 1;
     194    46591110 :   a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & 0xAAAAAAAAu;
     195    46591110 :   a5 = (x & 0xCCCCCCCCu) >> 2;
     196    46591110 :   x ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCu;
     197    46591110 :   a4 = a5 & 0x22222222u;
     198    46591110 :   a4 |= a4 >> 1;
     199    46591110 :   a4 ^= (a5 << 1) & 0x22222222u;
     200    46591110 :   x ^= a4;
     201    46591110 :   y = ((x & 0xFEFEFEFEu) >> 1) | ((x & 0x01010101u) << 7);
     202    46591110 :   x &= 0x39393939u;
     203    46591110 :   x ^= y & 0x3F3F3F3Fu;
     204    46591110 :   y = ((y & 0xFCFCFCFCu) >> 2) | ((y & 0x03030303u) << 6);
     205    46591110 :   x ^= y & 0x97979797u;
     206    46591110 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     207    46591110 :   x ^= y & 0x9B9B9B9Bu;
     208    46591110 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     209    46591110 :   x ^= y & 0x3C3C3C3Cu;
     210    46591110 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     211    46591110 :   x ^= y & 0xDDDDDDDDu;
     212    46591110 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     213    46591110 :   x ^= y & 0x72727272u;
     214    46591110 :   x ^= 0x63636363u;
     215    46591110 :   *w = x;
     216    46591110 : }
     217             : 
     218             : static void
     219   527102540 : SubLong( ulong * w ) {
     220   527102540 :   ulong x, y, a1, a2, a3, a4, a5, a6;
     221             : 
     222   527102540 :   x = *w;
     223   527102540 :   y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
     224   527102540 :   x &= (0xDDDDDDDDDDDDDDDDUL);
     225   527102540 :   x ^= y & (0x5757575757575757UL);
     226   527102540 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     227   527102540 :   x ^= y & (0x1C1C1C1C1C1C1C1CUL);
     228   527102540 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     229   527102540 :   x ^= y & (0x4A4A4A4A4A4A4A4AUL);
     230   527102540 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     231   527102540 :   x ^= y & (0x4242424242424242UL);
     232   527102540 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     233   527102540 :   x ^= y & (0x6464646464646464UL);
     234   527102540 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     235   527102540 :   x ^= y & (0xE0E0E0E0E0E0E0E0UL);
     236   527102540 :   a1 = x;
     237   527102540 :   a1 ^= (x & (0xF0F0F0F0F0F0F0F0UL)) >> 4;
     238   527102540 :   a2 = ((x & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((x & (0x3333333333333333UL)) << 2);
     239   527102540 :   a3 = x & a1;
     240   527102540 :   a3 ^= (a3 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     241   527102540 :   a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & (0xAAAAAAAAAAAAAAAAUL);
     242   527102540 :   a4 = a2 & a1;
     243   527102540 :   a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     244   527102540 :   a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
     245   527102540 :   a5 = (a3 & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
     246   527102540 :   a3 ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
     247   527102540 :   a4 = a5 & (0x2222222222222222UL);
     248   527102540 :   a4 |= a4 >> 1;
     249   527102540 :   a4 ^= (a5 << 1) & (0x2222222222222222UL);
     250   527102540 :   a3 ^= a4;
     251   527102540 :   a5 = a3 & (0xA0A0A0A0A0A0A0A0UL);
     252   527102540 :   a5 |= a5 >> 1;
     253   527102540 :   a5 ^= (a3 << 1) & (0xA0A0A0A0A0A0A0A0UL);
     254   527102540 :   a4 = a5 & (0xC0C0C0C0C0C0C0C0UL);
     255   527102540 :   a6 = a4 >> 2;
     256   527102540 :   a4 ^= (a5 << 2) & (0xC0C0C0C0C0C0C0C0UL);
     257   527102540 :   a5 = a6 & (0x2020202020202020UL);
     258   527102540 :   a5 |= a5 >> 1;
     259   527102540 :   a5 ^= (a6 << 1) & (0x2020202020202020UL);
     260   527102540 :   a4 |= a5;
     261   527102540 :   a3 ^= a4 >> 4;
     262   527102540 :   a3 &= (0x0F0F0F0F0F0F0F0FUL);
     263   527102540 :   a2 = a3;
     264   527102540 :   a2 ^= (a3 & (0x0C0C0C0C0C0C0C0CUL)) >> 2;
     265   527102540 :   a4 = a3 & a2;
     266   527102540 :   a4 ^= (a4 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
     267   527102540 :   a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & (0x0A0A0A0A0A0A0A0AUL);
     268   527102540 :   a5 = a4 & (0x0808080808080808UL);
     269   527102540 :   a5 |= a5 >> 1;
     270   527102540 :   a5 ^= (a4 << 1) & (0x0808080808080808UL);
     271   527102540 :   a4 ^= a5 >> 2;
     272   527102540 :   a4 &= (0x0303030303030303UL);
     273   527102540 :   a4 ^= (a4 & (0x0202020202020202UL)) >> 1;
     274   527102540 :   a4 |= a4 << 2;
     275   527102540 :   a3 = a2 & a4;
     276   527102540 :   a3 ^= (a3 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
     277   527102540 :   a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & (0x0A0A0A0A0A0A0A0AUL);
     278   527102540 :   a3 |= a3 << 4;
     279   527102540 :   a2 = ((a1 & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((a1 & (0x3333333333333333UL)) << 2);
     280   527102540 :   x = a1 & a3;
     281   527102540 :   x ^= (x & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     282   527102540 :   x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & (0xAAAAAAAAAAAAAAAAUL);
     283   527102540 :   a4 = a2 & a3;
     284   527102540 :   a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     285   527102540 :   a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
     286   527102540 :   a5 = (x & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
     287   527102540 :   x ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
     288   527102540 :   a4 = a5 & (0x2222222222222222UL);
     289   527102540 :   a4 |= a4 >> 1;
     290   527102540 :   a4 ^= (a5 << 1) & (0x2222222222222222UL);
     291   527102540 :   x ^= a4;
     292   527102540 :   y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
     293   527102540 :   x &= (0x3939393939393939UL);
     294   527102540 :   x ^= y & (0x3F3F3F3F3F3F3F3FUL);
     295   527102540 :   y = ((y & (0xFCFCFCFCFCFCFCFCUL)) >> 2) | ((y & (0x0303030303030303UL)) << 6);
     296   527102540 :   x ^= y & (0x9797979797979797UL);
     297   527102540 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     298   527102540 :   x ^= y & (0x9B9B9B9B9B9B9B9BUL);
     299   527102540 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     300   527102540 :   x ^= y & (0x3C3C3C3C3C3C3C3CUL);
     301   527102540 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     302   527102540 :   x ^= y & (0xDDDDDDDDDDDDDDDDUL);
     303   527102540 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     304   527102540 :   x ^= y & (0x7272727272727272UL);
     305   527102540 :   x ^= (0x6363636363636363UL);
     306   527102540 :   *w = x;
     307   527102540 : }
     308             : 
     309             : /*
     310             :  * This computes w := (S^-1 * (w + c))^-1
     311             :  */
     312             : static void
     313        5120 : InvSubLong( ulong * w ) {
     314        5120 :   ulong x, y, a1, a2, a3, a4, a5, a6;
     315             : 
     316        5120 :   x = *w;
     317        5120 :   x ^= (0x6363636363636363UL);
     318        5120 :   y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
     319        5120 :   x &= (0xFDFDFDFDFDFDFDFDUL);
     320        5120 :   x ^= y & (0x5E5E5E5E5E5E5E5EUL);
     321        5120 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     322        5120 :   x ^= y & (0xF3F3F3F3F3F3F3F3UL);
     323        5120 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     324        5120 :   x ^= y & (0xF5F5F5F5F5F5F5F5UL);
     325        5120 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     326        5120 :   x ^= y & (0x7878787878787878UL);
     327        5120 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     328        5120 :   x ^= y & (0x7777777777777777UL);
     329        5120 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     330        5120 :   x ^= y & (0x1515151515151515UL);
     331        5120 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     332        5120 :   x ^= y & (0xA5A5A5A5A5A5A5A5UL);
     333        5120 :   a1 = x;
     334        5120 :   a1 ^= (x & (0xF0F0F0F0F0F0F0F0UL)) >> 4;
     335        5120 :   a2 = ((x & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((x & (0x3333333333333333UL)) << 2);
     336        5120 :   a3 = x & a1;
     337        5120 :   a3 ^= (a3 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     338        5120 :   a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & (0xAAAAAAAAAAAAAAAAUL);
     339        5120 :   a4 = a2 & a1;
     340        5120 :   a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     341        5120 :   a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
     342        5120 :   a5 = (a3 & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
     343        5120 :   a3 ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
     344        5120 :   a4 = a5 & (0x2222222222222222UL);
     345        5120 :   a4 |= a4 >> 1;
     346        5120 :   a4 ^= (a5 << 1) & (0x2222222222222222UL);
     347        5120 :   a3 ^= a4;
     348        5120 :   a5 = a3 & (0xA0A0A0A0A0A0A0A0UL);
     349        5120 :   a5 |= a5 >> 1;
     350        5120 :   a5 ^= (a3 << 1) & (0xA0A0A0A0A0A0A0A0UL);
     351        5120 :   a4 = a5 & (0xC0C0C0C0C0C0C0C0UL);
     352        5120 :   a6 = a4 >> 2;
     353        5120 :   a4 ^= (a5 << 2) & (0xC0C0C0C0C0C0C0C0UL);
     354        5120 :   a5 = a6 & (0x2020202020202020UL);
     355        5120 :   a5 |= a5 >> 1;
     356        5120 :   a5 ^= (a6 << 1) & (0x2020202020202020UL);
     357        5120 :   a4 |= a5;
     358        5120 :   a3 ^= a4 >> 4;
     359        5120 :   a3 &= (0x0F0F0F0F0F0F0F0FUL);
     360        5120 :   a2 = a3;
     361        5120 :   a2 ^= (a3 & (0x0C0C0C0C0C0C0C0CUL)) >> 2;
     362        5120 :   a4 = a3 & a2;
     363        5120 :   a4 ^= (a4 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
     364        5120 :   a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & (0x0A0A0A0A0A0A0A0AUL);
     365        5120 :   a5 = a4 & (0x0808080808080808UL);
     366        5120 :   a5 |= a5 >> 1;
     367        5120 :   a5 ^= (a4 << 1) & (0x0808080808080808UL);
     368        5120 :   a4 ^= a5 >> 2;
     369        5120 :   a4 &= (0x0303030303030303UL);
     370        5120 :   a4 ^= (a4 & (0x0202020202020202UL)) >> 1;
     371        5120 :   a4 |= a4 << 2;
     372        5120 :   a3 = a2 & a4;
     373        5120 :   a3 ^= (a3 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
     374        5120 :   a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & (0x0A0A0A0A0A0A0A0AUL);
     375        5120 :   a3 |= a3 << 4;
     376        5120 :   a2 = ((a1 & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((a1 & (0x3333333333333333UL)) << 2);
     377        5120 :   x = a1 & a3;
     378        5120 :   x ^= (x & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     379        5120 :   x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & (0xAAAAAAAAAAAAAAAAUL);
     380        5120 :   a4 = a2 & a3;
     381        5120 :   a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     382        5120 :   a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
     383        5120 :   a5 = (x & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
     384        5120 :   x ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
     385        5120 :   a4 = a5 & (0x2222222222222222UL);
     386        5120 :   a4 |= a4 >> 1;
     387        5120 :   a4 ^= (a5 << 1) & (0x2222222222222222UL);
     388        5120 :   x ^= a4;
     389        5120 :   y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
     390        5120 :   x &= (0xB5B5B5B5B5B5B5B5UL);
     391        5120 :   x ^= y & (0x4040404040404040UL);
     392        5120 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     393        5120 :   x ^= y & (0x8080808080808080UL);
     394        5120 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     395        5120 :   x ^= y & (0x1616161616161616UL);
     396        5120 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     397        5120 :   x ^= y & (0xEBEBEBEBEBEBEBEBUL);
     398        5120 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     399        5120 :   x ^= y & (0x9797979797979797UL);
     400        5120 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     401        5120 :   x ^= y & (0xFBFBFBFBFBFBFBFBUL);
     402        5120 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     403        5120 :   x ^= y & (0x7D7D7D7D7D7D7D7DUL);
     404        5120 :   *w = x;
     405        5120 : }
     406             : 
     407             : static void
     408   263551270 : ShiftRows( ulong * state ) {
     409   263551270 :   uchar s[4];
     410   263551270 :   uchar *s0;
     411   263551270 :   int r;
     412             : 
     413   263551270 :   s0 = (uchar *)state;
     414  1317756350 :   for (r = 0; r < 4; r++) {
     415  1054205080 :     s[0] = s0[0*4 + r];
     416  1054205080 :     s[1] = s0[1*4 + r];
     417  1054205080 :     s[2] = s0[2*4 + r];
     418  1054205080 :     s[3] = s0[3*4 + r];
     419  1054205080 :     s0[0*4 + r] = s[(r+0) % 4];
     420  1054205080 :     s0[1*4 + r] = s[(r+1) % 4];
     421  1054205080 :     s0[2*4 + r] = s[(r+2) % 4];
     422  1054205080 :     s0[3*4 + r] = s[(r+3) % 4];
     423  1054205080 :   }
     424   263551270 : }
     425             : 
     426             : static void
     427        2560 : InvShiftRows( ulong * state ) {
     428        2560 :   uchar s[4];
     429        2560 :   uchar *s0;
     430        2560 :   int r;
     431             : 
     432        2560 :   s0 = (uchar *)state;
     433       12800 :   for (r = 0; r < 4; r++) {
     434       10240 :     s[0] = s0[0*4 + r];
     435       10240 :     s[1] = s0[1*4 + r];
     436       10240 :     s[2] = s0[2*4 + r];
     437       10240 :     s[3] = s0[3*4 + r];
     438       10240 :     s0[0*4 + r] = s[(4-r) % 4];
     439       10240 :     s0[1*4 + r] = s[(5-r) % 4];
     440       10240 :     s0[2*4 + r] = s[(6-r) % 4];
     441       10240 :     s0[3*4 + r] = s[(7-r) % 4];
     442       10240 :   }
     443        2560 : }
     444             : 
     445             : static void
     446   237196143 : MixColumns( ulong * state ) {
     447   237196143 :   uni s1;
     448   237196143 :   uni s;
     449   237196143 :   int c;
     450             : 
     451   711588429 :   for (c = 0; c < 2; c++) {
     452   474392286 :     s1.d = state[c];
     453   474392286 :     s.d = s1.d;
     454   474392286 :     s.d ^= ((s.d & (0xFFFF0000FFFF0000UL)) >> 16)
     455   474392286 :             | ((s.d & (0x0000FFFF0000FFFFUL)) << 16);
     456   474392286 :     s.d ^= ((s.d & (0xFF00FF00FF00FF00UL)) >> 8)
     457   474392286 :             | ((s.d & (0x00FF00FF00FF00FFUL)) << 8);
     458   474392286 :     s.d ^= s1.d;
     459   474392286 :     XtimeLong(&s1.d);
     460   474392286 :     s.d ^= s1.d;
     461   474392286 :     s.b[0] ^= s1.b[1];
     462   474392286 :     s.b[1] ^= s1.b[2];
     463   474392286 :     s.b[2] ^= s1.b[3];
     464   474392286 :     s.b[3] ^= s1.b[0];
     465   474392286 :     s.b[4] ^= s1.b[5];
     466   474392286 :     s.b[5] ^= s1.b[6];
     467   474392286 :     s.b[6] ^= s1.b[7];
     468   474392286 :     s.b[7] ^= s1.b[4];
     469   474392286 :     state[c] = s.d;
     470   474392286 :   }
     471   237196143 : }
     472             : 
     473             : static void InvMixColumns(ulong * state)
     474        2304 : {
     475        2304 :   uni s1;
     476        2304 :   uni s;
     477        2304 :   int c;
     478             : 
     479        6912 :   for (c = 0; c < 2; c++) {
     480        4608 :     s1.d = state[c];
     481        4608 :     s.d = s1.d;
     482        4608 :     s.d ^= ((s.d & (0xFFFF0000FFFF0000UL)) >> 16)
     483        4608 :             | ((s.d & (0x0000FFFF0000FFFFUL)) << 16);
     484        4608 :     s.d ^= ((s.d & (0xFF00FF00FF00FF00UL)) >> 8)
     485        4608 :             | ((s.d & (0x00FF00FF00FF00FFUL)) << 8);
     486        4608 :     s.d ^= s1.d;
     487        4608 :     XtimeLong(&s1.d);
     488        4608 :     s.d ^= s1.d;
     489        4608 :     s.b[0] ^= s1.b[1];
     490        4608 :     s.b[1] ^= s1.b[2];
     491        4608 :     s.b[2] ^= s1.b[3];
     492        4608 :     s.b[3] ^= s1.b[0];
     493        4608 :     s.b[4] ^= s1.b[5];
     494        4608 :     s.b[5] ^= s1.b[6];
     495        4608 :     s.b[6] ^= s1.b[7];
     496        4608 :     s.b[7] ^= s1.b[4];
     497        4608 :     XtimeLong(&s1.d);
     498        4608 :     s1.d ^= ((s1.d & (0xFFFF0000FFFF0000UL)) >> 16)
     499        4608 :             | ((s1.d & (0x0000FFFF0000FFFFUL)) << 16);
     500        4608 :     s.d ^= s1.d;
     501        4608 :     XtimeLong(&s1.d);
     502        4608 :     s1.d ^= ((s1.d & (0xFF00FF00FF00FF00UL)) >> 8)
     503        4608 :             | ((s1.d & (0x00FF00FF00FF00FFUL)) << 8);
     504        4608 :     s.d ^= s1.d;
     505        4608 :     state[c] = s.d;
     506        4608 :   }
     507        2304 : }
     508             : 
     509             : static void
     510             : AddRoundKey( ulong *       state,
     511   289909213 :              ulong const * w ) {
     512   289909213 :   state[0] ^= w[0];
     513   289909213 :   state[1] ^= w[1];
     514   289909213 : }
     515             : 
     516             : static void
     517             : Cipher( uchar const * in,
     518             :         uchar *       out,
     519             :         ulong const * w,
     520    26355127 :         int           nr ) {
     521    26355127 :   ulong state[2];
     522    26355127 :   int i;
     523             : 
     524    26355127 :   memcpy(state, in, 16);
     525             : 
     526    26355127 :   AddRoundKey(state, w);
     527             : 
     528   263551270 :   for (i = 1; i < nr; i++) {
     529   237196143 :     SubLong(&state[0]);
     530   237196143 :     SubLong(&state[1]);
     531   237196143 :     ShiftRows(state);
     532   237196143 :     MixColumns(state);
     533   237196143 :     AddRoundKey(state, w + i*2);
     534   237196143 :   }
     535             : 
     536    26355127 :   SubLong(&state[0]);
     537    26355127 :   SubLong(&state[1]);
     538    26355127 :   ShiftRows(state);
     539    26355127 :   AddRoundKey(state, w + nr*2);
     540             : 
     541    26355127 :   memcpy(out, state, 16);
     542    26355127 : }
     543             : 
     544             : static void
     545             : InvCipher( uchar const * in,
     546             :            uchar *       out,
     547             :            ulong const * w,
     548         256 :            int           nr ) {
     549         256 :   ulong state[2];
     550         256 :   int i;
     551             : 
     552         256 :   memcpy(state, in, 16);
     553             : 
     554         256 :   AddRoundKey(state, w + nr*2);
     555             : 
     556        2560 :   for (i = nr - 1; i > 0; i--) {
     557        2304 :     InvShiftRows(state);
     558        2304 :     InvSubLong(&state[0]);
     559        2304 :     InvSubLong(&state[1]);
     560        2304 :     AddRoundKey(state, w + i*2);
     561        2304 :     InvMixColumns(state);
     562        2304 :   }
     563             : 
     564         256 :   InvShiftRows(state);
     565         256 :   InvSubLong(&state[0]);
     566         256 :   InvSubLong(&state[1]);
     567         256 :   AddRoundKey(state, w);
     568             : 
     569         256 :   memcpy(out, state, 16);
     570         256 : }
     571             : 
     572             : static void
     573    46591110 : RotWord( uint * x ) {
     574    46591110 :   uchar *w0;
     575    46591110 :   uchar tmp;
     576             : 
     577    46591110 :   w0 = (uchar *)x;
     578    46591110 :   tmp = w0[0];
     579    46591110 :   w0[0] = w0[1];
     580    46591110 :   w0[1] = w0[2];
     581    46591110 :   w0[2] = w0[3];
     582    46591110 :   w0[3] = tmp;
     583    46591110 : }
     584             : 
     585             : static void
     586             : KeyExpansion( uchar const * key,
     587             :               ulong *       w,
     588             :               int           nr,
     589     4659111 :               int           nk ) {
     590     4659111 :   uint rcon;
     591     4659111 :   uni prev;
     592     4659111 :   uint temp;
     593     4659111 :   int i, n;
     594             : 
     595     4659111 :   memcpy( w, key, (ulong)nk*4UL );
     596     4659111 :   memcpy( &rcon, "\1\0\0\0", 4  );
     597     4659111 :   n = nk/2;
     598     4659111 :   prev.d = w[n-1];
     599    97841331 :   for (i = n; i < (nr+1)*2; i++) {
     600    93182220 :     temp = prev.w[1];
     601    93182220 :     if (i % n == 0) {
     602    46591110 :       RotWord(&temp);
     603    46591110 :       SubWord(&temp);
     604    46591110 :       temp ^= rcon;
     605    46591110 :       XtimeWord(&rcon);
     606    46591110 :     } else if (nk > 6 && i % n == 2) {
     607           0 :       SubWord(&temp);
     608           0 :     }
     609    93182220 :     prev.d = w[i-n];
     610    93182220 :     prev.w[0] ^= temp;
     611    93182220 :     prev.w[1] ^= prev.w[0];
     612    93182220 :     w[i] = prev.d;
     613    93182220 :   }
     614     4659111 : }
     615             : 
     616             : /**
     617             :  * Expand the cipher key into the encryption key schedule.
     618             :  */
     619             : int
     620             : fd_aes_ref_set_encrypt_key( uchar const *      userKey,
     621             :                             ulong const        bits,
     622     4659111 :                             fd_aes_key_ref_t * key ) {
     623     4659111 :   ulong *rk;
     624             : 
     625     4659111 :   if (!userKey || !key)
     626           0 :       return -1;
     627     4659111 :   if (bits != 128 && bits != 192 && bits != 256)
     628           0 :       return -2;
     629             : 
     630     4659111 :   rk = (ulong *)fd_type_pun( key->rd_key );  /* strict aliasing violation */
     631             : 
     632     4659111 :   if (bits == 128)
     633     4659111 :       key->rounds = 10;
     634           0 :   else if (bits == 192)
     635           0 :       key->rounds = 12;
     636           0 :   else
     637           0 :       key->rounds = 14;
     638             : 
     639     4659111 :   KeyExpansion(userKey, rk, key->rounds, (int)(bits/32UL) );
     640     4659111 :   return 0;
     641     4659111 : }
     642             : 
     643             : /**
     644             :  * Expand the cipher key into the decryption key schedule.
     645             :  */
     646             : int
     647             : fd_aes_ref_set_decrypt_key( uchar const *      userKey,
     648             :                             ulong const        bits,
     649         256 :                             fd_aes_key_ref_t * key ) {
     650         256 :   return fd_aes_ref_set_encrypt_key(userKey, bits, key);
     651         256 : }
     652             : 
     653             : /*
     654             :  * Encrypt a single block
     655             :  * in and out can overlap
     656             :  */
     657             : void
     658             : fd_aes_ref_encrypt_core( uchar const *            in,
     659             :                          uchar *                  out,
     660    26355127 :                          fd_aes_key_ref_t const * key ) {
     661             : 
     662    26355127 :   FD_DCHECK_CRIT( in && out && key, "invalid params" );
     663    26355127 :   ulong const * rk = (ulong *)fd_type_pun_const( key->rd_key );
     664             : 
     665    26355127 :   Cipher(in, out, rk, key->rounds);
     666    26355127 : }
     667             : 
     668             : /*
     669             :  * Decrypt a single block
     670             :  * in and out can overlap
     671             :  */
     672             : void
     673             : fd_aes_ref_decrypt_core( uchar const *            in,
     674             :                          uchar *                  out,
     675         256 :                          fd_aes_key_ref_t const * key ) {
     676             : 
     677         256 :   FD_DCHECK_CRIT( in && out && key, "invalid params" );
     678         256 :   ulong const * rk = (ulong const *)fd_type_pun_const( key->rd_key );
     679             : 
     680         256 :   InvCipher(in, out, rk, key->rounds );
     681         256 : }

Generated by: LCOV version 1.14