LCOV - code coverage report
Current view: top level - ballet/aes - fd_aes_base_ref.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 307 482 63.7 %
Date: 2024-11-13 11:58:15 Functions: 12 18 66.7 %

          Line data    Source code
       1             : /* fd_aes_ref.c was imported from the OpenSSL project circa 2023-Aug.
       2             :    Original source file:  crypto/aes/aes_core.c */
       3             : 
       4             : /*
       5             :  * Copyright 2002-2022 The OpenSSL Project Authors. All Rights Reserved.
       6             :  *
       7             :  * Licensed under the Apache License 2.0 (the "License").  You may not use
       8             :  * this file except in compliance with the License.  You can obtain a copy
       9             :  * in the file LICENSE in the source distribution or at
      10             :  * https://www.openssl.org/source/license.html
      11             :  */
      12             : 
      13             : /**
      14             :  * rijndael-alg-fst.c
      15             :  *
      16             :  * @version 3.0 (December 2000)
      17             :  *
      18             :  * Optimised ANSI C code for the Rijndael cipher (now AES)
      19             :  *
      20             :  * @author Vincent Rijmen
      21             :  * @author Antoon Bosselaers
      22             :  * @author Paulo Barreto
      23             :  *
      24             :  * This code is hereby placed in the public domain.
      25             :  *
      26             :  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
      27             :  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
      28             :  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      29             :  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
      30             :  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
      31             :  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
      32             :  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
      33             :  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
      34             :  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
      35             :  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
      36             :  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      37             :  */
      38             : 
      39             : /* Note: rewritten a little bit to provide error control and an OpenSSL-
      40             :    compatible API */
      41             : 
      42             : #include <assert.h>
      43             : #include <stdlib.h>
      44             : #include "fd_aes_gcm_ref.h"
      45             : 
      46             : typedef union {
      47             :   uchar b[8];
      48             :   uint w[2];
      49             :   ulong d;
      50             : } uni;
      51             : 
      52             : /*
      53             :  * Compute w := (w * x) mod (x^8 + x^4 + x^3 + x^1 + 1)
      54             :  * Therefore the name "xtime".
      55             :  */
      56             : static void
      57   131410530 : XtimeWord( uint * w ) {
      58   131410530 :   uint a, b;
      59             : 
      60   131410530 :   a = *w;
      61   131410530 :   b = a & 0x80808080u;
      62   131410530 :   a ^= b;
      63   131410530 :   b -= b >> 7;
      64   131410530 :   b &= 0x1B1B1B1Bu;
      65   131410530 :   b ^= a << 1;
      66   131410530 :   *w = b;
      67   131410530 : }
      68             : 
      69             : static void
      70  3934194930 : XtimeLong( ulong * w ) {
      71  3934194930 :   ulong a, b;
      72             : 
      73  3934194930 :   a = *w;
      74  3934194930 :   b = a & (ulong)(0x8080808080808080);
      75  3934194930 :   a ^= b;
      76  3934194930 :   b -= b >> 7;
      77  3934194930 :   b &= (ulong)(0x1B1B1B1B1B1B1B1B);
      78  3934194930 :   b ^= a << 1;
      79  3934194930 :   *w = b;
      80  3934194930 : }
      81             : 
      82             : /*
      83             :  * This computes w := S * w ^ -1 + c, where c = {01100011}.
      84             :  * Instead of using GF(2^8) mod (x^8+x^4+x^3+x+1} we do the inversion
      85             :  * in GF(GF(GF(2^2)^2)^2) mod (X^2+X+8)
      86             :  * and GF(GF(2^2)^2) mod (X^2+X+2)
      87             :  * and GF(2^2) mod (X^2+X+1)
      88             :  * The first part of the algorithm below transfers the coordinates
      89             :  * {0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80} =>
      90             :  * {1,Y,Y^2,Y^3,Y^4,Y^5,Y^6,Y^7} with Y=0x41:
      91             :  * {0x01,0x41,0x66,0x6c,0x56,0x9a,0x58,0xc4}
      92             :  * The last part undoes the coordinate transfer and the final affine
      93             :  * transformation S:
      94             :  * b[i] = b[i] + b[(i+4)%8] + b[(i+5)%8] + b[(i+6)%8] + b[(i+7)%8] + c[i]
      95             :  * in one step.
      96             :  * The multiplication in GF(2^2^2^2) is done in ordinary coords:
      97             :  * A = (a0*1 + a1*x^4)
      98             :  * B = (b0*1 + b1*x^4)
      99             :  * AB = ((a0*b0 + 8*a1*b1)*1 + (a1*b0 + (a0+a1)*b1)*x^4)
     100             :  * When A = (a0,a1) is given we want to solve AB = 1:
     101             :  * (a) 1 = a0*b0 + 8*a1*b1
     102             :  * (b) 0 = a1*b0 + (a0+a1)*b1
     103             :  * => multiply (a) by a1 and (b) by a0
     104             :  * (c) a1 = a1*a0*b0 + (8*a1*a1)*b1
     105             :  * (d) 0 = a1*a0*b0 + (a0*a0+a1*a0)*b1
     106             :  * => add (c) + (d)
     107             :  * (e) a1 = (a0*a0 + a1*a0 + 8*a1*a1)*b1
     108             :  * => therefore
     109             :  * b1 = (a0*a0 + a1*a0 + 8*a1*a1)^-1 * a1
     110             :  * => and adding (a1*b0) to (b) we get
     111             :  * (f) a1*b0 = (a0+a1)*b1
     112             :  * => therefore
     113             :  * b0 = (a0*a0 + a1*a0 + 8*a1*a1)^-1 * (a0+a1)
     114             :  * Note this formula also works for the case
     115             :  * (a0+a1)*a0 + 8*a1*a1 = 0
     116             :  * if the inverse element for 0^-1 is mapped to 0.
     117             :  * Repeat the same for GF(2^2^2) and GF(2^2).
     118             :  * We get the following algorithm:
     119             :  * inv8(a0,a1):
     120             :  *   x0 = a0^a1
     121             :  *   [y0,y1] = mul4([x0,a1],[a0,a1]); (*)
     122             :  *   y1 = mul4(8,y1);
     123             :  *   t = inv4(y0^y1);
     124             :  *   [b0,b1] = mul4([x0,a1],[t,t]); (*)
     125             :  *   return [b0,b1];
     126             :  * The non-linear multiplies (*) can be done in parallel at no extra cost.
     127             :  */
     128             : static void
     129   131410530 : SubWord( uint * w ) {
     130   131410530 :   uint x, y, a1, a2, a3, a4, a5, a6;
     131             : 
     132   131410530 :   x = *w;
     133   131410530 :   y = ((x & 0xFEFEFEFEu) >> 1) | ((x & 0x01010101u) << 7);
     134   131410530 :   x &= 0xDDDDDDDDu;
     135   131410530 :   x ^= y & 0x57575757u;
     136   131410530 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     137   131410530 :   x ^= y & 0x1C1C1C1Cu;
     138   131410530 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     139   131410530 :   x ^= y & 0x4A4A4A4Au;
     140   131410530 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     141   131410530 :   x ^= y & 0x42424242u;
     142   131410530 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     143   131410530 :   x ^= y & 0x64646464u;
     144   131410530 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     145   131410530 :   x ^= y & 0xE0E0E0E0u;
     146   131410530 :   a1 = x;
     147   131410530 :   a1 ^= (x & 0xF0F0F0F0u) >> 4;
     148   131410530 :   a2 = ((x & 0xCCCCCCCCu) >> 2) | ((x & 0x33333333u) << 2);
     149   131410530 :   a3 = x & a1;
     150   131410530 :   a3 ^= (a3 & 0xAAAAAAAAu) >> 1;
     151   131410530 :   a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & 0xAAAAAAAAu;
     152   131410530 :   a4 = a2 & a1;
     153   131410530 :   a4 ^= (a4 & 0xAAAAAAAAu) >> 1;
     154   131410530 :   a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & 0xAAAAAAAAu;
     155   131410530 :   a5 = (a3 & 0xCCCCCCCCu) >> 2;
     156   131410530 :   a3 ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCu;
     157   131410530 :   a4 = a5 & 0x22222222u;
     158   131410530 :   a4 |= a4 >> 1;
     159   131410530 :   a4 ^= (a5 << 1) & 0x22222222u;
     160   131410530 :   a3 ^= a4;
     161   131410530 :   a5 = a3 & 0xA0A0A0A0u;
     162   131410530 :   a5 |= a5 >> 1;
     163   131410530 :   a5 ^= (a3 << 1) & 0xA0A0A0A0u;
     164   131410530 :   a4 = a5 & 0xC0C0C0C0u;
     165   131410530 :   a6 = a4 >> 2;
     166   131410530 :   a4 ^= (a5 << 2) & 0xC0C0C0C0u;
     167   131410530 :   a5 = a6 & 0x20202020u;
     168   131410530 :   a5 |= a5 >> 1;
     169   131410530 :   a5 ^= (a6 << 1) & 0x20202020u;
     170   131410530 :   a4 |= a5;
     171   131410530 :   a3 ^= a4 >> 4;
     172   131410530 :   a3 &= 0x0F0F0F0Fu;
     173   131410530 :   a2 = a3;
     174   131410530 :   a2 ^= (a3 & 0x0C0C0C0Cu) >> 2;
     175   131410530 :   a4 = a3 & a2;
     176   131410530 :   a4 ^= (uint)(a4 & 0x0A0A0A0A0Au) >> 1u;
     177   131410530 :   a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & 0x0A0A0A0Au;
     178   131410530 :   a5 = a4 & 0x08080808u;
     179   131410530 :   a5 |= a5 >> 1;
     180   131410530 :   a5 ^= (a4 << 1) & 0x08080808u;
     181   131410530 :   a4 ^= a5 >> 2;
     182   131410530 :   a4 &= 0x03030303u;
     183   131410530 :   a4 ^= (a4 & 0x02020202u) >> 1;
     184   131410530 :   a4 |= a4 << 2;
     185   131410530 :   a3 = a2 & a4;
     186   131410530 :   a3 ^= (a3 & 0x0A0A0A0Au) >> 1;
     187   131410530 :   a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & 0x0A0A0A0Au;
     188   131410530 :   a3 |= a3 << 4;
     189   131410530 :   a2 = ((a1 & 0xCCCCCCCCu) >> 2) | ((a1 & 0x33333333u) << 2);
     190   131410530 :   x = a1 & a3;
     191   131410530 :   x ^= (x & 0xAAAAAAAAu) >> 1;
     192   131410530 :   x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & 0xAAAAAAAAu;
     193   131410530 :   a4 = a2 & a3;
     194   131410530 :   a4 ^= (a4 & 0xAAAAAAAAu) >> 1;
     195   131410530 :   a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & 0xAAAAAAAAu;
     196   131410530 :   a5 = (x & 0xCCCCCCCCu) >> 2;
     197   131410530 :   x ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCu;
     198   131410530 :   a4 = a5 & 0x22222222u;
     199   131410530 :   a4 |= a4 >> 1;
     200   131410530 :   a4 ^= (a5 << 1) & 0x22222222u;
     201   131410530 :   x ^= a4;
     202   131410530 :   y = ((x & 0xFEFEFEFEu) >> 1) | ((x & 0x01010101u) << 7);
     203   131410530 :   x &= 0x39393939u;
     204   131410530 :   x ^= y & 0x3F3F3F3Fu;
     205   131410530 :   y = ((y & 0xFCFCFCFCu) >> 2) | ((y & 0x03030303u) << 6);
     206   131410530 :   x ^= y & 0x97979797u;
     207   131410530 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     208   131410530 :   x ^= y & 0x9B9B9B9Bu;
     209   131410530 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     210   131410530 :   x ^= y & 0x3C3C3C3Cu;
     211   131410530 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     212   131410530 :   x ^= y & 0xDDDDDDDDu;
     213   131410530 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     214   131410530 :   x ^= y & 0x72727272u;
     215   131410530 :   x ^= 0x63636363u;
     216   131410530 :   *w = x;
     217   131410530 : }
     218             : 
     219             : static void
     220  4371327700 : SubLong( ulong * w ) {
     221  4371327700 :   ulong x, y, a1, a2, a3, a4, a5, a6;
     222             : 
     223  4371327700 :   x = *w;
     224  4371327700 :   y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
     225  4371327700 :   x &= (0xDDDDDDDDDDDDDDDDUL);
     226  4371327700 :   x ^= y & (0x5757575757575757UL);
     227  4371327700 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     228  4371327700 :   x ^= y & (0x1C1C1C1C1C1C1C1CUL);
     229  4371327700 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     230  4371327700 :   x ^= y & (0x4A4A4A4A4A4A4A4AUL);
     231  4371327700 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     232  4371327700 :   x ^= y & (0x4242424242424242UL);
     233  4371327700 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     234  4371327700 :   x ^= y & (0x6464646464646464UL);
     235  4371327700 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     236  4371327700 :   x ^= y & (0xE0E0E0E0E0E0E0E0UL);
     237  4371327700 :   a1 = x;
     238  4371327700 :   a1 ^= (x & (0xF0F0F0F0F0F0F0F0UL)) >> 4;
     239  4371327700 :   a2 = ((x & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((x & (0x3333333333333333UL)) << 2);
     240  4371327700 :   a3 = x & a1;
     241  4371327700 :   a3 ^= (a3 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     242  4371327700 :   a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & (0xAAAAAAAAAAAAAAAAUL);
     243  4371327700 :   a4 = a2 & a1;
     244  4371327700 :   a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     245  4371327700 :   a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
     246  4371327700 :   a5 = (a3 & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
     247  4371327700 :   a3 ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
     248  4371327700 :   a4 = a5 & (0x2222222222222222UL);
     249  4371327700 :   a4 |= a4 >> 1;
     250  4371327700 :   a4 ^= (a5 << 1) & (0x2222222222222222UL);
     251  4371327700 :   a3 ^= a4;
     252  4371327700 :   a5 = a3 & (0xA0A0A0A0A0A0A0A0UL);
     253  4371327700 :   a5 |= a5 >> 1;
     254  4371327700 :   a5 ^= (a3 << 1) & (0xA0A0A0A0A0A0A0A0UL);
     255  4371327700 :   a4 = a5 & (0xC0C0C0C0C0C0C0C0UL);
     256  4371327700 :   a6 = a4 >> 2;
     257  4371327700 :   a4 ^= (a5 << 2) & (0xC0C0C0C0C0C0C0C0UL);
     258  4371327700 :   a5 = a6 & (0x2020202020202020UL);
     259  4371327700 :   a5 |= a5 >> 1;
     260  4371327700 :   a5 ^= (a6 << 1) & (0x2020202020202020UL);
     261  4371327700 :   a4 |= a5;
     262  4371327700 :   a3 ^= a4 >> 4;
     263  4371327700 :   a3 &= (0x0F0F0F0F0F0F0F0FUL);
     264  4371327700 :   a2 = a3;
     265  4371327700 :   a2 ^= (a3 & (0x0C0C0C0C0C0C0C0CUL)) >> 2;
     266  4371327700 :   a4 = a3 & a2;
     267  4371327700 :   a4 ^= (a4 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
     268  4371327700 :   a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & (0x0A0A0A0A0A0A0A0AUL);
     269  4371327700 :   a5 = a4 & (0x0808080808080808UL);
     270  4371327700 :   a5 |= a5 >> 1;
     271  4371327700 :   a5 ^= (a4 << 1) & (0x0808080808080808UL);
     272  4371327700 :   a4 ^= a5 >> 2;
     273  4371327700 :   a4 &= (0x0303030303030303UL);
     274  4371327700 :   a4 ^= (a4 & (0x0202020202020202UL)) >> 1;
     275  4371327700 :   a4 |= a4 << 2;
     276  4371327700 :   a3 = a2 & a4;
     277  4371327700 :   a3 ^= (a3 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
     278  4371327700 :   a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & (0x0A0A0A0A0A0A0A0AUL);
     279  4371327700 :   a3 |= a3 << 4;
     280  4371327700 :   a2 = ((a1 & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((a1 & (0x3333333333333333UL)) << 2);
     281  4371327700 :   x = a1 & a3;
     282  4371327700 :   x ^= (x & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     283  4371327700 :   x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & (0xAAAAAAAAAAAAAAAAUL);
     284  4371327700 :   a4 = a2 & a3;
     285  4371327700 :   a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     286  4371327700 :   a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
     287  4371327700 :   a5 = (x & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
     288  4371327700 :   x ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
     289  4371327700 :   a4 = a5 & (0x2222222222222222UL);
     290  4371327700 :   a4 |= a4 >> 1;
     291  4371327700 :   a4 ^= (a5 << 1) & (0x2222222222222222UL);
     292  4371327700 :   x ^= a4;
     293  4371327700 :   y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
     294  4371327700 :   x &= (0x3939393939393939UL);
     295  4371327700 :   x ^= y & (0x3F3F3F3F3F3F3F3FUL);
     296  4371327700 :   y = ((y & (0xFCFCFCFCFCFCFCFCUL)) >> 2) | ((y & (0x0303030303030303UL)) << 6);
     297  4371327700 :   x ^= y & (0x9797979797979797UL);
     298  4371327700 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     299  4371327700 :   x ^= y & (0x9B9B9B9B9B9B9B9BUL);
     300  4371327700 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     301  4371327700 :   x ^= y & (0x3C3C3C3C3C3C3C3CUL);
     302  4371327700 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     303  4371327700 :   x ^= y & (0xDDDDDDDDDDDDDDDDUL);
     304  4371327700 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     305  4371327700 :   x ^= y & (0x7272727272727272UL);
     306  4371327700 :   x ^= (0x6363636363636363UL);
     307  4371327700 :   *w = x;
     308  4371327700 : }
     309             : 
     310             : /*
     311             :  * This computes w := (S^-1 * (w + c))^-1
     312             :  */
     313             : static void
     314           0 : InvSubLong( ulong * w ) {
     315           0 :   ulong x, y, a1, a2, a3, a4, a5, a6;
     316             : 
     317           0 :   x = *w;
     318           0 :   x ^= (0x6363636363636363UL);
     319           0 :   y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
     320           0 :   x &= (0xFDFDFDFDFDFDFDFDUL);
     321           0 :   x ^= y & (0x5E5E5E5E5E5E5E5EUL);
     322           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     323           0 :   x ^= y & (0xF3F3F3F3F3F3F3F3UL);
     324           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     325           0 :   x ^= y & (0xF5F5F5F5F5F5F5F5UL);
     326           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     327           0 :   x ^= y & (0x7878787878787878UL);
     328           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     329           0 :   x ^= y & (0x7777777777777777UL);
     330           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     331           0 :   x ^= y & (0x1515151515151515UL);
     332           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     333           0 :   x ^= y & (0xA5A5A5A5A5A5A5A5UL);
     334           0 :   a1 = x;
     335           0 :   a1 ^= (x & (0xF0F0F0F0F0F0F0F0UL)) >> 4;
     336           0 :   a2 = ((x & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((x & (0x3333333333333333UL)) << 2);
     337           0 :   a3 = x & a1;
     338           0 :   a3 ^= (a3 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     339           0 :   a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & (0xAAAAAAAAAAAAAAAAUL);
     340           0 :   a4 = a2 & a1;
     341           0 :   a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     342           0 :   a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
     343           0 :   a5 = (a3 & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
     344           0 :   a3 ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
     345           0 :   a4 = a5 & (0x2222222222222222UL);
     346           0 :   a4 |= a4 >> 1;
     347           0 :   a4 ^= (a5 << 1) & (0x2222222222222222UL);
     348           0 :   a3 ^= a4;
     349           0 :   a5 = a3 & (0xA0A0A0A0A0A0A0A0UL);
     350           0 :   a5 |= a5 >> 1;
     351           0 :   a5 ^= (a3 << 1) & (0xA0A0A0A0A0A0A0A0UL);
     352           0 :   a4 = a5 & (0xC0C0C0C0C0C0C0C0UL);
     353           0 :   a6 = a4 >> 2;
     354           0 :   a4 ^= (a5 << 2) & (0xC0C0C0C0C0C0C0C0UL);
     355           0 :   a5 = a6 & (0x2020202020202020UL);
     356           0 :   a5 |= a5 >> 1;
     357           0 :   a5 ^= (a6 << 1) & (0x2020202020202020UL);
     358           0 :   a4 |= a5;
     359           0 :   a3 ^= a4 >> 4;
     360           0 :   a3 &= (0x0F0F0F0F0F0F0F0FUL);
     361           0 :   a2 = a3;
     362           0 :   a2 ^= (a3 & (0x0C0C0C0C0C0C0C0CUL)) >> 2;
     363           0 :   a4 = a3 & a2;
     364           0 :   a4 ^= (a4 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
     365           0 :   a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & (0x0A0A0A0A0A0A0A0AUL);
     366           0 :   a5 = a4 & (0x0808080808080808UL);
     367           0 :   a5 |= a5 >> 1;
     368           0 :   a5 ^= (a4 << 1) & (0x0808080808080808UL);
     369           0 :   a4 ^= a5 >> 2;
     370           0 :   a4 &= (0x0303030303030303UL);
     371           0 :   a4 ^= (a4 & (0x0202020202020202UL)) >> 1;
     372           0 :   a4 |= a4 << 2;
     373           0 :   a3 = a2 & a4;
     374           0 :   a3 ^= (a3 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
     375           0 :   a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & (0x0A0A0A0A0A0A0A0AUL);
     376           0 :   a3 |= a3 << 4;
     377           0 :   a2 = ((a1 & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((a1 & (0x3333333333333333UL)) << 2);
     378           0 :   x = a1 & a3;
     379           0 :   x ^= (x & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     380           0 :   x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & (0xAAAAAAAAAAAAAAAAUL);
     381           0 :   a4 = a2 & a3;
     382           0 :   a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     383           0 :   a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
     384           0 :   a5 = (x & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
     385           0 :   x ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
     386           0 :   a4 = a5 & (0x2222222222222222UL);
     387           0 :   a4 |= a4 >> 1;
     388           0 :   a4 ^= (a5 << 1) & (0x2222222222222222UL);
     389           0 :   x ^= a4;
     390           0 :   y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
     391           0 :   x &= (0xB5B5B5B5B5B5B5B5UL);
     392           0 :   x ^= y & (0x4040404040404040UL);
     393           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     394           0 :   x ^= y & (0x8080808080808080UL);
     395           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     396           0 :   x ^= y & (0x1616161616161616UL);
     397           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     398           0 :   x ^= y & (0xEBEBEBEBEBEBEBEBUL);
     399           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     400           0 :   x ^= y & (0x9797979797979797UL);
     401           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     402           0 :   x ^= y & (0xFBFBFBFBFBFBFBFBUL);
     403           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     404           0 :   x ^= y & (0x7D7D7D7D7D7D7D7DUL);
     405           0 :   *w = x;
     406           0 : }
     407             : 
     408             : static void
     409  2185663850 : ShiftRows( ulong * state ) {
     410  2185663850 :   uchar s[4];
     411  2185663850 :   uchar *s0;
     412  2185663850 :   int r;
     413             : 
     414  2185663850 :   s0 = (uchar *)state;
     415 10928319250 :   for (r = 0; r < 4; r++) {
     416  8742655400 :     s[0] = s0[0*4 + r];
     417  8742655400 :     s[1] = s0[1*4 + r];
     418  8742655400 :     s[2] = s0[2*4 + r];
     419  8742655400 :     s[3] = s0[3*4 + r];
     420  8742655400 :     s0[0*4 + r] = s[(r+0) % 4];
     421  8742655400 :     s0[1*4 + r] = s[(r+1) % 4];
     422  8742655400 :     s0[2*4 + r] = s[(r+2) % 4];
     423  8742655400 :     s0[3*4 + r] = s[(r+3) % 4];
     424  8742655400 :   }
     425  2185663850 : }
     426             : 
     427             : static void
     428           0 : InvShiftRows( ulong * state ) {
     429           0 :   uchar s[4];
     430           0 :   uchar *s0;
     431           0 :   int r;
     432             : 
     433           0 :   s0 = (uchar *)state;
     434           0 :   for (r = 0; r < 4; r++) {
     435           0 :     s[0] = s0[0*4 + r];
     436           0 :     s[1] = s0[1*4 + r];
     437           0 :     s[2] = s0[2*4 + r];
     438           0 :     s[3] = s0[3*4 + r];
     439           0 :     s0[0*4 + r] = s[(4-r) % 4];
     440           0 :     s0[1*4 + r] = s[(5-r) % 4];
     441           0 :     s0[2*4 + r] = s[(6-r) % 4];
     442           0 :     s0[3*4 + r] = s[(7-r) % 4];
     443           0 :   }
     444           0 : }
     445             : 
     446             : static void
     447  1967097465 : MixColumns( ulong * state ) {
     448  1967097465 :   uni s1;
     449  1967097465 :   uni s;
     450  1967097465 :   int c;
     451             : 
     452  5901292395 :   for (c = 0; c < 2; c++) {
     453  3934194930 :     s1.d = state[c];
     454  3934194930 :     s.d = s1.d;
     455  3934194930 :     s.d ^= ((s.d & (0xFFFF0000FFFF0000UL)) >> 16)
     456  3934194930 :             | ((s.d & (0x0000FFFF0000FFFFUL)) << 16);
     457  3934194930 :     s.d ^= ((s.d & (0xFF00FF00FF00FF00UL)) >> 8)
     458  3934194930 :             | ((s.d & (0x00FF00FF00FF00FFUL)) << 8);
     459  3934194930 :     s.d ^= s1.d;
     460  3934194930 :     XtimeLong(&s1.d);
     461  3934194930 :     s.d ^= s1.d;
     462  3934194930 :     s.b[0] ^= s1.b[1];
     463  3934194930 :     s.b[1] ^= s1.b[2];
     464  3934194930 :     s.b[2] ^= s1.b[3];
     465  3934194930 :     s.b[3] ^= s1.b[0];
     466  3934194930 :     s.b[4] ^= s1.b[5];
     467  3934194930 :     s.b[5] ^= s1.b[6];
     468  3934194930 :     s.b[6] ^= s1.b[7];
     469  3934194930 :     s.b[7] ^= s1.b[4];
     470  3934194930 :     state[c] = s.d;
     471  3934194930 :   }
     472  1967097465 : }
     473             : 
     474             : static void InvMixColumns(ulong * state)
     475           0 : {
     476           0 :   uni s1;
     477           0 :   uni s;
     478           0 :   int c;
     479             : 
     480           0 :   for (c = 0; c < 2; c++) {
     481           0 :     s1.d = state[c];
     482           0 :     s.d = s1.d;
     483           0 :     s.d ^= ((s.d & (0xFFFF0000FFFF0000UL)) >> 16)
     484           0 :             | ((s.d & (0x0000FFFF0000FFFFUL)) << 16);
     485           0 :     s.d ^= ((s.d & (0xFF00FF00FF00FF00UL)) >> 8)
     486           0 :             | ((s.d & (0x00FF00FF00FF00FFUL)) << 8);
     487           0 :     s.d ^= s1.d;
     488           0 :     XtimeLong(&s1.d);
     489           0 :     s.d ^= s1.d;
     490           0 :     s.b[0] ^= s1.b[1];
     491           0 :     s.b[1] ^= s1.b[2];
     492           0 :     s.b[2] ^= s1.b[3];
     493           0 :     s.b[3] ^= s1.b[0];
     494           0 :     s.b[4] ^= s1.b[5];
     495           0 :     s.b[5] ^= s1.b[6];
     496           0 :     s.b[6] ^= s1.b[7];
     497           0 :     s.b[7] ^= s1.b[4];
     498           0 :     XtimeLong(&s1.d);
     499           0 :     s1.d ^= ((s1.d & (0xFFFF0000FFFF0000UL)) >> 16)
     500           0 :             | ((s1.d & (0x0000FFFF0000FFFFUL)) << 16);
     501           0 :     s.d ^= s1.d;
     502           0 :     XtimeLong(&s1.d);
     503           0 :     s1.d ^= ((s1.d & (0xFF00FF00FF00FF00UL)) >> 8)
     504           0 :             | ((s1.d & (0x00FF00FF00FF00FFUL)) << 8);
     505           0 :     s.d ^= s1.d;
     506           0 :     state[c] = s.d;
     507           0 :   }
     508           0 : }
     509             : 
     510             : static void
     511             : AddRoundKey( ulong *       state,
     512  2404230235 :              ulong const * w ) {
     513  2404230235 :   state[0] ^= w[0];
     514  2404230235 :   state[1] ^= w[1];
     515  2404230235 : }
     516             : 
     517             : static void
     518             : Cipher( uchar const * in,
     519             :         uchar *       out,
     520             :         ulong const * w,
     521   218566385 :         int           nr ) {
     522   218566385 :   ulong state[2];
     523   218566385 :   int i;
     524             : 
     525   218566385 :   memcpy(state, in, 16);
     526             : 
     527   218566385 :   AddRoundKey(state, w);
     528             : 
     529  2185663850 :   for (i = 1; i < nr; i++) {
     530  1967097465 :     SubLong(&state[0]);
     531  1967097465 :     SubLong(&state[1]);
     532  1967097465 :     ShiftRows(state);
     533  1967097465 :     MixColumns(state);
     534  1967097465 :     AddRoundKey(state, w + i*2);
     535  1967097465 :   }
     536             : 
     537   218566385 :   SubLong(&state[0]);
     538   218566385 :   SubLong(&state[1]);
     539   218566385 :   ShiftRows(state);
     540   218566385 :   AddRoundKey(state, w + nr*2);
     541             : 
     542   218566385 :   memcpy(out, state, 16);
     543   218566385 : }
     544             : 
     545             : static void
     546             : InvCipher( uchar const * in,
     547             :            uchar *       out,
     548             :            ulong const * w,
     549           0 :            int           nr ) {
     550           0 :   ulong state[2];
     551           0 :   int i;
     552             : 
     553           0 :   memcpy(state, in, 16);
     554             : 
     555           0 :   AddRoundKey(state, w + nr*2);
     556             : 
     557           0 :   for (i = nr - 1; i > 0; i--) {
     558           0 :     InvShiftRows(state);
     559           0 :     InvSubLong(&state[0]);
     560           0 :     InvSubLong(&state[1]);
     561           0 :     AddRoundKey(state, w + i*2);
     562           0 :     InvMixColumns(state);
     563           0 :   }
     564             : 
     565           0 :   InvShiftRows(state);
     566           0 :   InvSubLong(&state[0]);
     567           0 :   InvSubLong(&state[1]);
     568           0 :   AddRoundKey(state, w);
     569             : 
     570           0 :   memcpy(out, state, 16);
     571           0 : }
     572             : 
     573             : static void
     574   131410530 : RotWord( uint * x ) {
     575   131410530 :   uchar *w0;
     576   131410530 :   uchar tmp;
     577             : 
     578   131410530 :   w0 = (uchar *)x;
     579   131410530 :   tmp = w0[0];
     580   131410530 :   w0[0] = w0[1];
     581   131410530 :   w0[1] = w0[2];
     582   131410530 :   w0[2] = w0[3];
     583   131410530 :   w0[3] = tmp;
     584   131410530 : }
     585             : 
     586             : static void
     587             : KeyExpansion( uchar const * key,
     588             :               ulong *       w,
     589             :               int           nr,
     590    13141053 :               int           nk ) {
     591    13141053 :   uint rcon;
     592    13141053 :   uni prev;
     593    13141053 :   uint temp;
     594    13141053 :   int i, n;
     595             : 
     596    13141053 :   memcpy( w, key, (ulong)nk*4UL );
     597    13141053 :   memcpy( &rcon, "\1\0\0\0", 4  );
     598    13141053 :   n = nk/2;
     599    13141053 :   prev.d = w[n-1];
     600   275962113 :   for (i = n; i < (nr+1)*2; i++) {
     601   262821060 :     temp = prev.w[1];
     602   262821060 :     if (i % n == 0) {
     603   131410530 :       RotWord(&temp);
     604   131410530 :       SubWord(&temp);
     605   131410530 :       temp ^= rcon;
     606   131410530 :       XtimeWord(&rcon);
     607   131410530 :     } else if (nk > 6 && i % n == 2) {
     608           0 :       SubWord(&temp);
     609           0 :     }
     610   262821060 :     prev.d = w[i-n];
     611   262821060 :     prev.w[0] ^= temp;
     612   262821060 :     prev.w[1] ^= prev.w[0];
     613   262821060 :     w[i] = prev.d;
     614   262821060 :   }
     615    13141053 : }
     616             : 
     617             : /**
     618             :  * Expand the cipher key into the encryption key schedule.
     619             :  */
     620             : int
     621             : fd_aes_ref_set_encrypt_key( uchar const *      userKey,
     622             :                             ulong const        bits,
     623    13141053 :                             fd_aes_key_ref_t * key ) {
     624    13141053 :   ulong *rk;
     625             : 
     626    13141053 :   if (!userKey || !key)
     627           0 :       return -1;
     628    13141053 :   if (bits != 128 && bits != 192 && bits != 256)
     629           0 :       return -2;
     630             : 
     631    13141053 :   rk = (ulong *)fd_type_pun( key->rd_key );  /* strict aliasing violation */
     632             : 
     633    13141053 :   if (bits == 128)
     634    13141053 :       key->rounds = 10;
     635           0 :   else if (bits == 192)
     636           0 :       key->rounds = 12;
     637           0 :   else
     638           0 :       key->rounds = 14;
     639             : 
     640    13141053 :   KeyExpansion(userKey, rk, key->rounds, (int)(bits/32UL) );
     641    13141053 :   return 0;
     642    13141053 : }
     643             : 
     644             : /**
     645             :  * Expand the cipher key into the decryption key schedule.
     646             :  */
     647             : int
     648             : fd_aes_ref_set_decrypt_key( uchar const *      userKey,
     649             :                             ulong const        bits,
     650           0 :                             fd_aes_key_ref_t * key ) {
     651           0 :   return fd_aes_ref_set_encrypt_key(userKey, bits, key);
     652           0 : }
     653             : 
     654             : /*
     655             :  * Encrypt a single block
     656             :  * in and out can overlap
     657             :  */
     658             : void
     659             : fd_aes_ref_encrypt_core( uchar const *            in,
     660             :                          uchar *                  out,
     661   218566385 :                          fd_aes_key_ref_t const * key ) {
     662             : 
     663   218566385 :   assert(in && out && key);
     664   218566385 :   ulong const * rk = (ulong *)fd_type_pun_const( key->rd_key );
     665             : 
     666   218566385 :   Cipher(in, out, rk, key->rounds);
     667   218566385 : }
     668             : 
     669             : /*
     670             :  * Decrypt a single block
     671             :  * in and out can overlap
     672             :  */
     673             : void
     674             : fd_aes_ref_decrypt_core( uchar const *            in,
     675             :                          uchar *                  out,
     676           0 :                          fd_aes_key_ref_t const * key ) {
     677             : 
     678           0 :   assert(in && out && key);
     679           0 :   ulong const * rk = (ulong const *)fd_type_pun_const( key->rd_key );
     680             : 
     681           0 :   InvCipher(in, out, rk, key->rounds );
     682           0 : }

Generated by: LCOV version 1.14