LCOV - code coverage report
Current view: top level - ballet/aes - fd_aes_base_ref.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 306 482 63.5 %
Date: 2025-01-08 12:08:44 Functions: 12 18 66.7 %

          Line data    Source code
       1             : /* fd_aes_ref.c was imported from the OpenSSL project circa 2023-Aug.
       2             :    Original source file:  crypto/aes/aes_core.c */
       3             : 
       4             : /*
       5             :  * Copyright 2002-2022 The OpenSSL Project Authors. All Rights Reserved.
       6             :  *
       7             :  * Licensed under the Apache License 2.0 (the "License").  You may not use
       8             :  * this file except in compliance with the License.  You can obtain a copy
       9             :  * in the file LICENSE in the source distribution or at
      10             :  * https://www.openssl.org/source/license.html
      11             :  */
      12             : 
      13             : /**
      14             :  * rijndael-alg-fst.c
      15             :  *
      16             :  * @version 3.0 (December 2000)
      17             :  *
      18             :  * Optimised ANSI C code for the Rijndael cipher (now AES)
      19             :  *
      20             :  * @author Vincent Rijmen
      21             :  * @author Antoon Bosselaers
      22             :  * @author Paulo Barreto
      23             :  *
      24             :  * This code is hereby placed in the public domain.
      25             :  *
      26             :  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
      27             :  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
      28             :  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
      29             :  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
      30             :  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
      31             :  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
      32             :  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
      33             :  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
      34             :  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
      35             :  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
      36             :  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      37             :  */
      38             : 
      39             : /* Note: rewritten a little bit to provide error control and an OpenSSL-
      40             :    compatible API */
      41             : 
      42             : #include <assert.h>
      43             : #include <stdlib.h>
      44             : #include "fd_aes_gcm_ref.h"
      45             : 
      46             : typedef union {
      47             :   uchar b[8];
      48             :   uint w[2];
      49             :   ulong d;
      50             : } uni;
      51             : 
      52             : /*
      53             :  * Compute w := (w * x) mod (x^8 + x^4 + x^3 + x^1 + 1)
      54             :  * Therefore the name "xtime".
      55             :  */
      56             : static void
      57   124927350 : XtimeWord( uint * w ) {
      58   124927350 :   uint a, b;
      59             : 
      60   124927350 :   a = *w;
      61   124927350 :   b = a & 0x80808080u;
      62   124927350 :   a ^= b;
      63   124927350 :   b -= b >> 7;
      64   124927350 :   b &= 0x1B1B1B1Bu;
      65   124927350 :   b ^= a << 1;
      66   124927350 :   *w = b;
      67   124927350 : }
      68             : 
      69             : static void
      70  3941700714 : XtimeLong( ulong * w ) {
      71  3941700714 :   ulong a, b;
      72             : 
      73  3941700714 :   a = *w;
      74  3941700714 :   b = a & (ulong)(0x8080808080808080);
      75  3941700714 :   a ^= b;
      76  3941700714 :   b -= b >> 7;
      77  3941700714 :   b &= (ulong)(0x1B1B1B1B1B1B1B1B);
      78  3941700714 :   b ^= a << 1;
      79  3941700714 :   *w = b;
      80  3941700714 : }
      81             : 
      82             : /*
      83             :  * This computes w := S * w ^ -1 + c, where c = {01100011}.
      84             :  * Instead of using GF(2^8) mod (x^8+x^4+x^3+x+1} we do the inversion
      85             :  * in GF(GF(GF(2^2)^2)^2) mod (X^2+X+8)
      86             :  * and GF(GF(2^2)^2) mod (X^2+X+2)
      87             :  * and GF(2^2) mod (X^2+X+1)
      88             :  * The first part of the algorithm below transfers the coordinates
      89             :  * {0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80} =>
      90             :  * {1,Y,Y^2,Y^3,Y^4,Y^5,Y^6,Y^7} with Y=0x41:
      91             :  * {0x01,0x41,0x66,0x6c,0x56,0x9a,0x58,0xc4}
      92             :  * The last part undoes the coordinate transfer and the final affine
      93             :  * transformation S:
      94             :  * b[i] = b[i] + b[(i+4)%8] + b[(i+5)%8] + b[(i+6)%8] + b[(i+7)%8] + c[i]
      95             :  * in one step.
      96             :  * The multiplication in GF(2^2^2^2) is done in ordinary coords:
      97             :  * A = (a0*1 + a1*x^4)
      98             :  * B = (b0*1 + b1*x^4)
      99             :  * AB = ((a0*b0 + 8*a1*b1)*1 + (a1*b0 + (a0+a1)*b1)*x^4)
     100             :  * When A = (a0,a1) is given we want to solve AB = 1:
     101             :  * (a) 1 = a0*b0 + 8*a1*b1
     102             :  * (b) 0 = a1*b0 + (a0+a1)*b1
     103             :  * => multiply (a) by a1 and (b) by a0
     104             :  * (c) a1 = a1*a0*b0 + (8*a1*a1)*b1
     105             :  * (d) 0 = a1*a0*b0 + (a0*a0+a1*a0)*b1
     106             :  * => add (c) + (d)
     107             :  * (e) a1 = (a0*a0 + a1*a0 + 8*a1*a1)*b1
     108             :  * => therefore
     109             :  * b1 = (a0*a0 + a1*a0 + 8*a1*a1)^-1 * a1
     110             :  * => and adding (a1*b0) to (b) we get
     111             :  * (f) a1*b0 = (a0+a1)*b1
     112             :  * => therefore
     113             :  * b0 = (a0*a0 + a1*a0 + 8*a1*a1)^-1 * (a0+a1)
     114             :  * Note this formula also works for the case
     115             :  * (a0+a1)*a0 + 8*a1*a1 = 0
     116             :  * if the inverse element for 0^-1 is mapped to 0.
     117             :  * Repeat the same for GF(2^2^2) and GF(2^2).
     118             :  * We get the following algorithm:
     119             :  * inv8(a0,a1):
     120             :  *   x0 = a0^a1
     121             :  *   [y0,y1] = mul4([x0,a1],[a0,a1]); (*)
     122             :  *   y1 = mul4(8,y1);
     123             :  *   t = inv4(y0^y1);
     124             :  *   [b0,b1] = mul4([x0,a1],[t,t]); (*)
     125             :  *   return [b0,b1];
     126             :  * The non-linear multiplies (*) can be done in parallel at no extra cost.
     127             :  */
     128             : static void
     129   124927350 : SubWord( uint * w ) {
     130   124927350 :   uint x, y, a1, a2, a3, a4, a5, a6;
     131             : 
     132   124927350 :   x = *w;
     133   124927350 :   y = ((x & 0xFEFEFEFEu) >> 1) | ((x & 0x01010101u) << 7);
     134   124927350 :   x &= 0xDDDDDDDDu;
     135   124927350 :   x ^= y & 0x57575757u;
     136   124927350 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     137   124927350 :   x ^= y & 0x1C1C1C1Cu;
     138   124927350 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     139   124927350 :   x ^= y & 0x4A4A4A4Au;
     140   124927350 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     141   124927350 :   x ^= y & 0x42424242u;
     142   124927350 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     143   124927350 :   x ^= y & 0x64646464u;
     144   124927350 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     145   124927350 :   x ^= y & 0xE0E0E0E0u;
     146   124927350 :   a1 = x;
     147   124927350 :   a1 ^= (x & 0xF0F0F0F0u) >> 4;
     148   124927350 :   a2 = ((x & 0xCCCCCCCCu) >> 2) | ((x & 0x33333333u) << 2);
     149   124927350 :   a3 = x & a1;
     150   124927350 :   a3 ^= (a3 & 0xAAAAAAAAu) >> 1;
     151   124927350 :   a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & 0xAAAAAAAAu;
     152   124927350 :   a4 = a2 & a1;
     153   124927350 :   a4 ^= (a4 & 0xAAAAAAAAu) >> 1;
     154   124927350 :   a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & 0xAAAAAAAAu;
     155   124927350 :   a5 = (a3 & 0xCCCCCCCCu) >> 2;
     156   124927350 :   a3 ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCu;
     157   124927350 :   a4 = a5 & 0x22222222u;
     158   124927350 :   a4 |= a4 >> 1;
     159   124927350 :   a4 ^= (a5 << 1) & 0x22222222u;
     160   124927350 :   a3 ^= a4;
     161   124927350 :   a5 = a3 & 0xA0A0A0A0u;
     162   124927350 :   a5 |= a5 >> 1;
     163   124927350 :   a5 ^= (a3 << 1) & 0xA0A0A0A0u;
     164   124927350 :   a4 = a5 & 0xC0C0C0C0u;
     165   124927350 :   a6 = a4 >> 2;
     166   124927350 :   a4 ^= (a5 << 2) & 0xC0C0C0C0u;
     167   124927350 :   a5 = a6 & 0x20202020u;
     168   124927350 :   a5 |= a5 >> 1;
     169   124927350 :   a5 ^= (a6 << 1) & 0x20202020u;
     170   124927350 :   a4 |= a5;
     171   124927350 :   a3 ^= a4 >> 4;
     172   124927350 :   a3 &= 0x0F0F0F0Fu;
     173   124927350 :   a2 = a3;
     174   124927350 :   a2 ^= (a3 & 0x0C0C0C0Cu) >> 2;
     175   124927350 :   a4 = a3 & a2;
     176   124927350 :   a4 ^= (uint)(a4 & 0x0A0A0A0A0Au) >> 1u;
     177   124927350 :   a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & 0x0A0A0A0Au;
     178   124927350 :   a5 = a4 & 0x08080808u;
     179   124927350 :   a5 |= a5 >> 1;
     180   124927350 :   a5 ^= (a4 << 1) & 0x08080808u;
     181   124927350 :   a4 ^= a5 >> 2;
     182   124927350 :   a4 &= 0x03030303u;
     183   124927350 :   a4 ^= (a4 & 0x02020202u) >> 1;
     184   124927350 :   a4 |= a4 << 2;
     185   124927350 :   a3 = a2 & a4;
     186   124927350 :   a3 ^= (a3 & 0x0A0A0A0Au) >> 1;
     187   124927350 :   a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & 0x0A0A0A0Au;
     188   124927350 :   a3 |= a3 << 4;
     189   124927350 :   a2 = ((a1 & 0xCCCCCCCCu) >> 2) | ((a1 & 0x33333333u) << 2);
     190   124927350 :   x = a1 & a3;
     191   124927350 :   x ^= (x & 0xAAAAAAAAu) >> 1;
     192   124927350 :   x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & 0xAAAAAAAAu;
     193   124927350 :   a4 = a2 & a3;
     194   124927350 :   a4 ^= (a4 & 0xAAAAAAAAu) >> 1;
     195   124927350 :   a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & 0xAAAAAAAAu;
     196   124927350 :   a5 = (x & 0xCCCCCCCCu) >> 2;
     197   124927350 :   x ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCu;
     198   124927350 :   a4 = a5 & 0x22222222u;
     199   124927350 :   a4 |= a4 >> 1;
     200   124927350 :   a4 ^= (a5 << 1) & 0x22222222u;
     201   124927350 :   x ^= a4;
     202   124927350 :   y = ((x & 0xFEFEFEFEu) >> 1) | ((x & 0x01010101u) << 7);
     203   124927350 :   x &= 0x39393939u;
     204   124927350 :   x ^= y & 0x3F3F3F3Fu;
     205   124927350 :   y = ((y & 0xFCFCFCFCu) >> 2) | ((y & 0x03030303u) << 6);
     206   124927350 :   x ^= y & 0x97979797u;
     207   124927350 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     208   124927350 :   x ^= y & 0x9B9B9B9Bu;
     209   124927350 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     210   124927350 :   x ^= y & 0x3C3C3C3Cu;
     211   124927350 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     212   124927350 :   x ^= y & 0xDDDDDDDDu;
     213   124927350 :   y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
     214   124927350 :   x ^= y & 0x72727272u;
     215   124927350 :   x ^= 0x63636363u;
     216   124927350 :   *w = x;
     217   124927350 : }
     218             : 
     219             : static void
     220  4379667460 : SubLong( ulong * w ) {
     221  4379667460 :   ulong x, y, a1, a2, a3, a4, a5, a6;
     222             : 
     223  4379667460 :   x = *w;
     224  4379667460 :   y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
     225  4379667460 :   x &= (0xDDDDDDDDDDDDDDDDUL);
     226  4379667460 :   x ^= y & (0x5757575757575757UL);
     227  4379667460 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     228  4379667460 :   x ^= y & (0x1C1C1C1C1C1C1C1CUL);
     229  4379667460 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     230  4379667460 :   x ^= y & (0x4A4A4A4A4A4A4A4AUL);
     231  4379667460 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     232  4379667460 :   x ^= y & (0x4242424242424242UL);
     233  4379667460 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     234  4379667460 :   x ^= y & (0x6464646464646464UL);
     235  4379667460 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     236  4379667460 :   x ^= y & (0xE0E0E0E0E0E0E0E0UL);
     237  4379667460 :   a1 = x;
     238  4379667460 :   a1 ^= (x & (0xF0F0F0F0F0F0F0F0UL)) >> 4;
     239  4379667460 :   a2 = ((x & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((x & (0x3333333333333333UL)) << 2);
     240  4379667460 :   a3 = x & a1;
     241  4379667460 :   a3 ^= (a3 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     242  4379667460 :   a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & (0xAAAAAAAAAAAAAAAAUL);
     243  4379667460 :   a4 = a2 & a1;
     244  4379667460 :   a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     245  4379667460 :   a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
     246  4379667460 :   a5 = (a3 & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
     247  4379667460 :   a3 ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
     248  4379667460 :   a4 = a5 & (0x2222222222222222UL);
     249  4379667460 :   a4 |= a4 >> 1;
     250  4379667460 :   a4 ^= (a5 << 1) & (0x2222222222222222UL);
     251  4379667460 :   a3 ^= a4;
     252  4379667460 :   a5 = a3 & (0xA0A0A0A0A0A0A0A0UL);
     253  4379667460 :   a5 |= a5 >> 1;
     254  4379667460 :   a5 ^= (a3 << 1) & (0xA0A0A0A0A0A0A0A0UL);
     255  4379667460 :   a4 = a5 & (0xC0C0C0C0C0C0C0C0UL);
     256  4379667460 :   a6 = a4 >> 2;
     257  4379667460 :   a4 ^= (a5 << 2) & (0xC0C0C0C0C0C0C0C0UL);
     258  4379667460 :   a5 = a6 & (0x2020202020202020UL);
     259  4379667460 :   a5 |= a5 >> 1;
     260  4379667460 :   a5 ^= (a6 << 1) & (0x2020202020202020UL);
     261  4379667460 :   a4 |= a5;
     262  4379667460 :   a3 ^= a4 >> 4;
     263  4379667460 :   a3 &= (0x0F0F0F0F0F0F0F0FUL);
     264  4379667460 :   a2 = a3;
     265  4379667460 :   a2 ^= (a3 & (0x0C0C0C0C0C0C0C0CUL)) >> 2;
     266  4379667460 :   a4 = a3 & a2;
     267  4379667460 :   a4 ^= (a4 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
     268  4379667460 :   a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & (0x0A0A0A0A0A0A0A0AUL);
     269  4379667460 :   a5 = a4 & (0x0808080808080808UL);
     270  4379667460 :   a5 |= a5 >> 1;
     271  4379667460 :   a5 ^= (a4 << 1) & (0x0808080808080808UL);
     272  4379667460 :   a4 ^= a5 >> 2;
     273  4379667460 :   a4 &= (0x0303030303030303UL);
     274  4379667460 :   a4 ^= (a4 & (0x0202020202020202UL)) >> 1;
     275  4379667460 :   a4 |= a4 << 2;
     276  4379667460 :   a3 = a2 & a4;
     277  4379667460 :   a3 ^= (a3 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
     278  4379667460 :   a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & (0x0A0A0A0A0A0A0A0AUL);
     279  4379667460 :   a3 |= a3 << 4;
     280  4379667460 :   a2 = ((a1 & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((a1 & (0x3333333333333333UL)) << 2);
     281  4379667460 :   x = a1 & a3;
     282  4379667460 :   x ^= (x & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     283  4379667460 :   x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & (0xAAAAAAAAAAAAAAAAUL);
     284  4379667460 :   a4 = a2 & a3;
     285  4379667460 :   a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     286  4379667460 :   a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
     287  4379667460 :   a5 = (x & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
     288  4379667460 :   x ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
     289  4379667460 :   a4 = a5 & (0x2222222222222222UL);
     290  4379667460 :   a4 |= a4 >> 1;
     291  4379667460 :   a4 ^= (a5 << 1) & (0x2222222222222222UL);
     292  4379667460 :   x ^= a4;
     293  4379667460 :   y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
     294  4379667460 :   x &= (0x3939393939393939UL);
     295  4379667460 :   x ^= y & (0x3F3F3F3F3F3F3F3FUL);
     296  4379667460 :   y = ((y & (0xFCFCFCFCFCFCFCFCUL)) >> 2) | ((y & (0x0303030303030303UL)) << 6);
     297  4379667460 :   x ^= y & (0x9797979797979797UL);
     298  4379667460 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     299  4379667460 :   x ^= y & (0x9B9B9B9B9B9B9B9BUL);
     300  4379667460 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     301  4379667460 :   x ^= y & (0x3C3C3C3C3C3C3C3CUL);
     302  4379667460 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     303  4379667460 :   x ^= y & (0xDDDDDDDDDDDDDDDDUL);
     304  4379667460 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     305  4379667460 :   x ^= y & (0x7272727272727272UL);
     306  4379667460 :   x ^= (0x6363636363636363UL);
     307  4379667460 :   *w = x;
     308  4379667460 : }
     309             : 
     310             : /*
     311             :  * This computes w := (S^-1 * (w + c))^-1
     312             :  */
     313             : static void
     314           0 : InvSubLong( ulong * w ) {
     315           0 :   ulong x, y, a1, a2, a3, a4, a5, a6;
     316             : 
     317           0 :   x = *w;
     318           0 :   x ^= (0x6363636363636363UL);
     319           0 :   y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
     320           0 :   x &= (0xFDFDFDFDFDFDFDFDUL);
     321           0 :   x ^= y & (0x5E5E5E5E5E5E5E5EUL);
     322           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     323           0 :   x ^= y & (0xF3F3F3F3F3F3F3F3UL);
     324           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     325           0 :   x ^= y & (0xF5F5F5F5F5F5F5F5UL);
     326           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     327           0 :   x ^= y & (0x7878787878787878UL);
     328           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     329           0 :   x ^= y & (0x7777777777777777UL);
     330           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     331           0 :   x ^= y & (0x1515151515151515UL);
     332           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     333           0 :   x ^= y & (0xA5A5A5A5A5A5A5A5UL);
     334           0 :   a1 = x;
     335           0 :   a1 ^= (x & (0xF0F0F0F0F0F0F0F0UL)) >> 4;
     336           0 :   a2 = ((x & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((x & (0x3333333333333333UL)) << 2);
     337           0 :   a3 = x & a1;
     338           0 :   a3 ^= (a3 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     339           0 :   a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & (0xAAAAAAAAAAAAAAAAUL);
     340           0 :   a4 = a2 & a1;
     341           0 :   a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     342           0 :   a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
     343           0 :   a5 = (a3 & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
     344           0 :   a3 ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
     345           0 :   a4 = a5 & (0x2222222222222222UL);
     346           0 :   a4 |= a4 >> 1;
     347           0 :   a4 ^= (a5 << 1) & (0x2222222222222222UL);
     348           0 :   a3 ^= a4;
     349           0 :   a5 = a3 & (0xA0A0A0A0A0A0A0A0UL);
     350           0 :   a5 |= a5 >> 1;
     351           0 :   a5 ^= (a3 << 1) & (0xA0A0A0A0A0A0A0A0UL);
     352           0 :   a4 = a5 & (0xC0C0C0C0C0C0C0C0UL);
     353           0 :   a6 = a4 >> 2;
     354           0 :   a4 ^= (a5 << 2) & (0xC0C0C0C0C0C0C0C0UL);
     355           0 :   a5 = a6 & (0x2020202020202020UL);
     356           0 :   a5 |= a5 >> 1;
     357           0 :   a5 ^= (a6 << 1) & (0x2020202020202020UL);
     358           0 :   a4 |= a5;
     359           0 :   a3 ^= a4 >> 4;
     360           0 :   a3 &= (0x0F0F0F0F0F0F0F0FUL);
     361           0 :   a2 = a3;
     362           0 :   a2 ^= (a3 & (0x0C0C0C0C0C0C0C0CUL)) >> 2;
     363           0 :   a4 = a3 & a2;
     364           0 :   a4 ^= (a4 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
     365           0 :   a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & (0x0A0A0A0A0A0A0A0AUL);
     366           0 :   a5 = a4 & (0x0808080808080808UL);
     367           0 :   a5 |= a5 >> 1;
     368           0 :   a5 ^= (a4 << 1) & (0x0808080808080808UL);
     369           0 :   a4 ^= a5 >> 2;
     370           0 :   a4 &= (0x0303030303030303UL);
     371           0 :   a4 ^= (a4 & (0x0202020202020202UL)) >> 1;
     372           0 :   a4 |= a4 << 2;
     373           0 :   a3 = a2 & a4;
     374           0 :   a3 ^= (a3 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
     375           0 :   a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & (0x0A0A0A0A0A0A0A0AUL);
     376           0 :   a3 |= a3 << 4;
     377           0 :   a2 = ((a1 & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((a1 & (0x3333333333333333UL)) << 2);
     378           0 :   x = a1 & a3;
     379           0 :   x ^= (x & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     380           0 :   x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & (0xAAAAAAAAAAAAAAAAUL);
     381           0 :   a4 = a2 & a3;
     382           0 :   a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
     383           0 :   a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
     384           0 :   a5 = (x & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
     385           0 :   x ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
     386           0 :   a4 = a5 & (0x2222222222222222UL);
     387           0 :   a4 |= a4 >> 1;
     388           0 :   a4 ^= (a5 << 1) & (0x2222222222222222UL);
     389           0 :   x ^= a4;
     390           0 :   y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
     391           0 :   x &= (0xB5B5B5B5B5B5B5B5UL);
     392           0 :   x ^= y & (0x4040404040404040UL);
     393           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     394           0 :   x ^= y & (0x8080808080808080UL);
     395           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     396           0 :   x ^= y & (0x1616161616161616UL);
     397           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     398           0 :   x ^= y & (0xEBEBEBEBEBEBEBEBUL);
     399           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     400           0 :   x ^= y & (0x9797979797979797UL);
     401           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     402           0 :   x ^= y & (0xFBFBFBFBFBFBFBFBUL);
     403           0 :   y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
     404           0 :   x ^= y & (0x7D7D7D7D7D7D7D7DUL);
     405           0 :   *w = x;
     406           0 : }
     407             : 
     408             : static void
     409  2189833730 : ShiftRows( ulong * state ) {
     410  2189833730 :   uchar s[4];
     411  2189833730 :   uchar *s0;
     412  2189833730 :   int r;
     413             : 
     414  2189833730 :   s0 = (uchar *)state;
     415 10949168650 :   for (r = 0; r < 4; r++) {
     416  8759334920 :     s[0] = s0[0*4 + r];
     417  8759334920 :     s[1] = s0[1*4 + r];
     418  8759334920 :     s[2] = s0[2*4 + r];
     419  8759334920 :     s[3] = s0[3*4 + r];
     420  8759334920 :     s0[0*4 + r] = s[(r+0) % 4];
     421  8759334920 :     s0[1*4 + r] = s[(r+1) % 4];
     422  8759334920 :     s0[2*4 + r] = s[(r+2) % 4];
     423  8759334920 :     s0[3*4 + r] = s[(r+3) % 4];
     424  8759334920 :   }
     425  2189833730 : }
     426             : 
     427             : static void
     428           0 : InvShiftRows( ulong * state ) {
     429           0 :   uchar s[4];
     430           0 :   uchar *s0;
     431           0 :   int r;
     432             : 
     433           0 :   s0 = (uchar *)state;
     434           0 :   for (r = 0; r < 4; r++) {
     435           0 :     s[0] = s0[0*4 + r];
     436           0 :     s[1] = s0[1*4 + r];
     437           0 :     s[2] = s0[2*4 + r];
     438           0 :     s[3] = s0[3*4 + r];
     439           0 :     s0[0*4 + r] = s[(4-r) % 4];
     440           0 :     s0[1*4 + r] = s[(5-r) % 4];
     441           0 :     s0[2*4 + r] = s[(6-r) % 4];
     442           0 :     s0[3*4 + r] = s[(7-r) % 4];
     443           0 :   }
     444           0 : }
     445             : 
     446             : static void
     447  1970850357 : MixColumns( ulong * state ) {
     448  1970850357 :   uni s1;
     449  1970850357 :   uni s;
     450  1970850357 :   int c;
     451             : 
     452  5912551071 :   for (c = 0; c < 2; c++) {
     453  3941700714 :     s1.d = state[c];
     454  3941700714 :     s.d = s1.d;
     455  3941700714 :     s.d ^= ((s.d & (0xFFFF0000FFFF0000UL)) >> 16)
     456  3941700714 :             | ((s.d & (0x0000FFFF0000FFFFUL)) << 16);
     457  3941700714 :     s.d ^= ((s.d & (0xFF00FF00FF00FF00UL)) >> 8)
     458  3941700714 :             | ((s.d & (0x00FF00FF00FF00FFUL)) << 8);
     459  3941700714 :     s.d ^= s1.d;
     460  3941700714 :     XtimeLong(&s1.d);
     461  3941700714 :     s.d ^= s1.d;
     462  3941700714 :     s.b[0] ^= s1.b[1];
     463  3941700714 :     s.b[1] ^= s1.b[2];
     464  3941700714 :     s.b[2] ^= s1.b[3];
     465  3941700714 :     s.b[3] ^= s1.b[0];
     466  3941700714 :     s.b[4] ^= s1.b[5];
     467  3941700714 :     s.b[5] ^= s1.b[6];
     468  3941700714 :     s.b[6] ^= s1.b[7];
     469  3941700714 :     s.b[7] ^= s1.b[4];
     470  3941700714 :     state[c] = s.d;
     471  3941700714 :   }
     472  1970850357 : }
     473             : 
     474             : static void InvMixColumns(ulong * state)
     475           0 : {
     476           0 :   uni s1;
     477           0 :   uni s;
     478           0 :   int c;
     479             : 
     480           0 :   for (c = 0; c < 2; c++) {
     481           0 :     s1.d = state[c];
     482           0 :     s.d = s1.d;
     483           0 :     s.d ^= ((s.d & (0xFFFF0000FFFF0000UL)) >> 16)
     484           0 :             | ((s.d & (0x0000FFFF0000FFFFUL)) << 16);
     485           0 :     s.d ^= ((s.d & (0xFF00FF00FF00FF00UL)) >> 8)
     486           0 :             | ((s.d & (0x00FF00FF00FF00FFUL)) << 8);
     487           0 :     s.d ^= s1.d;
     488           0 :     XtimeLong(&s1.d);
     489           0 :     s.d ^= s1.d;
     490           0 :     s.b[0] ^= s1.b[1];
     491           0 :     s.b[1] ^= s1.b[2];
     492           0 :     s.b[2] ^= s1.b[3];
     493           0 :     s.b[3] ^= s1.b[0];
     494           0 :     s.b[4] ^= s1.b[5];
     495           0 :     s.b[5] ^= s1.b[6];
     496           0 :     s.b[6] ^= s1.b[7];
     497           0 :     s.b[7] ^= s1.b[4];
     498           0 :     XtimeLong(&s1.d);
     499           0 :     s1.d ^= ((s1.d & (0xFFFF0000FFFF0000UL)) >> 16)
     500           0 :             | ((s1.d & (0x0000FFFF0000FFFFUL)) << 16);
     501           0 :     s.d ^= s1.d;
     502           0 :     XtimeLong(&s1.d);
     503           0 :     s1.d ^= ((s1.d & (0xFF00FF00FF00FF00UL)) >> 8)
     504           0 :             | ((s1.d & (0x00FF00FF00FF00FFUL)) << 8);
     505           0 :     s.d ^= s1.d;
     506           0 :     state[c] = s.d;
     507           0 :   }
     508           0 : }
     509             : 
     510             : static void
     511             : AddRoundKey( ulong *       state,
     512  2408817103 :              ulong const * w ) {
     513  2408817103 :   state[0] ^= w[0];
     514  2408817103 :   state[1] ^= w[1];
     515  2408817103 : }
     516             : 
     517             : static void
     518             : Cipher( uchar const * in,
     519             :         uchar *       out,
     520             :         ulong const * w,
     521   218983373 :         int           nr ) {
     522   218983373 :   ulong state[2];
     523   218983373 :   int i;
     524             : 
     525   218983373 :   memcpy(state, in, 16);
     526             : 
     527   218983373 :   AddRoundKey(state, w);
     528             : 
     529  2189833730 :   for (i = 1; i < nr; i++) {
     530  1970850357 :     SubLong(&state[0]);
     531  1970850357 :     SubLong(&state[1]);
     532  1970850357 :     ShiftRows(state);
     533  1970850357 :     MixColumns(state);
     534  1970850357 :     AddRoundKey(state, w + i*2);
     535  1970850357 :   }
     536             : 
     537   218983373 :   SubLong(&state[0]);
     538   218983373 :   SubLong(&state[1]);
     539   218983373 :   ShiftRows(state);
     540   218983373 :   AddRoundKey(state, w + nr*2);
     541             : 
     542   218983373 :   memcpy(out, state, 16);
     543   218983373 : }
     544             : 
     545             : static void
     546             : InvCipher( uchar const * in,
     547             :            uchar *       out,
     548             :            ulong const * w,
     549           0 :            int           nr ) {
     550           0 :   ulong state[2];
     551           0 :   int i;
     552             : 
     553           0 :   memcpy(state, in, 16);
     554             : 
     555           0 :   AddRoundKey(state, w + nr*2);
     556             : 
     557           0 :   for (i = nr - 1; i > 0; i--) {
     558           0 :     InvShiftRows(state);
     559           0 :     InvSubLong(&state[0]);
     560           0 :     InvSubLong(&state[1]);
     561           0 :     AddRoundKey(state, w + i*2);
     562           0 :     InvMixColumns(state);
     563           0 :   }
     564             : 
     565           0 :   InvShiftRows(state);
     566           0 :   InvSubLong(&state[0]);
     567           0 :   InvSubLong(&state[1]);
     568           0 :   AddRoundKey(state, w);
     569             : 
     570           0 :   memcpy(out, state, 16);
     571           0 : }
     572             : 
     573             : static void
     574   124927350 : RotWord( uint * x ) {
     575   124927350 :   uchar *w0;
     576   124927350 :   uchar tmp;
     577             : 
     578   124927350 :   w0 = (uchar *)x;
     579   124927350 :   tmp = w0[0];
     580   124927350 :   w0[0] = w0[1];
     581   124927350 :   w0[1] = w0[2];
     582   124927350 :   w0[2] = w0[3];
     583   124927350 :   w0[3] = tmp;
     584   124927350 : }
     585             : 
     586             : static void
     587             : KeyExpansion( uchar const * key,
     588             :               ulong *       w,
     589             :               int           nr,
     590    12492735 :               int           nk ) {
     591    12492735 :   uint rcon;
     592    12492735 :   uni prev;
     593    12492735 :   uint temp;
     594    12492735 :   int i, n;
     595             : 
     596    12492735 :   memcpy( w, key, (ulong)nk*4UL );
     597    12492735 :   memcpy( &rcon, "\1\0\0\0", 4  );
     598    12492735 :   n = nk/2;
     599    12492735 :   prev.d = w[n-1];
     600   262347435 :   for (i = n; i < (nr+1)*2; i++) {
     601   249854700 :     temp = prev.w[1];
     602   249854700 :     if (i % n == 0) {
     603   124927350 :       RotWord(&temp);
     604   124927350 :       SubWord(&temp);
     605   124927350 :       temp ^= rcon;
     606   124927350 :       XtimeWord(&rcon);
     607   124927350 :     } else if (nk > 6 && i % n == 2) {
     608           0 :       SubWord(&temp);
     609           0 :     }
     610   249854700 :     prev.d = w[i-n];
     611   249854700 :     prev.w[0] ^= temp;
     612   249854700 :     prev.w[1] ^= prev.w[0];
     613   249854700 :     w[i] = prev.d;
     614   249854700 :   }
     615    12492735 : }
     616             : 
     617             : /**
     618             :  * Expand the cipher key into the encryption key schedule.
     619             :  */
     620             : int
     621             : fd_aes_ref_set_encrypt_key( uchar const *      userKey,
     622             :                             ulong const        bits,
     623    12492735 :                             fd_aes_key_ref_t * key ) {
     624    12492735 :   ulong *rk;
     625             : 
     626    12492735 :   if (!userKey || !key)
     627           0 :       return -1;
     628    12492735 :   if (bits != 128 && bits != 192 && bits != 256)
     629           0 :       return -2;
     630             : 
     631    12492735 :   rk = (ulong *)fd_type_pun( key->rd_key );  /* strict aliasing violation */
     632             : 
     633    12492735 :   if (bits == 128)
     634    12492735 :       key->rounds = 10;
     635           0 :   else if (bits == 192)
     636           0 :       key->rounds = 12;
     637           0 :   else
     638           0 :       key->rounds = 14;
     639             : 
     640    12492735 :   KeyExpansion(userKey, rk, key->rounds, (int)(bits/32UL) );
     641    12492735 :   return 0;
     642    12492735 : }
     643             : 
     644             : /**
     645             :  * Expand the cipher key into the decryption key schedule.
     646             :  */
     647             : int
     648             : fd_aes_ref_set_decrypt_key( uchar const *      userKey,
     649             :                             ulong const        bits,
     650           0 :                             fd_aes_key_ref_t * key ) {
     651           0 :   return fd_aes_ref_set_encrypt_key(userKey, bits, key);
     652           0 : }
     653             : 
     654             : /*
     655             :  * Encrypt a single block
     656             :  * in and out can overlap
     657             :  */
     658             : void
     659             : fd_aes_ref_encrypt_core( uchar const *            in,
     660             :                          uchar *                  out,
     661   218983373 :                          fd_aes_key_ref_t const * key ) {
     662             : 
     663   218983373 :   assert(in && out && key);
     664           0 :   ulong const * rk = (ulong *)fd_type_pun_const( key->rd_key );
     665             : 
     666   218983373 :   Cipher(in, out, rk, key->rounds);
     667   218983373 : }
     668             : 
     669             : /*
     670             :  * Decrypt a single block
     671             :  * in and out can overlap
     672             :  */
     673             : void
     674             : fd_aes_ref_decrypt_core( uchar const *            in,
     675             :                          uchar *                  out,
     676           0 :                          fd_aes_key_ref_t const * key ) {
     677             : 
     678           0 :   assert(in && out && key);
     679           0 :   ulong const * rk = (ulong const *)fd_type_pun_const( key->rd_key );
     680             : 
     681           0 :   InvCipher(in, out, rk, key->rounds );
     682           0 : }

Generated by: LCOV version 1.14