Line data Source code
1 : /* fd_aes_ref.c was imported from the OpenSSL project circa 2023-Aug.
2 : Original source file: crypto/aes/aes_core.c */
3 :
4 : /*
5 : * Copyright 2002-2022 The OpenSSL Project Authors. All Rights Reserved.
6 : *
7 : * Licensed under the Apache License 2.0 (the "License"). You may not use
8 : * this file except in compliance with the License. You can obtain a copy
9 : * in the file LICENSE in the source distribution or at
10 : * https://www.openssl.org/source/license.html
11 : */
12 :
13 : /**
14 : * rijndael-alg-fst.c
15 : *
16 : * @version 3.0 (December 2000)
17 : *
18 : * Optimised ANSI C code for the Rijndael cipher (now AES)
19 : *
20 : * @author Vincent Rijmen
21 : * @author Antoon Bosselaers
22 : * @author Paulo Barreto
23 : *
24 : * This code is hereby placed in the public domain.
25 : *
26 : * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
27 : * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
28 : * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 : * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
30 : * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 : * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 : * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
33 : * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
34 : * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
35 : * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
36 : * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 : */
38 :
39 : /* Note: rewritten a little bit to provide error control and an OpenSSL-
40 : compatible API */
41 :
42 : #include <stdlib.h>
43 : #include "fd_aes_gcm_ref.h"
44 :
45 : typedef union {
46 : uchar b[8];
47 : uint w[2];
48 : ulong d;
49 : } uni;
50 :
51 : /*
52 : * Compute w := (w * x) mod (x^8 + x^4 + x^3 + x^1 + 1)
53 : * Therefore the name "xtime".
54 : */
55 : static void
56 46591110 : XtimeWord( uint * w ) {
57 46591110 : uint a, b;
58 :
59 46591110 : a = *w;
60 46591110 : b = a & 0x80808080u;
61 46591110 : a ^= b;
62 46591110 : b -= b >> 7;
63 46591110 : b &= 0x1B1B1B1Bu;
64 46591110 : b ^= a << 1;
65 46591110 : *w = b;
66 46591110 : }
67 :
68 : static void
69 474406110 : XtimeLong( ulong * w ) {
70 474406110 : ulong a, b;
71 :
72 474406110 : a = *w;
73 474406110 : b = a & (ulong)(0x8080808080808080);
74 474406110 : a ^= b;
75 474406110 : b -= b >> 7;
76 474406110 : b &= (ulong)(0x1B1B1B1B1B1B1B1B);
77 474406110 : b ^= a << 1;
78 474406110 : *w = b;
79 474406110 : }
80 :
81 : /*
82 : * This computes w := S * w ^ -1 + c, where c = {01100011}.
83 : * Instead of using GF(2^8) mod (x^8+x^4+x^3+x+1} we do the inversion
84 : * in GF(GF(GF(2^2)^2)^2) mod (X^2+X+8)
85 : * and GF(GF(2^2)^2) mod (X^2+X+2)
86 : * and GF(2^2) mod (X^2+X+1)
87 : * The first part of the algorithm below transfers the coordinates
88 : * {0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80} =>
89 : * {1,Y,Y^2,Y^3,Y^4,Y^5,Y^6,Y^7} with Y=0x41:
90 : * {0x01,0x41,0x66,0x6c,0x56,0x9a,0x58,0xc4}
91 : * The last part undoes the coordinate transfer and the final affine
92 : * transformation S:
93 : * b[i] = b[i] + b[(i+4)%8] + b[(i+5)%8] + b[(i+6)%8] + b[(i+7)%8] + c[i]
94 : * in one step.
95 : * The multiplication in GF(2^2^2^2) is done in ordinary coords:
96 : * A = (a0*1 + a1*x^4)
97 : * B = (b0*1 + b1*x^4)
98 : * AB = ((a0*b0 + 8*a1*b1)*1 + (a1*b0 + (a0+a1)*b1)*x^4)
99 : * When A = (a0,a1) is given we want to solve AB = 1:
100 : * (a) 1 = a0*b0 + 8*a1*b1
101 : * (b) 0 = a1*b0 + (a0+a1)*b1
102 : * => multiply (a) by a1 and (b) by a0
103 : * (c) a1 = a1*a0*b0 + (8*a1*a1)*b1
104 : * (d) 0 = a1*a0*b0 + (a0*a0+a1*a0)*b1
105 : * => add (c) + (d)
106 : * (e) a1 = (a0*a0 + a1*a0 + 8*a1*a1)*b1
107 : * => therefore
108 : * b1 = (a0*a0 + a1*a0 + 8*a1*a1)^-1 * a1
109 : * => and adding (a1*b0) to (b) we get
110 : * (f) a1*b0 = (a0+a1)*b1
111 : * => therefore
112 : * b0 = (a0*a0 + a1*a0 + 8*a1*a1)^-1 * (a0+a1)
113 : * Note this formula also works for the case
114 : * (a0+a1)*a0 + 8*a1*a1 = 0
115 : * if the inverse element for 0^-1 is mapped to 0.
116 : * Repeat the same for GF(2^2^2) and GF(2^2).
117 : * We get the following algorithm:
118 : * inv8(a0,a1):
119 : * x0 = a0^a1
120 : * [y0,y1] = mul4([x0,a1],[a0,a1]); (*)
121 : * y1 = mul4(8,y1);
122 : * t = inv4(y0^y1);
123 : * [b0,b1] = mul4([x0,a1],[t,t]); (*)
124 : * return [b0,b1];
125 : * The non-linear multiplies (*) can be done in parallel at no extra cost.
126 : */
127 : static void
128 46591110 : SubWord( uint * w ) {
129 46591110 : uint x, y, a1, a2, a3, a4, a5, a6;
130 :
131 46591110 : x = *w;
132 46591110 : y = ((x & 0xFEFEFEFEu) >> 1) | ((x & 0x01010101u) << 7);
133 46591110 : x &= 0xDDDDDDDDu;
134 46591110 : x ^= y & 0x57575757u;
135 46591110 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
136 46591110 : x ^= y & 0x1C1C1C1Cu;
137 46591110 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
138 46591110 : x ^= y & 0x4A4A4A4Au;
139 46591110 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
140 46591110 : x ^= y & 0x42424242u;
141 46591110 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
142 46591110 : x ^= y & 0x64646464u;
143 46591110 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
144 46591110 : x ^= y & 0xE0E0E0E0u;
145 46591110 : a1 = x;
146 46591110 : a1 ^= (x & 0xF0F0F0F0u) >> 4;
147 46591110 : a2 = ((x & 0xCCCCCCCCu) >> 2) | ((x & 0x33333333u) << 2);
148 46591110 : a3 = x & a1;
149 46591110 : a3 ^= (a3 & 0xAAAAAAAAu) >> 1;
150 46591110 : a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & 0xAAAAAAAAu;
151 46591110 : a4 = a2 & a1;
152 46591110 : a4 ^= (a4 & 0xAAAAAAAAu) >> 1;
153 46591110 : a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & 0xAAAAAAAAu;
154 46591110 : a5 = (a3 & 0xCCCCCCCCu) >> 2;
155 46591110 : a3 ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCu;
156 46591110 : a4 = a5 & 0x22222222u;
157 46591110 : a4 |= a4 >> 1;
158 46591110 : a4 ^= (a5 << 1) & 0x22222222u;
159 46591110 : a3 ^= a4;
160 46591110 : a5 = a3 & 0xA0A0A0A0u;
161 46591110 : a5 |= a5 >> 1;
162 46591110 : a5 ^= (a3 << 1) & 0xA0A0A0A0u;
163 46591110 : a4 = a5 & 0xC0C0C0C0u;
164 46591110 : a6 = a4 >> 2;
165 46591110 : a4 ^= (a5 << 2) & 0xC0C0C0C0u;
166 46591110 : a5 = a6 & 0x20202020u;
167 46591110 : a5 |= a5 >> 1;
168 46591110 : a5 ^= (a6 << 1) & 0x20202020u;
169 46591110 : a4 |= a5;
170 46591110 : a3 ^= a4 >> 4;
171 46591110 : a3 &= 0x0F0F0F0Fu;
172 46591110 : a2 = a3;
173 46591110 : a2 ^= (a3 & 0x0C0C0C0Cu) >> 2;
174 46591110 : a4 = a3 & a2;
175 46591110 : a4 ^= (uint)(a4 & 0x0A0A0A0A0Au) >> 1u;
176 46591110 : a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & 0x0A0A0A0Au;
177 46591110 : a5 = a4 & 0x08080808u;
178 46591110 : a5 |= a5 >> 1;
179 46591110 : a5 ^= (a4 << 1) & 0x08080808u;
180 46591110 : a4 ^= a5 >> 2;
181 46591110 : a4 &= 0x03030303u;
182 46591110 : a4 ^= (a4 & 0x02020202u) >> 1;
183 46591110 : a4 |= a4 << 2;
184 46591110 : a3 = a2 & a4;
185 46591110 : a3 ^= (a3 & 0x0A0A0A0Au) >> 1;
186 46591110 : a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & 0x0A0A0A0Au;
187 46591110 : a3 |= a3 << 4;
188 46591110 : a2 = ((a1 & 0xCCCCCCCCu) >> 2) | ((a1 & 0x33333333u) << 2);
189 46591110 : x = a1 & a3;
190 46591110 : x ^= (x & 0xAAAAAAAAu) >> 1;
191 46591110 : x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & 0xAAAAAAAAu;
192 46591110 : a4 = a2 & a3;
193 46591110 : a4 ^= (a4 & 0xAAAAAAAAu) >> 1;
194 46591110 : a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & 0xAAAAAAAAu;
195 46591110 : a5 = (x & 0xCCCCCCCCu) >> 2;
196 46591110 : x ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCu;
197 46591110 : a4 = a5 & 0x22222222u;
198 46591110 : a4 |= a4 >> 1;
199 46591110 : a4 ^= (a5 << 1) & 0x22222222u;
200 46591110 : x ^= a4;
201 46591110 : y = ((x & 0xFEFEFEFEu) >> 1) | ((x & 0x01010101u) << 7);
202 46591110 : x &= 0x39393939u;
203 46591110 : x ^= y & 0x3F3F3F3Fu;
204 46591110 : y = ((y & 0xFCFCFCFCu) >> 2) | ((y & 0x03030303u) << 6);
205 46591110 : x ^= y & 0x97979797u;
206 46591110 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
207 46591110 : x ^= y & 0x9B9B9B9Bu;
208 46591110 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
209 46591110 : x ^= y & 0x3C3C3C3Cu;
210 46591110 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
211 46591110 : x ^= y & 0xDDDDDDDDu;
212 46591110 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
213 46591110 : x ^= y & 0x72727272u;
214 46591110 : x ^= 0x63636363u;
215 46591110 : *w = x;
216 46591110 : }
217 :
218 : static void
219 527102540 : SubLong( ulong * w ) {
220 527102540 : ulong x, y, a1, a2, a3, a4, a5, a6;
221 :
222 527102540 : x = *w;
223 527102540 : y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
224 527102540 : x &= (0xDDDDDDDDDDDDDDDDUL);
225 527102540 : x ^= y & (0x5757575757575757UL);
226 527102540 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
227 527102540 : x ^= y & (0x1C1C1C1C1C1C1C1CUL);
228 527102540 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
229 527102540 : x ^= y & (0x4A4A4A4A4A4A4A4AUL);
230 527102540 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
231 527102540 : x ^= y & (0x4242424242424242UL);
232 527102540 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
233 527102540 : x ^= y & (0x6464646464646464UL);
234 527102540 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
235 527102540 : x ^= y & (0xE0E0E0E0E0E0E0E0UL);
236 527102540 : a1 = x;
237 527102540 : a1 ^= (x & (0xF0F0F0F0F0F0F0F0UL)) >> 4;
238 527102540 : a2 = ((x & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((x & (0x3333333333333333UL)) << 2);
239 527102540 : a3 = x & a1;
240 527102540 : a3 ^= (a3 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
241 527102540 : a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & (0xAAAAAAAAAAAAAAAAUL);
242 527102540 : a4 = a2 & a1;
243 527102540 : a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
244 527102540 : a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
245 527102540 : a5 = (a3 & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
246 527102540 : a3 ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
247 527102540 : a4 = a5 & (0x2222222222222222UL);
248 527102540 : a4 |= a4 >> 1;
249 527102540 : a4 ^= (a5 << 1) & (0x2222222222222222UL);
250 527102540 : a3 ^= a4;
251 527102540 : a5 = a3 & (0xA0A0A0A0A0A0A0A0UL);
252 527102540 : a5 |= a5 >> 1;
253 527102540 : a5 ^= (a3 << 1) & (0xA0A0A0A0A0A0A0A0UL);
254 527102540 : a4 = a5 & (0xC0C0C0C0C0C0C0C0UL);
255 527102540 : a6 = a4 >> 2;
256 527102540 : a4 ^= (a5 << 2) & (0xC0C0C0C0C0C0C0C0UL);
257 527102540 : a5 = a6 & (0x2020202020202020UL);
258 527102540 : a5 |= a5 >> 1;
259 527102540 : a5 ^= (a6 << 1) & (0x2020202020202020UL);
260 527102540 : a4 |= a5;
261 527102540 : a3 ^= a4 >> 4;
262 527102540 : a3 &= (0x0F0F0F0F0F0F0F0FUL);
263 527102540 : a2 = a3;
264 527102540 : a2 ^= (a3 & (0x0C0C0C0C0C0C0C0CUL)) >> 2;
265 527102540 : a4 = a3 & a2;
266 527102540 : a4 ^= (a4 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
267 527102540 : a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & (0x0A0A0A0A0A0A0A0AUL);
268 527102540 : a5 = a4 & (0x0808080808080808UL);
269 527102540 : a5 |= a5 >> 1;
270 527102540 : a5 ^= (a4 << 1) & (0x0808080808080808UL);
271 527102540 : a4 ^= a5 >> 2;
272 527102540 : a4 &= (0x0303030303030303UL);
273 527102540 : a4 ^= (a4 & (0x0202020202020202UL)) >> 1;
274 527102540 : a4 |= a4 << 2;
275 527102540 : a3 = a2 & a4;
276 527102540 : a3 ^= (a3 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
277 527102540 : a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & (0x0A0A0A0A0A0A0A0AUL);
278 527102540 : a3 |= a3 << 4;
279 527102540 : a2 = ((a1 & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((a1 & (0x3333333333333333UL)) << 2);
280 527102540 : x = a1 & a3;
281 527102540 : x ^= (x & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
282 527102540 : x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & (0xAAAAAAAAAAAAAAAAUL);
283 527102540 : a4 = a2 & a3;
284 527102540 : a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
285 527102540 : a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
286 527102540 : a5 = (x & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
287 527102540 : x ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
288 527102540 : a4 = a5 & (0x2222222222222222UL);
289 527102540 : a4 |= a4 >> 1;
290 527102540 : a4 ^= (a5 << 1) & (0x2222222222222222UL);
291 527102540 : x ^= a4;
292 527102540 : y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
293 527102540 : x &= (0x3939393939393939UL);
294 527102540 : x ^= y & (0x3F3F3F3F3F3F3F3FUL);
295 527102540 : y = ((y & (0xFCFCFCFCFCFCFCFCUL)) >> 2) | ((y & (0x0303030303030303UL)) << 6);
296 527102540 : x ^= y & (0x9797979797979797UL);
297 527102540 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
298 527102540 : x ^= y & (0x9B9B9B9B9B9B9B9BUL);
299 527102540 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
300 527102540 : x ^= y & (0x3C3C3C3C3C3C3C3CUL);
301 527102540 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
302 527102540 : x ^= y & (0xDDDDDDDDDDDDDDDDUL);
303 527102540 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
304 527102540 : x ^= y & (0x7272727272727272UL);
305 527102540 : x ^= (0x6363636363636363UL);
306 527102540 : *w = x;
307 527102540 : }
308 :
309 : /*
310 : * This computes w := (S^-1 * (w + c))^-1
311 : */
312 : static void
313 5120 : InvSubLong( ulong * w ) {
314 5120 : ulong x, y, a1, a2, a3, a4, a5, a6;
315 :
316 5120 : x = *w;
317 5120 : x ^= (0x6363636363636363UL);
318 5120 : y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
319 5120 : x &= (0xFDFDFDFDFDFDFDFDUL);
320 5120 : x ^= y & (0x5E5E5E5E5E5E5E5EUL);
321 5120 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
322 5120 : x ^= y & (0xF3F3F3F3F3F3F3F3UL);
323 5120 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
324 5120 : x ^= y & (0xF5F5F5F5F5F5F5F5UL);
325 5120 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
326 5120 : x ^= y & (0x7878787878787878UL);
327 5120 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
328 5120 : x ^= y & (0x7777777777777777UL);
329 5120 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
330 5120 : x ^= y & (0x1515151515151515UL);
331 5120 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
332 5120 : x ^= y & (0xA5A5A5A5A5A5A5A5UL);
333 5120 : a1 = x;
334 5120 : a1 ^= (x & (0xF0F0F0F0F0F0F0F0UL)) >> 4;
335 5120 : a2 = ((x & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((x & (0x3333333333333333UL)) << 2);
336 5120 : a3 = x & a1;
337 5120 : a3 ^= (a3 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
338 5120 : a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & (0xAAAAAAAAAAAAAAAAUL);
339 5120 : a4 = a2 & a1;
340 5120 : a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
341 5120 : a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
342 5120 : a5 = (a3 & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
343 5120 : a3 ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
344 5120 : a4 = a5 & (0x2222222222222222UL);
345 5120 : a4 |= a4 >> 1;
346 5120 : a4 ^= (a5 << 1) & (0x2222222222222222UL);
347 5120 : a3 ^= a4;
348 5120 : a5 = a3 & (0xA0A0A0A0A0A0A0A0UL);
349 5120 : a5 |= a5 >> 1;
350 5120 : a5 ^= (a3 << 1) & (0xA0A0A0A0A0A0A0A0UL);
351 5120 : a4 = a5 & (0xC0C0C0C0C0C0C0C0UL);
352 5120 : a6 = a4 >> 2;
353 5120 : a4 ^= (a5 << 2) & (0xC0C0C0C0C0C0C0C0UL);
354 5120 : a5 = a6 & (0x2020202020202020UL);
355 5120 : a5 |= a5 >> 1;
356 5120 : a5 ^= (a6 << 1) & (0x2020202020202020UL);
357 5120 : a4 |= a5;
358 5120 : a3 ^= a4 >> 4;
359 5120 : a3 &= (0x0F0F0F0F0F0F0F0FUL);
360 5120 : a2 = a3;
361 5120 : a2 ^= (a3 & (0x0C0C0C0C0C0C0C0CUL)) >> 2;
362 5120 : a4 = a3 & a2;
363 5120 : a4 ^= (a4 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
364 5120 : a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & (0x0A0A0A0A0A0A0A0AUL);
365 5120 : a5 = a4 & (0x0808080808080808UL);
366 5120 : a5 |= a5 >> 1;
367 5120 : a5 ^= (a4 << 1) & (0x0808080808080808UL);
368 5120 : a4 ^= a5 >> 2;
369 5120 : a4 &= (0x0303030303030303UL);
370 5120 : a4 ^= (a4 & (0x0202020202020202UL)) >> 1;
371 5120 : a4 |= a4 << 2;
372 5120 : a3 = a2 & a4;
373 5120 : a3 ^= (a3 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
374 5120 : a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & (0x0A0A0A0A0A0A0A0AUL);
375 5120 : a3 |= a3 << 4;
376 5120 : a2 = ((a1 & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((a1 & (0x3333333333333333UL)) << 2);
377 5120 : x = a1 & a3;
378 5120 : x ^= (x & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
379 5120 : x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & (0xAAAAAAAAAAAAAAAAUL);
380 5120 : a4 = a2 & a3;
381 5120 : a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
382 5120 : a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
383 5120 : a5 = (x & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
384 5120 : x ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
385 5120 : a4 = a5 & (0x2222222222222222UL);
386 5120 : a4 |= a4 >> 1;
387 5120 : a4 ^= (a5 << 1) & (0x2222222222222222UL);
388 5120 : x ^= a4;
389 5120 : y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
390 5120 : x &= (0xB5B5B5B5B5B5B5B5UL);
391 5120 : x ^= y & (0x4040404040404040UL);
392 5120 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
393 5120 : x ^= y & (0x8080808080808080UL);
394 5120 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
395 5120 : x ^= y & (0x1616161616161616UL);
396 5120 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
397 5120 : x ^= y & (0xEBEBEBEBEBEBEBEBUL);
398 5120 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
399 5120 : x ^= y & (0x9797979797979797UL);
400 5120 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
401 5120 : x ^= y & (0xFBFBFBFBFBFBFBFBUL);
402 5120 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
403 5120 : x ^= y & (0x7D7D7D7D7D7D7D7DUL);
404 5120 : *w = x;
405 5120 : }
406 :
407 : static void
408 263551270 : ShiftRows( ulong * state ) {
409 263551270 : uchar s[4];
410 263551270 : uchar *s0;
411 263551270 : int r;
412 :
413 263551270 : s0 = (uchar *)state;
414 1317756350 : for (r = 0; r < 4; r++) {
415 1054205080 : s[0] = s0[0*4 + r];
416 1054205080 : s[1] = s0[1*4 + r];
417 1054205080 : s[2] = s0[2*4 + r];
418 1054205080 : s[3] = s0[3*4 + r];
419 1054205080 : s0[0*4 + r] = s[(r+0) % 4];
420 1054205080 : s0[1*4 + r] = s[(r+1) % 4];
421 1054205080 : s0[2*4 + r] = s[(r+2) % 4];
422 1054205080 : s0[3*4 + r] = s[(r+3) % 4];
423 1054205080 : }
424 263551270 : }
425 :
426 : static void
427 2560 : InvShiftRows( ulong * state ) {
428 2560 : uchar s[4];
429 2560 : uchar *s0;
430 2560 : int r;
431 :
432 2560 : s0 = (uchar *)state;
433 12800 : for (r = 0; r < 4; r++) {
434 10240 : s[0] = s0[0*4 + r];
435 10240 : s[1] = s0[1*4 + r];
436 10240 : s[2] = s0[2*4 + r];
437 10240 : s[3] = s0[3*4 + r];
438 10240 : s0[0*4 + r] = s[(4-r) % 4];
439 10240 : s0[1*4 + r] = s[(5-r) % 4];
440 10240 : s0[2*4 + r] = s[(6-r) % 4];
441 10240 : s0[3*4 + r] = s[(7-r) % 4];
442 10240 : }
443 2560 : }
444 :
445 : static void
446 237196143 : MixColumns( ulong * state ) {
447 237196143 : uni s1;
448 237196143 : uni s;
449 237196143 : int c;
450 :
451 711588429 : for (c = 0; c < 2; c++) {
452 474392286 : s1.d = state[c];
453 474392286 : s.d = s1.d;
454 474392286 : s.d ^= ((s.d & (0xFFFF0000FFFF0000UL)) >> 16)
455 474392286 : | ((s.d & (0x0000FFFF0000FFFFUL)) << 16);
456 474392286 : s.d ^= ((s.d & (0xFF00FF00FF00FF00UL)) >> 8)
457 474392286 : | ((s.d & (0x00FF00FF00FF00FFUL)) << 8);
458 474392286 : s.d ^= s1.d;
459 474392286 : XtimeLong(&s1.d);
460 474392286 : s.d ^= s1.d;
461 474392286 : s.b[0] ^= s1.b[1];
462 474392286 : s.b[1] ^= s1.b[2];
463 474392286 : s.b[2] ^= s1.b[3];
464 474392286 : s.b[3] ^= s1.b[0];
465 474392286 : s.b[4] ^= s1.b[5];
466 474392286 : s.b[5] ^= s1.b[6];
467 474392286 : s.b[6] ^= s1.b[7];
468 474392286 : s.b[7] ^= s1.b[4];
469 474392286 : state[c] = s.d;
470 474392286 : }
471 237196143 : }
472 :
473 : static void InvMixColumns(ulong * state)
474 2304 : {
475 2304 : uni s1;
476 2304 : uni s;
477 2304 : int c;
478 :
479 6912 : for (c = 0; c < 2; c++) {
480 4608 : s1.d = state[c];
481 4608 : s.d = s1.d;
482 4608 : s.d ^= ((s.d & (0xFFFF0000FFFF0000UL)) >> 16)
483 4608 : | ((s.d & (0x0000FFFF0000FFFFUL)) << 16);
484 4608 : s.d ^= ((s.d & (0xFF00FF00FF00FF00UL)) >> 8)
485 4608 : | ((s.d & (0x00FF00FF00FF00FFUL)) << 8);
486 4608 : s.d ^= s1.d;
487 4608 : XtimeLong(&s1.d);
488 4608 : s.d ^= s1.d;
489 4608 : s.b[0] ^= s1.b[1];
490 4608 : s.b[1] ^= s1.b[2];
491 4608 : s.b[2] ^= s1.b[3];
492 4608 : s.b[3] ^= s1.b[0];
493 4608 : s.b[4] ^= s1.b[5];
494 4608 : s.b[5] ^= s1.b[6];
495 4608 : s.b[6] ^= s1.b[7];
496 4608 : s.b[7] ^= s1.b[4];
497 4608 : XtimeLong(&s1.d);
498 4608 : s1.d ^= ((s1.d & (0xFFFF0000FFFF0000UL)) >> 16)
499 4608 : | ((s1.d & (0x0000FFFF0000FFFFUL)) << 16);
500 4608 : s.d ^= s1.d;
501 4608 : XtimeLong(&s1.d);
502 4608 : s1.d ^= ((s1.d & (0xFF00FF00FF00FF00UL)) >> 8)
503 4608 : | ((s1.d & (0x00FF00FF00FF00FFUL)) << 8);
504 4608 : s.d ^= s1.d;
505 4608 : state[c] = s.d;
506 4608 : }
507 2304 : }
508 :
509 : static void
510 : AddRoundKey( ulong * state,
511 289909213 : ulong const * w ) {
512 289909213 : state[0] ^= w[0];
513 289909213 : state[1] ^= w[1];
514 289909213 : }
515 :
516 : static void
517 : Cipher( uchar const * in,
518 : uchar * out,
519 : ulong const * w,
520 26355127 : int nr ) {
521 26355127 : ulong state[2];
522 26355127 : int i;
523 :
524 26355127 : memcpy(state, in, 16);
525 :
526 26355127 : AddRoundKey(state, w);
527 :
528 263551270 : for (i = 1; i < nr; i++) {
529 237196143 : SubLong(&state[0]);
530 237196143 : SubLong(&state[1]);
531 237196143 : ShiftRows(state);
532 237196143 : MixColumns(state);
533 237196143 : AddRoundKey(state, w + i*2);
534 237196143 : }
535 :
536 26355127 : SubLong(&state[0]);
537 26355127 : SubLong(&state[1]);
538 26355127 : ShiftRows(state);
539 26355127 : AddRoundKey(state, w + nr*2);
540 :
541 26355127 : memcpy(out, state, 16);
542 26355127 : }
543 :
544 : static void
545 : InvCipher( uchar const * in,
546 : uchar * out,
547 : ulong const * w,
548 256 : int nr ) {
549 256 : ulong state[2];
550 256 : int i;
551 :
552 256 : memcpy(state, in, 16);
553 :
554 256 : AddRoundKey(state, w + nr*2);
555 :
556 2560 : for (i = nr - 1; i > 0; i--) {
557 2304 : InvShiftRows(state);
558 2304 : InvSubLong(&state[0]);
559 2304 : InvSubLong(&state[1]);
560 2304 : AddRoundKey(state, w + i*2);
561 2304 : InvMixColumns(state);
562 2304 : }
563 :
564 256 : InvShiftRows(state);
565 256 : InvSubLong(&state[0]);
566 256 : InvSubLong(&state[1]);
567 256 : AddRoundKey(state, w);
568 :
569 256 : memcpy(out, state, 16);
570 256 : }
571 :
572 : static void
573 46591110 : RotWord( uint * x ) {
574 46591110 : uchar *w0;
575 46591110 : uchar tmp;
576 :
577 46591110 : w0 = (uchar *)x;
578 46591110 : tmp = w0[0];
579 46591110 : w0[0] = w0[1];
580 46591110 : w0[1] = w0[2];
581 46591110 : w0[2] = w0[3];
582 46591110 : w0[3] = tmp;
583 46591110 : }
584 :
585 : static void
586 : KeyExpansion( uchar const * key,
587 : ulong * w,
588 : int nr,
589 4659111 : int nk ) {
590 4659111 : uint rcon;
591 4659111 : uni prev;
592 4659111 : uint temp;
593 4659111 : int i, n;
594 :
595 4659111 : memcpy( w, key, (ulong)nk*4UL );
596 4659111 : memcpy( &rcon, "\1\0\0\0", 4 );
597 4659111 : n = nk/2;
598 4659111 : prev.d = w[n-1];
599 97841331 : for (i = n; i < (nr+1)*2; i++) {
600 93182220 : temp = prev.w[1];
601 93182220 : if (i % n == 0) {
602 46591110 : RotWord(&temp);
603 46591110 : SubWord(&temp);
604 46591110 : temp ^= rcon;
605 46591110 : XtimeWord(&rcon);
606 46591110 : } else if (nk > 6 && i % n == 2) {
607 0 : SubWord(&temp);
608 0 : }
609 93182220 : prev.d = w[i-n];
610 93182220 : prev.w[0] ^= temp;
611 93182220 : prev.w[1] ^= prev.w[0];
612 93182220 : w[i] = prev.d;
613 93182220 : }
614 4659111 : }
615 :
616 : /**
617 : * Expand the cipher key into the encryption key schedule.
618 : */
619 : int
620 : fd_aes_ref_set_encrypt_key( uchar const * userKey,
621 : ulong const bits,
622 4659111 : fd_aes_key_ref_t * key ) {
623 4659111 : ulong *rk;
624 :
625 4659111 : if (!userKey || !key)
626 0 : return -1;
627 4659111 : if (bits != 128 && bits != 192 && bits != 256)
628 0 : return -2;
629 :
630 4659111 : rk = (ulong *)fd_type_pun( key->rd_key ); /* strict aliasing violation */
631 :
632 4659111 : if (bits == 128)
633 4659111 : key->rounds = 10;
634 0 : else if (bits == 192)
635 0 : key->rounds = 12;
636 0 : else
637 0 : key->rounds = 14;
638 :
639 4659111 : KeyExpansion(userKey, rk, key->rounds, (int)(bits/32UL) );
640 4659111 : return 0;
641 4659111 : }
642 :
643 : /**
644 : * Expand the cipher key into the decryption key schedule.
645 : */
646 : int
647 : fd_aes_ref_set_decrypt_key( uchar const * userKey,
648 : ulong const bits,
649 256 : fd_aes_key_ref_t * key ) {
650 256 : return fd_aes_ref_set_encrypt_key(userKey, bits, key);
651 256 : }
652 :
653 : /*
654 : * Encrypt a single block
655 : * in and out can overlap
656 : */
657 : void
658 : fd_aes_ref_encrypt_core( uchar const * in,
659 : uchar * out,
660 26355127 : fd_aes_key_ref_t const * key ) {
661 :
662 26355127 : FD_DCHECK_CRIT( in && out && key, "invalid params" );
663 26355127 : ulong const * rk = (ulong *)fd_type_pun_const( key->rd_key );
664 :
665 26355127 : Cipher(in, out, rk, key->rounds);
666 26355127 : }
667 :
668 : /*
669 : * Decrypt a single block
670 : * in and out can overlap
671 : */
672 : void
673 : fd_aes_ref_decrypt_core( uchar const * in,
674 : uchar * out,
675 256 : fd_aes_key_ref_t const * key ) {
676 :
677 256 : FD_DCHECK_CRIT( in && out && key, "invalid params" );
678 256 : ulong const * rk = (ulong const *)fd_type_pun_const( key->rd_key );
679 :
680 256 : InvCipher(in, out, rk, key->rounds );
681 256 : }
|