Line data Source code
1 : /* fd_aes_ref.c was imported from the OpenSSL project circa 2023-Aug.
2 : Original source file: crypto/aes/aes_core.c */
3 :
4 : /*
5 : * Copyright 2002-2022 The OpenSSL Project Authors. All Rights Reserved.
6 : *
7 : * Licensed under the Apache License 2.0 (the "License"). You may not use
8 : * this file except in compliance with the License. You can obtain a copy
9 : * in the file LICENSE in the source distribution or at
10 : * https://www.openssl.org/source/license.html
11 : */
12 :
13 : /**
14 : * rijndael-alg-fst.c
15 : *
16 : * @version 3.0 (December 2000)
17 : *
18 : * Optimised ANSI C code for the Rijndael cipher (now AES)
19 : *
20 : * @author Vincent Rijmen
21 : * @author Antoon Bosselaers
22 : * @author Paulo Barreto
23 : *
24 : * This code is hereby placed in the public domain.
25 : *
26 : * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
27 : * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
28 : * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 : * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
30 : * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 : * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 : * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
33 : * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
34 : * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
35 : * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
36 : * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 : */
38 :
39 : /* Note: rewritten a little bit to provide error control and an OpenSSL-
40 : compatible API */
41 :
42 : #include <assert.h>
43 : #include <stdlib.h>
44 : #include "fd_aes_gcm_ref.h"
45 :
46 : typedef union {
47 : uchar b[8];
48 : uint w[2];
49 : ulong d;
50 : } uni;
51 :
52 : /*
53 : * Compute w := (w * x) mod (x^8 + x^4 + x^3 + x^1 + 1)
54 : * Therefore the name "xtime".
55 : */
56 : static void
57 124927350 : XtimeWord( uint * w ) {
58 124927350 : uint a, b;
59 :
60 124927350 : a = *w;
61 124927350 : b = a & 0x80808080u;
62 124927350 : a ^= b;
63 124927350 : b -= b >> 7;
64 124927350 : b &= 0x1B1B1B1Bu;
65 124927350 : b ^= a << 1;
66 124927350 : *w = b;
67 124927350 : }
68 :
69 : static void
70 3941700714 : XtimeLong( ulong * w ) {
71 3941700714 : ulong a, b;
72 :
73 3941700714 : a = *w;
74 3941700714 : b = a & (ulong)(0x8080808080808080);
75 3941700714 : a ^= b;
76 3941700714 : b -= b >> 7;
77 3941700714 : b &= (ulong)(0x1B1B1B1B1B1B1B1B);
78 3941700714 : b ^= a << 1;
79 3941700714 : *w = b;
80 3941700714 : }
81 :
82 : /*
83 : * This computes w := S * w ^ -1 + c, where c = {01100011}.
84 : * Instead of using GF(2^8) mod (x^8+x^4+x^3+x+1} we do the inversion
85 : * in GF(GF(GF(2^2)^2)^2) mod (X^2+X+8)
86 : * and GF(GF(2^2)^2) mod (X^2+X+2)
87 : * and GF(2^2) mod (X^2+X+1)
88 : * The first part of the algorithm below transfers the coordinates
89 : * {0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80} =>
90 : * {1,Y,Y^2,Y^3,Y^4,Y^5,Y^6,Y^7} with Y=0x41:
91 : * {0x01,0x41,0x66,0x6c,0x56,0x9a,0x58,0xc4}
92 : * The last part undoes the coordinate transfer and the final affine
93 : * transformation S:
94 : * b[i] = b[i] + b[(i+4)%8] + b[(i+5)%8] + b[(i+6)%8] + b[(i+7)%8] + c[i]
95 : * in one step.
96 : * The multiplication in GF(2^2^2^2) is done in ordinary coords:
97 : * A = (a0*1 + a1*x^4)
98 : * B = (b0*1 + b1*x^4)
99 : * AB = ((a0*b0 + 8*a1*b1)*1 + (a1*b0 + (a0+a1)*b1)*x^4)
100 : * When A = (a0,a1) is given we want to solve AB = 1:
101 : * (a) 1 = a0*b0 + 8*a1*b1
102 : * (b) 0 = a1*b0 + (a0+a1)*b1
103 : * => multiply (a) by a1 and (b) by a0
104 : * (c) a1 = a1*a0*b0 + (8*a1*a1)*b1
105 : * (d) 0 = a1*a0*b0 + (a0*a0+a1*a0)*b1
106 : * => add (c) + (d)
107 : * (e) a1 = (a0*a0 + a1*a0 + 8*a1*a1)*b1
108 : * => therefore
109 : * b1 = (a0*a0 + a1*a0 + 8*a1*a1)^-1 * a1
110 : * => and adding (a1*b0) to (b) we get
111 : * (f) a1*b0 = (a0+a1)*b1
112 : * => therefore
113 : * b0 = (a0*a0 + a1*a0 + 8*a1*a1)^-1 * (a0+a1)
114 : * Note this formula also works for the case
115 : * (a0+a1)*a0 + 8*a1*a1 = 0
116 : * if the inverse element for 0^-1 is mapped to 0.
117 : * Repeat the same for GF(2^2^2) and GF(2^2).
118 : * We get the following algorithm:
119 : * inv8(a0,a1):
120 : * x0 = a0^a1
121 : * [y0,y1] = mul4([x0,a1],[a0,a1]); (*)
122 : * y1 = mul4(8,y1);
123 : * t = inv4(y0^y1);
124 : * [b0,b1] = mul4([x0,a1],[t,t]); (*)
125 : * return [b0,b1];
126 : * The non-linear multiplies (*) can be done in parallel at no extra cost.
127 : */
128 : static void
129 124927350 : SubWord( uint * w ) {
130 124927350 : uint x, y, a1, a2, a3, a4, a5, a6;
131 :
132 124927350 : x = *w;
133 124927350 : y = ((x & 0xFEFEFEFEu) >> 1) | ((x & 0x01010101u) << 7);
134 124927350 : x &= 0xDDDDDDDDu;
135 124927350 : x ^= y & 0x57575757u;
136 124927350 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
137 124927350 : x ^= y & 0x1C1C1C1Cu;
138 124927350 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
139 124927350 : x ^= y & 0x4A4A4A4Au;
140 124927350 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
141 124927350 : x ^= y & 0x42424242u;
142 124927350 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
143 124927350 : x ^= y & 0x64646464u;
144 124927350 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
145 124927350 : x ^= y & 0xE0E0E0E0u;
146 124927350 : a1 = x;
147 124927350 : a1 ^= (x & 0xF0F0F0F0u) >> 4;
148 124927350 : a2 = ((x & 0xCCCCCCCCu) >> 2) | ((x & 0x33333333u) << 2);
149 124927350 : a3 = x & a1;
150 124927350 : a3 ^= (a3 & 0xAAAAAAAAu) >> 1;
151 124927350 : a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & 0xAAAAAAAAu;
152 124927350 : a4 = a2 & a1;
153 124927350 : a4 ^= (a4 & 0xAAAAAAAAu) >> 1;
154 124927350 : a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & 0xAAAAAAAAu;
155 124927350 : a5 = (a3 & 0xCCCCCCCCu) >> 2;
156 124927350 : a3 ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCu;
157 124927350 : a4 = a5 & 0x22222222u;
158 124927350 : a4 |= a4 >> 1;
159 124927350 : a4 ^= (a5 << 1) & 0x22222222u;
160 124927350 : a3 ^= a4;
161 124927350 : a5 = a3 & 0xA0A0A0A0u;
162 124927350 : a5 |= a5 >> 1;
163 124927350 : a5 ^= (a3 << 1) & 0xA0A0A0A0u;
164 124927350 : a4 = a5 & 0xC0C0C0C0u;
165 124927350 : a6 = a4 >> 2;
166 124927350 : a4 ^= (a5 << 2) & 0xC0C0C0C0u;
167 124927350 : a5 = a6 & 0x20202020u;
168 124927350 : a5 |= a5 >> 1;
169 124927350 : a5 ^= (a6 << 1) & 0x20202020u;
170 124927350 : a4 |= a5;
171 124927350 : a3 ^= a4 >> 4;
172 124927350 : a3 &= 0x0F0F0F0Fu;
173 124927350 : a2 = a3;
174 124927350 : a2 ^= (a3 & 0x0C0C0C0Cu) >> 2;
175 124927350 : a4 = a3 & a2;
176 124927350 : a4 ^= (uint)(a4 & 0x0A0A0A0A0Au) >> 1u;
177 124927350 : a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & 0x0A0A0A0Au;
178 124927350 : a5 = a4 & 0x08080808u;
179 124927350 : a5 |= a5 >> 1;
180 124927350 : a5 ^= (a4 << 1) & 0x08080808u;
181 124927350 : a4 ^= a5 >> 2;
182 124927350 : a4 &= 0x03030303u;
183 124927350 : a4 ^= (a4 & 0x02020202u) >> 1;
184 124927350 : a4 |= a4 << 2;
185 124927350 : a3 = a2 & a4;
186 124927350 : a3 ^= (a3 & 0x0A0A0A0Au) >> 1;
187 124927350 : a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & 0x0A0A0A0Au;
188 124927350 : a3 |= a3 << 4;
189 124927350 : a2 = ((a1 & 0xCCCCCCCCu) >> 2) | ((a1 & 0x33333333u) << 2);
190 124927350 : x = a1 & a3;
191 124927350 : x ^= (x & 0xAAAAAAAAu) >> 1;
192 124927350 : x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & 0xAAAAAAAAu;
193 124927350 : a4 = a2 & a3;
194 124927350 : a4 ^= (a4 & 0xAAAAAAAAu) >> 1;
195 124927350 : a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & 0xAAAAAAAAu;
196 124927350 : a5 = (x & 0xCCCCCCCCu) >> 2;
197 124927350 : x ^= ((a4 << 2) ^ a4) & 0xCCCCCCCCu;
198 124927350 : a4 = a5 & 0x22222222u;
199 124927350 : a4 |= a4 >> 1;
200 124927350 : a4 ^= (a5 << 1) & 0x22222222u;
201 124927350 : x ^= a4;
202 124927350 : y = ((x & 0xFEFEFEFEu) >> 1) | ((x & 0x01010101u) << 7);
203 124927350 : x &= 0x39393939u;
204 124927350 : x ^= y & 0x3F3F3F3Fu;
205 124927350 : y = ((y & 0xFCFCFCFCu) >> 2) | ((y & 0x03030303u) << 6);
206 124927350 : x ^= y & 0x97979797u;
207 124927350 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
208 124927350 : x ^= y & 0x9B9B9B9Bu;
209 124927350 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
210 124927350 : x ^= y & 0x3C3C3C3Cu;
211 124927350 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
212 124927350 : x ^= y & 0xDDDDDDDDu;
213 124927350 : y = ((y & 0xFEFEFEFEu) >> 1) | ((y & 0x01010101u) << 7);
214 124927350 : x ^= y & 0x72727272u;
215 124927350 : x ^= 0x63636363u;
216 124927350 : *w = x;
217 124927350 : }
218 :
219 : static void
220 4379667460 : SubLong( ulong * w ) {
221 4379667460 : ulong x, y, a1, a2, a3, a4, a5, a6;
222 :
223 4379667460 : x = *w;
224 4379667460 : y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
225 4379667460 : x &= (0xDDDDDDDDDDDDDDDDUL);
226 4379667460 : x ^= y & (0x5757575757575757UL);
227 4379667460 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
228 4379667460 : x ^= y & (0x1C1C1C1C1C1C1C1CUL);
229 4379667460 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
230 4379667460 : x ^= y & (0x4A4A4A4A4A4A4A4AUL);
231 4379667460 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
232 4379667460 : x ^= y & (0x4242424242424242UL);
233 4379667460 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
234 4379667460 : x ^= y & (0x6464646464646464UL);
235 4379667460 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
236 4379667460 : x ^= y & (0xE0E0E0E0E0E0E0E0UL);
237 4379667460 : a1 = x;
238 4379667460 : a1 ^= (x & (0xF0F0F0F0F0F0F0F0UL)) >> 4;
239 4379667460 : a2 = ((x & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((x & (0x3333333333333333UL)) << 2);
240 4379667460 : a3 = x & a1;
241 4379667460 : a3 ^= (a3 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
242 4379667460 : a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & (0xAAAAAAAAAAAAAAAAUL);
243 4379667460 : a4 = a2 & a1;
244 4379667460 : a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
245 4379667460 : a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
246 4379667460 : a5 = (a3 & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
247 4379667460 : a3 ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
248 4379667460 : a4 = a5 & (0x2222222222222222UL);
249 4379667460 : a4 |= a4 >> 1;
250 4379667460 : a4 ^= (a5 << 1) & (0x2222222222222222UL);
251 4379667460 : a3 ^= a4;
252 4379667460 : a5 = a3 & (0xA0A0A0A0A0A0A0A0UL);
253 4379667460 : a5 |= a5 >> 1;
254 4379667460 : a5 ^= (a3 << 1) & (0xA0A0A0A0A0A0A0A0UL);
255 4379667460 : a4 = a5 & (0xC0C0C0C0C0C0C0C0UL);
256 4379667460 : a6 = a4 >> 2;
257 4379667460 : a4 ^= (a5 << 2) & (0xC0C0C0C0C0C0C0C0UL);
258 4379667460 : a5 = a6 & (0x2020202020202020UL);
259 4379667460 : a5 |= a5 >> 1;
260 4379667460 : a5 ^= (a6 << 1) & (0x2020202020202020UL);
261 4379667460 : a4 |= a5;
262 4379667460 : a3 ^= a4 >> 4;
263 4379667460 : a3 &= (0x0F0F0F0F0F0F0F0FUL);
264 4379667460 : a2 = a3;
265 4379667460 : a2 ^= (a3 & (0x0C0C0C0C0C0C0C0CUL)) >> 2;
266 4379667460 : a4 = a3 & a2;
267 4379667460 : a4 ^= (a4 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
268 4379667460 : a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & (0x0A0A0A0A0A0A0A0AUL);
269 4379667460 : a5 = a4 & (0x0808080808080808UL);
270 4379667460 : a5 |= a5 >> 1;
271 4379667460 : a5 ^= (a4 << 1) & (0x0808080808080808UL);
272 4379667460 : a4 ^= a5 >> 2;
273 4379667460 : a4 &= (0x0303030303030303UL);
274 4379667460 : a4 ^= (a4 & (0x0202020202020202UL)) >> 1;
275 4379667460 : a4 |= a4 << 2;
276 4379667460 : a3 = a2 & a4;
277 4379667460 : a3 ^= (a3 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
278 4379667460 : a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & (0x0A0A0A0A0A0A0A0AUL);
279 4379667460 : a3 |= a3 << 4;
280 4379667460 : a2 = ((a1 & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((a1 & (0x3333333333333333UL)) << 2);
281 4379667460 : x = a1 & a3;
282 4379667460 : x ^= (x & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
283 4379667460 : x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & (0xAAAAAAAAAAAAAAAAUL);
284 4379667460 : a4 = a2 & a3;
285 4379667460 : a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
286 4379667460 : a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
287 4379667460 : a5 = (x & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
288 4379667460 : x ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
289 4379667460 : a4 = a5 & (0x2222222222222222UL);
290 4379667460 : a4 |= a4 >> 1;
291 4379667460 : a4 ^= (a5 << 1) & (0x2222222222222222UL);
292 4379667460 : x ^= a4;
293 4379667460 : y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
294 4379667460 : x &= (0x3939393939393939UL);
295 4379667460 : x ^= y & (0x3F3F3F3F3F3F3F3FUL);
296 4379667460 : y = ((y & (0xFCFCFCFCFCFCFCFCUL)) >> 2) | ((y & (0x0303030303030303UL)) << 6);
297 4379667460 : x ^= y & (0x9797979797979797UL);
298 4379667460 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
299 4379667460 : x ^= y & (0x9B9B9B9B9B9B9B9BUL);
300 4379667460 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
301 4379667460 : x ^= y & (0x3C3C3C3C3C3C3C3CUL);
302 4379667460 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
303 4379667460 : x ^= y & (0xDDDDDDDDDDDDDDDDUL);
304 4379667460 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
305 4379667460 : x ^= y & (0x7272727272727272UL);
306 4379667460 : x ^= (0x6363636363636363UL);
307 4379667460 : *w = x;
308 4379667460 : }
309 :
310 : /*
311 : * This computes w := (S^-1 * (w + c))^-1
312 : */
313 : static void
314 0 : InvSubLong( ulong * w ) {
315 0 : ulong x, y, a1, a2, a3, a4, a5, a6;
316 :
317 0 : x = *w;
318 0 : x ^= (0x6363636363636363UL);
319 0 : y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
320 0 : x &= (0xFDFDFDFDFDFDFDFDUL);
321 0 : x ^= y & (0x5E5E5E5E5E5E5E5EUL);
322 0 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
323 0 : x ^= y & (0xF3F3F3F3F3F3F3F3UL);
324 0 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
325 0 : x ^= y & (0xF5F5F5F5F5F5F5F5UL);
326 0 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
327 0 : x ^= y & (0x7878787878787878UL);
328 0 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
329 0 : x ^= y & (0x7777777777777777UL);
330 0 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
331 0 : x ^= y & (0x1515151515151515UL);
332 0 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
333 0 : x ^= y & (0xA5A5A5A5A5A5A5A5UL);
334 0 : a1 = x;
335 0 : a1 ^= (x & (0xF0F0F0F0F0F0F0F0UL)) >> 4;
336 0 : a2 = ((x & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((x & (0x3333333333333333UL)) << 2);
337 0 : a3 = x & a1;
338 0 : a3 ^= (a3 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
339 0 : a3 ^= (((x << 1) & a1) ^ ((a1 << 1) & x)) & (0xAAAAAAAAAAAAAAAAUL);
340 0 : a4 = a2 & a1;
341 0 : a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
342 0 : a4 ^= (((a2 << 1) & a1) ^ ((a1 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
343 0 : a5 = (a3 & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
344 0 : a3 ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
345 0 : a4 = a5 & (0x2222222222222222UL);
346 0 : a4 |= a4 >> 1;
347 0 : a4 ^= (a5 << 1) & (0x2222222222222222UL);
348 0 : a3 ^= a4;
349 0 : a5 = a3 & (0xA0A0A0A0A0A0A0A0UL);
350 0 : a5 |= a5 >> 1;
351 0 : a5 ^= (a3 << 1) & (0xA0A0A0A0A0A0A0A0UL);
352 0 : a4 = a5 & (0xC0C0C0C0C0C0C0C0UL);
353 0 : a6 = a4 >> 2;
354 0 : a4 ^= (a5 << 2) & (0xC0C0C0C0C0C0C0C0UL);
355 0 : a5 = a6 & (0x2020202020202020UL);
356 0 : a5 |= a5 >> 1;
357 0 : a5 ^= (a6 << 1) & (0x2020202020202020UL);
358 0 : a4 |= a5;
359 0 : a3 ^= a4 >> 4;
360 0 : a3 &= (0x0F0F0F0F0F0F0F0FUL);
361 0 : a2 = a3;
362 0 : a2 ^= (a3 & (0x0C0C0C0C0C0C0C0CUL)) >> 2;
363 0 : a4 = a3 & a2;
364 0 : a4 ^= (a4 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
365 0 : a4 ^= (((a3 << 1) & a2) ^ ((a2 << 1) & a3)) & (0x0A0A0A0A0A0A0A0AUL);
366 0 : a5 = a4 & (0x0808080808080808UL);
367 0 : a5 |= a5 >> 1;
368 0 : a5 ^= (a4 << 1) & (0x0808080808080808UL);
369 0 : a4 ^= a5 >> 2;
370 0 : a4 &= (0x0303030303030303UL);
371 0 : a4 ^= (a4 & (0x0202020202020202UL)) >> 1;
372 0 : a4 |= a4 << 2;
373 0 : a3 = a2 & a4;
374 0 : a3 ^= (a3 & (0x0A0A0A0A0A0A0A0AUL)) >> 1;
375 0 : a3 ^= (((a2 << 1) & a4) ^ ((a4 << 1) & a2)) & (0x0A0A0A0A0A0A0A0AUL);
376 0 : a3 |= a3 << 4;
377 0 : a2 = ((a1 & (0xCCCCCCCCCCCCCCCCUL)) >> 2) | ((a1 & (0x3333333333333333UL)) << 2);
378 0 : x = a1 & a3;
379 0 : x ^= (x & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
380 0 : x ^= (((a1 << 1) & a3) ^ ((a3 << 1) & a1)) & (0xAAAAAAAAAAAAAAAAUL);
381 0 : a4 = a2 & a3;
382 0 : a4 ^= (a4 & (0xAAAAAAAAAAAAAAAAUL)) >> 1;
383 0 : a4 ^= (((a2 << 1) & a3) ^ ((a3 << 1) & a2)) & (0xAAAAAAAAAAAAAAAAUL);
384 0 : a5 = (x & (0xCCCCCCCCCCCCCCCCUL)) >> 2;
385 0 : x ^= ((a4 << 2) ^ a4) & (0xCCCCCCCCCCCCCCCCUL);
386 0 : a4 = a5 & (0x2222222222222222UL);
387 0 : a4 |= a4 >> 1;
388 0 : a4 ^= (a5 << 1) & (0x2222222222222222UL);
389 0 : x ^= a4;
390 0 : y = ((x & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((x & (0x0101010101010101UL)) << 7);
391 0 : x &= (0xB5B5B5B5B5B5B5B5UL);
392 0 : x ^= y & (0x4040404040404040UL);
393 0 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
394 0 : x ^= y & (0x8080808080808080UL);
395 0 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
396 0 : x ^= y & (0x1616161616161616UL);
397 0 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
398 0 : x ^= y & (0xEBEBEBEBEBEBEBEBUL);
399 0 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
400 0 : x ^= y & (0x9797979797979797UL);
401 0 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
402 0 : x ^= y & (0xFBFBFBFBFBFBFBFBUL);
403 0 : y = ((y & (0xFEFEFEFEFEFEFEFEUL)) >> 1) | ((y & (0x0101010101010101UL)) << 7);
404 0 : x ^= y & (0x7D7D7D7D7D7D7D7DUL);
405 0 : *w = x;
406 0 : }
407 :
408 : static void
409 2189833730 : ShiftRows( ulong * state ) {
410 2189833730 : uchar s[4];
411 2189833730 : uchar *s0;
412 2189833730 : int r;
413 :
414 2189833730 : s0 = (uchar *)state;
415 10949168650 : for (r = 0; r < 4; r++) {
416 8759334920 : s[0] = s0[0*4 + r];
417 8759334920 : s[1] = s0[1*4 + r];
418 8759334920 : s[2] = s0[2*4 + r];
419 8759334920 : s[3] = s0[3*4 + r];
420 8759334920 : s0[0*4 + r] = s[(r+0) % 4];
421 8759334920 : s0[1*4 + r] = s[(r+1) % 4];
422 8759334920 : s0[2*4 + r] = s[(r+2) % 4];
423 8759334920 : s0[3*4 + r] = s[(r+3) % 4];
424 8759334920 : }
425 2189833730 : }
426 :
427 : static void
428 0 : InvShiftRows( ulong * state ) {
429 0 : uchar s[4];
430 0 : uchar *s0;
431 0 : int r;
432 :
433 0 : s0 = (uchar *)state;
434 0 : for (r = 0; r < 4; r++) {
435 0 : s[0] = s0[0*4 + r];
436 0 : s[1] = s0[1*4 + r];
437 0 : s[2] = s0[2*4 + r];
438 0 : s[3] = s0[3*4 + r];
439 0 : s0[0*4 + r] = s[(4-r) % 4];
440 0 : s0[1*4 + r] = s[(5-r) % 4];
441 0 : s0[2*4 + r] = s[(6-r) % 4];
442 0 : s0[3*4 + r] = s[(7-r) % 4];
443 0 : }
444 0 : }
445 :
446 : static void
447 1970850357 : MixColumns( ulong * state ) {
448 1970850357 : uni s1;
449 1970850357 : uni s;
450 1970850357 : int c;
451 :
452 5912551071 : for (c = 0; c < 2; c++) {
453 3941700714 : s1.d = state[c];
454 3941700714 : s.d = s1.d;
455 3941700714 : s.d ^= ((s.d & (0xFFFF0000FFFF0000UL)) >> 16)
456 3941700714 : | ((s.d & (0x0000FFFF0000FFFFUL)) << 16);
457 3941700714 : s.d ^= ((s.d & (0xFF00FF00FF00FF00UL)) >> 8)
458 3941700714 : | ((s.d & (0x00FF00FF00FF00FFUL)) << 8);
459 3941700714 : s.d ^= s1.d;
460 3941700714 : XtimeLong(&s1.d);
461 3941700714 : s.d ^= s1.d;
462 3941700714 : s.b[0] ^= s1.b[1];
463 3941700714 : s.b[1] ^= s1.b[2];
464 3941700714 : s.b[2] ^= s1.b[3];
465 3941700714 : s.b[3] ^= s1.b[0];
466 3941700714 : s.b[4] ^= s1.b[5];
467 3941700714 : s.b[5] ^= s1.b[6];
468 3941700714 : s.b[6] ^= s1.b[7];
469 3941700714 : s.b[7] ^= s1.b[4];
470 3941700714 : state[c] = s.d;
471 3941700714 : }
472 1970850357 : }
473 :
474 : static void InvMixColumns(ulong * state)
475 0 : {
476 0 : uni s1;
477 0 : uni s;
478 0 : int c;
479 :
480 0 : for (c = 0; c < 2; c++) {
481 0 : s1.d = state[c];
482 0 : s.d = s1.d;
483 0 : s.d ^= ((s.d & (0xFFFF0000FFFF0000UL)) >> 16)
484 0 : | ((s.d & (0x0000FFFF0000FFFFUL)) << 16);
485 0 : s.d ^= ((s.d & (0xFF00FF00FF00FF00UL)) >> 8)
486 0 : | ((s.d & (0x00FF00FF00FF00FFUL)) << 8);
487 0 : s.d ^= s1.d;
488 0 : XtimeLong(&s1.d);
489 0 : s.d ^= s1.d;
490 0 : s.b[0] ^= s1.b[1];
491 0 : s.b[1] ^= s1.b[2];
492 0 : s.b[2] ^= s1.b[3];
493 0 : s.b[3] ^= s1.b[0];
494 0 : s.b[4] ^= s1.b[5];
495 0 : s.b[5] ^= s1.b[6];
496 0 : s.b[6] ^= s1.b[7];
497 0 : s.b[7] ^= s1.b[4];
498 0 : XtimeLong(&s1.d);
499 0 : s1.d ^= ((s1.d & (0xFFFF0000FFFF0000UL)) >> 16)
500 0 : | ((s1.d & (0x0000FFFF0000FFFFUL)) << 16);
501 0 : s.d ^= s1.d;
502 0 : XtimeLong(&s1.d);
503 0 : s1.d ^= ((s1.d & (0xFF00FF00FF00FF00UL)) >> 8)
504 0 : | ((s1.d & (0x00FF00FF00FF00FFUL)) << 8);
505 0 : s.d ^= s1.d;
506 0 : state[c] = s.d;
507 0 : }
508 0 : }
509 :
510 : static void
511 : AddRoundKey( ulong * state,
512 2408817103 : ulong const * w ) {
513 2408817103 : state[0] ^= w[0];
514 2408817103 : state[1] ^= w[1];
515 2408817103 : }
516 :
517 : static void
518 : Cipher( uchar const * in,
519 : uchar * out,
520 : ulong const * w,
521 218983373 : int nr ) {
522 218983373 : ulong state[2];
523 218983373 : int i;
524 :
525 218983373 : memcpy(state, in, 16);
526 :
527 218983373 : AddRoundKey(state, w);
528 :
529 2189833730 : for (i = 1; i < nr; i++) {
530 1970850357 : SubLong(&state[0]);
531 1970850357 : SubLong(&state[1]);
532 1970850357 : ShiftRows(state);
533 1970850357 : MixColumns(state);
534 1970850357 : AddRoundKey(state, w + i*2);
535 1970850357 : }
536 :
537 218983373 : SubLong(&state[0]);
538 218983373 : SubLong(&state[1]);
539 218983373 : ShiftRows(state);
540 218983373 : AddRoundKey(state, w + nr*2);
541 :
542 218983373 : memcpy(out, state, 16);
543 218983373 : }
544 :
545 : static void
546 : InvCipher( uchar const * in,
547 : uchar * out,
548 : ulong const * w,
549 0 : int nr ) {
550 0 : ulong state[2];
551 0 : int i;
552 :
553 0 : memcpy(state, in, 16);
554 :
555 0 : AddRoundKey(state, w + nr*2);
556 :
557 0 : for (i = nr - 1; i > 0; i--) {
558 0 : InvShiftRows(state);
559 0 : InvSubLong(&state[0]);
560 0 : InvSubLong(&state[1]);
561 0 : AddRoundKey(state, w + i*2);
562 0 : InvMixColumns(state);
563 0 : }
564 :
565 0 : InvShiftRows(state);
566 0 : InvSubLong(&state[0]);
567 0 : InvSubLong(&state[1]);
568 0 : AddRoundKey(state, w);
569 :
570 0 : memcpy(out, state, 16);
571 0 : }
572 :
573 : static void
574 124927350 : RotWord( uint * x ) {
575 124927350 : uchar *w0;
576 124927350 : uchar tmp;
577 :
578 124927350 : w0 = (uchar *)x;
579 124927350 : tmp = w0[0];
580 124927350 : w0[0] = w0[1];
581 124927350 : w0[1] = w0[2];
582 124927350 : w0[2] = w0[3];
583 124927350 : w0[3] = tmp;
584 124927350 : }
585 :
586 : static void
587 : KeyExpansion( uchar const * key,
588 : ulong * w,
589 : int nr,
590 12492735 : int nk ) {
591 12492735 : uint rcon;
592 12492735 : uni prev;
593 12492735 : uint temp;
594 12492735 : int i, n;
595 :
596 12492735 : memcpy( w, key, (ulong)nk*4UL );
597 12492735 : memcpy( &rcon, "\1\0\0\0", 4 );
598 12492735 : n = nk/2;
599 12492735 : prev.d = w[n-1];
600 262347435 : for (i = n; i < (nr+1)*2; i++) {
601 249854700 : temp = prev.w[1];
602 249854700 : if (i % n == 0) {
603 124927350 : RotWord(&temp);
604 124927350 : SubWord(&temp);
605 124927350 : temp ^= rcon;
606 124927350 : XtimeWord(&rcon);
607 124927350 : } else if (nk > 6 && i % n == 2) {
608 0 : SubWord(&temp);
609 0 : }
610 249854700 : prev.d = w[i-n];
611 249854700 : prev.w[0] ^= temp;
612 249854700 : prev.w[1] ^= prev.w[0];
613 249854700 : w[i] = prev.d;
614 249854700 : }
615 12492735 : }
616 :
617 : /**
618 : * Expand the cipher key into the encryption key schedule.
619 : */
620 : int
621 : fd_aes_ref_set_encrypt_key( uchar const * userKey,
622 : ulong const bits,
623 12492735 : fd_aes_key_ref_t * key ) {
624 12492735 : ulong *rk;
625 :
626 12492735 : if (!userKey || !key)
627 0 : return -1;
628 12492735 : if (bits != 128 && bits != 192 && bits != 256)
629 0 : return -2;
630 :
631 12492735 : rk = (ulong *)fd_type_pun( key->rd_key ); /* strict aliasing violation */
632 :
633 12492735 : if (bits == 128)
634 12492735 : key->rounds = 10;
635 0 : else if (bits == 192)
636 0 : key->rounds = 12;
637 0 : else
638 0 : key->rounds = 14;
639 :
640 12492735 : KeyExpansion(userKey, rk, key->rounds, (int)(bits/32UL) );
641 12492735 : return 0;
642 12492735 : }
643 :
644 : /**
645 : * Expand the cipher key into the decryption key schedule.
646 : */
647 : int
648 : fd_aes_ref_set_decrypt_key( uchar const * userKey,
649 : ulong const bits,
650 0 : fd_aes_key_ref_t * key ) {
651 0 : return fd_aes_ref_set_encrypt_key(userKey, bits, key);
652 0 : }
653 :
654 : /*
655 : * Encrypt a single block
656 : * in and out can overlap
657 : */
658 : void
659 : fd_aes_ref_encrypt_core( uchar const * in,
660 : uchar * out,
661 218983373 : fd_aes_key_ref_t const * key ) {
662 :
663 218983373 : assert(in && out && key);
664 0 : ulong const * rk = (ulong *)fd_type_pun_const( key->rd_key );
665 :
666 218983373 : Cipher(in, out, rk, key->rounds);
667 218983373 : }
668 :
669 : /*
670 : * Decrypt a single block
671 : * in and out can overlap
672 : */
673 : void
674 : fd_aes_ref_decrypt_core( uchar const * in,
675 : uchar * out,
676 0 : fd_aes_key_ref_t const * key ) {
677 :
678 0 : assert(in && out && key);
679 0 : ulong const * rk = (ulong const *)fd_type_pun_const( key->rd_key );
680 :
681 0 : InvCipher(in, out, rk, key->rounds );
682 0 : }
|