Line data Source code
1 : #include "fd_sha256.h"
2 : #include "fd_sha256_constants.h"
3 :
4 : #if FD_HAS_SHANI
5 : /* For the optimized repeated hash */
6 : #include "../../util/simd/fd_sse.h"
7 : #endif
8 :
9 : ulong
10 921300 : fd_sha256_align( void ) {
11 921300 : return FD_SHA256_ALIGN;
12 921300 : }
13 :
14 : ulong
15 460641 : fd_sha256_footprint( void ) {
16 460641 : return FD_SHA256_FOOTPRINT;
17 460641 : }
18 :
19 : void *
20 460644 : fd_sha256_new( void * shmem ) {
21 460644 : fd_sha256_t * sha = (fd_sha256_t *)shmem;
22 :
23 460644 : if( FD_UNLIKELY( !shmem ) ) {
24 3 : FD_LOG_WARNING(( "NULL shmem" ));
25 3 : return NULL;
26 3 : }
27 :
28 460641 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, fd_sha256_align() ) ) ) {
29 3 : FD_LOG_WARNING(( "misaligned shmem" ));
30 3 : return NULL;
31 3 : }
32 :
33 460638 : ulong footprint = fd_sha256_footprint();
34 :
35 460638 : fd_memset( sha, 0, footprint );
36 :
37 460638 : FD_COMPILER_MFENCE();
38 460638 : FD_VOLATILE( sha->magic ) = FD_SHA256_MAGIC;
39 460638 : FD_COMPILER_MFENCE();
40 :
41 460638 : return (void *)sha;
42 460641 : }
43 :
44 : fd_sha256_t *
45 460644 : fd_sha256_join( void * shsha ) {
46 :
47 460644 : if( FD_UNLIKELY( !shsha ) ) {
48 3 : FD_LOG_WARNING(( "NULL shsha" ));
49 3 : return NULL;
50 3 : }
51 :
52 460641 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shsha, fd_sha256_align() ) ) ) {
53 3 : FD_LOG_WARNING(( "misaligned shsha" ));
54 3 : return NULL;
55 3 : }
56 :
57 460638 : fd_sha256_t * sha = (fd_sha256_t *)shsha;
58 :
59 460638 : if( FD_UNLIKELY( sha->magic!=FD_SHA256_MAGIC ) ) {
60 0 : FD_LOG_WARNING(( "bad magic" ));
61 0 : return NULL;
62 0 : }
63 :
64 460638 : return sha;
65 460638 : }
66 :
67 : void *
68 15 : fd_sha256_leave( fd_sha256_t * sha ) {
69 :
70 15 : if( FD_UNLIKELY( !sha ) ) {
71 3 : FD_LOG_WARNING(( "NULL sha" ));
72 3 : return NULL;
73 3 : }
74 :
75 12 : return (void *)sha;
76 15 : }
77 :
78 : void *
79 18 : fd_sha256_delete( void * shsha ) {
80 :
81 18 : if( FD_UNLIKELY( !shsha ) ) {
82 3 : FD_LOG_WARNING(( "NULL shsha" ));
83 3 : return NULL;
84 3 : }
85 :
86 15 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shsha, fd_sha256_align() ) ) ) {
87 3 : FD_LOG_WARNING(( "misaligned shsha" ));
88 3 : return NULL;
89 3 : }
90 :
91 12 : fd_sha256_t * sha = (fd_sha256_t *)shsha;
92 :
93 12 : if( FD_UNLIKELY( sha->magic!=FD_SHA256_MAGIC ) ) {
94 0 : FD_LOG_WARNING(( "bad magic" ));
95 0 : return NULL;
96 0 : }
97 :
98 12 : FD_COMPILER_MFENCE();
99 12 : FD_VOLATILE( sha->magic ) = 0UL;
100 12 : FD_COMPILER_MFENCE();
101 :
102 12 : return (void *)sha;
103 12 : }
104 :
105 : #ifndef FD_SHA256_CORE_IMPL
106 : #if FD_HAS_SHANI
107 : #define FD_SHA256_CORE_IMPL 1
108 : #else
109 : #define FD_SHA256_CORE_IMPL 0
110 : #endif
111 : #endif
112 :
113 : #if FD_SHA256_CORE_IMPL==0
114 :
115 : /* The implementation below was derived from OpenSSL's SHA-256
116 : implementation (Apache-2.0 licensed). See in particular:
117 :
118 : https://github.com/openssl/openssl/blob/master/crypto/sha/sha256.c
119 :
120 : (link valid circa 2022-Dec). It has been made more strict with more
121 : extensive implementation documentation, has been simplified and has
122 : been streamlined specifically for use inside Firedancer base machine
123 : model (no machine specific capabilities required).
124 :
125 : In particular, fd_sha256_core_ref is based on OpenSSL's
126 : OPENSSL_SMALL_FOOTPRINT SHA-256 implementation (Apache licensed).
127 : This should work anywhere but it is not the highest performance
128 : implementation possible.
129 :
130 : It is also straightforward to replace these implementations with HPC
131 : implementations that target specific machine capabilities without
132 : requiring any changes to caller code. */
133 :
134 : static void
135 : fd_sha256_core_ref( uint * state,
136 : uchar const * block,
137 454601128 : ulong block_cnt ) {
138 :
139 :
140 >34628*10^7 : # define ROTATE fd_uint_rotate_left
141 38476339968 : # define Sigma0(x) (ROTATE((x),30) ^ ROTATE((x),19) ^ ROTATE((x),10))
142 38476339968 : # define Sigma1(x) (ROTATE((x),26) ^ ROTATE((x),21) ^ ROTATE((x),7))
143 28857254976 : # define sigma0(x) (ROTATE((x),25) ^ ROTATE((x),14) ^ ((x)>>3))
144 28857254976 : # define sigma1(x) (ROTATE((x),15) ^ ROTATE((x),13) ^ ((x)>>10))
145 38476339968 : # define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
146 38476339968 : # define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
147 :
148 454601128 : uint const * W = (uint const *)block;
149 601192812 : do {
150 601192812 : uint a = state[0];
151 601192812 : uint b = state[1];
152 601192812 : uint c = state[2];
153 601192812 : uint d = state[3];
154 601192812 : uint e = state[4];
155 601192812 : uint f = state[5];
156 601192812 : uint g = state[6];
157 601192812 : uint h = state[7];
158 :
159 601192812 : uint X[16];
160 :
161 601192812 : ulong i;
162 10220277804 : for( i=0UL; i<16UL; i++ ) {
163 9619084992 : X[i] = fd_uint_bswap( W[i] );
164 9619084992 : uint T1 = X[i] + h + Sigma1(e) + Ch(e, f, g) + fd_sha256_K[i];
165 9619084992 : uint T2 = Sigma0(a) + Maj(a, b, c);
166 9619084992 : h = g;
167 9619084992 : g = f;
168 9619084992 : f = e;
169 9619084992 : e = d + T1;
170 9619084992 : d = c;
171 9619084992 : c = b;
172 9619084992 : b = a;
173 9619084992 : a = T1 + T2;
174 9619084992 : }
175 29458447788 : for( ; i<64UL; i++ ) {
176 28857254976 : uint s0 = X[(i + 1UL) & 0x0fUL];
177 28857254976 : uint s1 = X[(i + 14UL) & 0x0fUL];
178 28857254976 : s0 = sigma0(s0);
179 28857254976 : s1 = sigma1(s1);
180 28857254976 : X[i & 0xfUL] += s0 + s1 + X[(i + 9UL) & 0xfUL];
181 28857254976 : uint T1 = X[i & 0xfUL ] + h + Sigma1(e) + Ch(e, f, g) + fd_sha256_K[i];
182 28857254976 : uint T2 = Sigma0(a) + Maj(a, b, c);
183 28857254976 : h = g;
184 28857254976 : g = f;
185 28857254976 : f = e;
186 28857254976 : e = d + T1;
187 28857254976 : d = c;
188 28857254976 : c = b;
189 28857254976 : b = a;
190 28857254976 : a = T1 + T2;
191 28857254976 : }
192 :
193 601192812 : state[0] += a;
194 601192812 : state[1] += b;
195 601192812 : state[2] += c;
196 601192812 : state[3] += d;
197 601192812 : state[4] += e;
198 601192812 : state[5] += f;
199 601192812 : state[6] += g;
200 601192812 : state[7] += h;
201 :
202 601192812 : W += 16UL;
203 601192812 : } while( --block_cnt );
204 :
205 454601128 : # undef ROTATE
206 454601128 : # undef Sigma0
207 454601128 : # undef Sigma1
208 454601128 : # undef sigma0
209 454601128 : # undef sigma1
210 454601128 : # undef Ch
211 454601128 : # undef Maj
212 :
213 454601128 : }
214 :
215 454601128 : #define fd_sha256_core fd_sha256_core_ref
216 :
217 : #elif FD_SHA256_CORE_IMPL==1
218 :
219 : /* _mm_sha256rnds2_epu32 does two rounds, one from the first uint in
220 : wk and one from the second. Since wk stores four rounds worth of
221 : message schedule values, it makes sense for the macro to do four
222 : rounds at a time. We need to permute wk in between so that the
223 : second call to the intrinsic will use the other values. */
224 4966291888 : #define FOUR_ROUNDS( wk ) do { \
225 4966291888 : vu_t __wk = (wk); \
226 4966291888 : vu_t temp_state = stateFEBA; \
227 4966291888 : stateFEBA = _mm_sha256rnds2_epu32( stateHGDC, stateFEBA, __wk ); \
228 4966291888 : stateHGDC = temp_state; \
229 4966291888 : \
230 4966291888 : temp_state = stateFEBA; \
231 4966291888 : stateFEBA = _mm_sha256rnds2_epu32( stateHGDC, stateFEBA, vu_permute( __wk, 2,3,0,1 ) );\
232 4966291888 : stateHGDC = temp_state; \
233 4966291888 : } while( 0 )
234 :
235 :
236 : /* For completeness, here's the documentation for _mm_sha256msg1_epu32
237 : and _mm_sha256msg2_epu32 in a slightly reformatted way, where all
238 : values are uints, and "-" indicates a don't-care value:
239 :
240 : _mm_sha256msg1_epu32( (w[j ], w[j+1], w[j+1], w[j+3]),
241 : (w[j+4], -, -, - ) )
242 : = ( w[j ]+s0( w[j+1] ), w[j+1]+s0( w[j+2] ),
243 : w[j+2]+s0( w[j+3] ), w[j+3]+s0( w[j+4] ) ).
244 :
245 :
246 : _mm_sha256msg2_epu32( (v[j ], v[j+1], v[j+1], v[j+3]),
247 : (-, -, w[j-2], w[j-1]) )
248 : sets w[j ] = v[j ] + s1( w[j-2] ) and
249 : w[j+1] = v[j+1] + s1( w[j-1] ), and then returns
250 :
251 : ( v[j ]+s1( w[j-2] ), v[j+1]+s1( w[j-1] ),
252 : v[j+2]+s1( w[j ] ), v[j+3]+s1( w[j+1] ) ) */
253 :
254 :
255 : /* w[i] for i>= 16 is w[i-16] + s0(w[i-15]) + w[i-7] + s1(w[i-2])
256 : Since our vector size is 4 uints, it's only s1 that is a little
257 : problematic, because it references items in the same vector.
258 : Thankfully, the msg2 intrinsic takes care of the complexity, but we
259 : need to execute it last.
260 :
261 : We get w[i-16] and s0(s[i-15]) using the msg1 intrinsic, setting j =
262 : i-16. For example, to compute w1013, we pass in w0003 and w0407.
263 : Then we can get w[i-7] by using the alignr instruction on
264 : (w[i-8], w[i-7], w[i-6], w[i-5]) and (w[i-4], w[i-3], w[i-2], w[i-1])
265 : to concatenate them and shift by one uint. Continuing with the
266 : example of w1013, we need w080b and w0c0f. We then put
267 : v[i] = w[i-16] + s0(w[i-15]) + w[i-7],
268 : and invoke the msg2 intrinsic with j=i, which gives w[i], as desired.
269 : Each invocation of NEXT_W computes 4 values of w. */
270 :
271 3724718916 : #define NEXT_W( w_minus_16, w_minus_12, w_minus_8, w_minus_4 ) (__extension__({ \
272 3724718916 : vu_t __w_i_16_s0_i_15 = _mm_sha256msg1_epu32( w_minus_16, w_minus_12 ); \
273 3724718916 : vu_t __w_i_7 = _mm_alignr_epi8( w_minus_4, w_minus_8, 4 ); \
274 3724718916 : _mm_sha256msg2_epu32( vu_add( __w_i_7, __w_i_16_s0_i_15 ), w_minus_4 ); \
275 3724718916 : }))
276 :
277 : void
278 : fd_sha256_core_shaext( uint * state, /* 64-byte aligned, 8 entries */
279 : uchar const * block, /* ideally 128-byte aligned (but not required), 64*block_cnt in size */
280 105853926 : ulong block_cnt ) {/* positive */
281 105853926 : vu_t stateABCD = vu_ld( state );
282 105853926 : vu_t stateEFGH = vu_ld( state+4UL );
283 :
284 105853926 : vu_t baseFEBA = vu_permute2( stateEFGH, stateABCD, 1, 0, 1, 0 );
285 105853926 : vu_t baseHGDC = vu_permute2( stateEFGH, stateABCD, 3, 2, 3, 2 );
286 :
287 293445970 : for( ulong b=0UL; b<block_cnt; b++ ) {
288 187592044 : vu_t stateFEBA = baseFEBA;
289 187592044 : vu_t stateHGDC = baseHGDC;
290 :
291 187592044 : vu_t w0003 = vu_bswap( vu_ldu( block+64UL*b ) );
292 187592044 : vu_t w0407 = vu_bswap( vu_ldu( block+64UL*b+16UL ) );
293 187592044 : vu_t w080b = vu_bswap( vu_ldu( block+64UL*b+32UL ) );
294 187592044 : vu_t w0c0f = vu_bswap( vu_ldu( block+64UL*b+48UL ) );
295 :
296 187592044 : /* */ FOUR_ROUNDS( vu_add( w0003, vu_ld( fd_sha256_K+ 0UL ) ) );
297 187592044 : /* */ FOUR_ROUNDS( vu_add( w0407, vu_ld( fd_sha256_K+ 4UL ) ) );
298 187592044 : /* */ FOUR_ROUNDS( vu_add( w080b, vu_ld( fd_sha256_K+ 8UL ) ) );
299 187592044 : /* */ FOUR_ROUNDS( vu_add( w0c0f, vu_ld( fd_sha256_K+12UL ) ) );
300 187592044 : vu_t w1013 = NEXT_W( w0003, w0407, w080b, w0c0f ); FOUR_ROUNDS( vu_add( w1013, vu_ld( fd_sha256_K+16UL ) ) );
301 187592044 : vu_t w1417 = NEXT_W( w0407, w080b, w0c0f, w1013 ); FOUR_ROUNDS( vu_add( w1417, vu_ld( fd_sha256_K+20UL ) ) );
302 187592044 : vu_t w181b = NEXT_W( w080b, w0c0f, w1013, w1417 ); FOUR_ROUNDS( vu_add( w181b, vu_ld( fd_sha256_K+24UL ) ) );
303 187592044 : vu_t w1c1f = NEXT_W( w0c0f, w1013, w1417, w181b ); FOUR_ROUNDS( vu_add( w1c1f, vu_ld( fd_sha256_K+28UL ) ) );
304 187592044 : vu_t w2023 = NEXT_W( w1013, w1417, w181b, w1c1f ); FOUR_ROUNDS( vu_add( w2023, vu_ld( fd_sha256_K+32UL ) ) );
305 187592044 : vu_t w2427 = NEXT_W( w1417, w181b, w1c1f, w2023 ); FOUR_ROUNDS( vu_add( w2427, vu_ld( fd_sha256_K+36UL ) ) );
306 187592044 : vu_t w282b = NEXT_W( w181b, w1c1f, w2023, w2427 ); FOUR_ROUNDS( vu_add( w282b, vu_ld( fd_sha256_K+40UL ) ) );
307 187592044 : vu_t w2c2f = NEXT_W( w1c1f, w2023, w2427, w282b ); FOUR_ROUNDS( vu_add( w2c2f, vu_ld( fd_sha256_K+44UL ) ) );
308 187592044 : vu_t w3033 = NEXT_W( w2023, w2427, w282b, w2c2f ); FOUR_ROUNDS( vu_add( w3033, vu_ld( fd_sha256_K+48UL ) ) );
309 187592044 : vu_t w3437 = NEXT_W( w2427, w282b, w2c2f, w3033 ); FOUR_ROUNDS( vu_add( w3437, vu_ld( fd_sha256_K+52UL ) ) );
310 187592044 : vu_t w383b = NEXT_W( w282b, w2c2f, w3033, w3437 ); FOUR_ROUNDS( vu_add( w383b, vu_ld( fd_sha256_K+56UL ) ) );
311 187592044 : vu_t w3c3f = NEXT_W( w2c2f, w3033, w3437, w383b ); FOUR_ROUNDS( vu_add( w3c3f, vu_ld( fd_sha256_K+60UL ) ) );
312 :
313 187592044 : baseFEBA = vu_add( baseFEBA, stateFEBA );
314 187592044 : baseHGDC = vu_add( baseHGDC, stateHGDC );
315 :
316 187592044 : }
317 :
318 105853926 : stateABCD = vu_permute2( baseFEBA, baseHGDC, 3, 2, 3, 2 );
319 105853926 : stateEFGH = vu_permute2( baseFEBA, baseHGDC, 1, 0, 1, 0 );
320 105853926 : vu_st( state, stateABCD );
321 105853926 : vu_st( state+4UL, stateEFGH );
322 105853926 : }
323 :
324 105853926 : #define fd_sha256_core fd_sha256_core_shaext
325 :
326 : #else
327 : #error "Unsupported FD_SHA256_CORE_IMPL"
328 : #endif
329 :
330 : fd_sha256_t *
331 1554381 : fd_sha256_init( fd_sha256_t * sha ) {
332 1554381 : sha->state[0] = FD_SHA256_INITIAL_A;
333 1554381 : sha->state[1] = FD_SHA256_INITIAL_B;
334 1554381 : sha->state[2] = FD_SHA256_INITIAL_C;
335 1554381 : sha->state[3] = FD_SHA256_INITIAL_D;
336 1554381 : sha->state[4] = FD_SHA256_INITIAL_E;
337 1554381 : sha->state[5] = FD_SHA256_INITIAL_F;
338 1554381 : sha->state[6] = FD_SHA256_INITIAL_G;
339 1554381 : sha->state[7] = FD_SHA256_INITIAL_H;
340 1554381 : sha->buf_used = 0UL;
341 1554381 : sha->bit_cnt = 0UL;
342 1554381 : return sha;
343 1554381 : }
344 :
345 : fd_sha256_t *
346 : fd_sha256_append( fd_sha256_t * sha,
347 : void const * _data,
348 5700150 : ulong sz ) {
349 :
350 : /* If no data to append, we are done */
351 :
352 5700150 : if( FD_UNLIKELY( !sz ) ) return sha; /* optimize for non-trivial append */
353 :
354 : /* Unpack inputs */
355 :
356 5699892 : uint * state = sha->state;
357 5699892 : uchar * buf = sha->buf;
358 5699892 : ulong buf_used = sha->buf_used;
359 5699892 : ulong bit_cnt = sha->bit_cnt;
360 :
361 5699892 : uchar const * data = (uchar const *)_data;
362 :
363 : /* Update bit_cnt */
364 : /* FIXME: could accumulate bytes here and do bit conversion in append */
365 : /* FIXME: Overflow handling if more than 2^64 bits (unlikely) */
366 :
367 5699892 : sha->bit_cnt = bit_cnt + (sz<<3);
368 :
369 : /* Handle buffered bytes from previous appends */
370 :
371 5699892 : if( FD_UNLIKELY( buf_used ) ) { /* optimized for well aligned use of append */
372 :
373 : /* If the append isn't large enough to complete the current block,
374 : buffer these bytes too and return */
375 :
376 78888 : ulong buf_rem = FD_SHA256_PRIVATE_BUF_MAX - buf_used; /* In (0,FD_SHA256_PRIVATE_BUF_MAX) */
377 78888 : if( FD_UNLIKELY( sz < buf_rem ) ) { /* optimize for large append */
378 531 : fd_memcpy( buf + buf_used, data, sz );
379 531 : sha->buf_used = buf_used + sz;
380 531 : return sha;
381 531 : }
382 :
383 : /* Otherwise, buffer enough leading bytes of data to complete the
384 : block, update the hash and then continue processing any remaining
385 : bytes of data. */
386 :
387 78357 : fd_memcpy( buf + buf_used, data, buf_rem );
388 78357 : data += buf_rem;
389 78357 : sz -= buf_rem;
390 :
391 78357 : fd_sha256_core( state, buf, 1UL );
392 78357 : sha->buf_used = 0UL;
393 78357 : }
394 :
395 : /* Append the bulk of the data */
396 :
397 5699361 : ulong block_cnt = sz >> FD_SHA256_PRIVATE_LG_BUF_MAX;
398 5699361 : if( FD_LIKELY( block_cnt ) ) fd_sha256_core( state, data, block_cnt ); /* optimized for large append */
399 :
400 : /* Buffer any leftover bytes */
401 :
402 5699361 : buf_used = sz & (FD_SHA256_PRIVATE_BUF_MAX-1UL); /* In [0,FD_SHA256_PRIVATE_BUF_MAX) */
403 5699361 : if( FD_UNLIKELY( buf_used ) ) { /* optimized for well aligned use of append */
404 1332633 : fd_memcpy( buf, data + (block_cnt << FD_SHA256_PRIVATE_LG_BUF_MAX), buf_used );
405 1332633 : sha->buf_used = buf_used; /* In (0,FD_SHA256_PRIVATE_BUF_MAX) */
406 1332633 : }
407 :
408 5699361 : return sha;
409 5699892 : }
410 :
411 : void *
412 : fd_sha256_fini( fd_sha256_t * sha,
413 1602555 : void * _hash ) {
414 :
415 : /* Unpack inputs */
416 :
417 1602555 : uint * state = sha->state;
418 1602555 : uchar * buf = sha->buf;
419 1602555 : ulong buf_used = sha->buf_used; /* In [0,FD_SHA256_PRIVATE_BUF_MAX) */
420 1602555 : ulong bit_cnt = sha->bit_cnt;
421 :
422 : /* Append the terminating message byte */
423 :
424 1602555 : buf[ buf_used ] = (uchar)0x80;
425 1602555 : buf_used++;
426 :
427 : /* If there isn't enough room to save the message length in bits at
428 : the end of the in progress block, clear the rest of the in progress
429 : block, update the hash and start a new block. */
430 :
431 1602555 : if( FD_UNLIKELY( buf_used > (FD_SHA256_PRIVATE_BUF_MAX-8UL) ) ) { /* optimize for well aligned use of append */
432 12048 : fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-buf_used );
433 12048 : fd_sha256_core( state, buf, 1UL );
434 12048 : buf_used = 0UL;
435 12048 : }
436 :
437 : /* Clear in progress block up to last 64-bits, append the message
438 : size in bytes in the last 64-bits of the in progress block and
439 : update the hash to finalize it. */
440 :
441 1602555 : fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-8UL-buf_used );
442 1602555 : FD_STORE( ulong, buf+FD_SHA256_PRIVATE_BUF_MAX-8UL, fd_ulong_bswap( bit_cnt ) );
443 1602555 : fd_sha256_core( state, buf, 1UL );
444 :
445 : /* Unpack the result into md (annoying bswaps here) */
446 :
447 1602555 : state[0] = fd_uint_bswap( state[0] );
448 1602555 : state[1] = fd_uint_bswap( state[1] );
449 1602555 : state[2] = fd_uint_bswap( state[2] );
450 1602555 : state[3] = fd_uint_bswap( state[3] );
451 1602555 : state[4] = fd_uint_bswap( state[4] );
452 1602555 : state[5] = fd_uint_bswap( state[5] );
453 1602555 : state[6] = fd_uint_bswap( state[6] );
454 1602555 : state[7] = fd_uint_bswap( state[7] );
455 1602555 : return memcpy( _hash, state, 32 );
456 1602555 : }
457 :
458 : void *
459 : fd_sha256_hash( void const * _data,
460 : ulong sz,
461 189745613 : void * _hash ) {
462 189745613 : uchar const * data = (uchar const *)_data;
463 :
464 : /* This is just the above streamlined to eliminate all the overheads
465 : to support incremental hashing. */
466 :
467 189745613 : uchar buf[ FD_SHA256_PRIVATE_BUF_MAX ] __attribute__((aligned(128)));
468 189745613 : uint state[8] __attribute__((aligned(32)));
469 :
470 189745613 : state[0] = FD_SHA256_INITIAL_A;
471 189745613 : state[1] = FD_SHA256_INITIAL_B;
472 189745613 : state[2] = FD_SHA256_INITIAL_C;
473 189745613 : state[3] = FD_SHA256_INITIAL_D;
474 189745613 : state[4] = FD_SHA256_INITIAL_E;
475 189745613 : state[5] = FD_SHA256_INITIAL_F;
476 189745613 : state[6] = FD_SHA256_INITIAL_G;
477 189745613 : state[7] = FD_SHA256_INITIAL_H;
478 :
479 189745613 : ulong block_cnt = sz >> FD_SHA256_PRIVATE_LG_BUF_MAX;
480 189745613 : if( FD_LIKELY( block_cnt ) ) fd_sha256_core( state, data, block_cnt );
481 :
482 189745613 : ulong buf_used = sz & (FD_SHA256_PRIVATE_BUF_MAX-1UL);
483 189745613 : if( FD_UNLIKELY( buf_used ) ) fd_memcpy( buf, data + (block_cnt << FD_SHA256_PRIVATE_LG_BUF_MAX), buf_used );
484 189745613 : buf[ buf_used ] = (uchar)0x80;
485 189745613 : buf_used++;
486 :
487 189745613 : if( FD_UNLIKELY( buf_used > (FD_SHA256_PRIVATE_BUF_MAX-8UL) ) ) {
488 1176942 : fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-buf_used );
489 1176942 : fd_sha256_core( state, buf, 1UL );
490 1176942 : buf_used = 0UL;
491 1176942 : }
492 :
493 189745613 : ulong bit_cnt = sz << 3;
494 189745613 : fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-8UL-buf_used );
495 189745613 : FD_STORE( ulong, buf+FD_SHA256_PRIVATE_BUF_MAX-8UL, fd_ulong_bswap( bit_cnt ) );
496 189745613 : fd_sha256_core( state, buf, 1UL );
497 :
498 189745613 : state[0] = fd_uint_bswap( state[0] );
499 189745613 : state[1] = fd_uint_bswap( state[1] );
500 189745613 : state[2] = fd_uint_bswap( state[2] );
501 189745613 : state[3] = fd_uint_bswap( state[3] );
502 189745613 : state[4] = fd_uint_bswap( state[4] );
503 189745613 : state[5] = fd_uint_bswap( state[5] );
504 189745613 : state[6] = fd_uint_bswap( state[6] );
505 189745613 : state[7] = fd_uint_bswap( state[7] );
506 189745613 : return memcpy( _hash, state, 32 );
507 189745613 : }
508 :
509 :
510 :
511 : void *
512 : fd_sha256_hash_32_repeated( void const * _data,
513 : void * _hash,
514 303099 : ulong cnt ) {
515 303099 : uchar const * data = (uchar const *)_data;
516 303099 : uchar * hash = (uchar *)_hash;
517 101033 : #if FD_HAS_SHANI
518 101033 : vu_t w0003 = vu_bswap( vu_ldu( data ) );
519 101033 : vu_t w0407 = vu_bswap( vu_ldu( data+16UL ) );
520 101033 : vb_t const w080b = vb( 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
521 101033 : 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 );
522 101033 : vb_t const w0c0f = vb( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
523 101033 : 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00 ); /* 32 bytes */
524 :
525 101033 : vu_t const initialFEBA = vu( FD_SHA256_INITIAL_F, FD_SHA256_INITIAL_E, FD_SHA256_INITIAL_B, FD_SHA256_INITIAL_A );
526 101033 : vu_t const initialHGDC = vu( FD_SHA256_INITIAL_H, FD_SHA256_INITIAL_G, FD_SHA256_INITIAL_D, FD_SHA256_INITIAL_C );
527 :
528 122902232 : for( ulong iter=0UL; iter<cnt; iter++ ) {
529 122801199 : vu_t stateFEBA = initialFEBA;
530 122801199 : vu_t stateHGDC = initialHGDC;
531 :
532 :
533 :
534 122801199 : /* */ FOUR_ROUNDS( vu_add( w0003, vu_ld( fd_sha256_K+ 0UL ) ) );
535 122801199 : /* */ FOUR_ROUNDS( vu_add( w0407, vu_ld( fd_sha256_K+ 4UL ) ) );
536 122801199 : /* */ FOUR_ROUNDS( vu_add( w080b, vu_ld( fd_sha256_K+ 8UL ) ) );
537 122801199 : /* */ FOUR_ROUNDS( vu_add( w0c0f, vu_ld( fd_sha256_K+12UL ) ) );
538 122801199 : vu_t w1013 = NEXT_W( w0003, w0407, w080b, w0c0f ); FOUR_ROUNDS( vu_add( w1013, vu_ld( fd_sha256_K+16UL ) ) );
539 122801199 : vu_t w1417 = NEXT_W( w0407, w080b, w0c0f, w1013 ); FOUR_ROUNDS( vu_add( w1417, vu_ld( fd_sha256_K+20UL ) ) );
540 122801199 : vu_t w181b = NEXT_W( w080b, w0c0f, w1013, w1417 ); FOUR_ROUNDS( vu_add( w181b, vu_ld( fd_sha256_K+24UL ) ) );
541 122801199 : vu_t w1c1f = NEXT_W( w0c0f, w1013, w1417, w181b ); FOUR_ROUNDS( vu_add( w1c1f, vu_ld( fd_sha256_K+28UL ) ) );
542 122801199 : vu_t w2023 = NEXT_W( w1013, w1417, w181b, w1c1f ); FOUR_ROUNDS( vu_add( w2023, vu_ld( fd_sha256_K+32UL ) ) );
543 122801199 : vu_t w2427 = NEXT_W( w1417, w181b, w1c1f, w2023 ); FOUR_ROUNDS( vu_add( w2427, vu_ld( fd_sha256_K+36UL ) ) );
544 122801199 : vu_t w282b = NEXT_W( w181b, w1c1f, w2023, w2427 ); FOUR_ROUNDS( vu_add( w282b, vu_ld( fd_sha256_K+40UL ) ) );
545 122801199 : vu_t w2c2f = NEXT_W( w1c1f, w2023, w2427, w282b ); FOUR_ROUNDS( vu_add( w2c2f, vu_ld( fd_sha256_K+44UL ) ) );
546 122801199 : vu_t w3033 = NEXT_W( w2023, w2427, w282b, w2c2f ); FOUR_ROUNDS( vu_add( w3033, vu_ld( fd_sha256_K+48UL ) ) );
547 122801199 : vu_t w3437 = NEXT_W( w2427, w282b, w2c2f, w3033 ); FOUR_ROUNDS( vu_add( w3437, vu_ld( fd_sha256_K+52UL ) ) );
548 122801199 : vu_t w383b = NEXT_W( w282b, w2c2f, w3033, w3437 ); FOUR_ROUNDS( vu_add( w383b, vu_ld( fd_sha256_K+56UL ) ) );
549 122801199 : vu_t w3c3f = NEXT_W( w2c2f, w3033, w3437, w383b ); FOUR_ROUNDS( vu_add( w3c3f, vu_ld( fd_sha256_K+60UL ) ) );
550 :
551 122801199 : stateFEBA = vu_add( stateFEBA, initialFEBA );
552 122801199 : stateHGDC = vu_add( stateHGDC, initialHGDC );
553 :
554 122801199 : vu_t stateABCD = vu_permute2( stateFEBA, stateHGDC, 3, 2, 3, 2 );
555 122801199 : vu_t stateEFGH = vu_permute2( stateFEBA, stateHGDC, 1, 0, 1, 0 );
556 :
557 122801199 : w0003 = stateABCD;
558 122801199 : w0407 = stateEFGH;
559 122801199 : }
560 101033 : vu_stu( hash, vu_bswap( w0003 ) );
561 101033 : vu_stu( hash+16UL, vu_bswap( w0407 ) );
562 101033 : #undef FOUND_ROUNDS
563 101033 : #undef NEXT_W
564 :
565 : #else
566 :
567 202066 : uchar buf[ FD_SHA256_PRIVATE_BUF_MAX ] __attribute__((aligned(128)));
568 :
569 : /* Prepare padding once */
570 202066 : ulong buf_used = 32UL;
571 202066 : memcpy( buf, data, 32UL );
572 202066 : buf[ buf_used ] = (uchar)0x80;
573 202066 : buf_used++;
574 :
575 202066 : ulong bit_cnt = 32UL << 3;
576 202066 : memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-8UL-buf_used );
577 202066 : FD_STORE( ulong, buf+FD_SHA256_PRIVATE_BUF_MAX-8UL, fd_ulong_bswap( bit_cnt ) );
578 :
579 : /* This is just the above streamlined to eliminate all the overheads
580 : to support incremental hashing. */
581 245804464 : for( ulong iter=0UL; iter<cnt; iter++ ) {
582 :
583 245602398 : uint state[8] __attribute__((aligned(32)));
584 :
585 245602398 : state[0] = FD_SHA256_INITIAL_A;
586 245602398 : state[1] = FD_SHA256_INITIAL_B;
587 245602398 : state[2] = FD_SHA256_INITIAL_C;
588 245602398 : state[3] = FD_SHA256_INITIAL_D;
589 245602398 : state[4] = FD_SHA256_INITIAL_E;
590 245602398 : state[5] = FD_SHA256_INITIAL_F;
591 245602398 : state[6] = FD_SHA256_INITIAL_G;
592 245602398 : state[7] = FD_SHA256_INITIAL_H;
593 :
594 245602398 : fd_sha256_core( state, buf, 1UL );
595 :
596 245602398 : state[0] = fd_uint_bswap( state[0] );
597 245602398 : state[1] = fd_uint_bswap( state[1] );
598 245602398 : state[2] = fd_uint_bswap( state[2] );
599 245602398 : state[3] = fd_uint_bswap( state[3] );
600 245602398 : state[4] = fd_uint_bswap( state[4] );
601 245602398 : state[5] = fd_uint_bswap( state[5] );
602 245602398 : state[6] = fd_uint_bswap( state[6] );
603 245602398 : state[7] = fd_uint_bswap( state[7] );
604 245602398 : memcpy( buf, state, 32UL );
605 245602398 : }
606 202066 : memcpy( hash, buf, 32UL );
607 202066 : #endif
608 303099 : return _hash;
609 303099 : }
610 :
611 : #undef fd_sha256_core
|