Line data Source code
1 : #include "fd_sha256.h"
2 : #include "fd_sha256_constants.h"
3 :
4 : #if FD_HAS_SHANI
5 : /* For the optimized repeated hash */
6 : #include "../../util/simd/fd_sse.h"
7 : #endif
8 :
9 : ulong
10 929106 : fd_sha256_align( void ) {
11 929106 : return FD_SHA256_ALIGN;
12 929106 : }
13 :
14 : ulong
15 464526 : fd_sha256_footprint( void ) {
16 464526 : return FD_SHA256_FOOTPRINT;
17 464526 : }
18 :
19 : void *
20 464529 : fd_sha256_new( void * shmem ) {
21 464529 : fd_sha256_t * sha = (fd_sha256_t *)shmem;
22 :
23 464529 : if( FD_UNLIKELY( !shmem ) ) {
24 3 : FD_LOG_WARNING(( "NULL shmem" ));
25 3 : return NULL;
26 3 : }
27 :
28 464526 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, fd_sha256_align() ) ) ) {
29 3 : FD_LOG_WARNING(( "misaligned shmem" ));
30 3 : return NULL;
31 3 : }
32 :
33 464523 : ulong footprint = fd_sha256_footprint();
34 :
35 464523 : fd_memset( sha, 0, footprint );
36 :
37 464523 : FD_COMPILER_MFENCE();
38 464523 : FD_VOLATILE( sha->magic ) = FD_SHA256_MAGIC;
39 464523 : FD_COMPILER_MFENCE();
40 :
41 464523 : return (void *)sha;
42 464526 : }
43 :
44 : fd_sha256_t *
45 464529 : fd_sha256_join( void * shsha ) {
46 :
47 464529 : if( FD_UNLIKELY( !shsha ) ) {
48 3 : FD_LOG_WARNING(( "NULL shsha" ));
49 3 : return NULL;
50 3 : }
51 :
52 464526 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shsha, fd_sha256_align() ) ) ) {
53 3 : FD_LOG_WARNING(( "misaligned shsha" ));
54 3 : return NULL;
55 3 : }
56 :
57 464523 : fd_sha256_t * sha = (fd_sha256_t *)shsha;
58 :
59 464523 : if( FD_UNLIKELY( sha->magic!=FD_SHA256_MAGIC ) ) {
60 0 : FD_LOG_WARNING(( "bad magic" ));
61 0 : return NULL;
62 0 : }
63 :
64 464523 : return sha;
65 464523 : }
66 :
67 : void *
68 51 : fd_sha256_leave( fd_sha256_t * sha ) {
69 :
70 51 : if( FD_UNLIKELY( !sha ) ) {
71 3 : FD_LOG_WARNING(( "NULL sha" ));
72 3 : return NULL;
73 3 : }
74 :
75 48 : return (void *)sha;
76 51 : }
77 :
78 : void *
79 54 : fd_sha256_delete( void * shsha ) {
80 :
81 54 : if( FD_UNLIKELY( !shsha ) ) {
82 3 : FD_LOG_WARNING(( "NULL shsha" ));
83 3 : return NULL;
84 3 : }
85 :
86 51 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shsha, fd_sha256_align() ) ) ) {
87 3 : FD_LOG_WARNING(( "misaligned shsha" ));
88 3 : return NULL;
89 3 : }
90 :
91 48 : fd_sha256_t * sha = (fd_sha256_t *)shsha;
92 :
93 48 : if( FD_UNLIKELY( sha->magic!=FD_SHA256_MAGIC ) ) {
94 0 : FD_LOG_WARNING(( "bad magic" ));
95 0 : return NULL;
96 0 : }
97 :
98 48 : FD_COMPILER_MFENCE();
99 48 : FD_VOLATILE( sha->magic ) = 0UL;
100 48 : FD_COMPILER_MFENCE();
101 :
102 48 : return (void *)sha;
103 48 : }
104 :
105 : #ifndef FD_SHA256_CORE_IMPL
106 : #if FD_HAS_SHANI
107 : #define FD_SHA256_CORE_IMPL 1
108 : #else
109 : #define FD_SHA256_CORE_IMPL 0
110 : #endif
111 : #endif
112 :
113 : #if FD_SHA256_CORE_IMPL==0
114 :
115 : /* The implementation below was derived from OpenSSL's SHA-256
116 : implementation (Apache-2.0 licensed). See in particular:
117 :
118 : https://github.com/openssl/openssl/blob/master/crypto/sha/sha256.c
119 :
120 : (link valid circa 2022-Dec). It has been made more strict with more
121 : extensive implementation documentation, has been simplified and has
122 : been streamlined specifically for use inside Firedancer base machine
123 : model (no machine specific capabilities required).
124 :
125 : In particular, fd_sha256_core_ref is based on OpenSSL's
126 : OPENSSL_SMALL_FOOTPRINT SHA-256 implementation (Apache licensed).
127 : This should work anywhere but it is not the highest performance
128 : implementation possible.
129 :
130 : It is also straightforward to replace these implementations with HPC
131 : implementations that target specific machine capabilities without
132 : requiring any changes to caller code. */
133 :
134 : static void
135 : fd_sha256_core_ref( uint * state,
136 : uchar const * block,
137 453019068 : ulong block_cnt ) {
138 :
139 :
140 >34540*10^7 : # define ROTATE fd_uint_rotate_left
141 38378518912 : # define Sigma0(x) (ROTATE((x),30) ^ ROTATE((x),19) ^ ROTATE((x),10))
142 38378518912 : # define Sigma1(x) (ROTATE((x),26) ^ ROTATE((x),21) ^ ROTATE((x),7))
143 28783889184 : # define sigma0(x) (ROTATE((x),25) ^ ROTATE((x),14) ^ ((x)>>3))
144 28783889184 : # define sigma1(x) (ROTATE((x),15) ^ ROTATE((x),13) ^ ((x)>>10))
145 38378518912 : # define Ch(x,y,z) (((x) & (y)) ^ ((~(x)) & (z)))
146 38378518912 : # define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
147 :
148 453019068 : uint const * W = (uint const *)block;
149 599664358 : do {
150 599664358 : uint a = state[0];
151 599664358 : uint b = state[1];
152 599664358 : uint c = state[2];
153 599664358 : uint d = state[3];
154 599664358 : uint e = state[4];
155 599664358 : uint f = state[5];
156 599664358 : uint g = state[6];
157 599664358 : uint h = state[7];
158 :
159 599664358 : uint X[16];
160 :
161 599664358 : ulong i;
162 10194294086 : for( i=0UL; i<16UL; i++ ) {
163 9594629728 : X[i] = fd_uint_bswap( W[i] );
164 9594629728 : uint T1 = X[i] + h + Sigma1(e) + Ch(e, f, g) + fd_sha256_K[i];
165 9594629728 : uint T2 = Sigma0(a) + Maj(a, b, c);
166 9594629728 : h = g;
167 9594629728 : g = f;
168 9594629728 : f = e;
169 9594629728 : e = d + T1;
170 9594629728 : d = c;
171 9594629728 : c = b;
172 9594629728 : b = a;
173 9594629728 : a = T1 + T2;
174 9594629728 : }
175 29383553542 : for( ; i<64UL; i++ ) {
176 28783889184 : uint s0 = X[(i + 1UL) & 0x0fUL];
177 28783889184 : uint s1 = X[(i + 14UL) & 0x0fUL];
178 28783889184 : s0 = sigma0(s0);
179 28783889184 : s1 = sigma1(s1);
180 28783889184 : X[i & 0xfUL] += s0 + s1 + X[(i + 9UL) & 0xfUL];
181 28783889184 : uint T1 = X[i & 0xfUL ] + h + Sigma1(e) + Ch(e, f, g) + fd_sha256_K[i];
182 28783889184 : uint T2 = Sigma0(a) + Maj(a, b, c);
183 28783889184 : h = g;
184 28783889184 : g = f;
185 28783889184 : f = e;
186 28783889184 : e = d + T1;
187 28783889184 : d = c;
188 28783889184 : c = b;
189 28783889184 : b = a;
190 28783889184 : a = T1 + T2;
191 28783889184 : }
192 :
193 599664358 : state[0] += a;
194 599664358 : state[1] += b;
195 599664358 : state[2] += c;
196 599664358 : state[3] += d;
197 599664358 : state[4] += e;
198 599664358 : state[5] += f;
199 599664358 : state[6] += g;
200 599664358 : state[7] += h;
201 :
202 599664358 : W += 16UL;
203 599664358 : } while( --block_cnt );
204 :
205 453019068 : # undef ROTATE
206 453019068 : # undef Sigma0
207 453019068 : # undef Sigma1
208 453019068 : # undef sigma0
209 453019068 : # undef sigma1
210 453019068 : # undef Ch
211 453019068 : # undef Maj
212 :
213 453019068 : }
214 :
215 453019068 : #define fd_sha256_core fd_sha256_core_ref
216 :
217 : #elif FD_SHA256_CORE_IMPL==1
218 :
219 : /* _mm_sha256rnds2_epu32 does two rounds, one from the first uint in
220 : wk and one from the second. Since wk stores four rounds worth of
221 : message schedule values, it makes sense for the macro to do four
222 : rounds at a time. We need to permute wk in between so that the
223 : second call to the intrinsic will use the other values. */
224 4954064256 : #define FOUR_ROUNDS( wk ) do { \
225 4954064256 : vu_t __wk = (wk); \
226 4954064256 : vu_t temp_state = stateFEBA; \
227 4954064256 : stateFEBA = _mm_sha256rnds2_epu32( stateHGDC, stateFEBA, __wk ); \
228 4954064256 : stateHGDC = temp_state; \
229 4954064256 : \
230 4954064256 : temp_state = stateFEBA; \
231 4954064256 : stateFEBA = _mm_sha256rnds2_epu32( stateHGDC, stateFEBA, vu_permute( __wk, 2,3,0,1 ) );\
232 4954064256 : stateHGDC = temp_state; \
233 4954064256 : } while( 0 )
234 :
235 :
236 : /* For completeness, here's the documentation for _mm_sha256msg1_epu32
237 : and _mm_sha256msg2_epu32 in a slightly reformatted way, where all
238 : values are uints, and "-" indicates a don't-care value:
239 :
240 : _mm_sha256msg1_epu32( (w[j ], w[j+1], w[j+1], w[j+3]),
241 : (w[j+4], -, -, - ) )
242 : = ( w[j ]+s0( w[j+1] ), w[j+1]+s0( w[j+2] ),
243 : w[j+2]+s0( w[j+3] ), w[j+3]+s0( w[j+4] ) ).
244 :
245 :
246 : _mm_sha256msg2_epu32( (v[j ], v[j+1], v[j+1], v[j+3]),
247 : (-, -, w[j-2], w[j-1]) )
248 : sets w[j ] = v[j ] + s1( w[j-2] ) and
249 : w[j+1] = v[j+1] + s1( w[j-1] ), and then returns
250 :
251 : ( v[j ]+s1( w[j-2] ), v[j+1]+s1( w[j-1] ),
252 : v[j+2]+s1( w[j ] ), v[j+3]+s1( w[j+1] ) ) */
253 :
254 :
255 : /* w[i] for i>= 16 is w[i-16] + s0(w[i-15]) + w[i-7] + s1(w[i-2])
256 : Since our vector size is 4 uints, it's only s1 that is a little
257 : problematic, because it references items in the same vector.
258 : Thankfully, the msg2 intrinsic takes care of the complexity, but we
259 : need to execute it last.
260 :
261 : We get w[i-16] and s0(s[i-15]) using the msg1 intrinsic, setting j =
262 : i-16. For example, to compute w1013, we pass in w0003 and w0407.
263 : Then we can get w[i-7] by using the alignr instruction on
264 : (w[i-8], w[i-7], w[i-6], w[i-5]) and (w[i-4], w[i-3], w[i-2], w[i-1])
265 : to concatenate them and shift by one uint. Continuing with the
266 : example of w1013, we need w080b and w0c0f. We then put
267 : v[i] = w[i-16] + s0(w[i-15]) + w[i-7],
268 : and invoke the msg2 intrinsic with j=i, which gives w[i], as desired.
269 : Each invocation of NEXT_W computes 4 values of w. */
270 :
271 3715548192 : #define NEXT_W( w_minus_16, w_minus_12, w_minus_8, w_minus_4 ) (__extension__({ \
272 3715548192 : vu_t __w_i_16_s0_i_15 = _mm_sha256msg1_epu32( w_minus_16, w_minus_12 ); \
273 3715548192 : vu_t __w_i_7 = _mm_alignr_epi8( w_minus_4, w_minus_8, 4 ); \
274 3715548192 : _mm_sha256msg2_epu32( vu_add( __w_i_7, __w_i_16_s0_i_15 ), w_minus_4 ); \
275 3715548192 : }))
276 :
277 : /* Zen 5's sha256rnds2 has an RTP of 2, while Zen 4's has an RTP of 1. We can
278 : win some performance by moving the schedule updates earlier in the loop,
279 : which improves the speed by around 1M hashes/s for the repeated hashing. */
280 : #ifdef __znver5__
281 : #define FULL_ROUNDS() do { \
282 : vu_t w1013 = NEXT_W( w0003, w0407, w080b, w0c0f ); FOUR_ROUNDS( vu_add( w0003, vu_ld( fd_sha256_K+ 0UL ) ) ); \
283 : vu_t w1417 = NEXT_W( w0407, w080b, w0c0f, w1013 ); FOUR_ROUNDS( vu_add( w0407, vu_ld( fd_sha256_K+ 4UL ) ) ); \
284 : vu_t w181b = NEXT_W( w080b, w0c0f, w1013, w1417 ); FOUR_ROUNDS( vu_add( w080b, vu_ld( fd_sha256_K+ 8UL ) ) ); \
285 : vu_t w1c1f = NEXT_W( w0c0f, w1013, w1417, w181b ); FOUR_ROUNDS( vu_add( w0c0f, vu_ld( fd_sha256_K+12UL ) ) ); \
286 : vu_t w2023 = NEXT_W( w1013, w1417, w181b, w1c1f ); FOUR_ROUNDS( vu_add( w1013, vu_ld( fd_sha256_K+16UL ) ) ); \
287 : vu_t w2427 = NEXT_W( w1417, w181b, w1c1f, w2023 ); FOUR_ROUNDS( vu_add( w1417, vu_ld( fd_sha256_K+20UL ) ) ); \
288 : vu_t w282b = NEXT_W( w181b, w1c1f, w2023, w2427 ); FOUR_ROUNDS( vu_add( w181b, vu_ld( fd_sha256_K+24UL ) ) ); \
289 : vu_t w2c2f = NEXT_W( w1c1f, w2023, w2427, w282b ); FOUR_ROUNDS( vu_add( w1c1f, vu_ld( fd_sha256_K+28UL ) ) ); \
290 : vu_t w3033 = NEXT_W( w2023, w2427, w282b, w2c2f ); FOUR_ROUNDS( vu_add( w2023, vu_ld( fd_sha256_K+32UL ) ) ); \
291 : vu_t w3437 = NEXT_W( w2427, w282b, w2c2f, w3033 ); FOUR_ROUNDS( vu_add( w2427, vu_ld( fd_sha256_K+36UL ) ) ); \
292 : vu_t w383b = NEXT_W( w282b, w2c2f, w3033, w3437 ); FOUR_ROUNDS( vu_add( w282b, vu_ld( fd_sha256_K+40UL ) ) ); \
293 : vu_t w3c3f = NEXT_W( w2c2f, w3033, w3437, w383b ); FOUR_ROUNDS( vu_add( w2c2f, vu_ld( fd_sha256_K+44UL ) ) ); \
294 : /* */ FOUR_ROUNDS( vu_add( w3033, vu_ld( fd_sha256_K+48UL ) ) ); \
295 : /* */ FOUR_ROUNDS( vu_add( w3437, vu_ld( fd_sha256_K+52UL ) ) ); \
296 : /* */ FOUR_ROUNDS( vu_add( w383b, vu_ld( fd_sha256_K+56UL ) ) ); \
297 : /* */ FOUR_ROUNDS( vu_add( w3c3f, vu_ld( fd_sha256_K+60UL ) ) ); \
298 : } while ( 0 )
299 : #else
300 309629016 : #define FULL_ROUNDS() do { \
301 309629016 : /* */ FOUR_ROUNDS( vu_add( w0003, vu_ld( fd_sha256_K+ 0UL ) ) ); \
302 309629016 : /* */ FOUR_ROUNDS( vu_add( w0407, vu_ld( fd_sha256_K+ 4UL ) ) ); \
303 309629016 : /* */ FOUR_ROUNDS( vu_add( w080b, vu_ld( fd_sha256_K+ 8UL ) ) ); \
304 309629016 : /* */ FOUR_ROUNDS( vu_add( w0c0f, vu_ld( fd_sha256_K+12UL ) ) ); \
305 309629016 : vu_t w1013 = NEXT_W( w0003, w0407, w080b, w0c0f ); FOUR_ROUNDS( vu_add( w1013, vu_ld( fd_sha256_K+16UL ) ) ); \
306 309629016 : vu_t w1417 = NEXT_W( w0407, w080b, w0c0f, w1013 ); FOUR_ROUNDS( vu_add( w1417, vu_ld( fd_sha256_K+20UL ) ) ); \
307 309629016 : vu_t w181b = NEXT_W( w080b, w0c0f, w1013, w1417 ); FOUR_ROUNDS( vu_add( w181b, vu_ld( fd_sha256_K+24UL ) ) ); \
308 309629016 : vu_t w1c1f = NEXT_W( w0c0f, w1013, w1417, w181b ); FOUR_ROUNDS( vu_add( w1c1f, vu_ld( fd_sha256_K+28UL ) ) ); \
309 309629016 : vu_t w2023 = NEXT_W( w1013, w1417, w181b, w1c1f ); FOUR_ROUNDS( vu_add( w2023, vu_ld( fd_sha256_K+32UL ) ) ); \
310 309629016 : vu_t w2427 = NEXT_W( w1417, w181b, w1c1f, w2023 ); FOUR_ROUNDS( vu_add( w2427, vu_ld( fd_sha256_K+36UL ) ) ); \
311 309629016 : vu_t w282b = NEXT_W( w181b, w1c1f, w2023, w2427 ); FOUR_ROUNDS( vu_add( w282b, vu_ld( fd_sha256_K+40UL ) ) ); \
312 309629016 : vu_t w2c2f = NEXT_W( w1c1f, w2023, w2427, w282b ); FOUR_ROUNDS( vu_add( w2c2f, vu_ld( fd_sha256_K+44UL ) ) ); \
313 309629016 : vu_t w3033 = NEXT_W( w2023, w2427, w282b, w2c2f ); FOUR_ROUNDS( vu_add( w3033, vu_ld( fd_sha256_K+48UL ) ) ); \
314 309629016 : vu_t w3437 = NEXT_W( w2427, w282b, w2c2f, w3033 ); FOUR_ROUNDS( vu_add( w3437, vu_ld( fd_sha256_K+52UL ) ) ); \
315 309629016 : vu_t w383b = NEXT_W( w282b, w2c2f, w3033, w3437 ); FOUR_ROUNDS( vu_add( w383b, vu_ld( fd_sha256_K+56UL ) ) ); \
316 309629016 : vu_t w3c3f = NEXT_W( w2c2f, w3033, w3437, w383b ); FOUR_ROUNDS( vu_add( w3c3f, vu_ld( fd_sha256_K+60UL ) ) ); \
317 309629016 : } while ( 0 )
318 : #endif
319 :
320 :
321 : void
322 : fd_sha256_core_shaext( uint * state, /* 64-byte aligned, 8 entries */
323 : uchar const * block, /* ideally 128-byte aligned (but not required), 64*block_cnt in size */
324 105062896 : ulong block_cnt ) {/* positive */
325 105062896 : vu_t stateABCD = vu_ld( state );
326 105062896 : vu_t stateEFGH = vu_ld( state+4UL );
327 :
328 105062896 : vu_t baseFEBA = vu_permute2( stateEFGH, stateABCD, 1, 0, 1, 0 );
329 105062896 : vu_t baseHGDC = vu_permute2( stateEFGH, stateABCD, 3, 2, 3, 2 );
330 :
331 291890713 : for( ulong b=0UL; b<block_cnt; b++ ) {
332 186827817 : vu_t stateFEBA = baseFEBA;
333 186827817 : vu_t stateHGDC = baseHGDC;
334 :
335 186827817 : vu_t w0003 = vu_bswap( vu_ldu( block+64UL*b ) );
336 186827817 : vu_t w0407 = vu_bswap( vu_ldu( block+64UL*b+16UL ) );
337 186827817 : vu_t w080b = vu_bswap( vu_ldu( block+64UL*b+32UL ) );
338 186827817 : vu_t w0c0f = vu_bswap( vu_ldu( block+64UL*b+48UL ) );
339 :
340 186827817 : FULL_ROUNDS();
341 :
342 186827817 : baseFEBA = vu_add( baseFEBA, stateFEBA );
343 186827817 : baseHGDC = vu_add( baseHGDC, stateHGDC );
344 :
345 186827817 : }
346 :
347 105062896 : stateABCD = vu_permute2( baseFEBA, baseHGDC, 3, 2, 3, 2 );
348 105062896 : stateEFGH = vu_permute2( baseFEBA, baseHGDC, 1, 0, 1, 0 );
349 105062896 : vu_st( state, stateABCD );
350 105062896 : vu_st( state+4UL, stateEFGH );
351 105062896 : }
352 :
353 105062896 : #define fd_sha256_core fd_sha256_core_shaext
354 :
355 : #else
356 : #error "Unsupported FD_SHA256_CORE_IMPL"
357 : #endif
358 :
359 : fd_sha256_t *
360 1566987 : fd_sha256_init( fd_sha256_t * sha ) {
361 1566987 : sha->state[0] = FD_SHA256_INITIAL_A;
362 1566987 : sha->state[1] = FD_SHA256_INITIAL_B;
363 1566987 : sha->state[2] = FD_SHA256_INITIAL_C;
364 1566987 : sha->state[3] = FD_SHA256_INITIAL_D;
365 1566987 : sha->state[4] = FD_SHA256_INITIAL_E;
366 1566987 : sha->state[5] = FD_SHA256_INITIAL_F;
367 1566987 : sha->state[6] = FD_SHA256_INITIAL_G;
368 1566987 : sha->state[7] = FD_SHA256_INITIAL_H;
369 1566987 : sha->buf_used = 0UL;
370 1566987 : sha->bit_cnt = 0UL;
371 1566987 : return sha;
372 1566987 : }
373 :
374 : fd_sha256_t *
375 : fd_sha256_append( fd_sha256_t * sha,
376 : void const * _data,
377 5721051 : ulong sz ) {
378 :
379 : /* If no data to append, we are done */
380 :
381 5721051 : if( FD_UNLIKELY( !sz ) ) return sha; /* optimize for non-trivial append */
382 :
383 : /* Unpack inputs */
384 :
385 5720793 : uint * state = sha->state;
386 5720793 : uchar * buf = sha->buf;
387 5720793 : ulong buf_used = sha->buf_used;
388 5720793 : ulong bit_cnt = sha->bit_cnt;
389 :
390 5720793 : uchar const * data = (uchar const *)_data;
391 :
392 : /* Update bit_cnt */
393 : /* FIXME: could accumulate bytes here and do bit conversion in append */
394 : /* FIXME: Overflow handling if more than 2^64 bits (unlikely) */
395 :
396 5720793 : sha->bit_cnt = bit_cnt + (sz<<3);
397 :
398 : /* Handle buffered bytes from previous appends */
399 :
400 5720793 : if( FD_UNLIKELY( buf_used ) ) { /* optimized for well aligned use of append */
401 :
402 : /* If the append isn't large enough to complete the current block,
403 : buffer these bytes too and return */
404 :
405 79566 : ulong buf_rem = FD_SHA256_PRIVATE_BUF_MAX - buf_used; /* In (0,FD_SHA256_PRIVATE_BUF_MAX) */
406 79566 : if( FD_UNLIKELY( sz < buf_rem ) ) { /* optimize for large append */
407 576 : fd_memcpy( buf + buf_used, data, sz );
408 576 : sha->buf_used = buf_used + sz;
409 576 : return sha;
410 576 : }
411 :
412 : /* Otherwise, buffer enough leading bytes of data to complete the
413 : block, update the hash and then continue processing any remaining
414 : bytes of data. */
415 :
416 78990 : fd_memcpy( buf + buf_used, data, buf_rem );
417 78990 : data += buf_rem;
418 78990 : sz -= buf_rem;
419 :
420 78990 : fd_sha256_core( state, buf, 1UL );
421 78990 : sha->buf_used = 0UL;
422 78990 : }
423 :
424 : /* Append the bulk of the data */
425 :
426 5720217 : ulong block_cnt = sz >> FD_SHA256_PRIVATE_LG_BUF_MAX;
427 5720217 : if( FD_LIKELY( block_cnt ) ) fd_sha256_core( state, data, block_cnt ); /* optimized for large append */
428 :
429 : /* Buffer any leftover bytes */
430 :
431 5720217 : buf_used = sz & (FD_SHA256_PRIVATE_BUF_MAX-1UL); /* In [0,FD_SHA256_PRIVATE_BUF_MAX) */
432 5720217 : if( FD_UNLIKELY( buf_used ) ) { /* optimized for well aligned use of append */
433 1345872 : fd_memcpy( buf, data + (block_cnt << FD_SHA256_PRIVATE_LG_BUF_MAX), buf_used );
434 1345872 : sha->buf_used = buf_used; /* In (0,FD_SHA256_PRIVATE_BUF_MAX) */
435 1345872 : }
436 :
437 5720217 : return sha;
438 5720793 : }
439 :
440 : void *
441 : fd_sha256_fini( fd_sha256_t * sha,
442 1615596 : void * _hash ) {
443 :
444 : /* Unpack inputs */
445 :
446 1615596 : uint * state = sha->state;
447 1615596 : uchar * buf = sha->buf;
448 1615596 : ulong buf_used = sha->buf_used; /* In [0,FD_SHA256_PRIVATE_BUF_MAX) */
449 1615596 : ulong bit_cnt = sha->bit_cnt;
450 :
451 : /* Append the terminating message byte */
452 :
453 1615596 : buf[ buf_used ] = (uchar)0x80;
454 1615596 : buf_used++;
455 :
456 : /* If there isn't enough room to save the message length in bits at
457 : the end of the in progress block, clear the rest of the in progress
458 : block, update the hash and start a new block. */
459 :
460 1615596 : if( FD_UNLIKELY( buf_used > (FD_SHA256_PRIVATE_BUF_MAX-8UL) ) ) { /* optimize for well aligned use of append */
461 12150 : fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-buf_used );
462 12150 : fd_sha256_core( state, buf, 1UL );
463 12150 : buf_used = 0UL;
464 12150 : }
465 :
466 : /* Clear in progress block up to last 64-bits, append the message
467 : size in bytes in the last 64-bits of the in progress block and
468 : update the hash to finalize it. */
469 :
470 1615596 : fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-8UL-buf_used );
471 1615596 : FD_STORE( ulong, buf+FD_SHA256_PRIVATE_BUF_MAX-8UL, fd_ulong_bswap( bit_cnt ) );
472 1615596 : fd_sha256_core( state, buf, 1UL );
473 :
474 : /* Unpack the result into md (annoying bswaps here) */
475 :
476 1615596 : state[0] = fd_uint_bswap( state[0] );
477 1615596 : state[1] = fd_uint_bswap( state[1] );
478 1615596 : state[2] = fd_uint_bswap( state[2] );
479 1615596 : state[3] = fd_uint_bswap( state[3] );
480 1615596 : state[4] = fd_uint_bswap( state[4] );
481 1615596 : state[5] = fd_uint_bswap( state[5] );
482 1615596 : state[6] = fd_uint_bswap( state[6] );
483 1615596 : state[7] = fd_uint_bswap( state[7] );
484 1615596 : return memcpy( _hash, state, 32 );
485 1615596 : }
486 :
487 : void *
488 : fd_sha256_hash( void const * _data,
489 : ulong sz,
490 187345835 : void * _hash ) {
491 187345835 : uchar const * data = (uchar const *)_data;
492 :
493 : /* This is just the above streamlined to eliminate all the overheads
494 : to support incremental hashing. */
495 :
496 187345835 : uchar buf[ FD_SHA256_PRIVATE_BUF_MAX ] __attribute__((aligned(128)));
497 187345835 : uint state[8] __attribute__((aligned(32)));
498 :
499 187345835 : state[0] = FD_SHA256_INITIAL_A;
500 187345835 : state[1] = FD_SHA256_INITIAL_B;
501 187345835 : state[2] = FD_SHA256_INITIAL_C;
502 187345835 : state[3] = FD_SHA256_INITIAL_D;
503 187345835 : state[4] = FD_SHA256_INITIAL_E;
504 187345835 : state[5] = FD_SHA256_INITIAL_F;
505 187345835 : state[6] = FD_SHA256_INITIAL_G;
506 187345835 : state[7] = FD_SHA256_INITIAL_H;
507 :
508 187345835 : ulong block_cnt = sz >> FD_SHA256_PRIVATE_LG_BUF_MAX;
509 187345835 : if( FD_LIKELY( block_cnt ) ) fd_sha256_core( state, data, block_cnt );
510 :
511 187345835 : ulong buf_used = sz & (FD_SHA256_PRIVATE_BUF_MAX-1UL);
512 187345835 : if( FD_UNLIKELY( buf_used ) ) fd_memcpy( buf, data + (block_cnt << FD_SHA256_PRIVATE_LG_BUF_MAX), buf_used );
513 187345835 : buf[ buf_used ] = (uchar)0x80;
514 187345835 : buf_used++;
515 :
516 187345835 : if( FD_UNLIKELY( buf_used > (FD_SHA256_PRIVATE_BUF_MAX-8UL) ) ) {
517 1176942 : fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-buf_used );
518 1176942 : fd_sha256_core( state, buf, 1UL );
519 1176942 : buf_used = 0UL;
520 1176942 : }
521 :
522 187345835 : ulong bit_cnt = sz << 3;
523 187345835 : fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-8UL-buf_used );
524 187345835 : FD_STORE( ulong, buf+FD_SHA256_PRIVATE_BUF_MAX-8UL, fd_ulong_bswap( bit_cnt ) );
525 187345835 : fd_sha256_core( state, buf, 1UL );
526 :
527 187345835 : state[0] = fd_uint_bswap( state[0] );
528 187345835 : state[1] = fd_uint_bswap( state[1] );
529 187345835 : state[2] = fd_uint_bswap( state[2] );
530 187345835 : state[3] = fd_uint_bswap( state[3] );
531 187345835 : state[4] = fd_uint_bswap( state[4] );
532 187345835 : state[5] = fd_uint_bswap( state[5] );
533 187345835 : state[6] = fd_uint_bswap( state[6] );
534 187345835 : state[7] = fd_uint_bswap( state[7] );
535 187345835 : return memcpy( _hash, state, 32 );
536 187345835 : }
537 :
538 :
539 :
540 : void *
541 : fd_sha256_hash_32_repeated( void const * _data,
542 : void * _hash,
543 303099 : ulong cnt ) {
544 303099 : uchar const * data = (uchar const *)_data;
545 303099 : uchar * hash = (uchar *)_hash;
546 101033 : #if FD_HAS_SHANI
547 101033 : vu_t w0003 = vu_bswap( vu_ldu( data ) );
548 101033 : vu_t w0407 = vu_bswap( vu_ldu( data+16UL ) );
549 101033 : vb_t const w080b = vb( 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
550 101033 : 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 );
551 101033 : vb_t const w0c0f = vb( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
552 101033 : 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00 ); /* 32 bytes */
553 :
554 101033 : vu_t const initialFEBA = vu( FD_SHA256_INITIAL_F, FD_SHA256_INITIAL_E, FD_SHA256_INITIAL_B, FD_SHA256_INITIAL_A );
555 101033 : vu_t const initialHGDC = vu( FD_SHA256_INITIAL_H, FD_SHA256_INITIAL_G, FD_SHA256_INITIAL_D, FD_SHA256_INITIAL_C );
556 :
557 122902232 : for( ulong iter=0UL; iter<cnt; iter++ ) {
558 122801199 : vu_t stateFEBA = initialFEBA;
559 122801199 : vu_t stateHGDC = initialHGDC;
560 :
561 122801199 : FULL_ROUNDS();
562 :
563 122801199 : stateFEBA = vu_add( stateFEBA, initialFEBA );
564 122801199 : stateHGDC = vu_add( stateHGDC, initialHGDC );
565 :
566 122801199 : vu_t stateABCD = vu_permute2( stateFEBA, stateHGDC, 3, 2, 3, 2 );
567 122801199 : vu_t stateEFGH = vu_permute2( stateFEBA, stateHGDC, 1, 0, 1, 0 );
568 :
569 122801199 : w0003 = stateABCD;
570 122801199 : w0407 = stateEFGH;
571 122801199 : }
572 101033 : vu_stu( hash, vu_bswap( w0003 ) );
573 101033 : vu_stu( hash+16UL, vu_bswap( w0407 ) );
574 101033 : #undef NEXT_W
575 101033 : #undef FOUR_ROUNDS
576 101033 : #undef FULL_ROUNDS
577 :
578 : #else
579 :
580 202066 : uchar buf[ FD_SHA256_PRIVATE_BUF_MAX ] __attribute__((aligned(128)));
581 :
582 : /* Prepare padding once */
583 202066 : ulong buf_used = 32UL;
584 202066 : memcpy( buf, data, 32UL );
585 202066 : buf[ buf_used ] = (uchar)0x80;
586 202066 : buf_used++;
587 :
588 202066 : ulong bit_cnt = 32UL << 3;
589 202066 : memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-8UL-buf_used );
590 202066 : FD_STORE( ulong, buf+FD_SHA256_PRIVATE_BUF_MAX-8UL, fd_ulong_bswap( bit_cnt ) );
591 :
592 : /* This is just the above streamlined to eliminate all the overheads
593 : to support incremental hashing. */
594 245804464 : for( ulong iter=0UL; iter<cnt; iter++ ) {
595 :
596 245602398 : uint state[8] __attribute__((aligned(32)));
597 :
598 245602398 : state[0] = FD_SHA256_INITIAL_A;
599 245602398 : state[1] = FD_SHA256_INITIAL_B;
600 245602398 : state[2] = FD_SHA256_INITIAL_C;
601 245602398 : state[3] = FD_SHA256_INITIAL_D;
602 245602398 : state[4] = FD_SHA256_INITIAL_E;
603 245602398 : state[5] = FD_SHA256_INITIAL_F;
604 245602398 : state[6] = FD_SHA256_INITIAL_G;
605 245602398 : state[7] = FD_SHA256_INITIAL_H;
606 :
607 245602398 : fd_sha256_core( state, buf, 1UL );
608 :
609 245602398 : state[0] = fd_uint_bswap( state[0] );
610 245602398 : state[1] = fd_uint_bswap( state[1] );
611 245602398 : state[2] = fd_uint_bswap( state[2] );
612 245602398 : state[3] = fd_uint_bswap( state[3] );
613 245602398 : state[4] = fd_uint_bswap( state[4] );
614 245602398 : state[5] = fd_uint_bswap( state[5] );
615 245602398 : state[6] = fd_uint_bswap( state[6] );
616 245602398 : state[7] = fd_uint_bswap( state[7] );
617 245602398 : memcpy( buf, state, 32UL );
618 245602398 : }
619 202066 : memcpy( hash, buf, 32UL );
620 202066 : #endif
621 303099 : return _hash;
622 303099 : }
623 :
624 : #undef fd_sha256_core
|