Line data Source code
1 : #ifndef HEADER_fd_src_ballet_sha256_fd_sha256_h
2 : #define HEADER_fd_src_ballet_sha256_fd_sha256_h
3 :
4 : /* fd_sha256 provides APIs for SHA-256 hashing of messages. */
5 :
6 : #include "../fd_ballet_base.h"
7 :
8 : /* FD_SHA256_{ALIGN,FOOTPRINT} describe the alignment and footprint needed
9 : for a memory region to hold a fd_sha256_t. ALIGN is a positive
10 : integer power of 2. FOOTPRINT is a multiple of align. ALIGN is
11 : recommended to be at least double cache line to mitigate various
12 : kinds of false sharing. These are provided to facilitate compile
13 : time declarations. */
14 :
15 921300 : #define FD_SHA256_ALIGN (128UL)
16 460641 : #define FD_SHA256_FOOTPRINT (128UL)
17 :
18 : /* FD_SHA256_{LG_HASH_SZ,HASH_SZ} describe the size of a SHA256 hash
19 : in bytes. HASH_SZ==2^LG_HASH_SZ==32. */
20 :
21 : #define FD_SHA256_LG_HASH_SZ (5)
22 460875 : #define FD_SHA256_HASH_SZ (32UL) /* == 2^FD_SHA256_LG_HASH_SZ, explicit to workaround compiler limitations */
23 :
24 : /* FD_SHA256_{LG_BLOCK_SZ,BLOCK_SZ} describe the size of a SHA256
25 : hash block in byte. BLOCK_SZ==2^LG_BLOCK_SZ==64. */
26 :
27 573822910 : #define FD_SHA256_LG_BLOCK_SZ (6)
28 985122863 : #define FD_SHA256_BLOCK_SZ (64UL) /* == 2^FD_SHA256_LG_BLOCK_SZ, explicit to workaround compiler limitations */
29 :
30 : /* A fd_sha256_t should be treated as an opaque handle of a sha256
31 : calculation state. (It technically isn't here facilitate compile
32 : time declarations of fd_sha256_t memory.) */
33 :
34 460638 : #define FD_SHA256_MAGIC (0xF17EDA2CE54A2560) /* FIREDANCE SHA256 V0 */
35 :
36 : /* FD_SHA256_PRIVATE_{LG_BUF_MAX,BUF_MAX} describe the size of the
37 : internal buffer used by the sha256 computation object. This is for
38 : internal use only. BUF_MAX==2^LG_BUF_MAX==2*FD_SHA256_HASH_SZ==64. */
39 :
40 573822910 : #define FD_SHA256_PRIVATE_LG_BUF_MAX FD_SHA256_LG_BLOCK_SZ
41 952891643 : #define FD_SHA256_PRIVATE_BUF_MAX FD_SHA256_BLOCK_SZ
42 :
43 : struct __attribute__((aligned(FD_SHA256_ALIGN))) fd_sha256_private {
44 :
45 : /* This point is 128-byte aligned */
46 :
47 : uchar buf[ FD_SHA256_PRIVATE_BUF_MAX ];
48 :
49 : /* This point is 64-byte aligned */
50 :
51 : uint state[ FD_SHA256_HASH_SZ / sizeof(uint) ];
52 :
53 : /* This point is 32-byte aligned */
54 :
55 : ulong magic; /* ==FD_SHA256_MAGIC */
56 : ulong buf_used; /* Number of buffered bytes, in [0,FD_SHA256_BUF_MAX) */
57 : ulong bit_cnt; /* How many bits have been appended total */
58 :
59 : /* Padding to 128-byte here */
60 : };
61 :
62 : typedef struct fd_sha256_private fd_sha256_t;
63 :
64 : FD_PROTOTYPES_BEGIN
65 :
66 : /* fd_sha256_{align,footprint,new,join,leave,delete} usage is identical to
67 : that of their fd_sha512 counterparts. See ../sha512/fd_sha512.h */
68 :
69 : FD_FN_CONST ulong
70 : fd_sha256_align( void );
71 :
72 : FD_FN_CONST ulong
73 : fd_sha256_footprint( void );
74 :
75 : void *
76 : fd_sha256_new( void * shmem );
77 :
78 : fd_sha256_t *
79 : fd_sha256_join( void * shsha );
80 :
81 : void *
82 : fd_sha256_leave( fd_sha256_t * sha );
83 :
84 : void *
85 : fd_sha256_delete( void * shsha );
86 :
87 : /* fd_sha256_init starts a sha256 calculation. sha is assumed to be a
88 : current local join to a sha256 calculation state with no other
89 : concurrent operation that would modify the state while this is
90 : executing. Any preexisting state for an in-progress or recently
91 : completed calculation will be discarded. Returns sha (on return, sha
92 : will have the state of a new in-progress calculation). */
93 :
94 : fd_sha256_t *
95 : fd_sha256_init( fd_sha256_t * sha );
96 :
97 : /* fd_sha256_append adds sz bytes locally pointed to by data an
98 : in-progress sha256 calculation. sha, data and sz are assumed to be
99 : valid (i.e. sha is a current local join to a sha256 calculation state
100 : with no other concurrent operations that would modify the state while
101 : this is executing, data points to the first of the sz bytes and will
102 : be unmodified while this is running with no interest retained after
103 : return ... data==NULL is fine if sz==0). Returns sha (on return, sha
104 : will have the updated state of the in-progress calculation).
105 :
106 : It does not matter how the user group data bytes for a sha256
107 : calculation; the final hash will be identical. It is preferable for
108 : performance to try to append as many bytes as possible as a time
109 : though. It is also preferable for performance if sz is a multiple of
110 : 64 for all but the last append (it is also preferable if sz is less
111 : than 56 for the last append). */
112 :
113 : fd_sha256_t *
114 : fd_sha256_append( fd_sha256_t * sha,
115 : void const * data,
116 : ulong sz );
117 :
118 : /* fd_sha256_fini finishes a sha256 calculation. sha and hash are
119 : assumed to be valid (i.e. sha is a local join to a sha256 calculation
120 : state that has an in-progress calculation with no other concurrent
121 : operations that would modify the state while this is executing and
122 : hash points to the first byte of a 32-byte memory region where the
123 : result of the calculation should be stored). Returns hash (on
124 : return, there will be no calculation in-progress on sha and 32-byte
125 : buffer pointed to by hash will be populated with the calculation
126 : result). */
127 : /* FIXME: THIS SHOULD PROBABLY RETURN A FD_SHA256_T */
128 :
129 : void *
130 : fd_sha256_fini( fd_sha256_t * sha,
131 : void * hash );
132 :
133 : /* fd_sha256_hash is a streamlined implementation of:
134 :
135 : fd_sha256_t sha[1];
136 : return fd_sha256_fini( fd_sha256_append( fd_sha256_init( sha ), data, sz ), hash )
137 :
138 : This can be faster for small messages because it can eliminate
139 : function call overheads, branches, copies and data marshalling under
140 : the hood (things like binary Merkle tree construction were designed
141 : do lots of such operations). */
142 : /* FIXME: ADD NEW/JOIN/LEAVE/DELETE TO DOCUMENTATION */
143 : /* FIXME: PROBABLY SHOULD HAVE AN ABORT API */
144 : /* FIXME: UPDATE OTHER HASH FUNCTIONS SIMILARLY */
145 :
146 : void *
147 : fd_sha256_hash( void const * data,
148 : ulong sz,
149 : void * hash );
150 :
151 : /* fd_sha256_hash_32_repeated hashes the 32 bytes pointed to by data,
152 : then hashes the hash, and repeats, doing a total of cnt hashes. It
153 : is a streamlined version of:
154 :
155 : uchar temp[32];
156 : memcpy( temp, data, 32UL );
157 : for( ulong i=0UL; i<cnt; i++ ) fd_sha256_hash( temp, 32UL, temp );
158 : memcpy( hash, temp, 32UL );
159 : return hash;
160 :
161 : This eliminates function call overhead and data marshalling. cnt==0
162 : is okay, in which case this just copies data to hash. Always returns
163 : hash. data and hash must be valid, non-NULL pointers, even when
164 : cnt==0. */
165 : void *
166 : fd_sha256_hash_32_repeated( void const * data,
167 : void * hash,
168 : ulong cnt );
169 :
170 : FD_PROTOTYPES_END
171 :
172 : #if 0 /* SHA256 batch API details */
173 :
174 : /* FD_SHA256_BATCH_{ALIGN,FOOTPRINT} return the alignment and footprint
175 : in bytes required for a region of memory to can hold the state of an
176 : in-progress set of SHA-256 calculations. ALIGN will be an integer
177 : power of 2 and FOOTPRINT will be a multiple of ALIGN. These are to
178 : facilitate compile time declarations. */
179 :
180 : #define FD_SHA256_BATCH_ALIGN ...
181 : #define FD_SHA256_BATCH_FOOTPRINT ...
182 :
183 : /* FD_SHA256_BATCH_MAX returns the batch size used under the hood.
184 : Will be positive. Users should not normally need use this for
185 : anything. */
186 :
187 : #define FD_SHA256_BATCH_MAX ...
188 :
189 : /* A fd_sha256_batch_t is an opaque handle for a set of SHA-256
190 : calculations. */
191 :
192 : struct fd_sha256_private_batch;
193 : typedef struct fd_sha256_private_batch fd_sha256_batch_t;
194 :
195 : /* fd_sha256_batch_{align,footprint} return
196 : FD_SHA256_BATCH_{ALIGN,FOOTPRINT} respectively. */
197 :
198 : ulong fd_sha256_batch_align ( void );
199 : ulong fd_sha256_batch_footprint( void );
200 :
201 : /* fd_sha256_batch_init starts a new batch of SHA-256 calculations. The
202 : state of the in-progress calculation will be held in the memory
203 : region whose first byte in the local address space is pointed to by
204 : mem. The region should have the appropriate alignment and footprint
205 : and should not be read, changed or deleted until fini or abort is
206 : called on the in-progress calculation.
207 :
208 : Returns a handle to the in-progress batch calculation. As this is
209 : used in HPC contexts, does no input validation. */
210 :
211 : fd_sha256_batch_t *
212 : fd_sha256_batch_init( void * mem );
213 :
214 : /* fd_sha256_batch_add adds the sz byte message whose first byte in the
215 : local address space is pointed to by data to the in-progress batch
216 : calculation whose handle is batch. The result of the calculation
217 : will be stored at the 32-byte memory region whose first byte in the
218 : local address space is pointed to by hash.
219 :
220 : There are _no_ alignment restrictions on data and hash and _no_
221 : restrictions on sz. After a message is added, that message should
222 : not be changed or deleted until the fini or abort is called on the
223 : in-progress calculation. Likewise, the hash memory region shot not
224 : be read, written or deleted until the calculation has completed.
225 :
226 : Messages can overlap and/or be added to a batch multiple times. Each
227 : hash location added to a batch should not overlap any other hash
228 : location of calculation state or message region. (Hash reuse /
229 : overlap have indeterminant but non-crashing behavior as the
230 : implementation under the hood is free to execute the elements of the
231 : batch in whatever order it sees fit and potentially do those
232 : calculations incrementally / in the background / ... as the batch is
233 : assembled.)
234 :
235 : Depending on the implementation, it might help performance to cluster
236 : adds of similar sized messages together. Likewise, it can be
237 : advantageous to use aligned message regions, aligned hash regions and
238 : messages sizes that are a multiple of a SHA block size. None of this
239 : is required though.
240 :
241 : Returns batch (which will still be an in progress batch calculation).
242 : As this is used in HPC contexts, does no input validation. */
243 :
244 : fd_sha256_batch_t *
245 : fd_sha256_batch_add( fd_sha256_batch_t * batch,
246 : void const * data,
247 : ulong sz,
248 : void * hash );
249 :
250 : /* fd_sha256_batch_fini finishes a set of SHA-256 calculations. On
251 : return, all the hash memory regions will be populated with the
252 : corresponding message hash. Returns a pointer to the memory region
253 : used to hold the calculation state (contents undefined) and the
254 : calculation will no longer be in progress. As this is used in HPC
255 : contexts, does no input validation. */
256 :
257 : void *
258 : fd_sha256_batch_fini( fd_sha256_batch_t * batch );
259 :
260 : /* fd_sha256_batch_abort aborts an in-progress set of SHA-256
261 : calculations. There is no guarantee which individual messages (if
262 : any) had their hashes computed and the contents of the hash memory
263 : regions is undefined. Returns a pointer to the memory region used to
264 : hold the calculation state (contents undefined) and the calculation
265 : will no longer be in progress. As this is used in HPC contexts, does
266 : no input validation. */
267 :
268 : void *
269 : fd_sha256_batch_abort( fd_sha256_batch_t * batch );
270 :
271 : #endif
272 :
273 : #ifndef FD_SHA256_BATCH_IMPL
274 : #if FD_HAS_AVX512
275 : #define FD_SHA256_BATCH_IMPL 2
276 : #elif FD_HAS_AVX && !defined(__tune_znver1__) && !defined(__tune_znver2__) && !defined(__tune_znver3__)
277 : #define FD_SHA256_BATCH_IMPL 1
278 : #else
279 : #define FD_SHA256_BATCH_IMPL 0
280 : #endif
281 : #endif
282 :
283 : #if FD_SHA256_BATCH_IMPL==0 /* Reference batching implementation */
284 :
285 : #define FD_SHA256_BATCH_ALIGN (1UL)
286 : #define FD_SHA256_BATCH_FOOTPRINT (1UL)
287 : #define FD_SHA256_BATCH_MAX (1UL)
288 :
289 : typedef uchar fd_sha256_batch_t;
290 :
291 : FD_PROTOTYPES_BEGIN
292 :
293 : FD_FN_CONST static inline ulong fd_sha256_batch_align ( void ) { return alignof(fd_sha256_batch_t); }
294 : FD_FN_CONST static inline ulong fd_sha256_batch_footprint( void ) { return sizeof (fd_sha256_batch_t); }
295 :
296 : static inline fd_sha256_batch_t * fd_sha256_batch_init( void * mem ) { return (fd_sha256_batch_t *)mem; }
297 :
298 : static inline fd_sha256_batch_t *
299 : fd_sha256_batch_add( fd_sha256_batch_t * batch,
300 : void const * data,
301 : ulong sz,
302 : void * hash ) {
303 : fd_sha256_hash( data, sz, hash );
304 : return batch;
305 : }
306 :
307 : static inline void * fd_sha256_batch_fini ( fd_sha256_batch_t * batch ) { return (void *)batch; }
308 : static inline void * fd_sha256_batch_abort( fd_sha256_batch_t * batch ) { return (void *)batch; }
309 :
310 : FD_PROTOTYPES_END
311 :
312 : #elif FD_SHA256_BATCH_IMPL==1 /* AVX accelerated batching implementation */
313 :
314 : #define FD_SHA256_BATCH_ALIGN (128UL)
315 : #define FD_SHA256_BATCH_FOOTPRINT (256UL)
316 : #define FD_SHA256_BATCH_MAX (8UL)
317 :
318 : /* This is exposed here to facilitate inlining various operations */
319 :
320 : struct __attribute__((aligned(FD_SHA256_BATCH_ALIGN))) fd_sha256_private_batch {
321 : void const * data[ FD_SHA256_BATCH_MAX ]; /* AVX aligned */
322 : ulong sz [ FD_SHA256_BATCH_MAX ]; /* AVX aligned */
323 : void * hash[ FD_SHA256_BATCH_MAX ]; /* AVX aligned */
324 : ulong cnt;
325 : };
326 :
327 : typedef struct fd_sha256_private_batch fd_sha256_batch_t;
328 :
329 : FD_PROTOTYPES_BEGIN
330 :
331 : /* Internal use only */
332 :
333 : void
334 : fd_sha256_private_batch_avx( ulong batch_cnt, /* In [1,FD_SHA256_BATCH_MAX] */
335 : void const * batch_data, /* Indexed [0,FD_SHA256_BATCH_MAX), aligned 32,
336 : only [0,batch_cnt) used, essentially a msg_t const * const * */
337 : ulong const * batch_sz, /* Indexed [0,FD_SHA256_BATCH_MAX), aligned 32,
338 : only [0,batch_cnt) used */
339 : void * const * batch_hash ); /* Indexed [0,FD_SHA256_BATCH_MAX), aligned 32,
340 : only [0,batch_cnt) used */
341 :
342 2 : FD_FN_CONST static inline ulong fd_sha256_batch_align ( void ) { return alignof(fd_sha256_batch_t); }
343 2 : FD_FN_CONST static inline ulong fd_sha256_batch_footprint( void ) { return sizeof (fd_sha256_batch_t); }
344 :
345 : static inline fd_sha256_batch_t *
346 5904402 : fd_sha256_batch_init( void * mem ) {
347 5904402 : fd_sha256_batch_t * batch = (fd_sha256_batch_t *)mem;
348 5904402 : batch->cnt = 0UL;
349 5904402 : return batch;
350 5904402 : }
351 :
352 : static inline fd_sha256_batch_t *
353 : fd_sha256_batch_add( fd_sha256_batch_t * batch,
354 : void const * data,
355 : ulong sz,
356 128856852 : void * hash ) {
357 128856852 : ulong batch_cnt = batch->cnt;
358 128856852 : batch->data[ batch_cnt ] = data;
359 128856852 : batch->sz [ batch_cnt ] = sz;
360 128856852 : batch->hash[ batch_cnt ] = hash;
361 128856852 : batch_cnt++;
362 128856852 : if( FD_UNLIKELY( batch_cnt==FD_SHA256_BATCH_MAX ) ) {
363 14458758 : fd_sha256_private_batch_avx( batch_cnt, batch->data, batch->sz, batch->hash );
364 14458758 : batch_cnt = 0UL;
365 14458758 : }
366 128856852 : batch->cnt = batch_cnt;
367 128856852 : return batch;
368 128856852 : }
369 :
370 : static inline void *
371 5888300 : fd_sha256_batch_fini( fd_sha256_batch_t * batch ) {
372 5888300 : ulong batch_cnt = batch->cnt;
373 5888300 : if( FD_LIKELY( batch_cnt ) ) fd_sha256_private_batch_avx( batch_cnt, batch->data, batch->sz, batch->hash );
374 5888300 : return (void *)batch;
375 5888300 : }
376 :
377 : static inline void *
378 16102 : fd_sha256_batch_abort( fd_sha256_batch_t * batch ) {
379 16102 : return (void *)batch;
380 16102 : }
381 :
382 : FD_PROTOTYPES_END
383 :
384 : #elif FD_SHA256_BATCH_IMPL==2 /* AVX-512 accelerated batching implementation */
385 :
386 : #define FD_SHA256_BATCH_ALIGN (128UL)
387 : #define FD_SHA256_BATCH_FOOTPRINT (512UL)
388 : #define FD_SHA256_BATCH_MAX (16UL)
389 :
390 : /* This is exposed here to facilitate inlining various operations */
391 :
392 : struct __attribute__((aligned(FD_SHA256_BATCH_ALIGN))) fd_sha256_private_batch {
393 : void const * data[ FD_SHA256_BATCH_MAX ]; /* AVX aligned */
394 : ulong sz [ FD_SHA256_BATCH_MAX ]; /* AVX aligned */
395 : void * hash[ FD_SHA256_BATCH_MAX ]; /* AVX aligned */
396 : ulong cnt;
397 : };
398 :
399 : typedef struct fd_sha256_private_batch fd_sha256_batch_t;
400 :
401 : FD_PROTOTYPES_BEGIN
402 :
403 : /* Internal use only */
404 :
405 : void
406 : fd_sha256_private_batch_avx512( ulong batch_cnt, /* In [1,FD_SHA256_BATCH_MAX] */
407 : void const * batch_data, /* Indexed [0,FD_SHA256_BATCH_MAX), aligned 32,
408 : only [0,batch_cnt) used, essentially a msg_t const * const * */
409 : ulong const * batch_sz, /* Indexed [0,FD_SHA256_BATCH_MAX), aligned 32,
410 : only [0,batch_cnt) used */
411 : void * const * batch_hash ); /* Indexed [0,FD_SHA256_BATCH_MAX), aligned 32,
412 : only [0,batch_cnt) used */
413 :
414 1 : FD_FN_CONST static inline ulong fd_sha256_batch_align ( void ) { return alignof(fd_sha256_batch_t); }
415 1 : FD_FN_CONST static inline ulong fd_sha256_batch_footprint( void ) { return sizeof (fd_sha256_batch_t); }
416 :
417 : static inline fd_sha256_batch_t *
418 2952201 : fd_sha256_batch_init( void * mem ) {
419 2952201 : fd_sha256_batch_t * batch = (fd_sha256_batch_t *)mem;
420 2952201 : batch->cnt = 0UL;
421 2952201 : return batch;
422 2952201 : }
423 :
424 : static inline fd_sha256_batch_t *
425 : fd_sha256_batch_add( fd_sha256_batch_t * batch,
426 : void const * data,
427 : ulong sz,
428 64428426 : void * hash ) {
429 64428426 : ulong batch_cnt = batch->cnt;
430 64428426 : batch->data[ batch_cnt ] = data;
431 64428426 : batch->sz [ batch_cnt ] = sz;
432 64428426 : batch->hash[ batch_cnt ] = hash;
433 64428426 : batch_cnt++;
434 64428426 : if( FD_UNLIKELY( batch_cnt==FD_SHA256_BATCH_MAX ) ) {
435 3235949 : fd_sha256_private_batch_avx512( batch_cnt, batch->data, batch->sz, batch->hash );
436 3235949 : batch_cnt = 0UL;
437 3235949 : }
438 64428426 : batch->cnt = batch_cnt;
439 64428426 : return batch;
440 64428426 : }
441 :
442 : static inline void *
443 2944150 : fd_sha256_batch_fini( fd_sha256_batch_t * batch ) {
444 2944150 : ulong batch_cnt = batch->cnt;
445 2944150 : if( FD_LIKELY( batch_cnt ) ) fd_sha256_private_batch_avx512( batch_cnt, batch->data, batch->sz, batch->hash );
446 2944150 : return (void *)batch;
447 2944150 : }
448 :
449 : static inline void *
450 8051 : fd_sha256_batch_abort( fd_sha256_batch_t * batch ) {
451 8051 : return (void *)batch;
452 8051 : }
453 :
454 : FD_PROTOTYPES_END
455 :
456 : #else
457 : #error "Unsupported FD_SHA256_BATCH_IMPL"
458 : #endif
459 :
460 : #endif /* HEADER_fd_src_ballet_sha256_fd_sha256_h */
|