Line data Source code
1 : #ifndef HEADER_fd_src_ballet_sha256_fd_sha256_h
2 : #define HEADER_fd_src_ballet_sha256_fd_sha256_h
3 :
4 : /* fd_sha256 provides APIs for SHA-256 hashing of messages. */
5 :
6 : #include "../fd_ballet_base.h"
7 :
8 : /* FD_SHA256_{ALIGN,FOOTPRINT} describe the alignment and footprint needed
9 : for a memory region to hold a fd_sha256_t. ALIGN is a positive
10 : integer power of 2. FOOTPRINT is a multiple of align. ALIGN is
11 : recommended to be at least double cache line to mitigate various
12 : kinds of false sharing. These are provided to facilitate compile
13 : time declarations. */
14 :
15 3 : #define FD_SHA256_ALIGN (128UL)
16 3 : #define FD_SHA256_FOOTPRINT (128UL)
17 :
18 : /* FD_SHA256_{LG_HASH_SZ,HASH_SZ} describe the size of a SHA256 hash
19 : in bytes. HASH_SZ==2^LG_HASH_SZ==32. */
20 :
21 : #define FD_SHA256_LG_HASH_SZ (5)
22 388965 : #define FD_SHA256_HASH_SZ (32UL) /* == 2^FD_SHA256_LG_HASH_SZ, explicit to workaround compiler limitations */
23 :
24 : /* FD_SHA256_{LG_BLOCK_SZ,BLOCK_SZ} describe the size of a SHA256
25 : hash block in byte. BLOCK_SZ==2^LG_BLOCK_SZ==64. */
26 :
27 1085953477 : #define FD_SHA256_LG_BLOCK_SZ (6)
28 1483335778 : #define FD_SHA256_BLOCK_SZ (64UL) /* == 2^FD_SHA256_LG_BLOCK_SZ, explicit to workaround compiler limitations */
29 :
30 : /* A fd_sha256_t should be treated as an opaque handle of a sha256
31 : calculation state. (It technically isn't here facilitate compile
32 : time declarations of fd_sha256_t memory.) */
33 :
34 400956 : #define FD_SHA256_MAGIC (0xF17EDA2CE54A2560) /* FIREDANCE SHA256 V0 */
35 :
36 : /* FD_SHA256_PRIVATE_{LG_BUF_MAX,BUF_MAX} describe the size of the
37 : internal buffer used by the sha256 computation object. This is for
38 : internal use only. BUF_MAX==2^LG_BUF_MAX==2*FD_SHA256_HASH_SZ==64. */
39 :
40 1085953477 : #define FD_SHA256_PRIVATE_LG_BUF_MAX FD_SHA256_LG_BLOCK_SZ
41 1456138258 : #define FD_SHA256_PRIVATE_BUF_MAX FD_SHA256_BLOCK_SZ
42 :
43 : struct __attribute__((aligned(FD_SHA256_ALIGN))) fd_sha256_private {
44 :
45 : /* This point is 128-byte aligned */
46 :
47 : uchar buf[ FD_SHA256_PRIVATE_BUF_MAX ];
48 :
49 : /* This point is 64-byte aligned */
50 :
51 : uint state[ FD_SHA256_HASH_SZ / sizeof(uint) ];
52 :
53 : /* This point is 32-byte aligned */
54 :
55 : ulong magic; /* ==FD_SHA256_MAGIC */
56 : ulong buf_used; /* Number of buffered bytes, in [0,FD_SHA256_BUF_MAX) */
57 : ulong bit_cnt; /* How many bits have been appended total */
58 :
59 : /* Padding to 128-byte here */
60 : };
61 :
62 : typedef struct fd_sha256_private fd_sha256_t;
63 :
64 : FD_PROTOTYPES_BEGIN
65 :
66 : /* fd_sha256_{align,footprint,new,join,leave,delete} usage is identical to
67 : that of their fd_sha512 counterparts. See ../sha512/fd_sha512.h */
68 :
69 : FD_FN_CONST ulong
70 : fd_sha256_align( void );
71 :
72 : FD_FN_CONST ulong
73 : fd_sha256_footprint( void );
74 :
75 : void *
76 : fd_sha256_new( void * shmem );
77 :
78 : fd_sha256_t *
79 : fd_sha256_join( void * shsha );
80 :
81 : void *
82 : fd_sha256_leave( fd_sha256_t * sha );
83 :
84 : void *
85 : fd_sha256_delete( void * shsha );
86 :
87 : /* fd_sha256_init starts a sha256 calculation. sha is assumed to be a
88 : current local join to a sha256 calculation state with no other
89 : concurrent operation that would modify the state while this is
90 : executing. Any preexisting state for an in-progress or recently
91 : completed calculation will be discarded. Returns sha (on return, sha
92 : will have the state of a new in-progress calculation). */
93 :
94 : fd_sha256_t *
95 : fd_sha256_init( fd_sha256_t * sha );
96 :
97 : /* fd_sha256_append adds sz bytes locally pointed to by data an
98 : in-progress sha256 calculation. sha, data and sz are assumed to be
99 : valid (i.e. sha is a current local join to a sha256 calculation state
100 : with no other concurrent operations that would modify the state while
101 : this is executing, data points to the first of the sz bytes and will
102 : be unmodified while this is running with no interest retained after
103 : return ... data==NULL is fine if sz==0). Returns sha (on return, sha
104 : will have the updated state of the in-progress calculation).
105 :
106 : It does not matter how the user group data bytes for a sha256
107 : calculation; the final hash will be identical. It is preferable for
108 : performance to try to append as many bytes as possible as a time
109 : though. It is also preferable for performance if sz is a multiple of
110 : 64 for all but the last append (it is also preferable if sz is less
111 : than 56 for the last append). */
112 :
113 : fd_sha256_t *
114 : fd_sha256_append( fd_sha256_t * sha,
115 : void const * data,
116 : ulong sz );
117 :
118 : /* fd_sha256_fini finishes a sha256 calculation. sha and hash are
119 : assumed to be valid (i.e. sha is a local join to a sha256 calculation
120 : state that has an in-progress calculation with no other concurrent
121 : operations that would modify the state while this is executing and
122 : hash points to the first byte of a 32-byte memory region where the
123 : result of the calculation should be stored). Returns hash (on
124 : return, there will be no calculation in-progress on sha and 32-byte
125 : buffer pointed to by hash will be populated with the calculation
126 : result). */
127 : /* FIXME: THIS SHOULD PROBABLY RETURN A FD_SHA256_T */
128 :
129 : void *
130 : fd_sha256_fini( fd_sha256_t * sha,
131 : void * hash );
132 :
133 : /* fd_sha256_hash is a streamlined implementation of:
134 :
135 : fd_sha256_t sha[1];
136 : return fd_sha256_fini( fd_sha256_append( fd_sha256_init( sha ), data, sz ), hash )
137 :
138 : This can be faster for small messages because it can eliminate
139 : function call overheads, branches, copies and data marshalling under
140 : the hood (things like binary Merkle tree construction were designed
141 : do lots of such operations). */
142 : /* FIXME: ADD NEW/JOIN/LEAVE/DELETE TO DOCUMENTATION */
143 : /* FIXME: PROBABLY SHOULD HAVE AN ABORT API */
144 : /* FIXME: UPDATE OTHER HASH FUNCTIONS SIMILARLY */
145 :
146 : void *
147 : fd_sha256_hash( void const * data,
148 : ulong sz,
149 : void * hash );
150 :
151 : void *
152 : fd_sha256_hash_32( void const * data,
153 : void * hash );
154 :
155 : FD_PROTOTYPES_END
156 :
157 : #if 0 /* SHA256 batch API details */
158 :
159 : /* FD_SHA256_BATCH_{ALIGN,FOOTPRINT} return the alignment and footprint
160 : in bytes required for a region of memory to can hold the state of an
161 : in-progress set of SHA-256 calculations. ALIGN will be an integer
162 : power of 2 and FOOTPRINT will be a multiple of ALIGN. These are to
163 : facilitate compile time declarations. */
164 :
165 : #define FD_SHA256_BATCH_ALIGN ...
166 : #define FD_SHA256_BATCH_FOOTPRINT ...
167 :
168 : /* FD_SHA256_BATCH_MAX returns the batch size used under the hood.
169 : Will be positive. Users should not normally need use this for
170 : anything. */
171 :
172 : #define FD_SHA256_BATCH_MAX ...
173 :
174 : /* A fd_sha256_batch_t is an opaque handle for a set of SHA-256
175 : calculations. */
176 :
177 : struct fd_sha256_private_batch;
178 : typedef struct fd_sha256_private_batch fd_sha256_batch_t;
179 :
180 : /* fd_sha256_batch_{align,footprint} return
181 : FD_SHA256_BATCH_{ALIGN,FOOTPRINT} respectively. */
182 :
183 : ulong fd_sha256_batch_align ( void );
184 : ulong fd_sha256_batch_footprint( void );
185 :
186 : /* fd_sha256_batch_init starts a new batch of SHA-256 calculations. The
187 : state of the in-progress calculation will be held in the memory
188 : region whose first byte in the local address space is pointed to by
189 : mem. The region should have the appropriate alignment and footprint
190 : and should not be read, changed or deleted until fini or abort is
191 : called on the in-progress calculation.
192 :
193 : Returns a handle to the in-progress batch calculation. As this is
194 : used in HPC contexts, does no input validation. */
195 :
196 : fd_sha256_batch_t *
197 : fd_sha256_batch_init( void * mem );
198 :
199 : /* fd_sha256_batch_add adds the sz byte message whose first byte in the
200 : local address space is pointed to by data to the in-progress batch
201 : calculation whose handle is batch. The result of the calculation
202 : will be stored at the 32-byte memory region whose first byte in the
203 : local address space is pointed to by hash.
204 :
205 : There are _no_ alignment restrictions on data and hash and _no_
206 : restrictions on sz. After a message is added, that message should
207 : not be changed or deleted until the fini or abort is called on the
208 : in-progress calculation. Likewise, the hash memory region shot not
209 : be read, written or deleted until the calculation has completed.
210 :
211 : Messages can overlap and/or be added to a batch multiple times. Each
212 : hash location added to a batch should not overlap any other hash
213 : location of calculation state or message region. (Hash reuse /
214 : overlap have indeterminant but non-crashing behavior as the
215 : implementation under the hood is free to execute the elements of the
216 : batch in whatever order it sees fit and potentially do those
217 : calculations incrementally / in the background / ... as the batch is
218 : assembled.)
219 :
220 : Depending on the implementation, it might help performance to cluster
221 : adds of similar sized messages together. Likewise, it can be
222 : advantageous to use aligned message regions, aligned hash regions and
223 : messages sizes that are a multiple of a SHA block size. None of this
224 : is required though.
225 :
226 : Returns batch (which will still be an in progress batch calculation).
227 : As this is used in HPC contexts, does no input validation. */
228 :
229 : fd_sha256_batch_t *
230 : fd_sha256_batch_add( fd_sha256_batch_t * batch,
231 : void const * data,
232 : ulong sz,
233 : void * hash );
234 :
235 : /* fd_sha256_batch_fini finishes a set of SHA-256 calculations. On
236 : return, all the hash memory regions will be populated with the
237 : corresponding message hash. Returns a pointer to the memory region
238 : used to hold the calculation state (contents undefined) and the
239 : calculation will no longer be in progress. As this is used in HPC
240 : contexts, does no input validation. */
241 :
242 : void *
243 : fd_sha256_batch_fini( fd_sha256_batch_t * batch );
244 :
245 : /* fd_sha256_batch_abort aborts an in-progress set of SHA-256
246 : calculations. There is no guarantee which individual messages (if
247 : any) had their hashes computed and the contents of the hash memory
248 : regions is undefined. Returns a pointer to the memory region used to
249 : hold the calculation state (contents undefined) and the calculation
250 : will no longer be in progress. As this is used in HPC contexts, does
251 : no input validation. */
252 :
253 : void *
254 : fd_sha256_batch_abort( fd_sha256_batch_t * batch );
255 :
256 : #endif
257 :
258 : #ifndef FD_SHA256_BATCH_IMPL
259 : #if FD_HAS_AVX512
260 : #define FD_SHA256_BATCH_IMPL 2
261 : #elif FD_HAS_AVX && !defined(__tune_znver1__) && !defined(__tune_znver2__) && !defined(__tune_znver3__)
262 : #define FD_SHA256_BATCH_IMPL 1
263 : #else
264 : #define FD_SHA256_BATCH_IMPL 0
265 : #endif
266 : #endif
267 :
268 : #if FD_SHA256_BATCH_IMPL==0 /* Reference batching implementation */
269 :
270 : #define FD_SHA256_BATCH_ALIGN (1UL)
271 : #define FD_SHA256_BATCH_FOOTPRINT (1UL)
272 : #define FD_SHA256_BATCH_MAX (1UL)
273 :
274 : typedef uchar fd_sha256_batch_t;
275 :
276 : FD_PROTOTYPES_BEGIN
277 :
278 : FD_FN_CONST static inline ulong fd_sha256_batch_align ( void ) { return alignof(fd_sha256_batch_t); }
279 : FD_FN_CONST static inline ulong fd_sha256_batch_footprint( void ) { return sizeof (fd_sha256_batch_t); }
280 :
281 : static inline fd_sha256_batch_t * fd_sha256_batch_init( void * mem ) { return (fd_sha256_batch_t *)mem; }
282 :
283 : static inline fd_sha256_batch_t *
284 : fd_sha256_batch_add( fd_sha256_batch_t * batch,
285 : void const * data,
286 : ulong sz,
287 : void * hash ) {
288 : fd_sha256_hash( data, sz, hash );
289 : return batch;
290 : }
291 :
292 : static inline void * fd_sha256_batch_fini ( fd_sha256_batch_t * batch ) { return (void *)batch; }
293 : static inline void * fd_sha256_batch_abort( fd_sha256_batch_t * batch ) { return (void *)batch; }
294 :
295 : FD_PROTOTYPES_END
296 :
297 : #elif FD_SHA256_BATCH_IMPL==1 /* AVX accelerated batching implementation */
298 :
299 : #define FD_SHA256_BATCH_ALIGN (128UL)
300 : #define FD_SHA256_BATCH_FOOTPRINT (256UL)
301 : #define FD_SHA256_BATCH_MAX (8UL)
302 :
303 : /* This is exposed here to facilitate inlining various operations */
304 :
305 : struct __attribute__((aligned(FD_SHA256_BATCH_ALIGN))) fd_sha256_private_batch {
306 : void const * data[ FD_SHA256_BATCH_MAX ]; /* AVX aligned */
307 : ulong sz [ FD_SHA256_BATCH_MAX ]; /* AVX aligned */
308 : void * hash[ FD_SHA256_BATCH_MAX ]; /* AVX aligned */
309 : ulong cnt;
310 : };
311 :
312 : typedef struct fd_sha256_private_batch fd_sha256_batch_t;
313 :
314 : FD_PROTOTYPES_BEGIN
315 :
316 : /* Internal use only */
317 :
318 : void
319 : fd_sha256_private_batch_avx( ulong batch_cnt, /* In [1,FD_SHA256_BATCH_MAX] */
320 : void const * batch_data, /* Indexed [0,FD_SHA256_BATCH_MAX), aligned 32,
321 : only [0,batch_cnt) used, essentially a msg_t const * const * */
322 : ulong const * batch_sz, /* Indexed [0,FD_SHA256_BATCH_MAX), aligned 32,
323 : only [0,batch_cnt) used */
324 : void * const * batch_hash ); /* Indexed [0,FD_SHA256_BATCH_MAX), aligned 32,
325 : only [0,batch_cnt) used */
326 :
327 0 : FD_FN_CONST static inline ulong fd_sha256_batch_align ( void ) { return alignof(fd_sha256_batch_t); }
328 0 : FD_FN_CONST static inline ulong fd_sha256_batch_footprint( void ) { return sizeof (fd_sha256_batch_t); }
329 :
330 : static inline fd_sha256_batch_t *
331 5938202 : fd_sha256_batch_init( void * mem ) {
332 5938202 : fd_sha256_batch_t * batch = (fd_sha256_batch_t *)mem;
333 5938202 : batch->cnt = 0UL;
334 5938202 : return batch;
335 5938202 : }
336 :
337 : static inline fd_sha256_batch_t *
338 : fd_sha256_batch_add( fd_sha256_batch_t * batch,
339 : void const * data,
340 : ulong sz,
341 125845416 : void * hash ) {
342 125845416 : ulong batch_cnt = batch->cnt;
343 125845416 : batch->data[ batch_cnt ] = data;
344 125845416 : batch->sz [ batch_cnt ] = sz;
345 125845416 : batch->hash[ batch_cnt ] = hash;
346 125845416 : batch_cnt++;
347 125845416 : if( FD_UNLIKELY( batch_cnt==FD_SHA256_BATCH_MAX ) ) {
348 14244050 : fd_sha256_private_batch_avx( batch_cnt, batch->data, batch->sz, batch->hash );
349 14244050 : batch_cnt = 0UL;
350 14244050 : }
351 125845416 : batch->cnt = batch_cnt;
352 125845416 : return batch;
353 125845416 : }
354 :
355 : static inline void *
356 5922100 : fd_sha256_batch_fini( fd_sha256_batch_t * batch ) {
357 5922100 : ulong batch_cnt = batch->cnt;
358 5922100 : if( FD_LIKELY( batch_cnt ) ) fd_sha256_private_batch_avx( batch_cnt, batch->data, batch->sz, batch->hash );
359 5922100 : return (void *)batch;
360 5922100 : }
361 :
362 : static inline void *
363 16102 : fd_sha256_batch_abort( fd_sha256_batch_t * batch ) {
364 16102 : return (void *)batch;
365 16102 : }
366 :
367 : FD_PROTOTYPES_END
368 :
369 : #elif FD_SHA256_BATCH_IMPL==2 /* AVX-512 accelerated batching implementation */
370 :
371 : #define FD_SHA256_BATCH_ALIGN (128UL)
372 : #define FD_SHA256_BATCH_FOOTPRINT (512UL)
373 : #define FD_SHA256_BATCH_MAX (16UL)
374 :
375 : /* This is exposed here to facilitate inlining various operations */
376 :
377 : struct __attribute__((aligned(FD_SHA256_BATCH_ALIGN))) fd_sha256_private_batch {
378 : void const * data[ FD_SHA256_BATCH_MAX ]; /* AVX aligned */
379 : ulong sz [ FD_SHA256_BATCH_MAX ]; /* AVX aligned */
380 : void * hash[ FD_SHA256_BATCH_MAX ]; /* AVX aligned */
381 : ulong cnt;
382 : };
383 :
384 : typedef struct fd_sha256_private_batch fd_sha256_batch_t;
385 :
386 : FD_PROTOTYPES_BEGIN
387 :
388 : /* Internal use only */
389 :
390 : void
391 : fd_sha256_private_batch_avx512( ulong batch_cnt, /* In [1,FD_SHA256_BATCH_MAX] */
392 : void const * batch_data, /* Indexed [0,FD_SHA256_BATCH_MAX), aligned 32,
393 : only [0,batch_cnt) used, essentially a msg_t const * const * */
394 : ulong const * batch_sz, /* Indexed [0,FD_SHA256_BATCH_MAX), aligned 32,
395 : only [0,batch_cnt) used */
396 : void * const * batch_hash ); /* Indexed [0,FD_SHA256_BATCH_MAX), aligned 32,
397 : only [0,batch_cnt) used */
398 :
399 0 : FD_FN_CONST static inline ulong fd_sha256_batch_align ( void ) { return alignof(fd_sha256_batch_t); }
400 0 : FD_FN_CONST static inline ulong fd_sha256_batch_footprint( void ) { return sizeof (fd_sha256_batch_t); }
401 :
402 : static inline fd_sha256_batch_t *
403 2969101 : fd_sha256_batch_init( void * mem ) {
404 2969101 : fd_sha256_batch_t * batch = (fd_sha256_batch_t *)mem;
405 2969101 : batch->cnt = 0UL;
406 2969101 : return batch;
407 2969101 : }
408 :
409 : static inline fd_sha256_batch_t *
410 : fd_sha256_batch_add( fd_sha256_batch_t * batch,
411 : void const * data,
412 : ulong sz,
413 62922708 : void * hash ) {
414 62922708 : ulong batch_cnt = batch->cnt;
415 62922708 : batch->data[ batch_cnt ] = data;
416 62922708 : batch->sz [ batch_cnt ] = sz;
417 62922708 : batch->hash[ batch_cnt ] = hash;
418 62922708 : batch_cnt++;
419 62922708 : if( FD_UNLIKELY( batch_cnt==FD_SHA256_BATCH_MAX ) ) {
420 3230871 : fd_sha256_private_batch_avx512( batch_cnt, batch->data, batch->sz, batch->hash );
421 3230871 : batch_cnt = 0UL;
422 3230871 : }
423 62922708 : batch->cnt = batch_cnt;
424 62922708 : return batch;
425 62922708 : }
426 :
427 : static inline void *
428 2961050 : fd_sha256_batch_fini( fd_sha256_batch_t * batch ) {
429 2961050 : ulong batch_cnt = batch->cnt;
430 2961050 : if( FD_LIKELY( batch_cnt ) ) fd_sha256_private_batch_avx512( batch_cnt, batch->data, batch->sz, batch->hash );
431 2961050 : return (void *)batch;
432 2961050 : }
433 :
434 : static inline void *
435 8051 : fd_sha256_batch_abort( fd_sha256_batch_t * batch ) {
436 8051 : return (void *)batch;
437 8051 : }
438 :
439 : FD_PROTOTYPES_END
440 :
441 : #else
442 : #error "Unsupported FD_SHA256_BATCH_IMPL"
443 : #endif
444 :
445 : #endif /* HEADER_fd_src_ballet_sha256_fd_sha256_h */
|