Line data Source code
1 : #ifndef HEADER_fd_src_ballet_lthash_fd_lthash_adder_h
2 : #define HEADER_fd_src_ballet_lthash_fd_lthash_adder_h
3 :
4 : /* fd_lthash_adder.h is an optimized streaming LtHash adder.
5 :
6 : Uses two forms of SIMD parallelism internally to accelerate LtHash
7 : update throughput (multi-block and multi-message BLAKE3 hashing).
8 : A rate of 5 million LtHash updates per second was previously achieved
9 : on a 3.7 GHz AMD EPYC 9B45 (Zen 5 / Turin).
10 :
11 : Usage is as follows:
12 :
13 : fd_lthash_value_t sum[1];
14 : fd_lthash_zero( sum );
15 : fd_lthash_adder_t adder[1];
16 : fd_lthash_adder_new( adder );
17 : for( ... each value ... ) fd_lthash_adder_push( adder, sum, ... );
18 : fd_lthash_adder_flush( adder, sum );
19 : fd_lthash_adder_delete( adder ); */
20 :
21 : #include "../blake3/fd_blake3.h"
22 : #include "../lthash/fd_lthash.h"
23 :
24 : #define FD_LTHASH_ADDER_ALIGN 64
25 :
26 : #define FD_LTHASH_ADDER_PARA_MAX 16
27 :
28 : struct __attribute__((aligned(FD_LTHASH_ADDER_ALIGN))) fd_lthash_adder {
29 :
30 : uint batch_cnt;
31 :
32 : #if FD_LTHASH_ADDER_PARA_MAX>1
33 :
34 : uchar batch_data[ FD_LTHASH_ADDER_PARA_MAX*FD_BLAKE3_CHUNK_SZ ]
35 : __attribute__((aligned(64)));
36 :
37 : ulong batch_ptrs[ FD_LTHASH_ADDER_PARA_MAX ]
38 : __attribute__((aligned(64)));
39 :
40 : uint batch_sz[ FD_LTHASH_ADDER_PARA_MAX ];
41 :
42 : #endif
43 :
44 : };
45 :
46 : typedef struct fd_lthash_adder fd_lthash_adder_t;
47 :
48 : FD_PROTOTYPES_BEGIN
49 :
50 : /* fd_lthash_adder_{new,delete} {initializes,destroys} an lthash_adder. */
51 :
52 : fd_lthash_adder_t *
53 : fd_lthash_adder_new( fd_lthash_adder_t * adder );
54 :
55 : void *
56 : fd_lthash_adder_delete( fd_lthash_adder_t * adder );
57 :
58 : /* fd_lthash_adder_push enqueues the given input for hashing. sum may
59 : or may not be updated with enqueued LtHash additions. */
60 :
61 : static inline void
62 : fd_lthash_adder_push( fd_lthash_adder_t * adder,
63 : fd_lthash_value_t * sum,
64 : void const * input,
65 0 : ulong input_sz ) {
66 0 : ulong const batch_threshold = 512UL;
67 0 : fd_lthash_value_t value[1];
68 0 : if( FD_UNLIKELY( input_sz>batch_threshold ) ) {
69 0 : fd_blake3_t blake[1];
70 0 : fd_blake3_init( blake );
71 0 : fd_blake3_append( blake, input, input_sz );
72 0 : fd_blake3_fini_2048( blake, value->bytes );
73 0 : fd_lthash_add( sum, value );
74 0 : return;
75 0 : }
76 :
77 0 : uint batch_idx = adder->batch_cnt++;
78 0 : uchar * slot = (uchar *)adder->batch_ptrs[ batch_idx ];
79 0 : fd_memcpy( slot, input, input_sz );
80 0 : adder->batch_sz[ batch_idx ] = (uint)input_sz;
81 :
82 0 : if( batch_idx+1>=FD_BLAKE3_PARA_MAX ) {
83 0 : # if FD_HAS_AVX512
84 0 : fd_blake3_lthash_batch16( (void const **)fd_type_pun_const( adder->batch_ptrs ), adder->batch_sz, value->words );
85 : # elif FD_HAS_AVX
86 : fd_blake3_lthash_batch8 ( (void const **)fd_type_pun_const( adder->batch_ptrs ), adder->batch_sz, value->words );
87 0 : # endif
88 0 : adder->batch_cnt = 0;
89 0 : fd_lthash_add( sum, value );
90 0 : }
91 0 : }
92 :
93 : /* fd_lthash_adder_flush commits all previously enqueued additions to
94 : sum. */
95 :
96 : static inline void
97 : fd_lthash_adder_flush( fd_lthash_adder_t * adder,
98 0 : fd_lthash_value_t * sum ) {
99 0 : uint batch_cnt = adder->batch_cnt;
100 0 : for( uint i=0U; i<batch_cnt; i++ ) {
101 0 : fd_lthash_value_t value[1];
102 0 : fd_blake3_t blake[1];
103 0 : fd_blake3_init( blake );
104 0 : fd_blake3_append( blake, (void const *)adder->batch_ptrs[ i ], adder->batch_sz[ i ] );
105 0 : fd_blake3_fini_2048( blake, value->bytes );
106 0 : fd_lthash_add( sum, value );
107 0 : }
108 0 : adder->batch_cnt = 0U;
109 0 : }
110 :
111 : /* fd_lthash_adder_push_solana_account wraps fd_lthash_adder_push for
112 : Solana account inputs. */
113 :
114 : static inline void
115 : fd_lthash_adder_push_solana_account(
116 : fd_lthash_adder_t * adder,
117 : fd_lthash_value_t * sum,
118 : void const * pubkey,
119 : uchar const * data,
120 : ulong data_sz,
121 : ulong lamports,
122 : uchar executable,
123 : void const * owner
124 0 : ) {
125 0 : fd_lthash_value_t value[1];
126 : /* FIXME opportunities for memcpy hax here */
127 :
128 0 : ulong const static_sz = 73UL;
129 0 : ulong const batch_threshold = 512UL;
130 0 : if( FD_UNLIKELY( data_sz > batch_threshold-static_sz || /* optimize for small appends */
131 0 : FD_BLAKE3_PARA_MAX==0 ) ) {
132 0 : fd_blake3_t blake[1];
133 0 : fd_blake3_init( blake );
134 0 : fd_blake3_append( blake, &lamports, sizeof(ulong) );
135 0 : fd_blake3_append( blake, data, data_sz );
136 0 : uchar footer[ 65 ];
137 0 : footer[ 0 ] = executable;
138 0 : memcpy( footer+1, owner, 32 );
139 0 : memcpy( footer+33, pubkey, 32 );
140 0 : fd_blake3_append( blake, footer, sizeof(footer) );
141 0 : fd_blake3_fini_2048( blake, value->bytes );
142 0 : fd_lthash_add( sum, value );
143 0 : return;
144 0 : }
145 :
146 0 : uint batch_idx = adder->batch_cnt++;
147 0 : uchar * slot = (uchar *)adder->batch_ptrs[ batch_idx ];
148 0 : uchar * p = slot;
149 :
150 : /* Fixed size header */
151 0 : FD_STORE( ulong, p, lamports );
152 0 : p += sizeof(ulong);
153 : /* Variable size content */
154 0 : fd_memcpy( p, data, data_sz );
155 0 : p += data_sz;
156 : /* Fixed size footer */
157 0 : p[0] = executable; p += 1;
158 0 : fd_memcpy( p, owner, 32 ); p += 32;
159 0 : fd_memcpy( p, pubkey, 32 ); p += 32;
160 :
161 0 : adder->batch_sz[ batch_idx ] = (uint)( p-slot );
162 :
163 0 : if( batch_idx+1>=FD_BLAKE3_PARA_MAX ) {
164 0 : # if FD_HAS_AVX512
165 0 : fd_blake3_lthash_batch16( (void const **)fd_type_pun_const( adder->batch_ptrs ), adder->batch_sz, value->words );
166 : # elif FD_HAS_AVX
167 : fd_blake3_lthash_batch8 ( (void const **)fd_type_pun_const( adder->batch_ptrs ), adder->batch_sz, value->words );
168 0 : # endif
169 0 : adder->batch_cnt = 0;
170 0 : fd_lthash_add( sum, value );
171 0 : }
172 0 : }
173 :
174 : FD_PROTOTYPES_END
175 :
176 : #endif /* HEADER_fd_src_ballet_lthash_fd_lthash_adder_h */
|