Line data Source code
1 : #ifndef HEADER_fd_src_ballet_lthash_fd_lthash_adder_h
2 : #define HEADER_fd_src_ballet_lthash_fd_lthash_adder_h
3 :
4 : /* fd_lthash_adder.h is an optimized streaming LtHash adder.
5 :
6 : Uses two forms of SIMD parallelism internally to accelerate LtHash
7 : update throughput (multi-block and multi-message BLAKE3 hashing).
8 : A rate of 5 million LtHash updates per second was previously achieved
9 : on a 3.7 GHz AMD EPYC 9B45 (Zen 5 / Turin).
10 :
11 : Usage is as follows:
12 :
13 : fd_lthash_value_t sum[1];
14 : fd_lthash_zero( sum );
15 : fd_lthash_adder_t adder[1];
16 : fd_lthash_adder_new( adder );
17 : for( ... each value ... ) fd_lthash_adder_push( adder, sum, ... );
18 : fd_lthash_adder_flush( adder, sum );
19 : fd_lthash_adder_delete( adder ); */
20 :
21 : #include "../blake3/fd_blake3.h"
22 : #include "../lthash/fd_lthash.h"
23 :
24 : #define FD_LTHASH_ADDER_ALIGN 64
25 :
26 : #define FD_LTHASH_ADDER_PARA_MAX 16
27 :
28 : /* Number of parallel BLAKE3 hashes used for batching (1, 8, or 16). */
29 25934629 : #define FD_LTHASH_ADDER_PARA_CNT (FD_BLAKE3_PARA_MAX)
30 :
31 : struct __attribute__((aligned(FD_LTHASH_ADDER_ALIGN))) fd_lthash_adder {
32 :
33 : uint batch_cnt;
34 :
35 : #if FD_LTHASH_ADDER_PARA_MAX>1
36 :
37 : uchar batch_data[ FD_LTHASH_ADDER_PARA_MAX*FD_BLAKE3_CHUNK_SZ ]
38 : __attribute__((aligned(64)));
39 :
40 : ulong batch_ptrs[ FD_LTHASH_ADDER_PARA_MAX ]
41 : __attribute__((aligned(64)));
42 :
43 : uint batch_sz[ FD_LTHASH_ADDER_PARA_MAX ];
44 :
45 : #endif
46 :
47 : };
48 :
49 : typedef struct fd_lthash_adder fd_lthash_adder_t;
50 :
51 : FD_PROTOTYPES_BEGIN
52 :
53 : /* fd_lthash_adder_{new,delete} {initializes,destroys} an lthash_adder. */
54 :
55 : fd_lthash_adder_t *
56 : fd_lthash_adder_new( fd_lthash_adder_t * adder );
57 :
58 : void *
59 : fd_lthash_adder_delete( fd_lthash_adder_t * adder );
60 :
61 : /* fd_lthash_adder_push enqueues the given input for hashing. sum may
62 : or may not be updated with enqueued LtHash additions. */
63 :
64 : static inline void
65 : fd_lthash_adder_push( fd_lthash_adder_t * adder,
66 : fd_lthash_value_t * sum,
67 : void const * input,
68 12830334 : ulong input_sz ) {
69 12830334 : fd_lthash_value_t value[1];
70 12830334 : if( FD_LTHASH_ADDER_PARA_CNT<=1 || FD_UNLIKELY( input_sz>512UL ) ) {
71 9462 : fd_blake3_t blake[1];
72 9462 : fd_blake3_init( blake );
73 9462 : fd_blake3_append( blake, input, input_sz );
74 9462 : fd_blake3_fini_2048( blake, value->bytes );
75 9462 : fd_lthash_add( sum, value );
76 9462 : (void)adder;
77 9462 : return;
78 9462 : }
79 :
80 12820872 : uint batch_idx = adder->batch_cnt++;
81 12820872 : uchar * slot = (uchar *)adder->batch_ptrs[ batch_idx ];
82 12820872 : fd_memcpy( slot, input, input_sz );
83 12820872 : adder->batch_sz[ batch_idx ] = (uint)input_sz;
84 :
85 12820872 : if( batch_idx+1>=FD_LTHASH_ADDER_PARA_CNT ) {
86 266735 : # if FD_HAS_AVX512
87 266735 : fd_blake3_lthash_batch16( (void const **)fd_type_pun_const( adder->batch_ptrs ), adder->batch_sz, value->words );
88 : # elif FD_HAS_AVX
89 : fd_blake3_lthash_batch8 ( (void const **)fd_type_pun_const( adder->batch_ptrs ), adder->batch_sz, value->words );
90 1067724 : # endif
91 1334459 : adder->batch_cnt = 0;
92 1334459 : fd_lthash_add( sum, value );
93 1334459 : }
94 12820872 : }
95 :
96 : /* fd_lthash_adder_flush commits all previously enqueued additions to
97 : sum. */
98 :
99 : static inline void
100 : fd_lthash_adder_flush( fd_lthash_adder_t * adder,
101 4617 : fd_lthash_value_t * sum ) {
102 4617 : # if FD_LTHASH_ADDER_PARA_CNT>1
103 4617 : uint batch_cnt = adder->batch_cnt;
104 26574 : for( uint i=0U; i<batch_cnt; i++ ) {
105 21957 : fd_lthash_value_t value[1];
106 21957 : fd_blake3_t blake[1];
107 21957 : fd_blake3_init( blake );
108 21957 : fd_blake3_append( blake, (void const *)adder->batch_ptrs[ i ], adder->batch_sz[ i ] );
109 21957 : fd_blake3_fini_2048( blake, value->bytes );
110 21957 : fd_lthash_add( sum, value );
111 21957 : }
112 4617 : # endif
113 4617 : adder->batch_cnt = 0U;
114 4617 : (void)sum;
115 4617 : }
116 :
117 : /* fd_lthash_adder_push_solana_account wraps fd_lthash_adder_push for
118 : Solana account inputs. */
119 :
120 : static inline void
121 : fd_lthash_adder_push_solana_account(
122 : fd_lthash_adder_t * adder,
123 : fd_lthash_value_t * sum,
124 : void const * pubkey,
125 : uchar const * data,
126 : ulong data_sz,
127 : ulong lamports,
128 : uchar executable,
129 : void const * owner
130 146307 : ) {
131 146307 : fd_lthash_value_t value[1];
132 : /* FIXME opportunities for memcpy hax here */
133 :
134 146307 : ulong const static_sz = 73UL;
135 146307 : ulong const batch_threshold = 512UL;
136 146307 : if( FD_LTHASH_ADDER_PARA_CNT<=1 ||
137 146307 : FD_UNLIKELY( data_sz > batch_threshold-static_sz ) ) {
138 9366 : fd_blake3_t blake[1];
139 9366 : fd_blake3_init( blake );
140 9366 : fd_blake3_append( blake, &lamports, sizeof(ulong) );
141 9366 : fd_blake3_append( blake, data, data_sz );
142 9366 : uchar footer[ 65 ];
143 9366 : footer[ 0 ] = executable;
144 9366 : memcpy( footer+1, owner, 32 );
145 9366 : memcpy( footer+33, pubkey, 32 );
146 9366 : fd_blake3_append( blake, footer, sizeof(footer) );
147 9366 : fd_blake3_fini_2048( blake, value->bytes );
148 9366 : fd_lthash_add( sum, value );
149 9366 : return;
150 9366 : }
151 :
152 136941 : uint batch_idx = adder->batch_cnt++;
153 136941 : uchar * slot = (uchar *)adder->batch_ptrs[ batch_idx ];
154 136941 : uchar * p = slot;
155 :
156 : /* Fixed size header */
157 136941 : FD_STORE( ulong, p, lamports );
158 136941 : p += sizeof(ulong);
159 : /* Variable size content */
160 136941 : fd_memcpy( p, data, data_sz );
161 136941 : p += data_sz;
162 : /* Fixed size footer */
163 136941 : p[0] = executable; p += 1;
164 136941 : fd_memcpy( p, owner, 32 ); p += 32;
165 136941 : fd_memcpy( p, pubkey, 32 ); p += 32;
166 :
167 136941 : adder->batch_sz[ batch_idx ] = (uint)( p-slot );
168 :
169 136941 : if( batch_idx+1>=FD_LTHASH_ADDER_PARA_CNT ) {
170 2520 : # if FD_HAS_AVX512
171 2520 : fd_blake3_lthash_batch16( (void const **)fd_type_pun_const( adder->batch_ptrs ), adder->batch_sz, value->words );
172 : # elif FD_HAS_AVX
173 : fd_blake3_lthash_batch8 ( (void const **)fd_type_pun_const( adder->batch_ptrs ), adder->batch_sz, value->words );
174 10748 : # endif
175 13268 : adder->batch_cnt = 0;
176 13268 : fd_lthash_add( sum, value );
177 13268 : }
178 136941 : }
179 :
180 : FD_PROTOTYPES_END
181 :
182 : #endif /* HEADER_fd_src_ballet_lthash_fd_lthash_adder_h */
|