Line data Source code
1 : #include "../../ballet/shred/fd_shred.h"
2 : #include "fd_fec_set.h"
3 : #include "../../ballet/sha512/fd_sha512.h"
4 : #include "../../ballet/reedsol/fd_reedsol.h"
5 : #include "../metrics/fd_metrics.h"
6 : #include "fd_fec_resolver.h"
7 :
8 : typedef union {
9 : fd_ed25519_sig_t u;
10 : ulong l;
11 : } wrapped_sig_t;
12 :
13 : typedef struct __attribute__((packed)) {
14 : ulong slot;
15 : uint fec_idx;
16 : } slot_fec_pair_t;
17 :
18 : struct __attribute__((aligned(32UL))) set_ctx {
19 : /* The leader's signature of the root of the Merkle tree of the shreds
20 : in this FEC set. */
21 : wrapped_sig_t sig;
22 :
23 : union {
24 : /* When allocated, it's in a map_chain by signature and a treap
25 : by (shred, FEC set idx). When it's not allocated, it is either
26 : in the free list or the completed list. Both of those slists use
27 : free_next. */
28 : struct {
29 : uint map_next;
30 : uint map_prev;
31 : uint treap_parent;
32 : uint treap_left;
33 : uint treap_right;
34 : uint treap_prio;
35 : };
36 : struct {
37 : uint free_next;
38 : };
39 : };
40 :
41 : ulong slot;
42 : uint fec_set_idx;
43 :
44 : uchar data_variant;
45 : uchar parity_variant;
46 :
47 : ulong total_rx_shred_cnt;
48 :
49 : fd_fec_set_t * set;
50 :
51 : fd_bmtree_node_t root;
52 : /* If this FEC set has resigned shreds, this is our signature of the
53 : root of the Merkle tree */
54 : wrapped_sig_t retransmitter_sig;
55 :
56 : union {
57 : fd_bmtree_commit_t tree[1];
58 : uchar _footprint[ FD_BMTREE_COMMIT_FOOTPRINT( FD_SHRED_MERKLE_LAYER_CNT ) ] __attribute__((aligned(FD_BMTREE_COMMIT_ALIGN)));
59 : };
60 : };
61 : typedef struct set_ctx set_ctx_t;
62 :
63 : #define MAP_NAME ctx_map
64 513 : #define MAP_KEY sig
65 : #define MAP_KEY_T wrapped_sig_t
66 16743 : #define MAP_IDX_T uint
67 3801 : #define MAP_NEXT map_next
68 2436 : #define MAP_PREV map_prev
69 252 : #define MAP_ELE_T set_ctx_t
70 8628 : #define MAP_KEY_EQ(k0,k1) (!memcmp( (k0)->u, (k1)->u, FD_ED25519_SIG_SZ ))
71 8736 : #define MAP_KEY_HASH(key,s) (fd_ulong_hash( (key)->l ^ (s) ))
72 : #define MAP_OPTIMIZE_RANDOM_ACCESS_REMOVAL 1
73 : #include "../../util/tmpl/fd_map_chain.c"
74 :
75 :
76 : #define SLIST_NAME ctx_list
77 : #define SLIST_ELE_T set_ctx_t
78 486 : #define SLIST_IDX_T uint
79 1107 : #define SLIST_NEXT free_next
80 : #include "../../util/tmpl/fd_slist.c"
81 :
82 :
83 : static inline int
84 : slot_fec_pair_compare( slot_fec_pair_t const * q,
85 48 : set_ctx_t const * e ) {
86 : /* It seems like
87 : return (int)( q->slot!=e->slot ?
88 : q->slot - e->slot :
89 : q->fec_idx - e->fec_set_idx );
90 : should work, but I am concerned about overflow since this is all
91 : attacker controlled input. */
92 48 : if( FD_LIKELY( q->slot !=e->slot ) ) return fd_int_if( q->slot <e->slot, -1, 1 );
93 48 : if( FD_LIKELY( q->fec_idx!=e->fec_set_idx ) ) return fd_int_if( q->fec_idx<e->fec_set_idx, -1, 1 );
94 0 : return 0;
95 48 : }
96 :
97 : #define TREAP_NAME ctx_treap
98 : #define TREAP_T set_ctx_t
99 1083 : #define TREAP_IDX_T uint
100 582 : #define TREAP_PARENT treap_parent
101 588 : #define TREAP_LEFT treap_left
102 618 : #define TREAP_RIGHT treap_right
103 483 : #define TREAP_PRIO treap_prio
104 69 : #define TREAP_LT(e0,e1) (((e0)->slot < (e1)->slot) | ( ((e0)->slot==(e1)->slot) & ((e0)->fec_set_idx < (e1)->fec_set_idx)))
105 : #define TREAP_QUERY_T slot_fec_pair_t const *
106 48 : #define TREAP_CMP(q,e) slot_fec_pair_compare( (q), (e) )
107 : #include "../../util/tmpl/fd_treap.c"
108 :
109 :
110 :
111 : /* Once we're done with a FEC set, it goes into a map_chain and heap,
112 : both keyed by (slot, FEC set idx). */
113 :
114 : struct done_ele {
115 : slot_fec_pair_t key;
116 : uint heap_left; /* also used by pool when not allocated */
117 : uint heap_right;
118 : uint map_next;
119 : uint map_prev;
120 : /* In order to save space in the done_map and make this struct 32
121 : bytes, we store a 32 bit validator-specific hash of the shred
122 : signature. If a malicious leader equivocates and produces two FEC
123 : sets which have the same hash for us, a task which takes a decent
124 : but doable amount of effort, the only impact is that we would
125 : reject the shreds with SHRED_IGNORED instead of SHRED_EQUIVOC,
126 : which is not a big deal. It's documented that SHRED_EQUIVOC
127 : detection is on a best-effort basis. If we detect an equivocation
128 : for this (slot, FEC set idx), sig_hash gets set to
129 : SIG_HASH_EQUIVOC, and we start returning SHRED_IGNORED for any
130 : non-repair shred for that (slot, FEC set idx). */
131 : uint sig_hash;
132 : };
133 : typedef struct done_ele done_ele_t;
134 : FD_STATIC_ASSERT( sizeof(done_ele_t)==32UL, done_ele_t );
135 0 : #define SIG_HASH_EQUIVOC UINT_MAX
136 :
137 : #define MAP_NAME done_map
138 300 : #define MAP_KEY key
139 : #define MAP_KEY_T slot_fec_pair_t
140 2268 : #define MAP_IDX_T uint
141 1161 : #define MAP_NEXT map_next
142 642 : #define MAP_PREV map_prev
143 141 : #define MAP_ELE_T done_ele_t
144 1080 : #define MAP_KEY_EQ(k0,k1) ( ((k0)->slot==(k1)->slot) & ((k0)->fec_idx==(k1)->fec_idx) )
145 1308 : #define MAP_KEY_HASH(key,s) ((fd_ulong_hash( (key)->slot ^ (s) ) ^ fd_uint_hash( (key)->fec_idx ^ (uint)(s>>19) )))
146 : #define MAP_OPTIMIZE_RANDOM_ACCESS_REMOVAL 1
147 : #include "../../util/tmpl/fd_map_chain.c"
148 :
149 : #define HEAP_NAME done_heap
150 360 : #define HEAP_IDX_T uint
151 774 : #define HEAP_LEFT heap_left
152 774 : #define HEAP_RIGHT heap_right
153 : #define HEAP_T done_ele_t
154 414 : #define HEAP_LT(e0,e1) (((e0)->key.slot < (e1)->key.slot) | \
155 414 : ( ((e0)->key.slot==(e1)->key.slot) & ((e0)->key.fec_idx < (e1)->key.fec_idx)))
156 : #include "../../util/tmpl/fd_heap.c"
157 :
158 : #define POOL_NAME done_pool
159 87 : #define POOL_T done_ele_t
160 : #define POOL_IDX_T uint
161 465 : #define POOL_NEXT heap_left
162 : #include "../../util/tmpl/fd_pool.c"
163 :
164 : struct __attribute__((aligned(FD_FEC_RESOLVER_ALIGN))) fd_fec_resolver {
165 : /* depth stores the number of FEC sets this resolver can track
166 : simultaneously. done_depth stores the depth of the done tcache,
167 : i.e. the number of done FEC set keys that this resolver remembers.
168 : partial_depth stores the minimum size of the free FEC set list.
169 : completed_depth stores the size of the completed FEC set list. */
170 : ulong depth;
171 : ulong partial_depth;
172 : ulong complete_depth;
173 : ulong done_depth;
174 :
175 : /* expected_shred_version: discard all shreds with a shred version
176 : other than the specified value */
177 : ushort expected_shred_version;
178 :
179 : /* ctx_pool: A flat array (not an fd_pool) of the set_ctx_t
180 : structures used to back ctx_map, ctx_treap, and the ctx
181 : freelists. */
182 : set_ctx_t * ctx_pool;
183 :
184 : /* ctx_map: A map (using fd_map_chain) from signatures to
185 : the context object with its relevant data for in progress FEC sets.
186 : This map contains at most `depth` elements at any time. */
187 : ctx_map_t * ctx_map;
188 :
189 : /* ctx_treap: A treap (using fd_treap) of the context objects for in
190 : progress FEC sets. They are sorted by (slot, FEC index) from
191 : smallest to largest. In the case of equivocation, multiple
192 : elements with the same key may be present, with no particular
193 : ordering between them. */
194 : ctx_treap_t ctx_treap[1];
195 :
196 : /* free_list and complete_list are slists (using fd_slist)
197 : of FEC set contexts that are not in ctx_map. See the long comment
198 : in the header for why there are two. In order to satisfy the
199 : invariants, technically we only need to store the FEC set memory,
200 : not the full context, but it's not that big of a difference
201 : (especially if partial_depth and complete_depth are small), and it
202 : simplifies memory management.
203 :
204 : Invariant: at every entry and exit to fd_fec_resolver_add_shred:
205 : - free_list has between partial_depth and partial_depth+depth
206 : elements.
207 : - complete_list has complete_depth elements
208 : (all these counts are inclusive). */
209 : ctx_list_t free_list[1];
210 : ctx_list_t complete_list[1];
211 :
212 : /* free_list_cnt: The number of items in free_list. */
213 : ulong free_list_cnt;
214 :
215 : /* done_pool: A pool (this time using fd_pool) of the done_ele_t
216 : elements that back done_map and done_heap. Invariant: each element
217 : is either (i) released and in the pool, or (ii) in both the
218 : done_map and done_heap. */
219 : done_ele_t * done_pool;
220 :
221 : /* done_map: A map (using fd_map_chain) mapping (slot, fec_idx) to an
222 : element of done_pool. Even in the presence of equivocation, a
223 : specific (slot, fec_idx) tuple occurs at most once in the map,
224 : and it's arbitrary which version is represented by sig_hash. In
225 : the presence of equivocation, the right shreds are probably being
226 : delivered using repair, which will bypass reading the sig_hash
227 : field, so it doesn't really matter. */
228 : done_map_t * done_map;
229 :
230 : /* done_heap: A min heap (using fd_heap) based on (slot, fec_idx) used
231 : to stop tracking done elements older than slot_old, and for
232 : eviction in the unlikely case that we run out of elements in the
233 : done_map. */
234 : done_heap_t done_heap[1];
235 :
236 : /* signer is used to sign shreds that require a retransmitter
237 : signature. sign_ctx is provided as the first argument to the
238 : function. */
239 : fd_fec_resolver_sign_fn * signer;
240 : void * sign_ctx;
241 :
242 : /* max_shred_idx is the exclusive upper bound for shred indices. We
243 : need to reject any shred with an index >= max_shred_idx, but we
244 : also want to reject anything that is part of an FEC set where the
245 : highest index of a shred in the FEC set will be >= max_shred_idx.
246 : */
247 : ulong max_shred_idx;
248 :
249 : /* slot_old: slot_old is the lowest slot for which shreds will be
250 : accepted. That is any shred with slot<slot_old is rejected by
251 : add_shred with INGORED. slot_old can only increase. */
252 : ulong slot_old;
253 :
254 : /* seed: done_map uses seed to compute a 32-bute hash of the FEC set's
255 : signature. */
256 : ulong seed;
257 :
258 : /* discard_unexpected_data_complete_shreds: activation slot for
259 : discard_unexpected_data_complete_shreds.
260 :
261 : This feature rejects data shreds with the DATA_COMPLETE flag set
262 : at the wrong position within an FEC set (i.e. not the last data
263 : shred). */
264 : ulong discard_unexpected_data_complete_shreds;
265 :
266 : /* sha512 and reedsol are used for calculations while adding a shred.
267 : Their state outside a call to add_shred is indeterminate. */
268 : fd_sha512_t sha512[1];
269 : fd_reedsol_t reedsol[1];
270 :
271 : /* The footprint for the objects follows the struct and is in the same
272 : order as the pointers, namely:
273 : ctx_pool
274 : ctx_map
275 : done_pool
276 : done_map */
277 : };
278 :
279 : typedef struct fd_fec_resolver fd_fec_resolver_t;
280 :
281 : FD_FN_PURE ulong
282 : fd_fec_resolver_footprint( ulong depth,
283 : ulong partial_depth,
284 : ulong complete_depth,
285 3 : ulong done_depth ) {
286 3 : if( FD_UNLIKELY( (depth==0UL) | (partial_depth==0UL) | (complete_depth==0UL) | (done_depth==0UL) ) ) return 0UL;
287 3 : if( FD_UNLIKELY( (depth>UINT_MAX) | (partial_depth>UINT_MAX) | (complete_depth>UINT_MAX) ) ) return 0UL;
288 :
289 3 : ulong depth_sum = depth + partial_depth + complete_depth;
290 3 : if( FD_UNLIKELY( depth_sum>=UINT_MAX ) ) return 0UL;
291 :
292 3 : ulong ctx_chain_cnt = ctx_map_chain_cnt_est ( depth );
293 3 : ulong done_chain_cnt = done_map_chain_cnt_est( done_depth );
294 :
295 3 : ulong layout = FD_LAYOUT_INIT;
296 3 : layout = FD_LAYOUT_APPEND( layout, FD_FEC_RESOLVER_ALIGN, sizeof(fd_fec_resolver_t) );
297 3 : layout = FD_LAYOUT_APPEND( layout, alignof(set_ctx_t), sizeof(set_ctx_t)*depth_sum );
298 3 : layout = FD_LAYOUT_APPEND( layout, ctx_map_align(), ctx_map_footprint ( ctx_chain_cnt ) );
299 3 : layout = FD_LAYOUT_APPEND( layout, done_pool_align(), done_pool_footprint( done_depth ) );
300 3 : layout = FD_LAYOUT_APPEND( layout, done_map_align(), done_map_footprint ( done_chain_cnt ) );
301 :
302 3 : return FD_LAYOUT_FINI( layout, FD_FEC_RESOLVER_ALIGN );
303 3 : }
304 :
305 0 : FD_FN_CONST ulong fd_fec_resolver_align( void ) { return FD_FEC_RESOLVER_ALIGN; }
306 :
307 :
308 : void *
309 : fd_fec_resolver_new( void * shmem,
310 : fd_fec_resolver_sign_fn * signer,
311 : void * sign_ctx,
312 : ulong depth,
313 : ulong partial_depth,
314 : ulong complete_depth,
315 : ulong done_depth,
316 : fd_fec_set_t * sets,
317 : ulong max_shred_idx,
318 33 : ulong seed ) {
319 33 : if( FD_UNLIKELY( (depth==0UL) | (partial_depth==0UL) | (complete_depth==0UL) | (done_depth==0UL) ) ) return NULL;
320 33 : if( FD_UNLIKELY( (depth>UINT_MAX) | (partial_depth>UINT_MAX) | (complete_depth>UINT_MAX) ) ) return NULL;
321 :
322 33 : ulong depth_sum = depth + partial_depth + complete_depth;
323 33 : if( FD_UNLIKELY( depth_sum>=UINT_MAX ) ) return NULL;
324 :
325 33 : ulong ctx_chain_cnt = ctx_map_chain_cnt_est ( depth );
326 33 : ulong done_chain_cnt = done_map_chain_cnt_est( done_depth );
327 :
328 : /* round( 2^64 * ... */
329 33 : ulong seed0 = fd_ulong_hash( seed + 7640891576956012809UL ); /* sqrt(2)-1 */
330 33 : ulong seed1 = fd_ulong_hash( seed + 13503953896175478587UL ); /* sqrt(3)-1 */
331 33 : ulong seed2 = fd_ulong_hash( seed + 4354685564936845356UL ); /* sqrt(5)-2 */
332 33 : ulong seed3 = fd_ulong_hash( seed + 11912009170470909682UL ); /* sqrt(7)-2 */
333 :
334 33 : FD_SCRATCH_ALLOC_INIT( l, shmem );
335 33 : void * self = FD_SCRATCH_ALLOC_APPEND( l, FD_FEC_RESOLVER_ALIGN, sizeof(fd_fec_resolver_t) );
336 33 : void * _ctx_pool = FD_SCRATCH_ALLOC_APPEND( l, alignof(set_ctx_t), sizeof(set_ctx_t)*depth_sum );
337 33 : void * _ctx_map = FD_SCRATCH_ALLOC_APPEND( l, ctx_map_align(), ctx_map_footprint ( ctx_chain_cnt ) );
338 33 : void * _done_pool = FD_SCRATCH_ALLOC_APPEND( l, done_pool_align(), done_pool_footprint( done_depth ) );
339 33 : void * _done_map = FD_SCRATCH_ALLOC_APPEND( l, done_map_align(), done_map_footprint ( done_chain_cnt ) );
340 33 : FD_SCRATCH_ALLOC_FINI( l, FD_FEC_RESOLVER_ALIGN );
341 :
342 33 : fd_fec_resolver_t * resolver = (fd_fec_resolver_t *)self;
343 33 : void * _ctx_treap = resolver->ctx_treap;
344 33 : void * _free_list = resolver->free_list;
345 33 : void * _complete_list = resolver->complete_list;
346 33 : void * _done_heap = resolver->done_heap;
347 :
348 33 : if( FD_UNLIKELY( !ctx_map_new ( _ctx_map, ctx_chain_cnt, seed0 ) ) ) { FD_LOG_WARNING(( "ctx_map_new fail" )); return NULL; }
349 33 : if( FD_UNLIKELY( !ctx_treap_new( _ctx_treap, depth_sum ) ) ) { FD_LOG_WARNING(( "ctx_treap_new fail" )); return NULL; }
350 33 : if( FD_UNLIKELY( !ctx_list_new ( _free_list ) ) ) { FD_LOG_WARNING(( "ctx_list_new fail" )); return NULL; }
351 33 : if( FD_UNLIKELY( !ctx_list_new ( _complete_list ) ) ) { FD_LOG_WARNING(( "ctx_list_new fail" )); return NULL; }
352 33 : if( FD_UNLIKELY( !done_pool_new( _done_pool, done_depth ) ) ) { FD_LOG_WARNING(( "done_pool_new fail" )); return NULL; }
353 33 : if( FD_UNLIKELY( !done_map_new ( _done_map, done_chain_cnt, seed1 ) ) ) { FD_LOG_WARNING(( "done_map_new fail" )); return NULL; }
354 33 : if( FD_UNLIKELY( !done_heap_new( _done_heap, done_depth ) ) ) { FD_LOG_WARNING(( "done_heap_new fail" )); return NULL; }
355 :
356 33 : set_ctx_t * ctx_pool = (set_ctx_t *)_ctx_pool;
357 33 : fd_memset( ctx_pool, '\0', sizeof(set_ctx_t)*depth_sum );
358 186 : for( ulong i=0UL; i<depth_sum; i++ ) ctx_pool[i].set = sets + i;
359 33 : ctx_treap_seed( ctx_pool, depth_sum, seed2 );
360 :
361 : /* Initialize all the lists */
362 33 : ctx_list_t * free_list = ctx_list_join( _free_list ); FD_TEST( free_list ==resolver->free_list );
363 33 : ctx_list_t * complete_list = ctx_list_join( _complete_list ); FD_TEST( complete_list==resolver->complete_list );
364 :
365 141 : for( ulong i=0UL; i<depth+partial_depth; i++ ) { ctx_list_idx_push_tail( free_list, i, ctx_pool ); }
366 78 : for( ulong i=depth+partial_depth; i<depth_sum; i++ ) { ctx_list_idx_push_tail( complete_list, i, ctx_pool ); }
367 33 : ctx_list_leave( complete_list );
368 33 : ctx_list_leave( free_list );
369 :
370 33 : fd_sha512_new( resolver->sha512 );
371 :
372 33 : resolver->depth = depth;
373 33 : resolver->partial_depth = partial_depth;
374 33 : resolver->complete_depth = complete_depth;
375 33 : resolver->done_depth = done_depth;
376 33 : resolver->expected_shred_version = 0;
377 33 : resolver->free_list_cnt = depth+partial_depth;
378 33 : resolver->signer = signer;
379 33 : resolver->sign_ctx = sign_ctx;
380 33 : resolver->max_shred_idx = max_shred_idx;
381 33 : resolver->slot_old = 0UL;
382 33 : resolver->seed = seed3;
383 33 : resolver->discard_unexpected_data_complete_shreds = ULONG_MAX;
384 33 : return shmem;
385 33 : }
386 :
387 : fd_fec_resolver_t *
388 33 : fd_fec_resolver_join( void * shmem ) {
389 33 : fd_fec_resolver_t * resolver = (fd_fec_resolver_t *)shmem;
390 33 : ulong depth = resolver->depth;
391 33 : ulong partial_depth = resolver->partial_depth;
392 33 : ulong complete_depth = resolver->complete_depth;
393 33 : ulong done_depth = resolver->done_depth;
394 :
395 33 : ulong depth_sum = depth + partial_depth + complete_depth;
396 33 : if( FD_UNLIKELY( depth_sum>=UINT_MAX ) ) return NULL;
397 :
398 33 : ulong ctx_chain_cnt = ctx_map_chain_cnt_est ( depth );
399 33 : ulong done_chain_cnt = done_map_chain_cnt_est( done_depth );
400 :
401 33 : FD_SCRATCH_ALLOC_INIT( l, shmem );
402 33 : /* self */ FD_SCRATCH_ALLOC_APPEND( l, FD_FEC_RESOLVER_ALIGN, sizeof(fd_fec_resolver_t) );
403 33 : void * _ctx_pool = FD_SCRATCH_ALLOC_APPEND( l, alignof(set_ctx_t), sizeof(set_ctx_t)*depth_sum );
404 33 : void * _ctx_map = FD_SCRATCH_ALLOC_APPEND( l, ctx_map_align(), ctx_map_footprint ( ctx_chain_cnt ) );
405 33 : void * _done_pool = FD_SCRATCH_ALLOC_APPEND( l, done_pool_align(), done_pool_footprint( done_depth ) );
406 33 : void * _done_map = FD_SCRATCH_ALLOC_APPEND( l, done_map_align(), done_map_footprint ( done_chain_cnt ) );
407 33 : FD_SCRATCH_ALLOC_FINI( l, FD_FEC_RESOLVER_ALIGN );
408 :
409 33 : resolver->ctx_pool = (set_ctx_t *)_ctx_pool;
410 33 : resolver->ctx_map = ctx_map_join ( _ctx_map ); if( FD_UNLIKELY( !resolver->ctx_map ) ) return NULL;
411 33 : resolver->done_pool = done_pool_join( _done_pool ); if( FD_UNLIKELY( !resolver->done_pool ) ) return NULL;
412 33 : resolver->done_map = done_map_join ( _done_map ); if( FD_UNLIKELY( !resolver->done_map ) ) return NULL;
413 33 : if( FD_UNLIKELY( ctx_treap_join( resolver->ctx_treap )!= resolver->ctx_treap ) ) return NULL;
414 33 : if( FD_UNLIKELY( ctx_list_join ( resolver->free_list )!= resolver->free_list ) ) return NULL;
415 33 : if( FD_UNLIKELY( ctx_list_join ( resolver->complete_list )!= resolver->complete_list ) ) return NULL;
416 33 : if( FD_UNLIKELY( done_heap_join( resolver->done_heap )!= resolver->done_heap ) ) return NULL;
417 33 : if( FD_UNLIKELY( fd_sha512_join( resolver->sha512 )!= resolver->sha512 ) ) return NULL;
418 :
419 33 : return resolver;
420 33 : }
421 :
422 : void
423 : fd_fec_resolver_set_shred_version( fd_fec_resolver_t * resolver,
424 33 : ushort expected_shred_version ) {
425 33 : resolver->expected_shred_version = expected_shred_version;
426 33 : }
427 :
428 : void
429 : fd_fec_resolver_set_discard_unexpected_data_complete_shreds( fd_fec_resolver_t * resolver,
430 0 : ulong activation_slot ) {
431 0 : resolver->discard_unexpected_data_complete_shreds = activation_slot;
432 0 : }
433 :
434 : void
435 : fd_fec_resolver_advance_slot_old( fd_fec_resolver_t * resolver,
436 3 : ulong slot_old ) {
437 3 : if( FD_UNLIKELY( slot_old <= resolver->slot_old ) ) return;
438 3 : resolver->slot_old = slot_old;
439 :
440 : /* Remove from done map */
441 3 : done_heap_t * done_heap = resolver->done_heap;
442 3 : done_map_t * done_map = resolver->done_map;
443 3 : done_ele_t * done_pool = resolver->done_pool;
444 :
445 6 : while( done_heap_ele_cnt( done_heap ) ) {
446 3 : done_ele_t * min_ele = done_heap_ele_peek_min( done_heap, done_pool );
447 3 : if( FD_UNLIKELY( min_ele->key.slot>=slot_old ) ) break;
448 3 : done_map_ele_remove_fast( done_map, min_ele, done_pool );
449 3 : done_heap_idx_remove_min( done_heap, done_pool );
450 3 : done_pool_ele_release ( done_pool, min_ele );
451 3 : }
452 :
453 : /* Remove from in progress map */
454 3 : ctx_map_t * ctx_map = resolver->ctx_map;
455 3 : ctx_treap_t * ctx_treap = resolver->ctx_treap;
456 3 : set_ctx_t * ctx_pool = resolver->ctx_pool;
457 3 : ctx_list_t * free_list = resolver->free_list;
458 :
459 3 : ctx_treap_fwd_iter_t next;
460 6 : for( ctx_treap_fwd_iter_t iter=ctx_treap_fwd_iter_init( ctx_treap, ctx_pool ); !ctx_treap_fwd_iter_done( iter ); iter=next ) {
461 3 : next = ctx_treap_fwd_iter_next( iter, ctx_pool );
462 3 : set_ctx_t * min_ele = ctx_treap_fwd_iter_ele( iter, ctx_pool );
463 3 : if( FD_UNLIKELY( min_ele->slot>=slot_old ) ) break;
464 :
465 3 : ctx_treap_ele_remove ( ctx_treap, min_ele, ctx_pool );
466 3 : ctx_map_ele_remove_fast( ctx_map, min_ele, ctx_pool );
467 3 : ctx_list_ele_push_head ( free_list, min_ele, ctx_pool );
468 3 : resolver->free_list_cnt++;
469 3 : }
470 3 : }
471 :
472 : static inline void
473 : ensure_done_pool_free( done_ele_t * done_pool,
474 : done_heap_t * done_heap,
475 219 : done_map_t * done_map ) {
476 219 : if( FD_UNLIKELY( !done_pool_free( done_pool ) ) ) {
477 : /* Done map is full, so we'll forget about the oldest slot */
478 138 : ulong worst_idx = done_heap_idx_peek_min( done_heap );
479 138 : FD_TEST( worst_idx!=done_heap_idx_null() ); /* Done pool can't be empty and full at the same time */
480 138 : done_heap_idx_remove_min( done_heap, done_pool );
481 138 : done_map_idx_remove_fast( done_map, worst_idx, done_pool );
482 138 : done_pool_idx_release( done_pool, worst_idx );
483 : /* Now it's not empty */
484 138 : }
485 219 : }
486 :
487 :
488 : int
489 : fd_fec_resolver_add_shred( fd_fec_resolver_t * resolver,
490 : fd_shred_t const * shred,
491 : ulong shred_sz,
492 : int is_repair,
493 : uchar const * leader_pubkey,
494 : fd_fec_set_t const * * out_fec_set,
495 : fd_shred_t const * * out_shred,
496 : fd_bmtree_node_t * out_merkle_root,
497 8439 : fd_fec_resolver_spilled_t * out_spilled ) {
498 : /* Unpack variables */
499 8439 : ulong partial_depth = resolver->partial_depth;
500 :
501 8439 : ctx_list_t * free_list = resolver->free_list;
502 8439 : ctx_list_t * complete_list = resolver->complete_list;
503 8439 : ctx_map_t * ctx_map = resolver->ctx_map;
504 8439 : ctx_treap_t * ctx_treap = resolver->ctx_treap;
505 8439 : set_ctx_t * ctx_pool = resolver->ctx_pool;
506 8439 : done_map_t * done_map = resolver->done_map;
507 8439 : done_ele_t * done_pool = resolver->done_pool;
508 8439 : done_heap_t * done_heap = resolver->done_heap;
509 :
510 8439 : fd_reedsol_t * reedsol = resolver->reedsol;
511 8439 : fd_sha512_t * sha512 = resolver->sha512;
512 :
513 : /* Invariants:
514 : * each set_ctx_t is in exactly one of ctx_map, freelist, or
515 : complete_list */
516 :
517 : /* Is this shred for a slot we've already rooted or otherwise don't
518 : care about? */
519 8439 : if( FD_UNLIKELY( shred->slot<resolver->slot_old ) ) return FD_FEC_RESOLVER_SHRED_IGNORED;
520 :
521 : /* Do a bunch of quick validity checks */
522 8337 : if( FD_UNLIKELY( shred->version!=resolver->expected_shred_version ) ) return FD_FEC_RESOLVER_SHRED_REJECTED;
523 8334 : if( FD_UNLIKELY( shred_sz<fd_shred_sz( shred ) ) ) return FD_FEC_RESOLVER_SHRED_REJECTED;
524 8334 : if( FD_UNLIKELY( shred->idx>=resolver->max_shred_idx ) ) return FD_FEC_RESOLVER_SHRED_REJECTED;
525 8322 : if( FD_UNLIKELY( shred->fec_set_idx>resolver->max_shred_idx-FD_FEC_SHRED_CNT ) ) return FD_FEC_RESOLVER_SHRED_REJECTED;
526 8319 : if( FD_UNLIKELY( shred->idx-shred->fec_set_idx>=FD_FEC_SHRED_CNT ) ) return FD_FEC_RESOLVER_SHRED_REJECTED;
527 8319 : if( FD_UNLIKELY( shred->fec_set_idx%FD_FEC_SHRED_CNT!=0UL ) ) return FD_FEC_RESOLVER_SHRED_REJECTED;
528 :
529 8226 : uchar variant = shred->variant;
530 8226 : uchar shred_type = fd_shred_type( variant );
531 :
532 8226 : int is_data_shred = fd_shred_is_data( shred_type );
533 :
534 8226 : if( !is_data_shred ) { /* Roughly 50/50 branch */
535 4350 : if( FD_UNLIKELY( (shred->code.data_cnt!=FD_FEC_SHRED_CNT) | (shred->code.code_cnt!=FD_FEC_SHRED_CNT) ) )
536 3 : return FD_FEC_RESOLVER_SHRED_REJECTED;
537 4347 : if( FD_UNLIKELY( shred->code.idx>=FD_FEC_SHRED_CNT ) ) return FD_FEC_RESOLVER_SHRED_REJECTED;
538 4347 : if( FD_UNLIKELY( shred->code.idx!=shred->idx%FD_FEC_SHRED_CNT ) ) return FD_FEC_RESOLVER_SHRED_REJECTED;
539 4347 : } else {
540 : /* if the shred's parent is for a slot we've already pruned, ignore it. */
541 3876 : if( FD_UNLIKELY( shred->slot-shred->data.parent_off<resolver->slot_old ) ) return FD_FEC_RESOLVER_SHRED_IGNORED;
542 :
543 : /* if it has slot complete, it must be the last one in the FEC. */
544 3876 : if( FD_UNLIKELY( (shred->data.flags & FD_SHRED_DATA_FLAG_SLOT_COMPLETE) && ((1UL+shred->idx) % FD_FEC_SHRED_CNT) ) ) {
545 0 : return FD_FEC_RESOLVER_SHRED_REJECTED;
546 0 : }
547 :
548 : /* discard_unexpected_data_complete_shreds:
549 : if it has data complete, it must be the last data shred in the FEC set
550 : https://github.com/anza-xyz/agave/blob/v4.0.0-beta.1/ledger/src/shred.rs#L817-L825 */
551 3876 : if( FD_UNLIKELY( ( shred->slot >= resolver->discard_unexpected_data_complete_shreds ) &&
552 3876 : ( shred->data.flags & FD_SHRED_DATA_FLAG_DATA_COMPLETE ) &&
553 3876 : ( shred->idx != ( shred->fec_set_idx + ( FD_FEC_SHRED_CNT - 1UL ) ) ) ) ) {
554 0 : return FD_FEC_RESOLVER_SHRED_REJECTED;
555 0 : }
556 3876 : }
557 :
558 8223 : if( FD_UNLIKELY( (shred_type==FD_SHRED_TYPE_LEGACY_DATA) | (shred_type==FD_SHRED_TYPE_LEGACY_CODE) ) ) {
559 : /* Reject any legacy shreds */
560 0 : return FD_FEC_RESOLVER_SHRED_REJECTED;
561 0 : }
562 :
563 :
564 8223 : wrapped_sig_t const * w_sig = (wrapped_sig_t const *)shred->signature;
565 :
566 : /* Is this FEC set in progress? */
567 8223 : set_ctx_t * ctx = ctx_map_ele_query( ctx_map, w_sig, NULL, ctx_pool );
568 :
569 : /* If we detect a different signature for the same (slot, FEC set
570 : idx), it means either the shred is invalid, or the leader is
571 : equivocating. We can't tell which without verifying the shred
572 : though. */
573 8223 : int equivoc_or_invalid = 0;
574 :
575 : /* If it's not in progress and it's repair, we will allocate a context
576 : for it, assuming all the other checks pass. If it's from Turbine,
577 : we'll be a little more skeptical about it: if we've already seen a
578 : FEC set for that same (slot, FEC set idx) pair, then we won't take
579 : it, either rejecting it here, or setting equivoc_or_invalid to
580 : reject it later. */
581 8223 : if( FD_UNLIKELY( (ctx==NULL) & (!is_repair) ) ) {
582 : /* Most likely, it's just done. */
583 789 : slot_fec_pair_t slot_fec_pair[1] = {{ .slot = shred->slot, .fec_idx = shred->fec_set_idx }};
584 789 : done_ele_t * done_ele = done_map_ele_query( done_map, slot_fec_pair, NULL, done_pool );
585 789 : if( FD_LIKELY( done_ele ) ) {
586 552 : ulong sig_hash = fd_hash( resolver->seed, w_sig, sizeof(wrapped_sig_t) );
587 : /* It's possible (with probability 2^-32, about 1/year at current
588 : rates) for fd_hash to return SIG_HASH_EQUIVOC. In this case,
589 : we may miss an equivocation and just always return
590 : SHRED_IGNORED for subsequent Turbine shreds for that FEC set.
591 : Because the hash is validator specific, it just means we'll
592 : rely on another node to produce the equivocation proof, and
593 : we'll act as if we hadn't seen the equivocating shreds. */
594 552 : if( FD_LIKELY( ((uint)sig_hash==done_ele->sig_hash) | (done_ele->sig_hash==SIG_HASH_EQUIVOC) ) ) return FD_FEC_RESOLVER_SHRED_IGNORED;
595 0 : equivoc_or_invalid = 1;
596 0 : }
597 :
598 : /* If it's not done, then check for the unlikely case we have it
599 : in progress with a different signature. */
600 237 : if( FD_UNLIKELY( ctx_treap_ele_query_const( ctx_treap, slot_fec_pair, ctx_pool ) ) ) equivoc_or_invalid = 1;
601 237 : }
602 :
603 : /* If we've made it here, then we'll keep this shred as long as
604 : it is valid. */
605 :
606 7671 : fd_bmtree_node_t leaf[1];
607 :
608 : /* For the purposes of the shred header, tree_depth means the number
609 : of nodes, counting the leaf but excluding the root. For bmtree,
610 : depth means the number of layers, which counts both. */
611 7671 : ulong tree_depth = fd_shred_merkle_cnt( variant ); /* In [0, 15] */
612 7671 : ulong reedsol_protected_sz = 1115UL + FD_SHRED_DATA_HEADER_SZ - FD_SHRED_SIGNATURE_SZ - FD_SHRED_MERKLE_NODE_SZ*tree_depth
613 7671 : - FD_SHRED_MERKLE_ROOT_SZ*fd_shred_is_chained ( shred_type )
614 7671 : - FD_SHRED_SIGNATURE_SZ *fd_shred_is_resigned( shred_type); /* In [743, 1139] conservatively*/
615 7671 : ulong data_merkle_protected_sz = reedsol_protected_sz + FD_SHRED_MERKLE_ROOT_SZ*fd_shred_is_chained( shred_type );
616 7671 : ulong parity_merkle_protected_sz = reedsol_protected_sz + FD_SHRED_MERKLE_ROOT_SZ*fd_shred_is_chained( shred_type )
617 7671 : + FD_SHRED_CODE_HEADER_SZ - FD_ED25519_SIG_SZ;
618 7671 : ulong merkle_protected_sz = fd_ulong_if( is_data_shred, data_merkle_protected_sz, parity_merkle_protected_sz );
619 :
620 7671 : fd_bmtree_hash_leaf( leaf, (uchar const *)shred + sizeof(fd_ed25519_sig_t), merkle_protected_sz, FD_BMTREE_LONG_PREFIX_SZ );
621 :
622 : /* in_type_idx is between [0, code.data_cnt) or [0, code.code_cnt),
623 : where data_cnt <= FD_FEC_SHRED_CNT and code_cnt <= FD_FEC_SHRED_CNT
624 : On the other hand, shred_idx, goes from [0, code.data_cnt +
625 : code.code_cnt), with all the data shreds having
626 : shred_idx < code.data_cnt and all the parity shreds having
627 : shred_idx >= code.data_cnt. */
628 7671 : ulong in_type_idx = fd_ulong_if( is_data_shred, shred->idx - shred->fec_set_idx, shred->code.idx );
629 7671 : ulong shred_idx = fd_ulong_if( is_data_shred, in_type_idx, in_type_idx + shred->code.data_cnt );
630 :
631 7671 : if( FD_UNLIKELY( ( shred->fec_set_idx % FD_FEC_SHRED_CNT ) != 0UL ) ) return FD_FEC_RESOLVER_SHRED_REJECTED;
632 7671 : if( FD_UNLIKELY( in_type_idx >= FD_FEC_SHRED_CNT ) ) return FD_FEC_RESOLVER_SHRED_REJECTED;
633 :
634 : /* This, combined with the check on shred->code.data_cnt implies that
635 : shred_idx is in [0, 2*FD_FEC_SHRED_CNT). */
636 :
637 7671 : if( FD_UNLIKELY( tree_depth!=FD_SHRED_MERKLE_LAYER_CNT-1UL ) ) return FD_FEC_RESOLVER_SHRED_REJECTED;
638 :
639 7671 : if( FD_UNLIKELY( !ctx ) ) { /* This is the first shred in the FEC set */
640 :
641 264 : if( FD_UNLIKELY( resolver->free_list_cnt<=partial_depth ) ) {
642 : /* Packet loss is really high and we have a lot of in-progress FEC
643 : sets that we haven't been able to finish. Evict the context
644 : with the highest (slot, FEC idx). This is the one that is the
645 : farthest away from what we're currently replaying, which means
646 : we have the longest time to request it via repair if we
647 : actually need it. This also handles the case where a leader
648 : sends some shreds from their slots that are far in the future
649 : in this epoch. */
650 30 : set_ctx_t * victim_ctx = ctx_treap_rev_iter_ele( ctx_treap_rev_iter_init( ctx_treap, ctx_pool ), ctx_pool );
651 :
652 30 : if( FD_LIKELY( out_spilled ) ) {
653 6 : out_spilled->slot = victim_ctx->slot;
654 6 : out_spilled->fec_set_idx = victim_ctx->fec_set_idx;
655 6 : *out_spilled->merkle_root = victim_ctx->root;
656 6 : }
657 :
658 30 : fd_fec_set_t * set = victim_ctx->set;
659 :
660 : /* TODO: remove this log */
661 30 : FD_LOG_INFO(( "Spilled from fec_resolver in-progress map %lu %u, data_shreds_rcvd %x, parity_shreds_rcvd %x", victim_ctx->slot, victim_ctx->fec_set_idx, set->data_shred_rcvd, set->parity_shred_rcvd ));
662 :
663 : /* Remove from treap and map, then add to free_list */
664 30 : ctx_treap_ele_remove ( ctx_treap, victim_ctx, ctx_pool );
665 30 : ctx_map_ele_remove_fast( ctx_map, victim_ctx, ctx_pool );
666 :
667 30 : ctx_list_ele_push_tail ( free_list, victim_ctx, ctx_pool );
668 30 : resolver->free_list_cnt++;
669 :
670 30 : FD_MCNT_INC( SHRED, FEC_SET_SPILLED, 1UL );
671 30 : }
672 : /* Now we know |free_list|>partial_depth */
673 :
674 264 : ctx = ctx_list_ele_pop_head( free_list, ctx_pool );
675 264 : resolver->free_list_cnt--;
676 :
677 : /* Now we need to derive the root of the Merkle tree and verify the
678 : signature to prevent a DOS attack just by sending lots of invalid
679 : shreds. */
680 264 : fd_bmtree_commit_t * tree;
681 264 : tree = fd_bmtree_commit_init( ctx->_footprint, FD_SHRED_MERKLE_NODE_SZ, FD_BMTREE_LONG_PREFIX_SZ, FD_SHRED_MERKLE_LAYER_CNT );
682 264 : FD_TEST( tree==ctx->tree );
683 :
684 264 : fd_bmtree_node_t _root[1];
685 264 : fd_shred_merkle_t const * proof = fd_shred_merkle_nodes( shred );
686 264 : int rv = fd_bmtree_commitp_insert_with_proof( tree, shred_idx, leaf, (uchar const *)proof, tree_depth, _root );
687 264 : if( FD_UNLIKELY( !rv ) ) {
688 0 : ctx_list_ele_push_head( free_list, ctx, ctx_pool );
689 0 : resolver->free_list_cnt++;
690 0 : FD_MCNT_INC( SHRED, SHRED_REJECTED_INITIAL, 1UL );
691 0 : return FD_FEC_RESOLVER_SHRED_REJECTED;
692 0 : }
693 :
694 264 : if( FD_UNLIKELY( FD_ED25519_SUCCESS != fd_ed25519_verify( _root->hash, 32UL, shred->signature, leader_pubkey, sha512 ) ) ) {
695 0 : ctx_list_ele_push_head( free_list, ctx, ctx_pool );
696 0 : resolver->free_list_cnt++;
697 0 : FD_MCNT_INC( SHRED, SHRED_REJECTED_INITIAL, 1UL );
698 0 : return FD_FEC_RESOLVER_SHRED_REJECTED;
699 0 : }
700 :
701 : /* Copy the merkle root into the output arg. */
702 264 : if( FD_LIKELY( out_merkle_root ) ) memcpy( out_merkle_root, _root, sizeof(fd_bmtree_node_t) );
703 :
704 264 : if( FD_UNLIKELY( equivoc_or_invalid ) ) {
705 : /* It wasn't invalid, so it must be equivoc */
706 0 : ctx_list_ele_push_head( free_list, ctx, ctx_pool );
707 0 : resolver->free_list_cnt++;
708 : /* We want to record that we've sigverified the shred somewhere so
709 : that if an attacker sends it to us again, we don't have to
710 : verify it again. We do that by inserting it into the done_map
711 : with SIG_HASH_EQUIVOC. */
712 0 : slot_fec_pair_t slot_fec_pair[1] = {{ .slot = shred->slot, .fec_idx = shred->fec_set_idx }};
713 0 : done_ele_t * done = done_map_ele_query( done_map, slot_fec_pair, NULL, done_pool );
714 0 : if( FD_LIKELY( done ) ) done->sig_hash = SIG_HASH_EQUIVOC;
715 0 : else {
716 0 : ensure_done_pool_free( done_pool, done_heap, done_map );
717 :
718 0 : done = done_pool_ele_acquire( done_pool );
719 :
720 0 : done->key.slot = shred->slot;
721 0 : done->key.fec_idx = shred->fec_set_idx;
722 0 : done->sig_hash = SIG_HASH_EQUIVOC;
723 :
724 0 : done_heap_ele_insert( done_heap, done, done_pool );
725 0 : done_map_ele_insert ( done_map, done, done_pool );
726 0 : }
727 :
728 0 : return FD_FEC_RESOLVER_SHRED_EQUIVOC;
729 0 : }
730 :
731 : /* This seems like a legitimate FEC set, so we populate the rest of
732 : the fields, then add it to the map and treap. */
733 264 : ctx->sig = *w_sig;
734 264 : ctx->slot = shred->slot;
735 264 : ctx->fec_set_idx = shred->fec_set_idx;
736 264 : ctx->data_variant = fd_uchar_if( is_data_shred, variant, fd_shred_variant( fd_shred_swap_type( shred_type ), (uchar)tree_depth ) );
737 264 : ctx->parity_variant = fd_uchar_if( !is_data_shred, variant, fd_shred_variant( fd_shred_swap_type( shred_type ), (uchar)tree_depth ) );
738 264 : ctx->total_rx_shred_cnt = 0UL;
739 264 : ctx->root = *_root;
740 :
741 264 : if( FD_UNLIKELY( fd_shred_is_resigned( shred_type ) & !!(resolver->signer) ) ) {
742 3 : resolver->signer( resolver->sign_ctx, ctx->retransmitter_sig.u, _root->hash );
743 261 : } else {
744 261 : fd_memset( ctx->retransmitter_sig.u, 0, 64UL );
745 261 : }
746 :
747 : /* Reset the FEC set */
748 264 : ctx->set->data_shred_rcvd = 0U;
749 264 : ctx->set->parity_shred_rcvd = 0U;
750 :
751 264 : ctx_map_ele_insert ( ctx_map, ctx, ctx_pool );
752 264 : ctx_treap_ele_insert( ctx_treap, ctx, ctx_pool );
753 :
754 7407 : } else {
755 : /* This is not the first shred in the set */
756 :
757 : /* Verify that the shred's slot and fec_set_idx match the context
758 : established by the first shred. */
759 7407 : if( FD_UNLIKELY( shred->slot!=ctx->slot || shred->fec_set_idx!=ctx->fec_set_idx ) ) {
760 0 : return FD_FEC_RESOLVER_SHRED_REJECTED;
761 0 : }
762 :
763 : /* First ensure that all the shreds in the FEC set have consistent
764 : variants. They all must have the same tree_depth and the same
765 : chained/not chained, resigned/not resigned bits. */
766 7407 : if( FD_UNLIKELY( variant!=fd_uchar_if( is_data_shred, ctx->data_variant, ctx->parity_variant ) ) ) {
767 0 : return FD_FEC_RESOLVER_SHRED_REJECTED;
768 0 : }
769 :
770 7407 : fd_shred_merkle_t const * proof = fd_shred_merkle_nodes( shred );
771 7407 : int rv = fd_bmtree_commitp_insert_with_proof( ctx->tree, shred_idx, leaf, (uchar const *)proof, tree_depth, out_merkle_root );
772 7407 : if( !rv ) return FD_FEC_RESOLVER_SHRED_REJECTED;
773 :
774 : /* Check to make sure this is not a duplicate */
775 7398 : int shred_dup = !!(fd_uint_if( is_data_shred, ctx->set->data_shred_rcvd, ctx->set->parity_shred_rcvd ) & (1U << in_type_idx));
776 7398 : if( FD_UNLIKELY( shred_dup ) ) {
777 249 : *out_shred = is_data_shred ? ctx->set->data_shreds[ in_type_idx ].s : ctx->set->parity_shreds[ in_type_idx ].s;
778 249 : return FD_FEC_RESOLVER_SHRED_DUPLICATE;
779 249 : }
780 7398 : }
781 :
782 : /* At this point, the shred has passed Merkle validation and is new.
783 : We also know that ctx is a pointer to the set_ctx_t where this
784 : shred belongs. */
785 :
786 : /* Copy the shred to memory the FEC resolver owns */
787 7413 : uchar * dst = is_data_shred ? ctx->set->data_shreds[ in_type_idx ].b : ctx->set->parity_shreds[ in_type_idx ].b;
788 7413 : fd_memcpy( dst, shred, fd_shred_sz( shred ) );
789 :
790 : /* If the shred needs a retransmitter signature, set it */
791 7413 : if( FD_UNLIKELY( fd_shred_is_resigned( shred_type ) ) ) {
792 672 : memcpy( dst + fd_shred_retransmitter_sig_off( (fd_shred_t *)dst ), ctx->retransmitter_sig.u, 64UL );
793 672 : }
794 :
795 7413 : ctx->set->data_shred_rcvd |= (uint)(!!is_data_shred)<<in_type_idx;
796 7413 : ctx->set->parity_shred_rcvd |= (uint)( !is_data_shred)<<in_type_idx;
797 7413 : ctx->total_rx_shred_cnt++;
798 :
799 7413 : *out_shred = (fd_shred_t const *)dst;
800 :
801 : /* Do we have enough to begin reconstruction? */
802 7413 : if( FD_LIKELY( ctx->total_rx_shred_cnt < FD_FEC_SHRED_CNT ) ) return FD_FEC_RESOLVER_SHRED_OKAY;
803 :
804 : /* At this point, the FEC set is either valid or permanently invalid,
805 : so we can consider it done either way. */
806 :
807 219 : done_ele_t * done = NULL;
808 219 : ensure_done_pool_free( done_pool, done_heap, done_map );
809 :
810 : /* If it's already in the done map, we don't need to re-insert it.
811 : It's not very clear what we should do if the sig_hashes differ, but
812 : this can only happen the second insert was a repair shred, and in
813 : that case, it gets bypassed anyway, so it doesn't really matter.
814 : We'll just keep the existing value in that case. */
815 219 : slot_fec_pair_t done_key[1] = {{ .slot = ctx->slot, .fec_idx = ctx->fec_set_idx }};
816 219 : if( FD_LIKELY( !done_map_ele_query( done_map, done_key, NULL, done_pool ) ) ) {
817 219 : done = done_pool_ele_acquire( done_pool );
818 :
819 219 : done->key.slot = ctx->slot;
820 219 : done->key.fec_idx = ctx->fec_set_idx;
821 219 : done->sig_hash = (uint)fd_hash( resolver->seed, w_sig, sizeof(wrapped_sig_t) );
822 :
823 219 : done_heap_ele_insert( done_heap, done, done_pool );
824 219 : done_map_ele_insert ( done_map, done, done_pool );
825 219 : }
826 :
827 :
828 219 : ctx_map_ele_remove_fast( ctx_map, ctx, ctx_pool );
829 219 : ctx_treap_ele_remove ( ctx_treap, ctx, ctx_pool );
830 : /* At this point, ctx is not in any of the data structures, so we need
831 : to be sure to add it to one of the lists before exiting. */
832 :
833 219 : fd_fec_set_t * set = ctx->set;
834 219 : fd_bmtree_commit_t * tree = ctx->tree;
835 :
836 219 : reedsol = fd_reedsol_recover_init( (void*)reedsol, reedsol_protected_sz );
837 7227 : for( ulong i=0UL; i<FD_FEC_SHRED_CNT; i++ ) {
838 7008 : uchar * rs_payload = set->data_shreds[ i ].b + sizeof(fd_ed25519_sig_t);
839 7008 : if( set->data_shred_rcvd&(1U<<i) ) fd_reedsol_recover_add_rcvd_shred ( reedsol, 1, rs_payload );
840 3615 : else fd_reedsol_recover_add_erased_shred( reedsol, 1, rs_payload );
841 7008 : }
842 7227 : for( ulong i=0UL; i<FD_FEC_SHRED_CNT; i++ ) {
843 7008 : uchar * rs_payload = set->parity_shreds[ i ].b + FD_SHRED_CODE_HEADER_SZ;
844 7008 : if( set->parity_shred_rcvd&(1U<<i) ) fd_reedsol_recover_add_rcvd_shred ( reedsol, 0, rs_payload );
845 3393 : else fd_reedsol_recover_add_erased_shred( reedsol, 0, rs_payload );
846 7008 : }
847 :
848 219 : if( FD_UNLIKELY( FD_REEDSOL_SUCCESS != fd_reedsol_recover_fini( reedsol ) ) ) {
849 : /* A few lines up, we already checked to make sure it wasn't the
850 : insufficient case, so it must be the inconsistent case. That
851 : means the leader signed a shred with invalid Reed-Solomon FEC
852 : set. This shouldn't happen in practice, but we need to handle it
853 : for the malicious leader case. This should probably be a
854 : slash-able offense. */
855 0 : ctx_list_ele_push_tail( free_list, ctx, ctx_pool );
856 0 : resolver->free_list_cnt++;
857 0 : FD_MCNT_INC( SHRED, FEC_REJECTED_FATAL, 1UL );
858 0 : return FD_FEC_RESOLVER_SHRED_REJECTED;
859 0 : }
860 :
861 219 : uchar const * chained_root = fd_ptr_if( fd_shred_is_chained( shred_type ), (uchar *)shred+fd_shred_chain_off( variant ), NULL );
862 :
863 : /* Iterate over recovered shreds, add them to the Merkle tree,
864 : populate headers and signatures. */
865 7227 : for( ulong i=0UL; i<FD_FEC_SHRED_CNT; i++ ) {
866 7008 : if( !(set->data_shred_rcvd&(1U<<i)) ) {
867 3615 : fd_memcpy( set->data_shreds[i].b, shred, sizeof(fd_ed25519_sig_t) );
868 3615 : if( FD_LIKELY( fd_shred_is_chained( shred_type ) ) ) {
869 3615 : fd_memcpy( set->data_shreds[i].b+fd_shred_chain_off( ctx->data_variant ), chained_root, FD_SHRED_MERKLE_ROOT_SZ );
870 3615 : }
871 3615 : fd_bmtree_hash_leaf( leaf, set->data_shreds[i].b+sizeof(fd_ed25519_sig_t), data_merkle_protected_sz, FD_BMTREE_LONG_PREFIX_SZ );
872 3615 : if( FD_UNLIKELY( !fd_bmtree_commitp_insert_with_proof( tree, i, leaf, NULL, 0, NULL ) ) ) {
873 0 : ctx_list_ele_push_tail( free_list, ctx, ctx_pool );
874 0 : resolver->free_list_cnt++;
875 0 : FD_MCNT_INC( SHRED, FEC_REJECTED_FATAL, 1UL );
876 0 : return FD_FEC_RESOLVER_SHRED_REJECTED;
877 0 : }
878 3615 : }
879 7008 : }
880 :
881 7227 : for( ulong i=0UL; i<FD_FEC_SHRED_CNT; i++ ) {
882 7008 : if( !(set->parity_shred_rcvd&(1U<<i)) ) {
883 3393 : fd_shred_t * p_shred = set->parity_shreds[i].s; /* We can't parse because we haven't populated the header */
884 3393 : fd_memcpy( p_shred->signature, shred->signature, sizeof(fd_ed25519_sig_t) );
885 3393 : p_shred->variant = ctx->parity_variant;
886 3393 : p_shred->slot = shred->slot;
887 3393 : p_shred->idx = (uint)(i + ctx->fec_set_idx);
888 3393 : p_shred->version = shred->version;
889 3393 : p_shred->fec_set_idx = (uint)ctx->fec_set_idx;
890 3393 : p_shred->code.data_cnt = (ushort)FD_FEC_SHRED_CNT;
891 3393 : p_shred->code.code_cnt = (ushort)FD_FEC_SHRED_CNT;
892 3393 : p_shred->code.idx = (ushort)i;
893 :
894 3393 : if( FD_LIKELY( fd_shred_is_chained( shred_type ) ) ) {
895 3393 : fd_memcpy( set->parity_shreds[i].b+fd_shred_chain_off( ctx->parity_variant ), chained_root, FD_SHRED_MERKLE_ROOT_SZ );
896 3393 : }
897 :
898 3393 : fd_bmtree_hash_leaf( leaf, set->parity_shreds[i].b+sizeof(fd_ed25519_sig_t), parity_merkle_protected_sz, FD_BMTREE_LONG_PREFIX_SZ );
899 3393 : if( FD_UNLIKELY( !fd_bmtree_commitp_insert_with_proof( tree, FD_FEC_SHRED_CNT + i, leaf, NULL, 0, NULL ) ) ) {
900 0 : ctx_list_ele_push_tail( free_list, ctx, ctx_pool );
901 0 : resolver->free_list_cnt++;
902 0 : FD_MCNT_INC( SHRED, FEC_REJECTED_FATAL, 1UL );
903 0 : return FD_FEC_RESOLVER_SHRED_REJECTED;
904 0 : }
905 3393 : }
906 7008 : }
907 :
908 : /* Check that the whole Merkle tree is consistent. */
909 219 : if( FD_UNLIKELY( !fd_bmtree_commitp_fini( tree, FD_FEC_SHRED_CNT + FD_FEC_SHRED_CNT ) ) ) {
910 0 : ctx_list_ele_push_tail( free_list, ctx, ctx_pool );
911 0 : resolver->free_list_cnt++;
912 0 : FD_MCNT_INC( SHRED, FEC_REJECTED_FATAL, 1UL );
913 0 : return FD_FEC_RESOLVER_SHRED_REJECTED;
914 0 : }
915 :
916 : /* Check that all the fields that are supposed to be consistent across
917 : an FEC set actually are. */
918 219 : fd_shred_t const * base_data_shred = fd_shred_parse( set->data_shreds [ 0 ].b, FD_SHRED_MIN_SZ );
919 219 : fd_shred_t const * base_parity_shred = fd_shred_parse( set->parity_shreds[ 0 ].b, FD_SHRED_MAX_SZ );
920 219 : int reject = (!base_data_shred) | (!base_parity_shred);
921 :
922 : /* Check idx of base shreds */
923 219 : reject = reject || ((base_data_shred->idx!=ctx->fec_set_idx) | (base_parity_shred->idx!=ctx->fec_set_idx) |
924 219 : (base_data_shred->data.flags & FD_SHRED_DATA_FLAG_DATA_COMPLETE));
925 :
926 7008 : for( ulong i=1UL; (!reject) & (i<FD_FEC_SHRED_CNT); i++ ) {
927 : /* Technically, we only need to re-parse the ones we recovered with
928 : Reedsol, but parsing is pretty cheap and the rest of the
929 : validation we need to do on all of them. */
930 6789 : fd_shred_t const * parsed = fd_shred_parse( set->data_shreds[ i ].b, FD_SHRED_MIN_SZ );
931 6789 : if( FD_UNLIKELY( !parsed ) ) { reject = 1; break; }
932 6789 : reject |= parsed->variant != base_data_shred->variant;
933 6789 : reject |= parsed->slot != base_data_shred->slot;
934 6789 : reject |= parsed->version != base_data_shred->version;
935 6789 : reject |= parsed->fec_set_idx != base_data_shred->fec_set_idx;
936 6789 : reject |= parsed->data.parent_off != base_data_shred->data.parent_off;
937 6789 : reject |= parsed->idx != (uint)(ctx->fec_set_idx+i);
938 6789 : reject |= (i!=FD_FEC_SHRED_CNT-1UL) && (parsed->data.flags & FD_SHRED_DATA_FLAG_DATA_COMPLETE);
939 :
940 6789 : reject |= fd_shred_is_chained( fd_shred_type( parsed->variant ) ) &&
941 6789 : !fd_memeq( (uchar *)parsed +fd_shred_chain_off( parsed->variant ),
942 6789 : (uchar *)base_data_shred+fd_shred_chain_off( base_data_shred->variant ), FD_SHRED_MERKLE_ROOT_SZ );
943 6789 : }
944 :
945 7227 : for( ulong i=0UL; (!reject) & (i<FD_FEC_SHRED_CNT); i++ ) {
946 7008 : fd_shred_t const * parsed = fd_shred_parse( set->parity_shreds[ i ].b, FD_SHRED_MAX_SZ );
947 7008 : if( FD_UNLIKELY( !parsed ) ) { reject = 1; break; }
948 7008 : reject |= fd_shred_type( parsed->variant ) != fd_shred_swap_type( fd_shred_type( base_data_shred->variant ) );
949 7008 : reject |= fd_shred_merkle_cnt( parsed->variant ) != fd_shred_merkle_cnt( base_data_shred->variant );
950 7008 : reject |= parsed->slot != base_data_shred->slot;
951 7008 : reject |= parsed->version != base_data_shred->version;
952 7008 : reject |= parsed->fec_set_idx != base_data_shred->fec_set_idx;
953 7008 : reject |= parsed->idx != (uint)(ctx->fec_set_idx+i);
954 7008 : reject |= parsed->code.data_cnt != base_parity_shred->code.data_cnt;
955 7008 : reject |= parsed->code.code_cnt != base_parity_shred->code.code_cnt;
956 7008 : reject |= parsed->code.idx != (ushort)i;
957 :
958 7008 : reject |= fd_shred_is_chained( fd_shred_type( parsed->variant ) ) &&
959 7008 : !fd_memeq( (uchar *)parsed +fd_shred_chain_off( parsed->variant ),
960 7008 : (uchar *)base_data_shred+fd_shred_chain_off( base_data_shred->variant ), FD_SHRED_MERKLE_ROOT_SZ );
961 7008 : }
962 219 : if( FD_UNLIKELY( reject ) ) {
963 0 : ctx_list_ele_push_tail( free_list, ctx, ctx_pool );
964 0 : resolver->free_list_cnt++;
965 0 : FD_MCNT_INC( SHRED, FEC_REJECTED_FATAL, 1UL );
966 0 : return FD_FEC_RESOLVER_SHRED_REJECTED;
967 0 : }
968 :
969 : /* Populate missing Merkle proofs */
970 7227 : for( ulong i=0UL; i<FD_FEC_SHRED_CNT; i++ ) if( !( set->data_shred_rcvd&(1U<<i) ) )
971 3615 : fd_bmtree_get_proof( tree, set->data_shreds[i].b + fd_shred_merkle_off( set->data_shreds[i].s ), i );
972 :
973 7227 : for( ulong i=0UL; i<FD_FEC_SHRED_CNT; i++ ) if( !( set->parity_shred_rcvd&(1U<<i) ) )
974 3393 : fd_bmtree_get_proof( tree, set->parity_shreds[i].b + fd_shred_merkle_off( set->parity_shreds[i].s ), FD_FEC_SHRED_CNT+i );
975 :
976 : /* Set the retransmitter signature for shreds that need one */
977 219 : if( FD_UNLIKELY( fd_shred_is_resigned( shred_type ) ) ) {
978 693 : for( ulong i=0UL; i<FD_FEC_SHRED_CNT; i++ ) if( !( set->data_shred_rcvd&(1U<<i) ) )
979 372 : memcpy( set->data_shreds[i].b + fd_shred_retransmitter_sig_off( set->data_shreds[i].s ), ctx->retransmitter_sig.u, 64UL );
980 :
981 693 : for( ulong i=0UL; i<FD_FEC_SHRED_CNT; i++ ) if( !( set->parity_shred_rcvd&(1U<<i) ) )
982 300 : memcpy( set->parity_shreds[i].b + fd_shred_retransmitter_sig_off( set->parity_shreds[i].s ), ctx->retransmitter_sig.u, 64UL );
983 21 : }
984 :
985 : /* Finally... A valid FEC set. Forward it along. */
986 219 : ctx_list_ele_push_tail( complete_list, ctx, ctx_pool );
987 219 : ctx_list_idx_push_tail( free_list, ctx_list_idx_pop_head( complete_list, ctx_pool ), ctx_pool );
988 219 : resolver->free_list_cnt++;
989 :
990 219 : *out_fec_set = set;
991 :
992 219 : return FD_FEC_RESOLVER_SHRED_COMPLETES;
993 219 : }
994 :
995 :
996 21 : void * fd_fec_resolver_leave( fd_fec_resolver_t * resolver ) {
997 21 : fd_sha512_leave( resolver->sha512 );
998 21 : done_heap_leave( resolver->done_heap );
999 21 : ctx_list_leave ( resolver->complete_list );
1000 21 : ctx_list_leave ( resolver->free_list );
1001 21 : ctx_treap_leave( resolver->ctx_treap );
1002 21 : done_map_leave ( resolver->done_map );
1003 21 : done_pool_leave( resolver->done_pool );
1004 21 : ctx_map_leave ( resolver->ctx_map );
1005 :
1006 21 : return (void *)resolver;
1007 21 : }
1008 :
1009 21 : void * fd_fec_resolver_delete( void * shmem ) {
1010 21 : fd_fec_resolver_t * resolver = (fd_fec_resolver_t *)shmem;
1011 21 : ulong depth = resolver->depth;
1012 21 : ulong partial_depth = resolver->partial_depth;
1013 21 : ulong complete_depth = resolver->complete_depth;
1014 21 : ulong done_depth = resolver->done_depth;
1015 :
1016 21 : ulong depth_sum = depth + partial_depth + complete_depth;
1017 21 : ulong ctx_chain_cnt = ctx_map_chain_cnt_est ( depth );
1018 21 : ulong done_chain_cnt = done_map_chain_cnt_est( done_depth );
1019 :
1020 21 : FD_SCRATCH_ALLOC_INIT( l, shmem );
1021 21 : /* self */ FD_SCRATCH_ALLOC_APPEND( l, FD_FEC_RESOLVER_ALIGN, sizeof(fd_fec_resolver_t) );
1022 21 : /* _ctx_pool */ FD_SCRATCH_ALLOC_APPEND( l, alignof(set_ctx_t), sizeof(set_ctx_t)*depth_sum );
1023 21 : void * _ctx_map = FD_SCRATCH_ALLOC_APPEND( l, ctx_map_align(), ctx_map_footprint ( ctx_chain_cnt ) );
1024 21 : void * _done_pool = FD_SCRATCH_ALLOC_APPEND( l, done_pool_align(), done_pool_footprint( done_depth ) );
1025 21 : void * _done_map = FD_SCRATCH_ALLOC_APPEND( l, done_map_align(), done_map_footprint ( done_chain_cnt ) );
1026 21 : FD_SCRATCH_ALLOC_FINI( l, FD_FEC_RESOLVER_ALIGN );
1027 :
1028 21 : fd_sha512_delete( resolver->sha512 );
1029 21 : done_heap_delete( resolver->done_heap );
1030 21 : done_map_delete ( _done_map );
1031 21 : done_pool_delete( _done_pool );
1032 21 : ctx_list_delete ( resolver->complete_list );
1033 21 : ctx_list_delete ( resolver->free_list );
1034 21 : ctx_treap_delete( resolver->ctx_treap );
1035 21 : ctx_map_delete ( _ctx_map );
1036 :
1037 21 : return shmem;
1038 21 : }
|