Line data Source code
1 : #include "fd_reasm.h"
2 : #include "fd_reasm_private.h"
3 : #include "../../disco/shred/fd_fec_set.h"
4 :
5 : #define LOGGING 0
6 :
7 : FD_FN_CONST ulong
8 390 : fd_reasm_align( void ) {
9 390 : return alignof(fd_reasm_t);
10 390 : }
11 :
12 : FD_FN_CONST ulong
13 90 : fd_reasm_footprint( ulong fec_max ) {
14 90 : ulong max_slots = fd_ulong_max(fec_max / FD_FEC_BLK_MAX, 1UL); /* add capacity for a block id per slot */
15 90 : int lgf_max = fd_ulong_find_msb( fd_ulong_pow2_up( fec_max + max_slots ) ); /* capacity for fec_max fecs + (fec_max / 1024) more block ids */
16 90 : return FD_LAYOUT_FINI(
17 90 : FD_LAYOUT_APPEND(
18 90 : FD_LAYOUT_APPEND(
19 90 : FD_LAYOUT_APPEND(
20 90 : FD_LAYOUT_APPEND(
21 90 : FD_LAYOUT_APPEND(
22 90 : FD_LAYOUT_APPEND(
23 90 : FD_LAYOUT_APPEND(
24 90 : FD_LAYOUT_APPEND(
25 90 : FD_LAYOUT_INIT,
26 90 : alignof(fd_reasm_t), sizeof(fd_reasm_t) ),
27 90 : pool_align(), pool_footprint ( fec_max ) ),
28 90 : ancestry_align(), ancestry_footprint( fec_max ) ),
29 90 : frontier_align(), frontier_footprint( fec_max ) ),
30 90 : orphaned_align(), orphaned_footprint( fec_max ) ),
31 90 : subtrees_align(), subtrees_footprint( fec_max ) ),
32 90 : bfs_align(), bfs_footprint ( fec_max ) ),
33 90 : xid_align(), xid_footprint ( lgf_max ) ),
34 90 : fd_reasm_align() );
35 90 : }
36 :
37 : void *
38 : fd_reasm_new( void * shmem,
39 : ulong fec_max,
40 45 : ulong seed ) {
41 :
42 45 : if( FD_UNLIKELY( !shmem ) ) {
43 0 : FD_LOG_WARNING(( "NULL mem" ));
44 0 : return NULL;
45 0 : }
46 :
47 45 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, fd_reasm_align() ) ) ) {
48 0 : FD_LOG_WARNING(( "misaligned mem" ));
49 0 : return NULL;
50 0 : }
51 :
52 45 : ulong footprint = fd_reasm_footprint( fec_max );
53 45 : if( FD_UNLIKELY( !footprint ) ) {
54 0 : FD_LOG_WARNING(( "bad fec_max (%lu)", fec_max ));
55 0 : return NULL;
56 0 : }
57 :
58 45 : fd_wksp_t * wksp = fd_wksp_containing( shmem );
59 45 : if( FD_UNLIKELY( !wksp ) ) {
60 0 : FD_LOG_WARNING(( "shmem must be part of a workspace" ));
61 0 : return NULL;
62 0 : }
63 :
64 45 : fd_memset( shmem, 0, footprint );
65 :
66 45 : ulong max_slots = fd_ulong_max(fec_max / FD_FEC_BLK_MAX, 1UL); /* add capacity for a block id per slot */
67 45 : int lgf_max = fd_ulong_find_msb( fd_ulong_pow2_up( fec_max + max_slots ) ); /* capacity for fec_max fecs + (fec_max / 1024) more block ids */
68 :
69 45 : fd_reasm_t * reasm;
70 45 : FD_SCRATCH_ALLOC_INIT( l, shmem );
71 45 : reasm = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_reasm_t), sizeof(fd_reasm_t) );
72 45 : void * pool = FD_SCRATCH_ALLOC_APPEND( l, pool_align(), pool_footprint ( fec_max ) );
73 45 : void * ancestry = FD_SCRATCH_ALLOC_APPEND( l, ancestry_align(), ancestry_footprint( fec_max ) );
74 45 : void * frontier = FD_SCRATCH_ALLOC_APPEND( l, frontier_align(), frontier_footprint( fec_max ) );
75 45 : void * orphaned = FD_SCRATCH_ALLOC_APPEND( l, orphaned_align(), orphaned_footprint( fec_max ) );
76 45 : void * subtrees = FD_SCRATCH_ALLOC_APPEND( l, subtrees_align(), subtrees_footprint( fec_max ) );
77 45 : void * bfs = FD_SCRATCH_ALLOC_APPEND( l, bfs_align(), bfs_footprint ( fec_max ) );
78 45 : void * xid = FD_SCRATCH_ALLOC_APPEND( l, xid_align(), xid_footprint ( lgf_max ) );
79 45 : FD_TEST( FD_SCRATCH_ALLOC_FINI( l, fd_reasm_align() ) == (ulong)shmem + footprint );
80 :
81 45 : reasm->slot0 = ULONG_MAX;
82 45 : reasm->root = pool_idx_null( pool );
83 45 : reasm->pool_gaddr = fd_wksp_gaddr_fast( wksp, pool_join( pool_new( pool, fec_max ) ) );
84 45 : reasm->wksp_gaddr = fd_wksp_gaddr_fast( wksp, reasm );
85 45 : reasm->ancestry = ancestry_new( ancestry, fec_max, seed );
86 45 : reasm->frontier = frontier_new( frontier, fec_max, seed );
87 45 : reasm->orphaned = orphaned_new( orphaned, fec_max, seed );
88 45 : reasm->subtrees = subtrees_new( subtrees, fec_max, seed );
89 45 : /* */ subtreel_new( reasm->_subtrlf );
90 45 : /* */ out_new ( reasm->_out );
91 45 : reasm->bfs = bfs_new ( bfs, fec_max );
92 45 : reasm->xid = xid_new ( xid, lgf_max, seed );
93 :
94 45 : return shmem;
95 45 : }
96 :
97 : fd_reasm_t *
98 45 : fd_reasm_join( void * shreasm ) {
99 45 : fd_reasm_t * reasm = (fd_reasm_t *)shreasm;
100 :
101 45 : if( FD_UNLIKELY( !reasm ) ) {
102 0 : FD_LOG_WARNING(( "NULL reasm" ));
103 0 : return NULL;
104 0 : }
105 : /* pool join handled in fd_reasm_new */
106 45 : reasm->ancestry = ancestry_join( reasm->ancestry );
107 45 : reasm->frontier = frontier_join( reasm->frontier );
108 45 : reasm->orphaned = orphaned_join( reasm->orphaned );
109 45 : reasm->subtrees = subtrees_join( reasm->subtrees );
110 45 : reasm->subtreel = subtreel_join( reasm->_subtrlf );
111 45 : reasm->out = out_join ( reasm->_out );
112 45 : reasm->bfs = bfs_join ( reasm->bfs );
113 45 : reasm->xid = xid_join ( reasm->xid );
114 :
115 45 : return reasm;
116 45 : }
117 :
118 : void *
119 30 : fd_reasm_leave( fd_reasm_t * reasm ) {
120 :
121 30 : if( FD_UNLIKELY( !reasm ) ) {
122 0 : FD_LOG_WARNING(( "NULL reasm" ));
123 0 : return NULL;
124 0 : }
125 :
126 30 : return (void *)reasm;
127 30 : }
128 :
129 : void *
130 30 : fd_reasm_delete( void * shreasm ) {
131 30 : fd_reasm_t * reasm = (fd_reasm_t *)shreasm;
132 :
133 30 : if( FD_UNLIKELY( !reasm ) ) {
134 0 : FD_LOG_WARNING(( "NULL reasm" ));
135 0 : return NULL;
136 0 : }
137 :
138 30 : if( FD_UNLIKELY( !fd_ulong_is_aligned((ulong)reasm, fd_reasm_align() ) ) ) {
139 0 : FD_LOG_WARNING(( "misaligned reasm" ));
140 0 : return NULL;
141 0 : }
142 :
143 30 : return reasm;
144 30 : }
145 :
146 6 : fd_reasm_fec_t * fd_reasm_root ( fd_reasm_t * reasm ) { return pool_ele ( reasm_pool ( reasm ), reasm->root ); }
147 6 : fd_reasm_fec_t const * fd_reasm_root_const ( fd_reasm_t const * reasm ) { return pool_ele_const( reasm_pool_const( reasm ), reasm->root ); }
148 84 : fd_reasm_fec_t * fd_reasm_parent ( fd_reasm_t * reasm, fd_reasm_fec_t * child ) { return pool_ele ( reasm_pool ( reasm ), child->parent ); }
149 0 : fd_reasm_fec_t const * fd_reasm_parent_const ( fd_reasm_t const * reasm, fd_reasm_fec_t const * child ) { return pool_ele_const( reasm_pool_const( reasm ), child->parent ); }
150 162 : fd_reasm_fec_t * fd_reasm_child ( fd_reasm_t * reasm, fd_reasm_fec_t * parent ) { return pool_ele ( reasm_pool ( reasm ), parent->child ); }
151 42 : fd_reasm_fec_t const * fd_reasm_child_const ( fd_reasm_t const * reasm, fd_reasm_fec_t const * parent ) { return pool_ele_const( reasm_pool_const( reasm ), parent->child ); }
152 36 : fd_reasm_fec_t * fd_reasm_sibling ( fd_reasm_t * reasm, fd_reasm_fec_t * sibling ) { return pool_ele ( reasm_pool ( reasm ), sibling->sibling ); }
153 0 : fd_reasm_fec_t const * fd_reasm_sibling_const( fd_reasm_t const * reasm, fd_reasm_fec_t const * sibling ) { return pool_ele_const( reasm_pool_const( reasm ), sibling->sibling ); }
154 :
155 : ulong
156 0 : fd_reasm_slot0( fd_reasm_t * reasm ) {
157 0 : return reasm->slot0;
158 0 : }
159 :
160 : ulong
161 0 : fd_reasm_free( fd_reasm_t * reasm ) {
162 0 : return pool_free( reasm_pool( reasm ) );
163 0 : }
164 :
165 : fd_reasm_fec_t *
166 0 : fd_reasm_peek( fd_reasm_t * reasm ) {
167 0 : fd_reasm_fec_t * pool = reasm_pool( reasm );
168 0 : for( out_iter_t iter = out_iter_fwd_init( reasm->out, pool );
169 0 : !out_iter_done ( iter, reasm->out, pool );
170 0 : iter = out_iter_fwd_next( iter, reasm->out, pool ) ) {
171 0 : fd_reasm_fec_t * fec = out_iter_ele( iter, reasm->out, pool );
172 0 : if( FD_LIKELY( fec && !fec->popped && ( !fec->eqvoc || fec->confirmed ) ) ) return fec;
173 0 : }
174 0 : return NULL;
175 0 : }
176 :
177 : fd_reasm_fec_t *
178 135 : fd_reasm_pop( fd_reasm_t * reasm ) {
179 135 : fd_reasm_fec_t * pool = reasm_pool( reasm );
180 192 : while( FD_LIKELY( !out_is_empty( reasm->out, pool ) ) ) {
181 153 : fd_reasm_fec_t * fec = out_ele_pop_head( reasm->out, pool );
182 153 : fec->in_out = 0;
183 153 : if( FD_LIKELY( !fec->popped && ( !fec->eqvoc || fec->confirmed ) ) ) {
184 96 : fec->popped = 1;
185 96 : return fec;
186 96 : }
187 153 : }
188 39 : return NULL;
189 135 : }
190 :
191 : fd_reasm_fec_t *
192 : fd_reasm_query( fd_reasm_t * reasm,
193 699 : fd_hash_t const * merkle_root ) {
194 699 : fd_reasm_fec_t * pool = reasm_pool( reasm );
195 699 : fd_reasm_fec_t * fec = NULL;
196 699 : fec = ancestry_ele_query( reasm->ancestry, merkle_root, NULL, pool );
197 699 : fec = fd_ptr_if( !fec, frontier_ele_query( reasm->frontier, merkle_root, NULL, pool ), fec );
198 699 : fec = fd_ptr_if( !fec, orphaned_ele_query( reasm->orphaned, merkle_root, NULL, pool ), fec );
199 699 : fec = fd_ptr_if( !fec, subtrees_ele_query( reasm->subtrees, merkle_root, NULL, pool ), fec );
200 699 : return fec;
201 699 : }
202 :
203 : void
204 : fd_reasm_confirm( fd_reasm_t * reasm,
205 24 : fd_hash_t const * block_id ) {
206 24 : fd_reasm_fec_t * pool = reasm_pool( reasm );
207 24 : fd_reasm_fec_t * fec = ancestry_ele_query( reasm->ancestry, block_id, NULL, pool );
208 24 : fec = fd_ptr_if( !fec, frontier_ele_query( reasm->frontier, block_id, NULL, pool ), fec );
209 :
210 : /* TODO there is a potential optimization where we don't actually need
211 : to confirm every FEC and instead just confirm at the slot-level.
212 : Given roughly ~1k shreds per slot at 32 shreds per FEC, this would
213 : save ~32 loop iterations. Punting given the additional complexity
214 : of bookkeeping and logic this would require. */
215 :
216 24 : fd_reasm_fec_t * last_inserted = NULL;
217 84 : while( FD_LIKELY( fec && !fec->confirmed ) ) {
218 60 : fec->confirmed = 1;
219 :
220 60 : xid_t * xid = xid_query( reasm->xid, (fec->slot << 32) | fec->fec_set_idx, NULL );
221 60 : xid->idx = pool_idx( pool, fec );
222 60 : if( FD_UNLIKELY( fec->slot_complete ) ) {
223 18 : xid_t * bid = xid_query( reasm->xid, ( fec->slot << 32 ) | UINT_MAX, NULL );
224 18 : bid->idx = pool_idx( pool, fec );
225 18 : }
226 :
227 60 : if( FD_LIKELY( !fec->popped && !fec->in_out ) ) {
228 : /* Let's say that the delivery queue already contains A, and
229 : we confirm A - B - C. We walk upwards from C, but we need to
230 : make sure B and C are inserted after A, and in that order. */
231 33 : if( FD_UNLIKELY( !last_inserted ) ) out_ele_push_tail( reasm->out, fec, pool );
232 18 : else out_ele_insert_before( reasm->out, fec, last_inserted, pool );
233 33 : fec->in_out = 1;
234 33 : last_inserted = fec;
235 33 : }
236 60 : fec = fd_reasm_parent( reasm, fec );
237 60 : }
238 24 : }
239 :
240 : /* This is a gross case reasm needs to handle because Agave currently
241 : does not validate chained merkle roots across slots ie. if a leader
242 : sends a bad chained merkle root on a slot boundary, the cluster
243 : might converge on the leader's block anyways. So we overwrite the
244 : chained merkle root based on the slot and parent_off metadata.
245 : There are two cases: 1. we receive the parent before the child. In
246 : this case we just overwrite the child's CMR. 2. we receive the
247 : child before the parent. In this case every time we receive a new
248 : FEC set we need to check the orphan roots for whether we can link
249 : the orphan to the new FEC via slot metadata, since the chained
250 : merkle root metadata on that orphan root might be wrong. */
251 :
252 : static void
253 : overwrite_invalid_cmr( fd_reasm_t * reasm,
254 351 : fd_reasm_fec_t * child ) {
255 351 : fd_reasm_fec_t * pool = reasm_pool( reasm );
256 351 : if( FD_UNLIKELY( child->fec_set_idx==0 && !fd_reasm_query( reasm, &child->cmr ) ) ) {
257 81 : xid_t * parent_bid = xid_query( reasm->xid, (child->slot - child->parent_off) << 32 | UINT_MAX, NULL );
258 81 : if( FD_LIKELY( parent_bid ) ) {
259 6 : fd_reasm_fec_t * parent = pool_ele( pool, parent_bid->idx );
260 6 : if( FD_LIKELY( parent ) ) {
261 6 : FD_BASE58_ENCODE_32_BYTES( child->cmr.key, cmr_b58 );
262 6 : FD_BASE58_ENCODE_32_BYTES( parent->key.key, parent_key_b58 );
263 6 : FD_LOG_INFO(( "overwriting invalid cmr for FEC slot: %lu fec_set_idx: %u from %s (CMR) to %s (parent's block id)", child->slot, child->fec_set_idx, cmr_b58, parent_key_b58 ));
264 6 : child->cmr = parent->key; /* use the parent's merkle root */
265 6 : }
266 6 : }
267 81 : }
268 351 : }
269 :
270 : /* Mark the entire subtree beginning from root as equivocating. This is
271 : linear in the number of descendants in the subtree, but amortizes
272 : because we can short-circuit the BFS at nodes that are already marked
273 : equivocating, so each node is visited at most once. */
274 :
275 : static void
276 : eqvoc( fd_reasm_t * reasm,
277 99 : fd_reasm_fec_t * root ) {
278 99 : fd_reasm_fec_t * pool = reasm_pool( reasm );
279 99 : ulong * bfs = reasm->bfs;
280 99 : bfs_push_tail( bfs, pool_idx( pool, root ) );
281 234 : while( FD_LIKELY( !bfs_empty( bfs ) ) ) {
282 135 : fd_reasm_fec_t * descendant = pool_ele( pool, bfs_pop_head( bfs ) );
283 135 : if( FD_LIKELY( descendant->eqvoc ) ) continue;
284 111 : descendant->eqvoc = 1;
285 111 : fd_reasm_fec_t * child = fd_reasm_child( reasm, descendant );
286 147 : while( FD_LIKELY( child ) ) {
287 36 : bfs_push_tail( bfs, pool_idx( pool, child ) );
288 36 : child = fd_reasm_sibling( reasm, child );
289 36 : }
290 111 : }
291 99 : }
292 :
293 : static void
294 : link( fd_reasm_t * reasm,
295 : fd_reasm_fec_t * parent,
296 234 : fd_reasm_fec_t * child ) {
297 234 : fd_reasm_fec_t * pool = reasm_pool( reasm );
298 234 : child->parent = pool_idx( pool, parent );
299 234 : if( FD_LIKELY( parent->child == pool_idx_null( pool ) ) ) {
300 195 : parent->child = pool_idx( pool, child ); /* set as left-child. */
301 195 : } else {
302 39 : fd_reasm_fec_t * sibling = pool_ele( pool, parent->child );
303 39 : while( FD_LIKELY( sibling->sibling != pool_idx_null( pool ) ) ) sibling = pool_ele( pool, sibling->sibling );
304 39 : sibling->sibling = pool_idx( pool, child ); /* set as right-sibling. */
305 39 : }
306 234 : }
307 :
308 : /* Assumes caller is de-duplicating FEC sets of the same merkle root. */
309 : static xid_t *
310 423 : xid_update( fd_reasm_t * reasm, ulong slot, uint fec_set_idx, ulong pool_idx ) {
311 423 : xid_t * xid = xid_query( reasm->xid, (slot << 32) | fec_set_idx, NULL );
312 423 : if( FD_UNLIKELY( xid ) ) {
313 51 : xid->cnt++;
314 372 : } else {
315 372 : xid = xid_insert( reasm->xid, (slot << 32) | fec_set_idx );
316 372 : if( FD_UNLIKELY( !xid ) ) FD_LOG_CRIT(( "xid map full, slot=%lu fec_set_idx=%u", slot, fec_set_idx )); // TODO remove after reasm eviction is implemented
317 372 : xid->idx = pool_idx;
318 372 : xid->cnt = 1;
319 372 : }
320 423 : return xid;
321 423 : }
322 :
323 : static fd_reasm_fec_t *
324 : clear_slot_metadata( fd_reasm_t * reasm,
325 45 : fd_reasm_fec_t * fec ) {
326 : /* remove from bid and xid */
327 45 : if( FD_UNLIKELY( fec->slot_complete ) ) {
328 24 : xid_t * bid = xid_query( reasm->xid, (fec->slot << 32)|UINT_MAX, NULL );
329 24 : bid->cnt--;
330 24 : if( FD_LIKELY( !bid->cnt ) ) xid_remove( reasm->xid, bid );
331 24 : }
332 45 : xid_t * xid = xid_query( reasm->xid, ( fec->slot << 32 ) | fec->fec_set_idx, NULL );
333 45 : xid->cnt--;
334 45 : if( FD_LIKELY( !xid->cnt ) ) xid_remove( reasm->xid, xid );
335 :
336 45 : return fec;
337 45 : }
338 :
339 : void
340 : fd_reasm_pool_release( fd_reasm_t * reasm,
341 18 : fd_reasm_fec_t * ele ){
342 18 : pool_ele_release( reasm_pool( reasm ), ele );
343 18 : }
344 :
345 : ulong
346 0 : fd_reasm_pool_idx( fd_reasm_t * reasm, fd_reasm_fec_t * ele ) {
347 0 : return pool_idx( reasm_pool( reasm ), ele );
348 0 : }
349 :
350 : fd_reasm_fec_t *
351 : fd_reasm_remove( fd_reasm_t * reasm,
352 : fd_reasm_fec_t * head,
353 12 : fd_store_t * opt_store ) {
354 : /* see fd_forest.c clear_leaf */
355 :
356 12 : fd_reasm_fec_t * pool = reasm_pool( reasm );
357 12 : orphaned_t * orphaned = reasm->orphaned;
358 12 : frontier_t * frontier = reasm->frontier;
359 12 : ancestry_t * ancestry = reasm->ancestry;
360 12 : subtrees_t * subtrees = reasm->subtrees;
361 12 : subtreel_t * subtreel = reasm->subtreel;
362 :
363 12 : FD_TEST( head );
364 :
365 12 : fd_reasm_fec_t * tail = head;
366 :
367 :
368 :
369 12 : if( FD_LIKELY( orphaned_ele_query( orphaned, &head->key, NULL, pool ) ||
370 12 : subtrees_ele_query( subtrees, &head->key, NULL, pool ) ) ) {
371 3 : FD_TEST( head->child == ULONG_MAX ); /* must be a leaf node */
372 9 : } else {
373 : /* Node is in frontier or ancestry. If the leaf is in the frontier,
374 : we could be removing something that has been executed. Move the
375 : head pointer up to where we begin evicting.
376 :
377 : We search up the tree until the theoretical boundary of a bank.
378 : This is usually when we jump to a parent slot, but if an
379 : equivocation occured, this could also be the middle of the slot.
380 :
381 : 0 ── 32 ── 64 ── 96 (confirmed)
382 : └── 64' ── 96' ── 128' (eqvoc)
383 :
384 : Note we only have execute a slot twice (have 2 bank idxs for it)
385 : if the slot equivocated, we replayed the wrong version, and then
386 : replayed the confirmed version afterwards.
387 :
388 : The state after executing the wrong version first is:
389 :
390 : 0 ──── 32 ──── 64 ──── 96 ──── 128
391 : (bank_idx=1) (bank_idx=1) .... (all bank_idx=1)
392 :
393 : After receiving and executing the confirmed version, the state
394 : looks like:
395 :
396 : 0 (b=2) ── 32 (b=2) ── 64 (b=2) ── 96 (b=2) (confirmed)
397 : └── 64' (b=1) ── 96' (b=1) ── 128' (b=1) (eqvoc)
398 :
399 : Here we want to evict only until fec 64'. Or let's say we are
400 : getting around to executing the confirmed version, but we haven't
401 : executed it yet.
402 :
403 : 0 (b=1) ── 32 (b=1) ── 64 (b=ULONG_MAX) ── 96 (b=ULONG_MAX) (confirmed, but not executed yet)
404 : └── 64' (b=1) ── 96' (b=1) ── 128'(b=1) (eqvoc)
405 :
406 : Now we know we should evict until the max(parent has > 1 child, or fec set idx == 0) */
407 :
408 12 : while( FD_LIKELY( head ) ) {
409 12 : fd_reasm_fec_t * parent = fd_reasm_parent( reasm, head ); /* parent must exist. It's not possible to walk up past root */
410 12 : FD_TEST( head->bank_idx == tail->bank_idx );
411 :
412 12 : if( FD_UNLIKELY( head->fec_set_idx==0 ) ) break;
413 3 : if( FD_UNLIKELY( head->sibling != pool_idx_null( pool ) ) ) break; /* if the parent has more than 1 child, we know for sure the parent is a slot boundary or eqvoc point, so we can stop here. */
414 3 : if( FD_UNLIKELY( parent->child != pool_idx( pool, head ) ) ) break; /* if the parent has more than 1 child, we know for sure the parent is a slot boundary or eqvoc point, so we can stop here. */
415 3 : if( FD_UNLIKELY( parent->slot_complete ) ) break; /* specifically catches case where slot complete is in middle of slot. */
416 3 : head = parent;
417 3 : }
418 9 : }
419 12 : FD_LOG_INFO(( "evicting reasm slot %lu, fec idx %u, down to %u bank_idx %lu", head->slot, head->fec_set_idx, tail->fec_set_idx, head->bank_idx ));
420 :
421 : /* Orphan the entire subtree from the tail :( */
422 12 : if( FD_UNLIKELY( fd_reasm_child( reasm, tail ) ) ) {
423 : /* subtree this child. This code path should only be hit if this is
424 : banks-driven eviction, so children are guaranteed to be in main
425 : tree right now. */
426 3 : ulong * bfs = reasm->bfs;
427 3 : bfs_push_tail( bfs, pool_idx( pool, tail ) );
428 27 : while( FD_LIKELY( !bfs_empty( bfs ) ) ) {
429 24 : fd_reasm_fec_t * ele = pool_ele( pool, bfs_pop_head( bfs ) );
430 24 : fd_reasm_fec_t * child = fd_reasm_child( reasm, ele );
431 45 : while( FD_LIKELY( child ) ) {
432 21 : bfs_push_tail( bfs, pool_idx( pool, child ) );
433 21 : child = pool_ele( pool, child->sibling );
434 21 : }
435 :
436 24 : if( FD_UNLIKELY( ele == tail ) ) continue;
437 : /* remove each child from the maps */
438 21 : if( FD_UNLIKELY( !ancestry_ele_remove( ancestry, &ele->key, NULL, pool ) ) ) frontier_ele_remove( frontier, &ele->key, NULL, pool );
439 21 : if( FD_LIKELY( ele->in_out ) ) { out_ele_remove( reasm->out, ele, pool ); ele->in_out = 0; }
440 :
441 21 : if( FD_UNLIKELY( ele->parent == pool_idx( pool, tail ) ) ) {
442 3 : subtrees_ele_insert( subtrees, ele, pool );
443 3 : subtreel_ele_push_tail( reasm->subtreel, ele, pool );
444 3 : ele->parent = ULONG_MAX;
445 3 : ele->sibling = ULONG_MAX;
446 18 : } else {
447 18 : orphaned_ele_insert( orphaned, ele, pool );
448 18 : }
449 21 : }
450 : /* unlink the leaf from its children. */
451 3 : tail->child = ULONG_MAX;
452 3 : }
453 :
454 12 : fd_reasm_fec_t * parent = fd_reasm_parent( reasm, head );
455 12 : if( FD_LIKELY( parent ) ) {
456 : /* Clean up the parent pointing to this head, and remove block from the maps
457 : remove the block from the parent's child list */
458 :
459 9 : fd_reasm_fec_t * child = pool_ele( pool, parent->child );
460 9 : if( FD_LIKELY( 0==memcmp( &child->key, &head->key, sizeof(fd_hash_t) ) ) ) { /* evicted is left-child (or only child) */
461 9 : parent->child = child->sibling;
462 9 : } else {
463 : /* evicted is a right-sibling */
464 0 : fd_reasm_fec_t * sibling = pool_ele( pool, child->sibling );
465 0 : fd_reasm_fec_t * prev = child;
466 0 : while( FD_LIKELY( sibling && memcmp( &sibling->key, &head->key, sizeof(fd_hash_t) ) ) ) {
467 0 : prev = sibling;
468 0 : sibling = pool_ele( pool, sibling->sibling );
469 0 : }
470 0 : prev->sibling = sibling->sibling;
471 0 : }
472 :
473 : /* remove the chain itself from the maps */
474 :
475 9 : fd_reasm_fec_t * removed_orphan = NULL;
476 9 : if( FD_LIKELY ( removed_orphan = orphaned_ele_remove( orphaned, &head->key, NULL, pool ) ) ) {
477 0 : clear_slot_metadata( reasm, head );
478 0 : if( FD_LIKELY( opt_store ) ) fd_store_remove( opt_store, &head->key );
479 0 : return head;
480 0 : }
481 :
482 : /* remove from ancestry and frontier */
483 9 : fd_reasm_fec_t * curr = head;
484 21 : while( FD_LIKELY( curr ) ) {
485 12 : fd_reasm_fec_t * removed = ancestry_ele_remove( ancestry, &curr->key, NULL, pool );
486 12 : if( !removed ) removed = frontier_ele_remove( frontier, &curr->key, NULL, pool );
487 12 : if( FD_LIKELY( removed->in_out ) ) { out_ele_remove( reasm->out, removed, pool ); removed->in_out = 0; }
488 :
489 12 : curr = fd_reasm_child( reasm, curr ); /* guaranteed only one child */
490 12 : clear_slot_metadata( reasm, removed );
491 12 : if( FD_LIKELY( opt_store ) ) fd_store_remove( opt_store, &removed->key );
492 12 : }
493 :
494 : /* We removed from the main tree, so we might need to insert parent into the frontier.
495 : Only need to add parent to the frontier if it doesn't have any other children. */
496 :
497 9 : if( parent->child == pool_idx_null( pool ) ) {
498 6 : parent = ancestry_ele_remove( ancestry, &parent->key, NULL, pool );
499 6 : FD_TEST( parent );
500 6 : frontier_ele_insert( frontier, parent, pool );
501 6 : }
502 9 : return head;
503 9 : }
504 :
505 : /* No parent, remove from subtrees and subtree list */
506 3 : subtrees_ele_remove( subtrees, &head->key, NULL, pool );
507 3 : subtreel_ele_remove( subtreel, head, pool );
508 3 : clear_slot_metadata( reasm, head );
509 3 : if( FD_LIKELY( opt_store ) ) fd_store_remove( opt_store, &head->key );
510 3 : return head;
511 12 : }
512 :
513 : fd_reasm_fec_t *
514 : latest_confirmed_fec( fd_reasm_t * reasm,
515 0 : ulong subtree_root ) {
516 0 : ulong * bfs = reasm->bfs;
517 0 : fd_reasm_fec_t * pool = reasm_pool( reasm );
518 0 : bfs_push_tail( bfs, subtree_root );
519 0 : fd_reasm_fec_t * latest_confirmed = NULL;
520 0 : while( FD_LIKELY( !bfs_empty( bfs ) ) ) {
521 0 : fd_reasm_fec_t * ele = pool_ele( pool, bfs_pop_head( bfs ) );
522 0 : if( FD_LIKELY( ele->confirmed ) ) {
523 0 : if( FD_LIKELY( latest_confirmed == NULL ||
524 0 : latest_confirmed->slot < ele->slot ||
525 0 : (latest_confirmed->slot == ele->slot && latest_confirmed->fec_set_idx < ele->fec_set_idx)) )
526 0 : latest_confirmed = ele;
527 0 : }
528 0 : fd_reasm_fec_t * child = fd_reasm_child( reasm, ele );
529 0 : while( FD_LIKELY( child ) ) {
530 0 : bfs_push_tail( bfs, pool_idx( pool, child ) );
531 0 : child = pool_ele( pool, child->sibling );
532 0 : }
533 0 : }
534 0 : return latest_confirmed;
535 0 : }
536 :
537 : static fd_reasm_fec_t *
538 : gca( fd_reasm_t * reasm,
539 : fd_reasm_fec_t * a,
540 0 : fd_reasm_fec_t * b ) {
541 0 : fd_reasm_fec_t * parent1 = a;
542 0 : fd_reasm_fec_t * parent2 = b;
543 0 : while( FD_LIKELY( parent1 && parent2 ) ) {
544 0 : if( FD_LIKELY( parent1 == parent2 ) ) return parent1;
545 0 : if( parent1->slot > parent2->slot ||
546 0 : ( parent1->slot == parent2->slot && parent1->fec_set_idx > parent2->fec_set_idx ) ) parent1 = fd_reasm_parent( reasm, parent1 );
547 0 : else parent2 = fd_reasm_parent( reasm, parent2 );
548 0 : }
549 0 : return NULL;
550 0 : }
551 :
552 : /* Caller guarantees new_root and parent_root are non-NULL */
553 : static fd_reasm_fec_t *
554 : evict( fd_reasm_t * reasm,
555 : fd_store_t * opt_store,
556 : fd_hash_t const * new_root FD_PARAM_UNUSED,
557 12 : fd_hash_t const * parent_root ) {
558 12 : fd_reasm_fec_t * pool = reasm_pool( reasm );
559 12 : frontier_t * frontier = reasm->frontier;
560 12 : orphaned_t * orphaned = reasm->orphaned;
561 12 : subtrees_t * subtrees = reasm->subtrees;
562 12 : subtreel_t * subtreel = reasm->subtreel;
563 :
564 : /* Generally, best policy for eviction is to evict in the order of:
565 : 1. Highest unconfirmed orphan leaf - furthest from root
566 : 2. Highest incomplete, unconfirmed leaf in ancestry - furthest from tip of execution
567 : 3. Highest confirmed orphan leaf - evictable, since unrelated to banks, but less ideal */
568 :
569 12 : fd_reasm_fec_t * unconfrmd_orphan = NULL; /* 1st best candidate for eviction is the highest unconfirmed orphan. */
570 12 : fd_reasm_fec_t * confirmed_orphan = NULL; /* 3rd best candidate for eviction is the highest confirmed orphan. */
571 12 : for( subtreel_iter_t iter = subtreel_iter_fwd_init( subtreel, pool );
572 18 : !subtreel_iter_done ( iter, subtreel, pool );
573 12 : iter = subtreel_iter_fwd_next( iter, subtreel, pool ) ) {
574 6 : fd_reasm_fec_t * ele = subtreel_iter_ele( iter, subtreel, pool );
575 6 : if( ele->child != ULONG_MAX || memcmp( &ele->key, parent_root, sizeof(fd_hash_t) ) == 0 ) continue;
576 3 : if( FD_UNLIKELY( ele->confirmed ) ) confirmed_orphan = fd_ptr_if( !confirmed_orphan || ele->slot > confirmed_orphan->slot, ele, confirmed_orphan );
577 3 : else unconfrmd_orphan = fd_ptr_if( !unconfrmd_orphan || ele->slot > unconfrmd_orphan->slot, ele, unconfrmd_orphan );
578 3 : }
579 12 : for( orphaned_iter_t iter = orphaned_iter_init( orphaned, pool );
580 12 : !orphaned_iter_done( iter, orphaned, pool );
581 12 : iter = orphaned_iter_next( iter, orphaned, pool ) ) {
582 0 : fd_reasm_fec_t * ele = orphaned_iter_ele( iter, orphaned, pool );
583 0 : if( ele->child != ULONG_MAX || memcmp( &ele->key, parent_root, sizeof(fd_hash_t) ) == 0 ) continue;
584 0 : if( FD_UNLIKELY( ele->confirmed ) ) confirmed_orphan = fd_ptr_if( !confirmed_orphan || ele->slot > confirmed_orphan->slot, ele, confirmed_orphan );
585 0 : else unconfrmd_orphan = fd_ptr_if( !unconfrmd_orphan || ele->slot > unconfrmd_orphan->slot, ele, unconfrmd_orphan );
586 0 : }
587 :
588 12 : if( FD_UNLIKELY( unconfrmd_orphan )) {
589 3 : return fd_reasm_remove( reasm, unconfrmd_orphan, opt_store );
590 3 : }
591 :
592 9 : fd_reasm_fec_t * unconfrmd_leaf = NULL; /* 2nd best candidate for eviction is the highest unconfirmed, incomplete slot. */
593 9 : for( frontier_iter_t iter = frontier_iter_init( frontier, pool );
594 24 : !frontier_iter_done( iter, frontier, pool );
595 15 : iter = frontier_iter_next( iter, frontier, pool ) ) {
596 15 : fd_reasm_fec_t * ele = frontier_iter_ele( iter, frontier, pool );
597 15 : if( iter.ele_idx == reasm->root
598 15 : || 0==memcmp( &ele->key, parent_root, sizeof(fd_hash_t) )
599 15 : || ele->confirmed
600 15 : || ele->slot_complete
601 15 : || ele->is_leader ) continue; /* not a candidate */
602 9 : unconfrmd_leaf = fd_ptr_if( !unconfrmd_leaf || ele->slot > unconfrmd_leaf->slot, ele, unconfrmd_leaf );
603 9 : }
604 :
605 9 : if( FD_UNLIKELY( unconfrmd_leaf )) {
606 6 : return fd_reasm_remove( reasm, unconfrmd_leaf, opt_store );
607 6 : }
608 :
609 : /* Already did traversal to find best confirmed orphan candidate,
610 : which is the third choice */
611 :
612 3 : if( FD_UNLIKELY( confirmed_orphan )) {
613 0 : fd_reasm_fec_t * parent = fd_reasm_query( reasm, parent_root );
614 0 : if( !parent ) {
615 0 : return fd_reasm_remove( reasm, confirmed_orphan, opt_store );
616 0 : }
617 : /* for any subtree:
618 : 0 ── 1 ── 2 ── 3 (confirmed) ── 4(confirmed) ── 5 ── 6 ──> add 7 here is valid.
619 : └──> add 7 here is valid.
620 : └──> add 7 here is invalid. */
621 0 : ulong subtree_root = reasm->root;
622 0 : if( subtrees_ele_query( subtrees, parent_root, NULL, pool ) ||
623 0 : orphaned_ele_query( orphaned, parent_root, NULL, pool ) ) {
624 : /* if adding to an orphan, find the root of the orphan subtree. */
625 0 : fd_reasm_fec_t * root = parent;
626 0 : while( FD_LIKELY( root->parent != ULONG_MAX ) ) {
627 0 : root = pool_ele( pool, root->parent );
628 0 : }
629 0 : subtree_root = pool_idx( pool, root );
630 0 : }
631 :
632 0 : fd_reasm_fec_t * latest_confirmed_leaf = latest_confirmed_fec( reasm, subtree_root );
633 0 : if( !latest_confirmed_leaf || latest_confirmed_leaf == gca( reasm, latest_confirmed_leaf, parent )) {
634 0 : return fd_reasm_remove( reasm, confirmed_orphan, opt_store );
635 0 : }
636 : /* is a useless new fork. */
637 0 : return NULL;
638 0 : }
639 3 : return NULL; /* nothing else could be evicted */
640 3 : }
641 :
642 : fd_reasm_fec_t *
643 : fd_reasm_insert( fd_reasm_t * reasm,
644 : fd_hash_t const * merkle_root,
645 : fd_hash_t const * chained_merkle_root,
646 : ulong slot,
647 : uint fec_set_idx,
648 : ushort parent_off,
649 : ushort data_cnt,
650 : int data_complete,
651 : int slot_complete,
652 : int is_leader,
653 : fd_store_t * opt_store,
654 282 : fd_reasm_fec_t ** evicted ) {
655 :
656 : # if LOGGING
657 : FD_BASE58_ENCODE_32_BYTES( merkle_root->key, merkle_root_b58 );
658 : FD_BASE58_ENCODE_32_BYTES( chained_merkle_root->key, chained_merkle_root_b58 );
659 : FD_LOG_NOTICE(( "inserting (%lu %u) %s %s. %u %d %d", slot, fec_set_idx, merkle_root_b58, chained_merkle_root_b58, data_cnt, data_complete, slot_complete ));
660 : # endif
661 :
662 282 : fd_reasm_fec_t * pool = reasm_pool( reasm );
663 282 : # if FD_REASM_USE_HANDHOLDING
664 282 : FD_TEST( !fd_reasm_query( reasm, merkle_root ) );
665 282 : # endif
666 :
667 282 : ulong null = pool_idx_null( pool );
668 282 : ancestry_t * ancestry = reasm->ancestry;
669 282 : frontier_t * frontier = reasm->frontier;
670 282 : orphaned_t * orphaned = reasm->orphaned;
671 282 : subtrees_t * subtrees = reasm->subtrees;
672 282 : subtreel_t * subtreel = reasm->subtreel;
673 :
674 282 : ulong * bfs = reasm->bfs;
675 282 : out_t * out = reasm->out;
676 :
677 282 : *evicted = NULL;
678 :
679 282 : if( FD_UNLIKELY( pool_free( pool )==1UL ) ) {
680 12 : FD_TEST( reasm->root!=pool_idx_null( pool ) );
681 : /* The eviction removes evicted elements from the maps, but leaves
682 : the elements in the pool for caller to release. Thus, in order
683 : for the following insert/acquire to succeed, we have to start
684 : evicting when we have 1 remaining free element in the pool. This
685 : element is the one that will be acquired below. reasm is
686 : dependent on the caller to then release the evicted elements back
687 : to the pool before the next insert/acquire. */
688 12 : fd_reasm_fec_t * evicted_fec = evict( reasm, opt_store, merkle_root, chained_merkle_root );
689 12 : if( FD_UNLIKELY( evicted_fec == NULL ) ) {
690 3 : FD_LOG_INFO(("reasm failed to evict a fec set when inserting slot %lu fec set %u", slot, fec_set_idx));
691 :
692 : /* in this case we want to signal to the replay tile that we
693 : failed to insert the FEC set. This is effectively is the same
694 : logic as if we had this FEC set, and then it got evicted, and
695 : then the caller now needs to process the evicted FEC set. So
696 : here we acquire the final pool element for it and return it
697 : to the caller as the evicted FEC set. */
698 :
699 3 : fd_reasm_fec_t * fec = pool_ele_acquire( pool );
700 3 : fec->key = *merkle_root;
701 3 : fec->cmr = *chained_merkle_root;
702 3 : fec->parent = null;
703 3 : fec->child = null;
704 3 : fec->slot = slot;
705 3 : fec->parent_off = parent_off;
706 3 : fec->fec_set_idx = fec_set_idx;
707 3 : fec->bank_idx = null;
708 :
709 3 : *evicted = fec;
710 3 : return NULL;
711 3 : }
712 :
713 9 : *evicted = evicted_fec;
714 9 : }
715 :
716 279 : FD_TEST( pool_free( pool ) );
717 279 : fd_reasm_fec_t * fec = pool_ele_acquire( pool );
718 279 : fec->key = *merkle_root;
719 279 : fec->next = null;
720 279 : fec->parent = null;
721 279 : fec->child = null;
722 279 : fec->sibling = null;
723 279 : fec->slot = slot;
724 279 : fec->parent_off = parent_off;
725 279 : fec->fec_set_idx = fec_set_idx;
726 279 : fec->data_cnt = data_cnt;
727 279 : fec->data_complete = data_complete;
728 279 : fec->slot_complete = slot_complete;
729 279 : fec->is_leader = is_leader;
730 279 : fec->eqvoc = 0;
731 279 : fec->confirmed = 0;
732 279 : fec->popped = 0;
733 279 : fec->bank_dead = 0;
734 279 : fec->bank_idx = null;
735 279 : fec->parent_bank_idx = null;
736 279 : fec->bank_seq = null;
737 279 : fec->parent_bank_seq = null;
738 :
739 : /* set the out and subtreel pointers to null */
740 279 : fec->out.next = null;
741 279 : fec->out.prev = null;
742 279 : fec->in_out = 0;
743 279 : fec->subtreel.next = null;
744 279 : fec->subtreel.prev = null;
745 :
746 279 : if( FD_UNLIKELY( !chained_merkle_root ) ) { /* initialize the reasm with the root */
747 39 : FD_TEST( reasm->root==pool_idx_null( pool ) );
748 39 : fec->confirmed = 1;
749 39 : fec->popped = 1;
750 39 : /* */ xid_update( reasm, slot, UINT_MAX, pool_idx( pool, fec ) );
751 39 : /* */ xid_update( reasm, slot, fec_set_idx, pool_idx( pool, fec ) );
752 39 : reasm->root = pool_idx( pool, fec );
753 39 : reasm->slot0 = slot;
754 39 : frontier_ele_insert( reasm->frontier, fec, pool );
755 39 : return fec;
756 39 : }
757 :
758 240 : fec->cmr = *chained_merkle_root;
759 240 : FD_TEST( memcmp( &fec->cmr, chained_merkle_root, sizeof(fd_hash_t) ) == 0 );
760 :
761 240 : if( FD_UNLIKELY( slot_complete ) ) {
762 105 : xid_t * bid = xid_query( reasm->xid, (slot << 32) | UINT_MAX, NULL );
763 105 : if( FD_UNLIKELY( bid ) ) {
764 18 : fd_reasm_fec_t * orig_fec = pool_ele( pool, bid->idx );
765 18 : FD_BASE58_ENCODE_32_BYTES( orig_fec->key.key, prev_block_id_b58 );
766 18 : FD_BASE58_ENCODE_32_BYTES( fec->key.key, curr_block_id_b58 );
767 18 : FD_LOG_WARNING(( "equivocating block_id for FEC slot: %lu fec_set_idx: %u prev: %s curr: %s", fec->slot, fec->fec_set_idx, prev_block_id_b58, curr_block_id_b58 )); /* it's possible there's equivocation... */
768 18 : }
769 105 : xid_update( reasm, slot, UINT_MAX, pool_idx( pool, fec ) );
770 105 : }
771 240 : overwrite_invalid_cmr( reasm, fec ); /* handle receiving parent before child */
772 :
773 : /* First, we search for the parent of this new FEC and link if found.
774 : The new FEC set may result in a new leaf or a new orphan tree root
775 : so we need to check that. */
776 :
777 240 : fd_reasm_fec_t * parent = NULL;
778 240 : if( FD_LIKELY ( parent = ancestry_ele_query ( ancestry, &fec->cmr, NULL, pool ) ) ) { /* parent is connected non-leaf */
779 33 : frontier_ele_insert( frontier, fec, pool );
780 33 : out_ele_push_tail ( out, fec, pool );
781 33 : fec->in_out = 1;
782 207 : } else if( FD_LIKELY ( parent = frontier_ele_remove( frontier, &fec->cmr, NULL, pool ) ) ) { /* parent is connected leaf */
783 159 : ancestry_ele_insert( ancestry, parent, pool );
784 159 : frontier_ele_insert( frontier, fec, pool );
785 159 : out_ele_push_tail ( out, fec, pool );
786 159 : fec->in_out = 1;
787 159 : } else if( FD_LIKELY ( parent = orphaned_ele_query ( orphaned, &fec->cmr, NULL, pool ) ) ) { /* parent is orphaned non-root */
788 0 : orphaned_ele_insert( orphaned, fec, pool );
789 48 : } else if( FD_LIKELY ( parent = subtrees_ele_query ( subtrees, &fec->cmr, NULL, pool ) ) ) { /* parent is orphaned root */
790 3 : orphaned_ele_insert( orphaned, fec, pool );
791 45 : } else { /* parent not found */
792 45 : subtrees_ele_insert ( subtrees, fec, pool );
793 45 : subtreel_ele_push_tail( subtreel, fec, pool );
794 45 : }
795 :
796 240 : if( FD_LIKELY( parent ) ) link( reasm, parent, fec );
797 :
798 : /* Second, we search for children of this new FEC and link them to it.
799 : By definition any children must be orphaned (a child cannot be part
800 : of a connected tree before its parent). Therefore, we only search
801 : through the orphaned subtrees. As part of this operation, we also
802 : coalesce connected orphans into the same tree. This way we only
803 : need to search the orphan tree roots (vs. all orphaned nodes). */
804 :
805 240 : ulong min_descendant = ULONG_MAX; /* needed for eqvoc checks below */
806 240 : FD_TEST( bfs_empty( bfs ) );
807 240 : for( subtreel_iter_t iter = subtreel_iter_fwd_init( subtreel, pool );
808 351 : !subtreel_iter_done ( iter, subtreel, pool );
809 240 : iter = subtreel_iter_fwd_next( iter, subtreel, pool ) ) {
810 111 : bfs_push_tail( bfs, subtreel_iter_idx( iter, subtreel, pool ) );
811 111 : }
812 351 : while( FD_LIKELY( !bfs_empty( bfs ) ) ) { /* link orphan subtrees to the new FEC */
813 111 : fd_reasm_fec_t * orphan_root = pool_ele( pool, bfs_pop_head( bfs ) );
814 111 : FD_TEST( orphan_root ); // `overwrite_invalid_cmr` relies on orphan_root being non-null
815 111 : overwrite_invalid_cmr( reasm, orphan_root ); /* case 2: received child before parent */
816 111 : if( FD_LIKELY( 0==memcmp( orphan_root->cmr.uc, fec->key.uc, sizeof(fd_hash_t) ) ) ) { /* this orphan_root is a direct child of fec */
817 39 : link( reasm, fec, orphan_root );
818 39 : subtrees_ele_remove( subtrees, &orphan_root->key, NULL, pool );
819 39 : subtreel_ele_remove( subtreel, orphan_root, pool );
820 39 : orphaned_ele_insert( orphaned, orphan_root, pool );
821 39 : min_descendant = fd_ulong_min( min_descendant, orphan_root->slot );
822 39 : }
823 111 : }
824 :
825 : /* Third, we advance the frontier outward beginning from fec as we may
826 : have connected orphaned descendants to fec in the above step. This
827 : does a BFS outward from fec until it reaches leaves, moving fec and
828 : its non-leaf descendants into ancestry and leaves into frontier.
829 :
830 : parent (ancestry) orphan root (subtrees)
831 : | |
832 : fec (frontier) orphan child (orphaned)
833 :
834 : parent
835 : |
836 : fec <- frontier is here
837 : |
838 : orphan root
839 : |
840 : orphan child <- advance to here */
841 :
842 240 : if( FD_LIKELY( frontier_ele_query( frontier, &fec->key, NULL, pool ) ) ) bfs_push_tail( bfs, pool_idx( pool, fec ) );
843 471 : while( FD_LIKELY( !bfs_empty( bfs ) ) ) {
844 231 : fd_reasm_fec_t * parent = pool_ele( pool, bfs_pop_head( bfs ) );
845 231 : fd_reasm_fec_t * child = pool_ele( pool, parent->child );
846 231 : if( FD_LIKELY( child ) ) {
847 33 : frontier_ele_remove( frontier, &parent->key, NULL, pool );
848 33 : ancestry_ele_insert( ancestry, parent, pool );
849 33 : }
850 270 : while( FD_LIKELY( child ) ) {
851 39 : FD_TEST( orphaned_ele_remove( orphaned, &child->key, NULL, pool ) );
852 39 : frontier_ele_insert( frontier, child, pool );
853 39 : bfs_push_tail( bfs, pool_idx( pool, child ) );
854 39 : out_ele_push_tail( out, child, pool );
855 39 : child->in_out = 1;
856 39 : child = pool_ele( pool, child->sibling );
857 39 : }
858 231 : }
859 :
860 : /* Fourth, check and handle equivocation. There are three cases.
861 :
862 : 1. we've already seen this FEC's xid (slot, fec_set_idx)
863 : 2. this FEC's parent equivocates. */
864 :
865 240 : xid_t * xid = xid_query( reasm->xid, (slot<<32) | fec_set_idx, NULL );
866 240 : if( FD_UNLIKELY( xid ) ) {
867 33 : eqvoc( reasm, fec );
868 33 : eqvoc( reasm, pool_ele( pool, xid->idx ) ); /* first appearance of this xid */
869 33 : }
870 240 : xid_update( reasm, slot, fec_set_idx, pool_idx( pool, fec ) );
871 240 : if( FD_UNLIKELY( parent && parent->eqvoc && !parent->confirmed ) ) eqvoc( reasm, fec );
872 :
873 : /* 3. this FEC's parent is a slot_complete, but this FEC is part of
874 : the same slot. Or this fec is a slot_complete, but it's child
875 : is part of the same slot. i.e.
876 :
877 : A - B - C (slot cmpl) - D - E - F (slot cmpl)
878 :
879 : We do not want to deliver this entire slot if possible. The
880 : block has TWO slot complete flags. This is not honest behavior.
881 :
882 : Two ways this can happen:
883 : scenario 1: A - B - C (slot cmpl) - D - E - F (slot cmpl)
884 :
885 : scenario 2: A - B - C (slot cmpl)
886 : \
887 : D - E - F (slot cmpl) [true equivocation case]
888 :
889 : Scenario 2 is handled first-class in reasm, and we will only
890 : replay this slot if one version gets confirmed (or we did not
891 : see evidence of the other version until after we replayed the
892 : first).
893 :
894 : In scenario 1, it is impossible for the cluster to converge on
895 : ABC(slot cmpl)DEF(slot cmpl), but depending on the order in
896 : which each node receives the FEC sets, the cluster could either
897 : confirm ABC(slot cmpl), or mark the slot dead. In general, if
898 : the majority of nodes received and replayed the shorter version
899 : before seeing the second half, the slot could still end up
900 : getting confirmed. Whereas if the majority of nodes saw shreds
901 : from DEF before finishing replay and voting on ABC, the slot
902 : would likely be marked dead.
903 :
904 : Firedancer handles this case by marking the slot eqvoc upon
905 : detecting a slot complete in the middle of a slot. reasm will
906 : mark the earliest FEC possible in the slot as eqvoc, but may not
907 : be able to detect fec 0 because the FEC may be orphaned, or fec
908 : 0 may not exist yet. Thus, it is possible for Firedancer to
909 : replay ABC(slot cmpl), but it is impossible for reasm to deliver
910 : fecs D, E, or F. The node would then vote for ABC(slot cmpl).
911 :
912 : If the node sees D before replaying A, B, or C, then it would be
913 : able to mark ABCD as eqvoc, and prevent the corresponding FECs
914 : from being delivered. In this case, the Firedancer node would
915 : have an incompletely executed bank that eventually gets pruned
916 : away.
917 :
918 : Agave's handling differs because they key by slot, but our
919 : handling is compatible with the protocol. */
920 :
921 240 : if( FD_UNLIKELY( (parent && parent->slot_complete && parent->slot == slot) ||
922 240 : (fec->slot_complete && min_descendant == slot) ) ) {
923 : /* walk up to the earliest fec in slot */
924 6 : fd_reasm_fec_t * curr = fec;
925 15 : while( FD_LIKELY( curr->parent != pool_idx_null( pool ) && pool_ele( pool, curr->parent )->slot == slot ) ) {
926 9 : curr = pool_ele( pool, curr->parent );
927 9 : }
928 6 : eqvoc( reasm, curr );
929 6 : }
930 :
931 : /* Finally, return the newly inserted FEC. */
932 240 : return fec;
933 240 : }
934 :
935 : fd_reasm_fec_t *
936 : fd_reasm_publish( fd_reasm_t * reasm,
937 : fd_hash_t const * merkle_root,
938 9 : fd_store_t * opt_store ) {
939 :
940 9 : # if FD_REASM_USE_HANDHOLDING
941 9 : if( FD_UNLIKELY( !pool_ele( reasm_pool( reasm ), reasm->root ) ) ) { FD_LOG_WARNING(( "missing root" )); return NULL; }
942 9 : if( FD_UNLIKELY( !fd_reasm_query( reasm, merkle_root ) ) ) {
943 0 : FD_BASE58_ENCODE_32_BYTES( merkle_root->key, merkle_root_b58 );
944 0 : FD_LOG_WARNING(( "merkle root %s not found", merkle_root_b58 ));
945 0 : return NULL;
946 0 : }
947 9 : # endif
948 :
949 9 : fd_reasm_fec_t * pool = reasm_pool( reasm );
950 9 : ulong null = pool_idx_null( pool );
951 9 : fd_reasm_fec_t * oldr = pool_ele( pool, reasm->root );
952 9 : fd_reasm_fec_t * newr = fd_reasm_query( reasm, merkle_root );
953 9 : ulong * bfs = reasm->bfs;
954 :
955 9 : bfs_push_tail( bfs, pool_idx( pool, oldr ) );
956 :
957 : /* First, BFS down the tree, pruning all of root's ancestors and also
958 : any descendants of those ancestors. */
959 :
960 : /* Also, prune any subtrees who's root is less than the new root. */
961 :
962 9 : subtreel_t * subtreel = reasm->subtreel;
963 9 : for( subtreel_iter_t iter = subtreel_iter_fwd_init( subtreel, pool );
964 9 : !subtreel_iter_done ( iter, subtreel, pool );
965 9 : iter = subtreel_iter_fwd_next( iter, subtreel, pool ) ) {
966 0 : fd_reasm_fec_t * ele = subtreel_iter_ele( iter, subtreel, pool );
967 0 : if( ele->slot < newr->slot ) {
968 0 : bfs_push_tail( bfs, pool_idx( pool, ele ) );
969 0 : }
970 0 : }
971 :
972 39 : while( FD_LIKELY( !bfs_empty( bfs ) ) ) {
973 30 : fd_reasm_fec_t * head = pool_ele( pool, bfs_pop_head( bfs ) );
974 :
975 30 : fd_reasm_fec_t * fec = ancestry_ele_remove( reasm->ancestry, &head->key, NULL, pool );
976 30 : if( FD_UNLIKELY( !fec ) ) fec = frontier_ele_remove( reasm->frontier, &head->key, NULL, pool );
977 30 : if( FD_UNLIKELY( !fec ) ) fec = orphaned_ele_remove( reasm->orphaned, &head->key, NULL, pool );
978 30 : if( FD_UNLIKELY( !fec ) ) {
979 0 : fec = subtrees_ele_remove( reasm->subtrees, &head->key, NULL, pool );
980 0 : subtreel_ele_remove( reasm->subtreel, head, pool );
981 0 : }
982 :
983 30 : fd_reasm_fec_t * child = pool_ele( pool, head->child );
984 60 : while( FD_LIKELY( child ) ) { /* iterate over children */
985 30 : if( FD_LIKELY( child != newr ) ) { /* stop at new root */
986 21 : bfs_push_tail( bfs, pool_idx( pool, child ) );
987 21 : }
988 30 : child = pool_ele( pool, child->sibling ); /* right-sibling */
989 30 : }
990 30 : clear_slot_metadata( reasm, head );
991 30 : if( FD_LIKELY( opt_store ) ) fd_store_remove( opt_store, &head->key );
992 30 : if( FD_LIKELY( head->in_out ) ) { out_ele_remove( reasm->out, head, pool ); head->in_out = 0; }
993 30 : pool_ele_release( pool, head );
994 30 : }
995 :
996 9 : newr->parent = null; /* unlink old root */
997 9 : newr->sibling = null;
998 9 : reasm->root = pool_idx( pool, newr ); /* replace with new root */
999 9 : return newr;
1000 9 : }
1001 :
1002 : #include <stdio.h>
1003 :
1004 : FD_FN_UNUSED static void
1005 0 : print( fd_reasm_t const * reasm, fd_reasm_fec_t const * fec, int space, const char * prefix ) {
1006 0 : fd_reasm_fec_t const * pool = reasm_pool_const( reasm );
1007 0 :
1008 0 : if( fec == NULL ) return;
1009 0 :
1010 0 : if( space > 0 ) printf( "\n" );
1011 0 : for( int i = 0; i < space; i++ ) printf( " " );
1012 0 : FD_BASE58_ENCODE_32_BYTES( fec->key.key, key_b58 );
1013 0 : printf( "%s%s", prefix, key_b58 );
1014 0 :
1015 0 : fd_reasm_fec_t const * curr = pool_ele_const( pool, fec->child );
1016 0 : char new_prefix[1024]; /* FIXME size this correctly */
1017 0 : while( curr ) {
1018 0 : if( pool_ele_const( pool, curr->sibling ) ) {
1019 0 : sprintf( new_prefix, "├── " ); /* branch indicating more siblings follow */
1020 0 : print( reasm, curr, space + 4, new_prefix );
1021 0 : } else {
1022 0 : sprintf( new_prefix, "└── " ); /* end branch */
1023 0 : print( reasm, curr, space + 4, new_prefix );
1024 0 : }
1025 0 : curr = pool_ele_const( pool, curr->sibling );
1026 0 : }
1027 0 : }
1028 :
1029 : static void
1030 42 : ancestry_print( fd_reasm_t const * reasm, fd_reasm_fec_t const * fec, int space, const char * prefix, fd_reasm_fec_t const * prev, ulong recurse_depth ) {
1031 42 : fd_reasm_fec_t const * pool = reasm_pool_const( reasm );
1032 42 : if( fec == NULL ) return;
1033 42 : recurse_depth++;
1034 42 : if( recurse_depth == 2048 ) {
1035 0 : FD_BASE58_ENCODE_32_BYTES( fec->key.key, key_b58 );
1036 0 : FD_LOG_NOTICE(("Cutting off ancestry print at depth %lu, slot %lu. Continue printing with this root key %s.", recurse_depth, fec->slot, key_b58 ));
1037 0 : return;
1038 0 : }
1039 42 : fd_reasm_fec_t const * child = fd_reasm_child_const( reasm, fec );
1040 :
1041 42 : if( !prev || /* root OR */
1042 42 : ( fec->slot_complete || (!prev->eqvoc && fec->eqvoc) || fec->child == pool_idx_null( pool ) || child->sibling != pool_idx_null( pool ) )) {
1043 36 : if( space > 0 ) printf( "\n" );
1044 321 : for( int i = 0; i < space; i++ ) printf( " " );
1045 36 : printf( "%s", prefix );
1046 :
1047 36 : FD_BASE58_ENCODE_32_BYTES( fec->key.key, key_b58 );
1048 36 : key_b58[5] = '\0'; /* only print first 5 characters of key_b58 */
1049 36 : printf( "%lu(%u) %s", fec->slot, fec->fec_set_idx, key_b58 );
1050 36 : if( fec->eqvoc ) printf( " [eqvoc]" );
1051 36 : if( fec->is_leader ) printf( " [leader]" );
1052 36 : space += 5;
1053 36 : fflush(stdout);
1054 36 : }
1055 :
1056 42 : char new_prefix[1024]; /* FIXME size this correctly */
1057 :
1058 75 : while( child ) {
1059 33 : if( pool_ele_const( pool, child->sibling ) ) {
1060 9 : sprintf( new_prefix, "├── " ); /* branch indicating more siblings follow */
1061 9 : ancestry_print( reasm, child, space, new_prefix, fec, recurse_depth );
1062 24 : } else {
1063 24 : sprintf( new_prefix, "└── " ); /* end branch */
1064 24 : ancestry_print( reasm, child, space, new_prefix, fec, recurse_depth );
1065 24 : }
1066 33 : child = pool_ele_const( pool, child->sibling );
1067 33 : }
1068 42 : }
1069 :
1070 : void
1071 6 : fd_reasm_print( fd_reasm_t const * reasm ) {
1072 6 : FD_LOG_NOTICE( ( "\n\n[Reasm - showing only leaves, slot completes, and branches]" ) );
1073 6 : fd_reasm_fec_t const * pool = reasm_pool_const( reasm );
1074 6 : printf( "ele cnt: %lu\n", pool_used( pool ) );
1075 :
1076 6 : if( FD_LIKELY( reasm->root != pool_idx_null( pool ) ) ) {
1077 6 : printf( "\n\n[Connected Fecs]\n" );
1078 6 : ancestry_print( reasm, fd_reasm_root_const( reasm ), 0, "", NULL, 0 );
1079 6 : }
1080 :
1081 6 : printf( "\n\n[Unconnected Fecs]\n" );
1082 6 : subtreel_t const * subtreel = reasm->_subtrlf;
1083 6 : for( subtreel_iter_t iter = subtreel_iter_fwd_init( subtreel, pool );
1084 9 : !subtreel_iter_done ( iter, subtreel, pool );
1085 6 : iter = subtreel_iter_fwd_next( iter, subtreel, pool ) ) {
1086 3 : fd_reasm_fec_t const * fec = subtreel_iter_ele_const( iter, subtreel, pool );
1087 3 : ancestry_print( reasm, fec, 0, "", NULL, 0 );
1088 3 : }
1089 :
1090 6 : printf( "\n\n" );
1091 : fflush(stdout);
1092 6 : }
|