Line data Source code
1 : #ifndef HEADER_fd_src_choreo_forest_fd_forest_h
2 : #define HEADER_fd_src_choreo_forest_fd_forest_h
3 :
4 : /* Forest is an API for repairing blocks as they are discovered from the
5 : cluster via Turbine or Gossip. Shreds (from Turbine) and votes (from
6 : Gossip) inform forest that a block with the given slot they are
7 : associated with exists. Blk repair ensures that this block is
8 : received in its entirety by requesting repairs for missing shreds for
9 : the block.
10 :
11 : Like other fork-aware structures, forest maintains a tree that
12 : records the ancestry of slots. It also maintains a frontier, which
13 : models the leaves of the tree ie. the oldest (in ancestry) blocks
14 : that still need to be repaired (across multiple forks).
15 :
16 : Forest constructs the ancestry tree backwards, and then repairs the
17 : tree forwards (using BFS). */
18 :
19 : /* FD_FOREST_USE_HANDHOLDING: Define this to non-zero at compile time
20 : to turn on additional runtime checks and logging. */
21 :
22 : #include "../../disco/fd_disco_base.h"
23 :
24 : #ifndef FD_FOREST_USE_HANDHOLDING
25 : #define FD_FOREST_USE_HANDHOLDING 1
26 : #endif
27 :
28 0 : #define FD_FOREST_VER_UNINIT (0UL)
29 0 : #define FD_FOREST_VER_INVAL (ULONG_MAX)
30 :
31 0 : #define FD_FOREST_MAGIC (0xf17eda2ce7b1c0UL) /* firedancer forest version 0 */
32 :
33 : /* The below is a non-restrictive estimate on the number of forks at
34 : once. In particular this controls the number of chains/buckets in the
35 : consumed map and subtrees map, although both maps element count are
36 : still bounded by the forest_pool. This is mainly a performance
37 : optimization; repair does a lot of repeated iterator_init and
38 : iterator_next calls on the consumed map, and almost always the
39 : consumed map is very sparse, which makes the iterator very slow. Thus
40 : we can afford to drastically decrease the number of chains in return
41 : for a slightly higher chance of collisions. */
42 :
43 : #define SET_NAME fd_forest_blk_idxs
44 0 : #define SET_MAX FD_SHRED_BLK_MAX
45 : #include "../../util/tmpl/fd_set.c"
46 :
47 : /* fd_forest_blk_t implements a left-child, right-sibling n-ary
48 : tree. Each ele maintains the `pool` index of its left-most child
49 : (`child_idx`), its immediate-right sibling (`sibling_idx`), and its
50 : parent (`parent_idx`).
51 :
52 : This tree structure is gaddr-safe and supports accesses and
53 : operations from processes with separate local forest joins. */
54 :
55 : struct __attribute__((aligned(128UL))) fd_forest_blk {
56 : ulong slot; /* map key */
57 : ulong parent_slot; /* map key of the parent. invariant: if parent is populated, parent_slot is populated. the converse is not necessarily true. */
58 : ulong next; /* internal use by fd_pool, fd_map_chain */
59 : ulong parent; /* pool idx of the parent in the tree */
60 : ulong child; /* pool idx of the left-child */
61 : ulong sibling; /* pool idx of the right-sibling */
62 :
63 : uint consumed_idx; /* highest contiguous consumed shred idx */
64 : uint buffered_idx; /* highest contiguous buffered shred idx */
65 : uint complete_idx; /* shred_idx with SLOT_COMPLETE_FLAG ie. last shred idx in the slot */
66 :
67 : fd_forest_blk_idxs_t fecs[fd_forest_blk_idxs_word_cnt]; /* fec set idxs - 1, or the idx of the last shred in every FEC set */
68 : fd_forest_blk_idxs_t idxs[fd_forest_blk_idxs_word_cnt]; /* data shred idxs */
69 : fd_forest_blk_idxs_t cmpl[fd_forest_blk_idxs_word_cnt]; /* last shred idx of every FEC set that has been completed by shred_tile */
70 :
71 : /* i.e. when fecs == cmpl, the slot is truly complete and everything
72 : is contained in fec store. Look at fec_clear for more details.*/
73 :
74 : int est_buffered_tick_recv; /* tick of shred at buffered_idx. Note since we don't track all the
75 : ticks received, this will be a lower bound estimate on the highest tick we have seen.
76 : But this is only used for limiting eager repair, so an exact value is not necessary. */
77 :
78 : /* Metrics */
79 :
80 : fd_forest_blk_idxs_t code[fd_forest_blk_idxs_word_cnt]; /* code shred idxs */
81 : long first_shred_ts; /* timestamp of first shred rcved in slot != complete_idx */
82 : long first_req_ts; /* timestamp of first request received in slot != complete_idx */
83 : uint turbine_cnt; /* number of shreds received from turbine */
84 : uint repair_cnt; /* number of data shreds received from repair */
85 : uint recovered_cnt; /* number of shreds recovered from reedsol recovery */
86 : };
87 : typedef struct fd_forest_blk fd_forest_blk_t;
88 :
89 : #define POOL_NAME fd_forest_pool
90 0 : #define POOL_T fd_forest_blk_t
91 : #include "../../util/tmpl/fd_pool.c"
92 :
93 : #define MAP_NAME fd_forest_ancestry
94 : #define MAP_ELE_T fd_forest_blk_t
95 0 : #define MAP_KEY slot
96 : #include "../../util/tmpl/fd_map_chain.c"
97 :
98 : #define MAP_NAME fd_forest_frontier
99 : #define MAP_ELE_T fd_forest_blk_t
100 0 : #define MAP_KEY slot
101 : #include "../../util/tmpl/fd_map_chain.c"
102 :
103 : #define MAP_NAME fd_forest_orphaned
104 : #define MAP_ELE_T fd_forest_blk_t
105 0 : #define MAP_KEY slot
106 : #include "../../util/tmpl/fd_map_chain.c"
107 :
108 : #define MAP_NAME fd_forest_subtrees
109 : #define MAP_ELE_T fd_forest_blk_t
110 0 : #define MAP_KEY slot
111 : #include "../../util/tmpl/fd_map_chain.c"
112 :
113 : struct fd_forest_cns {
114 : ulong slot;
115 : ulong forest_pool_idx;
116 : ulong hash; /* reserved by pool and map_chain */
117 : ulong next; /* reserved by dlist */
118 : ulong prev; /* reserved by dlist */
119 : };
120 : typedef struct fd_forest_cns fd_forest_cns_t;
121 :
122 : #define MAP_NAME fd_forest_consumed
123 : #define MAP_ELE_T fd_forest_cns_t
124 0 : #define MAP_KEY slot
125 0 : #define MAP_NEXT hash
126 : #include "../../util/tmpl/fd_map_chain.c"
127 :
128 : #define DLIST_NAME fd_forest_conslist
129 : #define DLIST_ELE_T fd_forest_cns_t
130 0 : #define DLIST_PREV prev
131 0 : #define DLIST_NEXT next
132 : #include "../../util/tmpl/fd_dlist.c"
133 :
134 : #define POOL_NAME fd_forest_conspool
135 0 : #define POOL_T fd_forest_cns_t
136 0 : #define POOL_NEXT hash
137 : #include "../../util/tmpl/fd_pool.c"
138 :
139 : /* Internal use only for BFSing */
140 : #define DEQUE_NAME fd_forest_deque
141 0 : #define DEQUE_T ulong
142 : #include "../../util/tmpl/fd_deque_dynamic.c"
143 :
144 :
145 : /* fd_forest_t is the top-level structure that holds the root of
146 : the tree, as well as the memory pools and map structures.
147 :
148 : These structures are bump-allocated and laid out contiguously in
149 : memory from the fd_forest_t * pointer which points to the
150 : beginning of the memory region.
151 :
152 : --------------------- <- fd_forest_t *
153 : | metadata |
154 : |-------------------|
155 : | pool |
156 : |-------------------|
157 : | ancestry |
158 : |-------------------|
159 : | frontier |
160 : |-------------------|
161 : | subtrees |
162 : |-------------------|
163 : | orphaned |
164 : ---------------------
165 :
166 :
167 : A valid, initialized forest is always non-empty. After
168 : `fd_forest_init` the forest will always have a root ele unless
169 : modified improperly out of forest's API.*/
170 :
171 : struct __attribute__((aligned(128UL))) fd_forest {
172 : ulong root; /* pool idx of the root */
173 : ulong iter; /* pool idx of the iterator */
174 : ulong wksp_gaddr; /* wksp gaddr of fd_forest in the backing wksp, non-zero gaddr */
175 : ulong ver_gaddr; /* wksp gaddr of version fseq, incremented on write ops */
176 : ulong pool_gaddr; /* wksp gaddr of fd_pool */
177 : ulong ancestry_gaddr; /* wksp_gaddr of fd_forest_ancestry */
178 : ulong frontier_gaddr; /* leaves that needs repair */
179 : ulong subtrees_gaddr; /* head of orphaned trees */
180 : ulong orphaned_gaddr; /* map of parent_slot to singly-linked list of ele orphaned by that parent slot */
181 :
182 : ulong consumed_gaddr; /* map of slot to pool idx of the completed repair frontier */
183 : ulong conslist_gaddr; /* wksp gaddr of fd_forest_conslist */
184 : ulong conspool_gaddr; /* wksp gaddr of fd_forest_conspool */
185 : ulong deque_gaddr; /* wksp gaddr of fd_forest_deque. internal use only for BFSing */
186 : ulong magic; /* ==FD_FOREST_MAGIC */
187 :
188 : ulong subtree_cnt; /* number of subtrees in the forest - particularly useful for iterating */
189 : };
190 : typedef struct fd_forest fd_forest_t;
191 :
192 : FD_PROTOTYPES_BEGIN
193 :
194 : /* Constructors */
195 :
196 : /* fd_forest_{align,footprint} return the required alignment and
197 : footprint of a memory region suitable for use as forest with up to
198 : ele_max eles and vote_max votes. */
199 :
200 : FD_FN_CONST static inline ulong
201 0 : fd_forest_align( void ) {
202 0 : return alignof(fd_forest_t);
203 0 : }
204 :
205 : FD_FN_CONST static inline ulong
206 0 : fd_forest_footprint( ulong ele_max ) {
207 0 : return FD_LAYOUT_FINI(
208 0 : FD_LAYOUT_APPEND(
209 0 : FD_LAYOUT_APPEND(
210 0 : FD_LAYOUT_APPEND(
211 0 : FD_LAYOUT_APPEND(
212 0 : FD_LAYOUT_APPEND(
213 0 : FD_LAYOUT_APPEND(
214 0 : FD_LAYOUT_APPEND(
215 0 : FD_LAYOUT_APPEND(
216 0 : FD_LAYOUT_APPEND(
217 0 : FD_LAYOUT_APPEND(
218 0 : FD_LAYOUT_APPEND(
219 0 : FD_LAYOUT_INIT,
220 0 : alignof(fd_forest_t), sizeof(fd_forest_t) ),
221 0 : fd_fseq_align(), fd_fseq_footprint() ),
222 0 : fd_forest_pool_align(), fd_forest_pool_footprint ( ele_max ) ),
223 0 : fd_forest_ancestry_align(), fd_forest_ancestry_footprint( ele_max ) ),
224 0 : fd_forest_frontier_align(), fd_forest_frontier_footprint( ele_max ) ),
225 0 : fd_forest_subtrees_align(), fd_forest_subtrees_footprint( ele_max ) ),
226 0 : fd_forest_orphaned_align(), fd_forest_orphaned_footprint( ele_max ) ),
227 0 : fd_forest_consumed_align(), fd_forest_consumed_footprint( ele_max ) ),
228 0 : fd_forest_conslist_align(), fd_forest_conslist_footprint( ) ),
229 0 : fd_forest_conspool_align(), fd_forest_conspool_footprint( ele_max ) ),
230 0 : fd_forest_deque_align(), fd_forest_deque_footprint ( ele_max ) ),
231 0 : fd_forest_align() );
232 0 : }
233 :
234 : /* fd_forest_new formats an unused memory region for use as a
235 : forest. mem is a non-NULL pointer to this region in the local
236 : address space with the required footprint and alignment. */
237 :
238 : void *
239 : fd_forest_new( void * shmem, ulong ele_max, ulong seed );
240 :
241 : /* fd_forest_join joins the caller to the forest. forest
242 : points to the first byte of the memory region backing the forest
243 : in the caller's address space. Returns a pointer in the local
244 : address space to forest on success. */
245 :
246 : fd_forest_t *
247 : fd_forest_join( void * forest );
248 :
249 : /* fd_forest_leave leaves a current local join. Returns a pointer
250 : to the underlying shared memory region on success and NULL on failure
251 : (logs details). Reasons for failure include forest is NULL. */
252 :
253 : void *
254 : fd_forest_leave( fd_forest_t const * forest );
255 :
256 : /* fd_forest_delete unformats a memory region used as a
257 : forest. Assumes only the nobody is joined to the region.
258 : Returns a pointer to the underlying shared memory region or NULL if
259 : used obviously in error (e.g. forest is obviously not a
260 : forest ... logs details). The ownership of the memory region is
261 : transferred to the caller. */
262 :
263 : void *
264 : fd_forest_delete( void * forest );
265 :
266 : /* fd_forest_init initializes a forest. Assumes forest
267 : is a valid local join and no one else is joined. root is the initial
268 : root forest will use. This is the snapshot slot if booting from
269 : a snapshot, 0 if the genesis slot.
270 :
271 : In general, this should be called by the same process that formatted
272 : forest's memory, ie. the caller of fd_forest_new. */
273 :
274 : fd_forest_t *
275 : fd_forest_init( fd_forest_t * forest, ulong root );
276 :
277 : /* fd_forest_fini finishes an forest. Assumes forest is
278 : a valid local join and no one else is joined. */
279 :
280 : fd_forest_t *
281 : fd_forest_fini( fd_forest_t * forest );
282 :
283 : /* Accessors */
284 :
285 : /* fd_forest_wksp returns the local join to the wksp backing the
286 : forest. The lifetime of the returned pointer is at least as
287 : long as the lifetime of the local join. Assumes forest is a
288 : current local join. */
289 :
290 : FD_FN_PURE static inline fd_wksp_t *
291 0 : fd_forest_wksp( fd_forest_t const * forest ) {
292 0 : return (fd_wksp_t *)( ( (ulong)forest ) - forest->wksp_gaddr );
293 0 : }
294 :
295 : /* fd_forest_{ver, ver_const} returns the local join to the version
296 : number fseq. The lifetime of the returned pointer is at least as
297 : long as the lifetime of the local join. Assumes forest is a
298 : current local join. If value is ULONG_MAX, ghost is uninitialized or
299 : invalid. Query pre- & post-read:
300 :
301 : odd: if either pre or post is odd, discard read.
302 : even: if pre == post, read is consistent. */
303 :
304 : FD_FN_PURE static inline ulong *
305 0 : fd_forest_ver( fd_forest_t * forest ) {
306 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->ver_gaddr );
307 0 : }
308 :
309 : FD_FN_PURE static inline ulong const *
310 0 : fd_forest_ver_const( fd_forest_t const * forest ) {
311 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->ver_gaddr );
312 0 : }
313 :
314 : /* fd_forest_{pool, pool_const} returns a pointer in the caller's address
315 : space to forest's element pool. */
316 :
317 : FD_FN_PURE static inline fd_forest_blk_t *
318 0 : fd_forest_pool( fd_forest_t * forest ) {
319 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->pool_gaddr );
320 0 : }
321 :
322 : FD_FN_PURE static inline fd_forest_blk_t const *
323 0 : fd_forest_pool_const( fd_forest_t const * forest ) {
324 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->pool_gaddr );
325 0 : }
326 :
327 : /* fd_forest_{ancestry, ancestry_const} returns a pointer in the caller's
328 : address space to forest's ancestry map. */
329 :
330 : FD_FN_PURE static inline fd_forest_ancestry_t *
331 0 : fd_forest_ancestry( fd_forest_t * forest ) {
332 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->ancestry_gaddr );
333 0 : }
334 :
335 : FD_FN_PURE static inline fd_forest_ancestry_t const *
336 0 : fd_forest_ancestry_const( fd_forest_t const * forest ) {
337 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->ancestry_gaddr );
338 0 : }
339 :
340 : /* fd_forest_{frontier, frontier_const} returns a pointer in the caller's
341 : address space to forest's frontier map. */
342 :
343 : FD_FN_PURE static inline fd_forest_frontier_t *
344 0 : fd_forest_frontier( fd_forest_t * forest ) {
345 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->frontier_gaddr );
346 0 : }
347 :
348 : FD_FN_PURE static inline fd_forest_frontier_t const *
349 0 : fd_forest_frontier_const( fd_forest_t const * forest ) {
350 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->frontier_gaddr );
351 0 : }
352 :
353 : /* fd_forest_{subtrees, subtrees_const} returns a pointer in the caller's
354 : address space to forest's subtrees map. */
355 :
356 : FD_FN_PURE static inline fd_forest_subtrees_t *
357 0 : fd_forest_subtrees( fd_forest_t * forest ) {
358 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->subtrees_gaddr );
359 0 : }
360 :
361 : FD_FN_PURE static inline fd_forest_subtrees_t const *
362 0 : fd_forest_subtrees_const( fd_forest_t const * forest ) {
363 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->subtrees_gaddr );
364 0 : }
365 :
366 : /* fd_forest_{orphaned, orphaned_const} returns a pointer in the caller's
367 : address space to forest's orphaned map. */
368 :
369 : FD_FN_PURE static inline fd_forest_orphaned_t *
370 0 : fd_forest_orphaned( fd_forest_t * forest ) {
371 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->orphaned_gaddr );
372 0 : }
373 :
374 : FD_FN_PURE static inline fd_forest_orphaned_t const *
375 0 : fd_forest_orphaned_const( fd_forest_t const * forest ) {
376 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->orphaned_gaddr );
377 0 : }
378 :
379 : /* fd_forest_{consumed, consumed_const} returns a pointer in the caller's
380 : address space to forest's consumed map. */
381 :
382 : FD_FN_PURE static inline fd_forest_consumed_t *
383 0 : fd_forest_consumed( fd_forest_t * forest ) {
384 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->consumed_gaddr );
385 0 : }
386 :
387 : FD_FN_PURE static inline fd_forest_consumed_t const *
388 0 : fd_forest_consumed_const( fd_forest_t const * forest ) {
389 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->consumed_gaddr );
390 0 : }
391 :
392 : /* fd_forest_{conslist, conslist_const} returns a pointer in the caller's
393 : address space to forest's conslist. */
394 :
395 : FD_FN_PURE static inline fd_forest_conslist_t *
396 0 : fd_forest_conslist( fd_forest_t * forest ) {
397 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->conslist_gaddr );
398 0 : }
399 :
400 : FD_FN_PURE static inline fd_forest_conslist_t const *
401 0 : fd_forest_conslist_const( fd_forest_t const * forest ) {
402 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->conslist_gaddr );
403 0 : }
404 :
405 : /* fd_forest_{conspool, conspool_const} returns a pointer in the caller's
406 : address space to forest's conspool pool. */
407 :
408 : FD_FN_PURE static inline fd_forest_cns_t *
409 0 : fd_forest_conspool( fd_forest_t * forest ) {
410 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->conspool_gaddr );
411 0 : }
412 :
413 : FD_FN_PURE static inline fd_forest_cns_t const *
414 0 : fd_forest_conspool_const( fd_forest_t const * forest ) {
415 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->conspool_gaddr );
416 0 : }
417 :
418 : /* fd_forest_root_slot returns forest's root slot. Assumes
419 : forest is a current local join. */
420 :
421 : FD_FN_PURE static inline ulong
422 0 : fd_forest_root_slot( fd_forest_t const * forest ) {
423 0 : if( FD_UNLIKELY( forest->root == fd_forest_pool_idx_null( fd_forest_pool_const( forest ) ) )) return ULONG_MAX; /* uninitialized */
424 0 : return fd_forest_pool_ele_const( fd_forest_pool_const( forest ), forest->root )->slot;
425 0 : }
426 :
427 : fd_forest_blk_t *
428 : fd_forest_query( fd_forest_t * forest, ulong slot );
429 :
430 : /* Operations */
431 :
432 : /* fd_forest_blk_insert inserts a new block into the forest. Assumes
433 : slot >= forest->smr, and the blk pool has a free element (if
434 : handholding is enabled, explicitly checks and errors). This blk
435 : insert is idempotent, and can be called multiple times with the same
436 : slot. Returns the inserted forest ele. */
437 :
438 : fd_forest_blk_t *
439 : fd_forest_blk_insert( fd_forest_t * forest, ulong slot, ulong parent_slot );
440 :
441 0 : #define SHRED_SRC_TURBINE 0
442 0 : #define SHRED_SRC_REPAIR 1
443 0 : #define SHRED_SRC_RECOVERED 2
444 :
445 : /* fd_forest_shred_insert inserts a new shred into the forest.
446 : Assumes slot is already in forest, and should typically be called
447 : directly after fd_forest_block_insert. Returns the forest ele
448 : corresponding to the shred slot. */
449 :
450 : fd_forest_blk_t *
451 : fd_forest_data_shred_insert( fd_forest_t * forest, ulong slot, ulong parent_slot, uint shred_idx, uint fec_set_idx, int slot_complete, int ref_tick, int src );
452 :
453 : fd_forest_blk_t *
454 : fd_forest_code_shred_insert( fd_forest_t * forest, ulong slot, uint shred_idx );
455 :
456 : /* fd_forest_fec_insert inserts a new fully completed FEC set into the
457 : forest. Assumes slot is already in forest, and should typically be
458 : called directly after fd_forest_block_insert. Returns the forest ele
459 : corresponding to the shred slot. */
460 :
461 : fd_forest_blk_t *
462 : fd_forest_fec_insert( fd_forest_t * forest, ulong slot, ulong parent_slot, uint last_shred_idx, uint fec_set_idx, int slot_complete, int ref_tick );
463 :
464 : /* fd_forest_fec_clear clears the FEC set at the given slot and
465 : fec_set_idx.
466 : Can fec_clear break consumed frontier invariants? No. Why?
467 :
468 : 1. consumed frontier moves past slot n iff
469 : child && (we have received evidence of a child of slot n)
470 : head->complete_idx != UINT_MAX && head->complete_idx == head->buffered_idx && (all shreds of a slot have been received)
471 : 0==memcmp( head->cmpl, head->fecs, sizeof(fd_forest_blk_idxs_t) * fd_forest_blk_idxs_word_cnt ) (our fec completion record matches the observed FECs)
472 :
473 : If we have received all the shreds of `n`, we must have all FEC
474 : sets idxs of `n` recorded in the fecs bitset. The cmpl bitset
475 : is updated only on fd_forest_fec_insert, which is only called on
476 : fec_completes message from shed tile.
477 :
478 : Which means we must have received fec completion for all the FEC
479 : sets of `n` from shred tile in order for the cmpl bitset to
480 : match the fecs bitset, for the consumed to move past `n`.
481 :
482 : Since we only call fd_forest_fec_insert on the fec_completes
483 : message from shred tile, this means fec_resolver must have
484 : COMPLETED each FEC set of slot n at least once. After the first
485 : fec_completes for each FEC set in `n`, the fec_resolver will
486 : have the FEC sets of `n` in the done_map of the fec_resolver.
487 : This prevents any more duplicate fec_completes for `n`
488 : happening for usually a buffer of 65k fec sets, until they get
489 : evicted from the done_map.
490 :
491 : Now consider that we have moved the consumed frontier past slot
492 : n.
493 :
494 : It is technically possible for us to receive again a FEC set of
495 : slot n many many slots later, after the done_map has erased all
496 : records of the FEC sets of slot n. This is not a problem.
497 : 1) If slot n is not in scope of the forest root, then
498 : fec_resolver will build a ctx for that FEC set, but repair
499 : will ignore any messages related to that FEC set, as the slot
500 : <= forest_root. Eventually the fec_resolver will either
501 : complete or evict the ctx, in which either the fec_completes
502 : or fec_clear msg will be ignored by repair.
503 : 2) If slot n is in scope of the forest root, then the shred
504 : delivered to repair will trigger a data_shred_insert call
505 : that does nothing, as repair already has record of that
506 : shred. Eventually the fec_completes or fec_clear msg will be
507 : delivered to repair. fec_insert will do nothing. fec_clear
508 : will remove the idxs for the shreds from the bitset, and
509 : update the buffered_idx. This doesn't matter though! because
510 : we already have moved past slot n on the consumed frontier.
511 : No need to request those shreds again.
512 :
513 : Except 2) breaks a bit with in specific leader slot cases. See
514 : fd_forest_fec_clear for more details. */
515 :
516 : void
517 : fd_forest_fec_clear( fd_forest_t * forest, ulong slot, uint fec_set_idx, uint max_shred_idx );
518 :
519 : /* fd_forest_publish publishes slot as the new forest root, setting
520 : the subtree beginning from slot as the new forest tree (ie. slot
521 : and all its descendants). Prunes all eles not in slot's forest.
522 : Assumes slot is present in forest. Returns the new root. */
523 :
524 : fd_forest_blk_t const *
525 : fd_forest_publish( fd_forest_t * forest, ulong slot );
526 :
527 : struct fd_forest_iter {
528 : ulong ele_idx;
529 : uint shred_idx;
530 : ulong frontier_ver; /* the frontier version number of forest at time of initialization */
531 : fd_forest_conslist_iter_t head; /* the frontier node "root" of our current iteration, provided NO insertions or deletions in the frontier. */
532 : };
533 : typedef struct fd_forest_iter fd_forest_iter_t;
534 :
535 : /* fd_forest_iter_* supports iteration over a frontier node of the
536 : forest and its children. iter_init selects the frontier_iter_init
537 : node from the frontier. iter_next advances the iterator to the next
538 : shred currently not yet existing in the forest, and will always
539 : choose the left most child to iterate down. This iterator is safe
540 : with concurrent query/insert/remove. If the forest has not been
541 : modified, the iterator will continue down all frontier nodes. If not,
542 : the iterator will return done.
543 :
544 : An iterator is done when the ele_idx is null, i.e. the leaf of the
545 : original selected frontier node has been reached.
546 :
547 : An iterator signifies to the repair tile to request the
548 : highest_window_index when the ele_idx is not null and shred_idx is
549 : UINT_MAX.
550 :
551 : Otherwise, the iterator signifies to the repair tile to request a
552 : regular shred window_idx.
553 :
554 : Caller should generally have a timeout that resets the iterator. In
555 : addition, since the iterator always chooses the leftmost child,
556 : reaching new forks under one frontier node relies on repair reponses
557 : -> shreds being inserted. Thus the frontier nodes can advance to the
558 : slot where the fork branched. */
559 :
560 : fd_forest_iter_t
561 : fd_forest_iter_init( fd_forest_t * forest );
562 :
563 : fd_forest_iter_t
564 : fd_forest_iter_next( fd_forest_iter_t iter, fd_forest_t const * forest );
565 :
566 : int
567 : fd_forest_iter_done( fd_forest_iter_t iter, fd_forest_t const * forest );
568 :
569 : /* Misc */
570 :
571 : /* fd_forest_verify checks the forest is not obviously corrupt.
572 : Returns 0 if verify succeeds, -1 otherwise. */
573 :
574 : int
575 : fd_forest_verify( fd_forest_t const * forest );
576 :
577 : void
578 : fd_forest_frontier_print( fd_forest_t const * forest );
579 :
580 : /* fd_forest_print pretty-prints a formatted forest tree. Printing begins
581 : from `ele` (it will appear as the root in the print output).
582 :
583 : The most straightforward and commonly used printing pattern is:
584 : `fd_forest_print( forest, fd_forest_root( forest ) )`
585 :
586 : This would print forest beginning from the root.
587 :
588 : Alternatively, caller can print a more localized view, for example
589 : starting from the grandparent of the most recently executed slot:
590 :
591 : ```
592 : fd_forest_blk_t const * ele = fd_forest_query( slot );
593 : fd_forest_print( forest, fd_forest_parent( fd_forest_parent( ele ) ) )
594 : ```
595 :
596 : Callers should add null-checks as appropriate in actual usage. */
597 :
598 : void
599 : fd_forest_print( fd_forest_t const * forest );
600 :
601 : FD_PROTOTYPES_END
602 :
603 : #endif /* HEADER_fd_src_choreo_forest_fd_forest_h */
|