Line data Source code
1 : #ifndef HEADER_fd_src_choreo_forest_fd_forest_h
2 : #define HEADER_fd_src_choreo_forest_fd_forest_h
3 :
4 : /* Forest is an API for repairing blocks as they are discovered from the
5 : cluster via Turbine or Gossip. Shreds (from Turbine) and votes (from
6 : Gossip) inform forest that a block with the given slot they are
7 : associated with exists. Blk repair ensures that this block is
8 : received in its entirety by requesting repairs for missing shreds for
9 : the block.
10 :
11 : Like other fork-aware structures, forest maintains a tree that
12 : records the ancestry of slots. It also maintains a frontier, which
13 : models the leaves of the tree ie. the oldest (in ancestry) blocks
14 : that still need to be repaired (across multiple forks).
15 :
16 : Forest constructs the ancestry tree backwards, and then repairs the
17 : tree forwards (using BFS). */
18 :
19 : /* FD_FOREST_USE_HANDHOLDING: Define this to non-zero at compile time
20 : to turn on additional runtime checks and logging. */
21 :
22 : #include "../../disco/fd_disco_base.h"
23 :
24 : #ifndef FD_FOREST_USE_HANDHOLDING
25 : #define FD_FOREST_USE_HANDHOLDING 1
26 : #endif
27 :
28 0 : #define FD_FOREST_VER_UNINIT (0UL)
29 : #define FD_FOREST_VER_INVAL (ULONG_MAX)
30 :
31 0 : #define FD_FOREST_MAGIC (0xf17eda2ce7b1c0UL) /* firedancer forest version 0 */
32 :
33 : #define SET_NAME fd_forest_ele_idxs
34 : #define SET_MAX FD_SHRED_BLK_MAX
35 : #include "../../util/tmpl/fd_set.c"
36 :
37 :
38 : /* fd_forest_ele_t implements a left-child, right-sibling n-ary
39 : tree. Each ele maintains the `pool` index of its left-most child
40 : (`child_idx`), its immediate-right sibling (`sibling_idx`), and its
41 : parent (`parent_idx`).
42 :
43 : This tree structure is gaddr-safe and supports accesses and
44 : operations from processes with separate local forest joins. */
45 :
46 : struct __attribute__((aligned(128UL))) fd_forest_ele {
47 : ulong slot; /* map key */
48 : ulong prev; /* internal use by link_orphans */
49 : ulong next; /* internal use by fd_pool, fd_map_chain */
50 : ulong parent; /* pool idx of the parent in the tree */
51 : ulong child; /* pool idx of the left-child */
52 : ulong sibling; /* pool idx of the right-sibling */
53 :
54 : uint buffered_idx; /* highest contiguous buffered shred idx */
55 : uint complete_idx; /* shred_idx with SLOT_COMPLETE_FLAG ie. last shred idx in the slot */
56 :
57 : fd_forest_ele_idxs_t cmpl[fd_forest_ele_idxs_word_cnt]; /* fec complete idxs */
58 : fd_forest_ele_idxs_t fecs[fd_forest_ele_idxs_word_cnt]; /* fec set idxs */
59 : fd_forest_ele_idxs_t idxs[fd_forest_ele_idxs_word_cnt]; /* data shred idxs */
60 : };
61 : typedef struct fd_forest_ele fd_forest_ele_t;
62 :
63 : #define POOL_NAME fd_forest_pool
64 0 : #define POOL_T fd_forest_ele_t
65 : #include "../../util/tmpl/fd_pool.c"
66 :
67 : #define MAP_NAME fd_forest_ancestry
68 : #define MAP_ELE_T fd_forest_ele_t
69 0 : #define MAP_KEY slot
70 : #include "../../util/tmpl/fd_map_chain.c"
71 :
72 : #define MAP_NAME fd_forest_frontier
73 : #define MAP_ELE_T fd_forest_ele_t
74 0 : #define MAP_KEY slot
75 : #include "../../util/tmpl/fd_map_chain.c"
76 :
77 : #define MAP_NAME fd_forest_orphaned
78 : #define MAP_ELE_T fd_forest_ele_t
79 0 : #define MAP_KEY slot
80 : #include "../../util/tmpl/fd_map_chain.c"
81 :
82 : /* fd_forest_t is the top-level structure that holds the root of
83 : the tree, as well as the memory pools and map structures.
84 :
85 : These structures are bump-allocated and laid out contiguously in
86 : memory from the fd_forest_t * pointer which points to the
87 : beginning of the memory region.
88 :
89 : --------------------- <- fd_forest_t *
90 : | metadata |
91 : |-------------------|
92 : | pool |
93 : |-------------------|
94 : | ancestry |
95 : |-------------------|
96 : | frontier |
97 : |-------------------|
98 : | orphaned |
99 : ---------------------
100 :
101 : A valid, initialized forest is always non-empty. After
102 : `fd_forest_init` the forest will always have a root ele unless
103 : modified improperly out of forest's API.*/
104 :
105 : struct __attribute__((aligned(128UL))) fd_forest {
106 : ulong root; /* pool idx of the root */
107 : ulong iter; /* pool idx of the iterator */
108 : ulong wksp_gaddr; /* wksp gaddr of fd_forest in the backing wksp, non-zero gaddr */
109 : ulong ver_gaddr; /* wksp gaddr of version fseq, incremented on write ops */
110 : ulong pool_gaddr; /* wksp gaddr of fd_pool */
111 : ulong ancestry_gaddr; /* wksp_gaddr of fd_forest_ancestry */
112 : ulong frontier_gaddr; /* map of slot to ele (leaf that needs repair) */
113 : ulong orphaned_gaddr; /* map of parent_slot to singly-linked list of ele orphaned by that parent slot */
114 : ulong magic; /* ==FD_FOREST_MAGIC */
115 : };
116 : typedef struct fd_forest fd_forest_t;
117 :
118 : FD_PROTOTYPES_BEGIN
119 :
120 : /* Constructors */
121 :
122 : /* fd_forest_{align,footprint} return the required alignment and
123 : footprint of a memory region suitable for use as forest with up to
124 : ele_max eles and vote_max votes. */
125 :
126 : FD_FN_CONST static inline ulong
127 0 : fd_forest_align( void ) {
128 0 : return alignof(fd_forest_t);
129 0 : }
130 :
131 : FD_FN_CONST static inline ulong
132 0 : fd_forest_footprint( ulong ele_max ) {
133 0 : return FD_LAYOUT_FINI(
134 0 : FD_LAYOUT_APPEND(
135 0 : FD_LAYOUT_APPEND(
136 0 : FD_LAYOUT_APPEND(
137 0 : FD_LAYOUT_APPEND(
138 0 : FD_LAYOUT_APPEND(
139 0 : FD_LAYOUT_APPEND(
140 0 : FD_LAYOUT_INIT,
141 0 : alignof(fd_forest_t), sizeof(fd_forest_t) ),
142 0 : fd_fseq_align(), fd_fseq_footprint() ),
143 0 : fd_forest_pool_align(), fd_forest_pool_footprint( ele_max ) ),
144 0 : fd_forest_ancestry_align(), fd_forest_ancestry_footprint( ele_max ) ),
145 0 : fd_forest_frontier_align(), fd_forest_frontier_footprint( ele_max ) ),
146 0 : fd_forest_orphaned_align(), fd_forest_orphaned_footprint( ele_max ) ),
147 0 : fd_forest_align() );
148 0 : }
149 :
150 : /* fd_forest_new formats an unused memory region for use as a
151 : forest. mem is a non-NULL pointer to this region in the local
152 : address space with the required footprint and alignment. */
153 :
154 : void *
155 : fd_forest_new( void * shmem, ulong ele_max, ulong seed );
156 :
157 : /* fd_forest_join joins the caller to the forest. forest
158 : points to the first byte of the memory region backing the forest
159 : in the caller's address space. Returns a pointer in the local
160 : address space to forest on success. */
161 :
162 : fd_forest_t *
163 : fd_forest_join( void * forest );
164 :
165 : /* fd_forest_leave leaves a current local join. Returns a pointer
166 : to the underlying shared memory region on success and NULL on failure
167 : (logs details). Reasons for failure include forest is NULL. */
168 :
169 : void *
170 : fd_forest_leave( fd_forest_t const * forest );
171 :
172 : /* fd_forest_delete unformats a memory region used as a
173 : forest. Assumes only the nobody is joined to the region.
174 : Returns a pointer to the underlying shared memory region or NULL if
175 : used obviously in error (e.g. forest is obviously not a
176 : forest ... logs details). The ownership of the memory region is
177 : transferred to the caller. */
178 :
179 : void *
180 : fd_forest_delete( void * forest );
181 :
182 : /* fd_forest_init initializes a forest. Assumes forest
183 : is a valid local join and no one else is joined. root is the initial
184 : root forest will use. This is the snapshot slot if booting from
185 : a snapshot, 0 if the genesis slot.
186 :
187 : In general, this should be called by the same process that formatted
188 : forest's memory, ie. the caller of fd_forest_new. */
189 :
190 : fd_forest_t *
191 : fd_forest_init( fd_forest_t * forest, ulong root );
192 :
193 : /* fd_forest_fini finishes an forest. Assumes forest is
194 : a valid local join and no one else is joined. */
195 :
196 : void *
197 : fd_forest_fini( fd_forest_t * forest );
198 :
199 : /* Accessors */
200 :
201 : /* fd_forest_wksp returns the local join to the wksp backing the
202 : forest. The lifetime of the returned pointer is at least as
203 : long as the lifetime of the local join. Assumes forest is a
204 : current local join. */
205 :
206 : FD_FN_PURE static inline fd_wksp_t *
207 0 : fd_forest_wksp( fd_forest_t const * forest ) {
208 0 : return (fd_wksp_t *)( ( (ulong)forest ) - forest->wksp_gaddr );
209 0 : }
210 :
211 : /* fd_forest_{ver, ver_const} returns the local join to the version
212 : number fseq. The lifetime of the returned pointer is at least as
213 : long as the lifetime of the local join. Assumes forest is a
214 : current local join. If value is ULONG_MAX, ghost is uninitialized or
215 : invalid. Query pre- & post-read:
216 :
217 : odd: if either pre or post is odd, discard read.
218 : even: if pre == post, read is consistent. */
219 :
220 : FD_FN_PURE static inline ulong *
221 0 : fd_forest_ver( fd_forest_t * forest ) {
222 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->ver_gaddr );
223 0 : }
224 :
225 : FD_FN_PURE static inline ulong const *
226 0 : fd_forest_ver_const( fd_forest_t const * forest ) {
227 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->ver_gaddr );
228 0 : }
229 :
230 : /* fd_forest_{pool, pool_const} returns a pointer in the caller's address
231 : space to forest's element pool. */
232 :
233 : FD_FN_PURE static inline fd_forest_ele_t *
234 0 : fd_forest_pool( fd_forest_t * forest ) {
235 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->pool_gaddr );
236 0 : }
237 :
238 : FD_FN_PURE static inline fd_forest_ele_t const *
239 0 : fd_forest_pool_const( fd_forest_t const * forest ) {
240 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->pool_gaddr );
241 0 : }
242 :
243 : /* fd_forest_{ancestry, ancestry_const} returns a pointer in the caller's
244 : address space to forest's ancestry map. */
245 :
246 : FD_FN_PURE static inline fd_forest_ancestry_t *
247 0 : fd_forest_ancestry( fd_forest_t * forest ) {
248 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->ancestry_gaddr );
249 0 : }
250 :
251 : FD_FN_PURE static inline fd_forest_ancestry_t const *
252 0 : fd_forest_ancestry_const( fd_forest_t const * forest ) {
253 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->ancestry_gaddr );
254 0 : }
255 :
256 : /* fd_forest_{frontier, frontier_const} returns a pointer in the caller's
257 : address space to forest's frontier map. */
258 :
259 : FD_FN_PURE static inline fd_forest_frontier_t *
260 0 : fd_forest_frontier( fd_forest_t * forest ) {
261 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->frontier_gaddr );
262 0 : }
263 :
264 : FD_FN_PURE static inline fd_forest_frontier_t const *
265 0 : fd_forest_frontier_const( fd_forest_t const * forest ) {
266 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->frontier_gaddr );
267 0 : }
268 :
269 : /* fd_forest_{orphaned, orphaned_const} returns a pointer in the caller's
270 : address space to forest's orphaned map. */
271 :
272 : FD_FN_PURE static inline fd_forest_orphaned_t *
273 0 : fd_forest_orphaned( fd_forest_t * forest ) {
274 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->orphaned_gaddr );
275 0 : }
276 :
277 : FD_FN_PURE static inline fd_forest_orphaned_t const *
278 0 : fd_forest_orphaned_const( fd_forest_t const * forest ) {
279 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->orphaned_gaddr );
280 0 : }
281 :
282 : /* fd_forest_root_slot returns forest's root slot. Assumes
283 : forest is a current local join. */
284 :
285 : FD_FN_PURE static inline ulong
286 0 : fd_forest_root_slot( fd_forest_t const * forest ) {
287 0 : if( FD_UNLIKELY( forest->root == fd_forest_pool_idx_null( fd_forest_pool_const( forest ) ) )) return ULONG_MAX; /* uninitialized */
288 0 : return fd_forest_pool_ele_const( fd_forest_pool_const( forest ), forest->root )->slot;
289 0 : }
290 :
291 : fd_forest_ele_t *
292 : fd_forest_query( fd_forest_t * forest, ulong slot );
293 :
294 : /* Operations */
295 :
296 : /* fd_forest_shred_insert inserts a new shred into the forest.
297 : Assumes slot >= forest->smr, slot is not already in forest,
298 : parent_slot is already in forest, and the ele pool has a free
299 : element (if handholding is enabled, explicitly checks and errors).
300 : Returns the inserted forest ele. */
301 :
302 : fd_forest_ele_t *
303 : fd_forest_data_shred_insert( fd_forest_t * forest, ulong slot, ushort parent_off, uint shred_idx, uint fec_set_idx, int data_complete, int slot_complete );
304 :
305 : /* fd_forest_publish publishes slot as the new forest root, setting
306 : the subtree beginning from slot as the new forest tree (ie. slot
307 : and all its descendants). Prunes all eles not in slot's forest.
308 : Assumes slot is present in forest. Returns the new root. */
309 :
310 : fd_forest_ele_t const *
311 : fd_forest_publish( fd_forest_t * forest, ulong slot );
312 :
313 : struct fd_forest_iter {
314 : ulong ele_idx;
315 : uint shred_idx;
316 : ulong frontier_ver; /* the frontier version number of forest at time of initialization */
317 : fd_forest_frontier_iter_t head; /* the frontier node "root" of our current iteration, provided NO insertions or deletions in the frontier. */
318 : };
319 : typedef struct fd_forest_iter fd_forest_iter_t;
320 :
321 : /* fd_forest_iter_* supports iteration over a frontier node of the
322 : forest and its children. iter_init selects the frontier_iter_init
323 : node from the frontier. iter_next advances the iterator to the next
324 : shred currently not yet existing in the forest, and will always
325 : choose the left most child to iterate down. This iterator is safe
326 : with concurrent query/insert/remove. If the forest has not been
327 : modified, the iterator will continue down all frontier nodes. If not,
328 : the iterator will return done.
329 :
330 : An iterator is done when the ele_idx is null, i.e. the leaf of the
331 : original selected frontier node has been reached.
332 :
333 : An iterator signifies to the repair tile to request the
334 : highest_window_index when the ele_idx is not null and shred_idx is
335 : UINT_MAX.
336 :
337 : Otherwise, the iterator signifies to the repair tile to request a
338 : regular shred window_idx.
339 :
340 : Caller should generally have a timeout that resets the iterator. In
341 : addition, since the iterator always chooses the leftmost child,
342 : reaching new forks under one frontier node relies on repair reponses
343 : -> shreds being inserted. Thus the frontier nodes can advance to the
344 : slot where the fork branched. */
345 :
346 : fd_forest_iter_t
347 : fd_forest_iter_init( fd_forest_t * forest );
348 :
349 : fd_forest_iter_t
350 : fd_forest_iter_next( fd_forest_iter_t iter, fd_forest_t const * forest );
351 :
352 : int
353 : fd_forest_iter_done( fd_forest_iter_t iter, fd_forest_t const * forest );
354 :
355 : /* Misc */
356 :
357 : /* fd_forest_verify checks the forest is not obviously corrupt.
358 : Returns 0 if verify succeeds, -1 otherwise. */
359 :
360 : int
361 : fd_forest_verify( fd_forest_t const * forest );
362 :
363 : void
364 : fd_forest_frontier_print( fd_forest_t const * forest );
365 :
366 : /* fd_forest_print pretty-prints a formatted forest tree. Printing begins
367 : from `ele` (it will appear as the root in the print output).
368 :
369 : The most straightforward and commonly used printing pattern is:
370 : `fd_forest_print( forest, fd_forest_root( forest ) )`
371 :
372 : This would print forest beginning from the root.
373 :
374 : Alternatively, caller can print a more localized view, for example
375 : starting from the grandparent of the most recently executed slot:
376 :
377 : ```
378 : fd_forest_ele_t const * ele = fd_forest_query( slot );
379 : fd_forest_print( forest, fd_forest_parent( fd_forest_parent( ele ) ) )
380 : ```
381 :
382 : Callers should add null-checks as appropriate in actual usage. */
383 :
384 : void
385 : fd_forest_print( fd_forest_t const * forest );
386 :
387 : FD_PROTOTYPES_END
388 :
389 : #endif /* HEADER_fd_src_choreo_forest_fd_forest_h */
|