Line data Source code
1 : #ifndef HEADER_fd_src_choreo_forest_fd_forest_h
2 : #define HEADER_fd_src_choreo_forest_fd_forest_h
3 :
4 : /* Forest is an API for repairing blocks as they are discovered from the
5 : cluster via Turbine or Gossip. Shreds (from Turbine) and votes (from
6 : Gossip) inform forest that a block with the given slot they are
7 : associated with exists. Blk repair ensures that this block is
8 : received in its entirety by requesting repairs for missing shreds for
9 : the block.
10 :
11 : Like other fork-aware structures, forest maintains a tree that
12 : records the ancestry of slots. It also maintains a frontier, which
13 : models the leaves of the tree ie. the oldest (in ancestry) blocks
14 : that still need to be repaired (across multiple forks).
15 :
16 : Forest constructs the ancestry tree backwards, and then repairs the
17 : tree forwards (using BFS). */
18 :
19 : /* FD_FOREST_USE_HANDHOLDING: Define this to non-zero at compile time
20 : to turn on additional runtime checks and logging. */
21 :
22 : #include "../../disco/fd_disco_base.h"
23 :
24 : #ifndef FD_FOREST_USE_HANDHOLDING
25 : #define FD_FOREST_USE_HANDHOLDING 1
26 : #endif
27 :
28 0 : #define FD_FOREST_VER_UNINIT (0UL)
29 : #define FD_FOREST_VER_INVAL (ULONG_MAX)
30 :
31 0 : #define FD_FOREST_MAGIC (0xf17eda2ce7b1c0UL) /* firedancer forest version 0 */
32 :
33 : #define SET_NAME fd_forest_ele_idxs
34 : #define SET_MAX FD_SHRED_BLK_MAX
35 : #include "../../util/tmpl/fd_set.c"
36 :
37 :
38 : /* fd_forest_ele_t implements a left-child, right-sibling n-ary
39 : tree. Each ele maintains the `pool` index of its left-most child
40 : (`child_idx`), its immediate-right sibling (`sibling_idx`), and its
41 : parent (`parent_idx`).
42 :
43 : This tree structure is gaddr-safe and supports accesses and
44 : operations from processes with separate local forest joins. */
45 :
46 : struct __attribute__((aligned(128UL))) fd_forest_ele {
47 : ulong slot; /* map key */
48 : ulong next; /* internal use by fd_pool, fd_map_chain */
49 : ulong parent; /* pool idx of the parent in the tree */
50 : ulong child; /* pool idx of the left-child */
51 : ulong sibling; /* pool idx of the right-sibling */
52 :
53 : uint buffered_idx; /* highest contiguous buffered shred idx */
54 : uint complete_idx; /* shred_idx with SLOT_COMPLETE_FLAG ie. last shred idx in the slot */
55 :
56 : fd_forest_ele_idxs_t cmpl[fd_forest_ele_idxs_word_cnt]; /* fec complete idxs */
57 : fd_forest_ele_idxs_t fecs[fd_forest_ele_idxs_word_cnt]; /* fec set idxs */
58 : fd_forest_ele_idxs_t idxs[fd_forest_ele_idxs_word_cnt]; /* data shred idxs */
59 : };
60 : typedef struct fd_forest_ele fd_forest_ele_t;
61 :
62 : #define POOL_NAME fd_forest_pool
63 0 : #define POOL_T fd_forest_ele_t
64 : #include "../../util/tmpl/fd_pool.c"
65 :
66 : #define MAP_NAME fd_forest_ancestry
67 : #define MAP_ELE_T fd_forest_ele_t
68 0 : #define MAP_KEY slot
69 : #include "../../util/tmpl/fd_map_chain.c"
70 :
71 : #define MAP_NAME fd_forest_frontier
72 : #define MAP_ELE_T fd_forest_ele_t
73 0 : #define MAP_KEY slot
74 : #include "../../util/tmpl/fd_map_chain.c"
75 :
76 : #define MAP_NAME fd_forest_orphaned
77 : #define MAP_ELE_T fd_forest_ele_t
78 0 : #define MAP_KEY slot
79 : #include "../../util/tmpl/fd_map_chain.c"
80 :
81 : /* Internal use only for BFSing */
82 : #define DEQUE_NAME fd_forest_deque
83 0 : #define DEQUE_T ulong
84 : #include "../../util/tmpl/fd_deque_dynamic.c"
85 :
86 :
87 : /* fd_forest_t is the top-level structure that holds the root of
88 : the tree, as well as the memory pools and map structures.
89 :
90 : These structures are bump-allocated and laid out contiguously in
91 : memory from the fd_forest_t * pointer which points to the
92 : beginning of the memory region.
93 :
94 : --------------------- <- fd_forest_t *
95 : | metadata |
96 : |-------------------|
97 : | pool |
98 : |-------------------|
99 : | ancestry |
100 : |-------------------|
101 : | frontier |
102 : |-------------------|
103 : | orphaned |
104 : ---------------------
105 :
106 : A valid, initialized forest is always non-empty. After
107 : `fd_forest_init` the forest will always have a root ele unless
108 : modified improperly out of forest's API.*/
109 :
110 : struct __attribute__((aligned(128UL))) fd_forest {
111 : ulong root; /* pool idx of the root */
112 : ulong iter; /* pool idx of the iterator */
113 : ulong wksp_gaddr; /* wksp gaddr of fd_forest in the backing wksp, non-zero gaddr */
114 : ulong ver_gaddr; /* wksp gaddr of version fseq, incremented on write ops */
115 : ulong pool_gaddr; /* wksp gaddr of fd_pool */
116 : ulong ancestry_gaddr; /* wksp_gaddr of fd_forest_ancestry */
117 : ulong frontier_gaddr; /* map of slot to ele (leaf that needs repair) */
118 : ulong orphaned_gaddr; /* map of parent_slot to singly-linked list of ele orphaned by that parent slot */
119 : ulong deque_gaddr; /* wksp gaddr of fd_forest_deque. internal use only for BFSing */
120 : ulong magic; /* ==FD_FOREST_MAGIC */
121 : };
122 : typedef struct fd_forest fd_forest_t;
123 :
124 : FD_PROTOTYPES_BEGIN
125 :
126 : /* Constructors */
127 :
128 : /* fd_forest_{align,footprint} return the required alignment and
129 : footprint of a memory region suitable for use as forest with up to
130 : ele_max eles and vote_max votes. */
131 :
132 : FD_FN_CONST static inline ulong
133 0 : fd_forest_align( void ) {
134 0 : return alignof(fd_forest_t);
135 0 : }
136 :
137 : FD_FN_CONST static inline ulong
138 0 : fd_forest_footprint( ulong ele_max ) {
139 0 : return FD_LAYOUT_FINI(
140 0 : FD_LAYOUT_APPEND(
141 0 : FD_LAYOUT_APPEND(
142 0 : FD_LAYOUT_APPEND(
143 0 : FD_LAYOUT_APPEND(
144 0 : FD_LAYOUT_APPEND(
145 0 : FD_LAYOUT_APPEND(
146 0 : FD_LAYOUT_APPEND(
147 0 : FD_LAYOUT_INIT,
148 0 : alignof(fd_forest_t), sizeof(fd_forest_t) ),
149 0 : fd_fseq_align(), fd_fseq_footprint() ),
150 0 : fd_forest_pool_align(), fd_forest_pool_footprint ( ele_max ) ),
151 0 : fd_forest_ancestry_align(), fd_forest_ancestry_footprint( ele_max ) ),
152 0 : fd_forest_frontier_align(), fd_forest_frontier_footprint( ele_max ) ),
153 0 : fd_forest_orphaned_align(), fd_forest_orphaned_footprint( ele_max ) ),
154 0 : fd_forest_deque_align(), fd_forest_deque_footprint ( ele_max ) ),
155 0 : fd_forest_align() );
156 0 : }
157 :
158 : /* fd_forest_new formats an unused memory region for use as a
159 : forest. mem is a non-NULL pointer to this region in the local
160 : address space with the required footprint and alignment. */
161 :
162 : void *
163 : fd_forest_new( void * shmem, ulong ele_max, ulong seed );
164 :
165 : /* fd_forest_join joins the caller to the forest. forest
166 : points to the first byte of the memory region backing the forest
167 : in the caller's address space. Returns a pointer in the local
168 : address space to forest on success. */
169 :
170 : fd_forest_t *
171 : fd_forest_join( void * forest );
172 :
173 : /* fd_forest_leave leaves a current local join. Returns a pointer
174 : to the underlying shared memory region on success and NULL on failure
175 : (logs details). Reasons for failure include forest is NULL. */
176 :
177 : void *
178 : fd_forest_leave( fd_forest_t const * forest );
179 :
180 : /* fd_forest_delete unformats a memory region used as a
181 : forest. Assumes only the nobody is joined to the region.
182 : Returns a pointer to the underlying shared memory region or NULL if
183 : used obviously in error (e.g. forest is obviously not a
184 : forest ... logs details). The ownership of the memory region is
185 : transferred to the caller. */
186 :
187 : void *
188 : fd_forest_delete( void * forest );
189 :
190 : /* fd_forest_init initializes a forest. Assumes forest
191 : is a valid local join and no one else is joined. root is the initial
192 : root forest will use. This is the snapshot slot if booting from
193 : a snapshot, 0 if the genesis slot.
194 :
195 : In general, this should be called by the same process that formatted
196 : forest's memory, ie. the caller of fd_forest_new. */
197 :
198 : fd_forest_t *
199 : fd_forest_init( fd_forest_t * forest, ulong root );
200 :
201 : /* fd_forest_fini finishes an forest. Assumes forest is
202 : a valid local join and no one else is joined. */
203 :
204 : void *
205 : fd_forest_fini( fd_forest_t * forest );
206 :
207 : /* Accessors */
208 :
209 : /* fd_forest_wksp returns the local join to the wksp backing the
210 : forest. The lifetime of the returned pointer is at least as
211 : long as the lifetime of the local join. Assumes forest is a
212 : current local join. */
213 :
214 : FD_FN_PURE static inline fd_wksp_t *
215 0 : fd_forest_wksp( fd_forest_t const * forest ) {
216 0 : return (fd_wksp_t *)( ( (ulong)forest ) - forest->wksp_gaddr );
217 0 : }
218 :
219 : /* fd_forest_{ver, ver_const} returns the local join to the version
220 : number fseq. The lifetime of the returned pointer is at least as
221 : long as the lifetime of the local join. Assumes forest is a
222 : current local join. If value is ULONG_MAX, ghost is uninitialized or
223 : invalid. Query pre- & post-read:
224 :
225 : odd: if either pre or post is odd, discard read.
226 : even: if pre == post, read is consistent. */
227 :
228 : FD_FN_PURE static inline ulong *
229 0 : fd_forest_ver( fd_forest_t * forest ) {
230 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->ver_gaddr );
231 0 : }
232 :
233 : FD_FN_PURE static inline ulong const *
234 0 : fd_forest_ver_const( fd_forest_t const * forest ) {
235 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->ver_gaddr );
236 0 : }
237 :
238 : /* fd_forest_{pool, pool_const} returns a pointer in the caller's address
239 : space to forest's element pool. */
240 :
241 : FD_FN_PURE static inline fd_forest_ele_t *
242 0 : fd_forest_pool( fd_forest_t * forest ) {
243 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->pool_gaddr );
244 0 : }
245 :
246 : FD_FN_PURE static inline fd_forest_ele_t const *
247 0 : fd_forest_pool_const( fd_forest_t const * forest ) {
248 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->pool_gaddr );
249 0 : }
250 :
251 : /* fd_forest_{ancestry, ancestry_const} returns a pointer in the caller's
252 : address space to forest's ancestry map. */
253 :
254 : FD_FN_PURE static inline fd_forest_ancestry_t *
255 0 : fd_forest_ancestry( fd_forest_t * forest ) {
256 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->ancestry_gaddr );
257 0 : }
258 :
259 : FD_FN_PURE static inline fd_forest_ancestry_t const *
260 0 : fd_forest_ancestry_const( fd_forest_t const * forest ) {
261 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->ancestry_gaddr );
262 0 : }
263 :
264 : /* fd_forest_{frontier, frontier_const} returns a pointer in the caller's
265 : address space to forest's frontier map. */
266 :
267 : FD_FN_PURE static inline fd_forest_frontier_t *
268 0 : fd_forest_frontier( fd_forest_t * forest ) {
269 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->frontier_gaddr );
270 0 : }
271 :
272 : FD_FN_PURE static inline fd_forest_frontier_t const *
273 0 : fd_forest_frontier_const( fd_forest_t const * forest ) {
274 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->frontier_gaddr );
275 0 : }
276 :
277 : /* fd_forest_{orphaned, orphaned_const} returns a pointer in the caller's
278 : address space to forest's orphaned map. */
279 :
280 : FD_FN_PURE static inline fd_forest_orphaned_t *
281 0 : fd_forest_orphaned( fd_forest_t * forest ) {
282 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->orphaned_gaddr );
283 0 : }
284 :
285 : FD_FN_PURE static inline fd_forest_orphaned_t const *
286 0 : fd_forest_orphaned_const( fd_forest_t const * forest ) {
287 0 : return fd_wksp_laddr_fast( fd_forest_wksp( forest ), forest->orphaned_gaddr );
288 0 : }
289 :
290 : /* fd_forest_root_slot returns forest's root slot. Assumes
291 : forest is a current local join. */
292 :
293 : FD_FN_PURE static inline ulong
294 0 : fd_forest_root_slot( fd_forest_t const * forest ) {
295 0 : if( FD_UNLIKELY( forest->root == fd_forest_pool_idx_null( fd_forest_pool_const( forest ) ) )) return ULONG_MAX; /* uninitialized */
296 0 : return fd_forest_pool_ele_const( fd_forest_pool_const( forest ), forest->root )->slot;
297 0 : }
298 :
299 : fd_forest_ele_t *
300 : fd_forest_query( fd_forest_t * forest, ulong slot );
301 :
302 : /* Operations */
303 :
304 : /* fd_forest_shred_insert inserts a new shred into the forest.
305 : Assumes slot >= forest->smr, slot is not already in forest,
306 : parent_slot is already in forest, and the ele pool has a free
307 : element (if handholding is enabled, explicitly checks and errors).
308 : Returns the inserted forest ele. */
309 :
310 : fd_forest_ele_t *
311 : fd_forest_data_shred_insert( fd_forest_t * forest, ulong slot, ushort parent_off, uint shred_idx, uint fec_set_idx, int data_complete, int slot_complete );
312 :
313 : /* fd_forest_publish publishes slot as the new forest root, setting
314 : the subtree beginning from slot as the new forest tree (ie. slot
315 : and all its descendants). Prunes all eles not in slot's forest.
316 : Assumes slot is present in forest. Returns the new root. */
317 :
318 : fd_forest_ele_t const *
319 : fd_forest_publish( fd_forest_t * forest, ulong slot );
320 :
321 : struct fd_forest_iter {
322 : ulong ele_idx;
323 : uint shred_idx;
324 : ulong frontier_ver; /* the frontier version number of forest at time of initialization */
325 : fd_forest_frontier_iter_t head; /* the frontier node "root" of our current iteration, provided NO insertions or deletions in the frontier. */
326 : };
327 : typedef struct fd_forest_iter fd_forest_iter_t;
328 :
329 : /* fd_forest_iter_* supports iteration over a frontier node of the
330 : forest and its children. iter_init selects the frontier_iter_init
331 : node from the frontier. iter_next advances the iterator to the next
332 : shred currently not yet existing in the forest, and will always
333 : choose the left most child to iterate down. This iterator is safe
334 : with concurrent query/insert/remove. If the forest has not been
335 : modified, the iterator will continue down all frontier nodes. If not,
336 : the iterator will return done.
337 :
338 : An iterator is done when the ele_idx is null, i.e. the leaf of the
339 : original selected frontier node has been reached.
340 :
341 : An iterator signifies to the repair tile to request the
342 : highest_window_index when the ele_idx is not null and shred_idx is
343 : UINT_MAX.
344 :
345 : Otherwise, the iterator signifies to the repair tile to request a
346 : regular shred window_idx.
347 :
348 : Caller should generally have a timeout that resets the iterator. In
349 : addition, since the iterator always chooses the leftmost child,
350 : reaching new forks under one frontier node relies on repair reponses
351 : -> shreds being inserted. Thus the frontier nodes can advance to the
352 : slot where the fork branched. */
353 :
354 : fd_forest_iter_t
355 : fd_forest_iter_init( fd_forest_t * forest );
356 :
357 : fd_forest_iter_t
358 : fd_forest_iter_next( fd_forest_iter_t iter, fd_forest_t const * forest );
359 :
360 : int
361 : fd_forest_iter_done( fd_forest_iter_t iter, fd_forest_t const * forest );
362 :
363 : /* Misc */
364 :
365 : /* fd_forest_verify checks the forest is not obviously corrupt.
366 : Returns 0 if verify succeeds, -1 otherwise. */
367 :
368 : int
369 : fd_forest_verify( fd_forest_t const * forest );
370 :
371 : void
372 : fd_forest_frontier_print( fd_forest_t const * forest );
373 :
374 : /* fd_forest_print pretty-prints a formatted forest tree. Printing begins
375 : from `ele` (it will appear as the root in the print output).
376 :
377 : The most straightforward and commonly used printing pattern is:
378 : `fd_forest_print( forest, fd_forest_root( forest ) )`
379 :
380 : This would print forest beginning from the root.
381 :
382 : Alternatively, caller can print a more localized view, for example
383 : starting from the grandparent of the most recently executed slot:
384 :
385 : ```
386 : fd_forest_ele_t const * ele = fd_forest_query( slot );
387 : fd_forest_print( forest, fd_forest_parent( fd_forest_parent( ele ) ) )
388 : ```
389 :
390 : Callers should add null-checks as appropriate in actual usage. */
391 :
392 : void
393 : fd_forest_print( fd_forest_t const * forest );
394 :
395 : FD_PROTOTYPES_END
396 :
397 : #endif /* HEADER_fd_src_choreo_forest_fd_forest_h */
|