Line data Source code
1 : #ifndef HEADER_fd_src_discof_repair_fd_fec_chainer_h
2 : #define HEADER_fd_src_discof_repair_fd_fec_chainer_h
3 :
4 : /* FEC chainer is an API for "chaining" FEC sets as they are received
5 : asynchronously out-of-order over the network (via Turbine and
6 : Repair). The chainer both validates and reorders those FEC sets, and
7 : delivers them in-order to the calling application.
8 :
9 : Every FEC set has a parent (the immediately preceding FEC set in the
10 : slot or parent slot) and children (immediately succeeding FEC set(s)
11 : for the same slot or child slots). Because of forks, FEC sets can
12 : have multiple children, but this will only be true across slots (ie.
13 : the parent and child must be different slots). The chainer treats
14 : forks as "concurrent" with no particular order, so the calling
15 : application will receive forking FEC sets in the order in which the
16 : chainer is able to chain them.
17 :
18 : There is a protocol violation called equivocation (also known as
19 : "duplicates") that breaks the invariant that forks must be across
20 : slots. For example, equivocation can result in observing two or more
21 : child FEC sets for a given parent FEC set in the same slot.
22 : Equivocation can also result in other anomalies such as keying
23 : collisions (this is detailed later in the documentation). The
24 : chainer makes a best-effort attempt to detect and error on
25 : equivocation. Examples include checking merkle roots chain correctly
26 : and checking FEC sets are unique. Not all cases of equivocation can
27 : be detected by the chainer however, as not all the necessary
28 : information is yet available at this stage in the validator pipeline.
29 : Ultimately, if there is equivocation, it is the responsibility of the
30 : consensus module to handle it. */
31 :
32 : #include "../../ballet/shred/fd_shred.h"
33 : #include "../../flamenco/types/fd_types_custom.h"
34 :
35 : /* FD_FEC_CHAINER_USE_HANDHOLDING: Define this to non-zero at compile time
36 : to turn on additional runtime checks and logging. */
37 :
38 : #ifndef FD_FEC_CHAINER_USE_HANDHOLDING
39 : #define FD_FEC_CHAINER_USE_HANDHOLDING 1
40 : #endif
41 :
42 0 : #define FD_FEC_CHAINER_SUCCESS ( 0)
43 0 : #define FD_FEC_CHAINER_ERR_UNIQUE (-1) /* key uniqueness conflict */
44 0 : #define FD_FEC_CHAINER_ERR_MERKLE (-2) /* chained merkle root conflict */
45 :
46 : /* fd_fec_chainer is a tree-like structure backed by three maps. At any
47 : given point in time, an element (FEC set) in the chainer is in one of
48 : three possible positions with respect to the tree: a non-leaf, leaf,
49 : or not connected. This corresponds to the ancestry, frontier, or
50 : orphaned maps, respectively. Therefore, a given element will always
51 : be present in exactly one of these maps, depending on where (and
52 : whether) it currently is in the tree.
53 :
54 : KEYING
55 :
56 : The chainer keys FEC sets by concatenating the slot with fec_set_idx.
57 : This uniquely identifies a FEC set in most cases. It is possible to
58 : receive over the network two or more different FEC sets with the same
59 : slot and fec_set_idx due to equivocation as mentioned earlier. In
60 : general, the chainer expects the caller to handle equivocation and
61 : assumes unique FEC sets will have unique keys (handholding is
62 : available to verify this).
63 :
64 : The map key is an encoding of the slot and fec_set_idx which uniquely
65 : keys every FEC set. The 32 msb of the key are the 32 lsb of the slot
66 : and the 32 lsb of the key are the fec_set_idx, except when the FEC
67 : set is the last one for the slot, in which case the 32 lsb are set to
68 : UINT_MAX. By setting fec_set_idx to UINT_MAX, the chainer can easily
69 : query for the last FEC set in any given slot
70 :
71 : A useful property of the keying scheme above is a FEC set can infer
72 : the key of its immediate child by adding data_cnt to its fec_set_idx.
73 : For example, a FEC set for slot 0, fec_set_idx 0, data_cnt 32 knows
74 : its child key is slot 0, fec_set_idx 32. The last FEC set in the slot
75 : is special because the child(s) FEC set will have a different slot
76 : number, so we know the fec_set_idx will be zero.
77 :
78 : There is one exception to this keying scheme. When the FEC set is
79 : the last one in the slot, an extra insertion to the parent_map is
80 : done. In the standard procedure, the second to last fec will create
81 : the (n-1, n) entry, and the following child slot will create a
82 : (UINT_MAX, 0) entry. Thus we insert an extra entry (n, UINT_MAX) in
83 : the parent_map to connect the chain. This double insertion is only
84 : done for the parent_map - the pool_ele will have an element with key
85 : slot | fec_set_idx, not UINT_MAX.
86 :
87 : Visually, the parent_map / elements looks like this:
88 :
89 : - Arrows denote a child->parent entry in the parent_map.
90 :
91 : parent_map | pool_ele (slot, fec_set_idx, completes)
92 : ──────────────────────────────────────────────────────────────────────────────────────────────────
93 : (slot, 0) | (slot, 0, 0)
94 : ▲ |
95 : | |
96 : (slot, 32) | (slot, 32, 0)
97 : ▲ |
98 : | |
99 : (slot, 64) <-- (slot, UINT_MAX) | (slot, 64, 1)
100 : ▲ |
101 : | |
102 : (slot + 1, 0) | (slot + 1, 0, 0)
103 : ▲ |
104 : | |
105 : (slot + 1, 32) | (slot + 1, 32, 0)
106 : ▲ |
107 : | |
108 : (slot + 1, 64) ◄── (slot + 1, UINT_MAX) | (slot + 1, 64, 1)
109 : ▲ |
110 : | ... |
111 :
112 : Thus we will have double entries for the last FEC set in a slot in
113 : the parent map, but only one entry in the ancestry/orphan/frontier
114 : pool. This means if we want to query for the last FEC set in a slot,
115 : we need to query the parent_map twice - once with the fec_set_idx set
116 : to UINT_MAX and once with the parent_key of the result.
117 :
118 : INSERTING
119 :
120 : When inserting a new FEC set, the chainer first checks whether the
121 : parent is a FEC set already in the frontier map. This indicates that
122 : the new FEC set directly chains off the frontier. If it does, the
123 : parent FEC set is removed, and the new FEC set is inserted into the
124 : frontier map. This is the common case because we expect FEC sets to
125 : chain linearly the vast majority (ie. not start new forks), so the
126 : new FEC set is simply "advancing" the frontier. The parent FEC set
127 : is also added to the ancestry map, so that every leaf can trace back
128 : to the root.
129 :
130 : If the FEC set's parent is not already in the frontier, the chainer
131 : checks the ancestry map next. If the parent is in the ancestry map,
132 : the chainer knows that this FEC set is starting a new fork, because
133 : it is part of the tree (the ancestry) but not one of the leaves (the
134 : frontier). In this case, the new FEC set is simply inserted into the
135 : frontier map, and now the frontier has an additional fork (leaf).
136 :
137 : Lastly, if the FEC set's parent is not in the ancestry map, the
138 : chainer knows that this FEC set is orphaned. It is inserted into the
139 : orphaned map for later retry of tree insertion when its ancestors
140 : have been inserted.
141 :
142 : Here are some more important details on forks. Note a FEC set can
143 : only start a new fork when it is across a slot boundary (different
144 : slot than its parent). It is invalid for two FEC sets to chain off
145 : the same parent FEC within the same slot - this would imply there are
146 : two FECs keyed by the (same slot, fec_set_idx) combination, which as
147 : detailed earlier, is equivocation. Therefore, only the first FEC set
148 : in a slot can start a fork from the last FEC set in a parent slot. We
149 : know a FEC set is the first one in a slot when the fec_set_idx is 0,
150 : and we know it is the last one when the last shred in the FEC set has
151 : the SLOT_COMPLETE flag set.
152 :
153 : QUERYING
154 :
155 : The chainer can fast O(1) query any FEC set using the key. As
156 : mentioned earlier, any FEC set except the last one in a slot can
157 : derive its direct child's key and therefore query for it.
158 :
159 : For the special case of the first FEC set in a slot, the chainer can
160 : derive the parent key by subtracting the parent_off from the slot and
161 : querying for (slot, UINT_MAX).
162 :
163 : CHAINING
164 :
165 : As mentioned in the top-level documentation, the purpose of the
166 : chainer is to chain FEC sets. On insertion, the chainer will attempt
167 : to chain as many FEC sets as possible to the frontier. The chainer
168 : does this by conducting a BFS from the just-inserted FEC set, looking
169 : for parents and orphans to traverse. See `chain` in the .c file for
170 : the implementation. */
171 :
172 : typedef struct fd_fec_chainer fd_fec_chainer_t; /* forward decl */
173 :
174 : struct fd_fec_ele {
175 : ulong key; /* map key */
176 : ulong next; /* reserved for use by fd_pool and fd_map_chain */
177 :
178 : ulong slot;
179 : uint fec_set_idx;
180 : ushort data_cnt;
181 : int data_complete;
182 : int slot_complete;
183 : ushort parent_off;
184 : uchar merkle_root[FD_SHRED_MERKLE_ROOT_SZ];
185 : uchar chained_merkle_root[FD_SHRED_MERKLE_ROOT_SZ];
186 : };
187 : typedef struct fd_fec_ele fd_fec_ele_t;
188 :
189 : #define POOL_NAME fd_fec_pool
190 0 : #define POOL_T fd_fec_ele_t
191 : #include "../../util/tmpl/fd_pool.c"
192 :
193 : #define MAP_NAME fd_fec_ancestry
194 : #define MAP_ELE_T fd_fec_ele_t
195 : #include "../../util/tmpl/fd_map_chain.c"
196 :
197 : #define MAP_NAME fd_fec_frontier
198 : #define MAP_ELE_T fd_fec_ele_t
199 : #include "../../util/tmpl/fd_map_chain.c"
200 :
201 : #define MAP_NAME fd_fec_orphaned
202 : #define MAP_ELE_T fd_fec_ele_t
203 : #include "../../util/tmpl/fd_map_chain.c"
204 :
205 : struct fd_fec_parent {
206 : ulong key;
207 : ulong parent_key;
208 : };
209 : typedef struct fd_fec_parent fd_fec_parent_t;
210 :
211 : /* There are no FEC sets for the genesis block, so (0, 0) represents the
212 : NULL map key. */
213 :
214 : #define MAP_NAME fd_fec_parents
215 0 : #define MAP_T fd_fec_parent_t
216 : #define MAP_MEMOIZE 0
217 : #include "../../util/tmpl/fd_map_dynamic.c"
218 :
219 : #define FD_FEC_CHILDREN_MAX (64) /* TODO size this more reasonably */
220 : FD_STATIC_ASSERT( FD_FEC_CHILDREN_MAX % 64 == 0, FD_FEC_CHILDREN_MAX must be a multiple of 64 bits per word );
221 :
222 : #define SET_NAME fd_slot_child_offs
223 : #define SET_MAX FD_FEC_CHILDREN_MAX
224 : #include "../../util/tmpl/fd_set.c"
225 :
226 : /* FIXME consider alternate pooled tree-like representation eg. fd_ghost
227 : maybe the ghost generic in tmpl? */
228 :
229 : struct fd_fec_children {
230 : ulong slot;
231 : fd_slot_child_offs_t child_offs[FD_FEC_CHILDREN_MAX / 64];
232 : };
233 : typedef struct fd_fec_children fd_fec_children_t;
234 :
235 : #define MAP_NAME fd_fec_children
236 0 : #define MAP_T fd_fec_children_t
237 0 : #define MAP_KEY slot
238 : #define MAP_MEMOIZE 0
239 : #include "../../util/tmpl/fd_map_dynamic.c"
240 :
241 : #define DEQUE_NAME fd_fec_queue
242 0 : #define DEQUE_T ulong
243 : #include "../../util/tmpl/fd_deque_dynamic.c"
244 :
245 : struct fd_fec_out {
246 : int err; /* FD_FEC_CHAINER_{SUCCESS,ERR} */
247 : ulong slot; /* The slot of the FEC set */
248 : ushort parent_off; /* The index of first shred in the FEC set */
249 : uint fec_set_idx; /* The offset for the parent slot of the FEC set */
250 : ushort data_cnt; /* The number of data shreds in the FEC set */
251 : int data_complete; /* Whether the FEC set completes an entry batch */
252 : int slot_complete; /* Whether this FEC set completes the slot */
253 : fd_hash_t merkle_root; /* The merkle root of the FEC set */
254 : fd_hash_t chained_root; /* The chained merkle root of the FEC set */
255 : };
256 : typedef struct fd_fec_out fd_fec_out_t;
257 :
258 : #define DEQUE_NAME fd_fec_out
259 0 : #define DEQUE_T fd_fec_out_t
260 : #include "../../util/tmpl/fd_deque_dynamic.c"
261 :
262 : /* TODO deque probably not needed if reuse ele->next. */
263 :
264 : struct __attribute__((aligned(128UL))) fd_fec_chainer {
265 : ulong root; /* pool idx of the root FEC set */
266 : ulong slot0; /* special initialization slot. chains first FEC */
267 : fd_fec_ancestry_t * ancestry; /* map of key->fec. non-leaves of FEC tree */
268 : fd_fec_frontier_t * frontier; /* map of key->fec. leaves */
269 : fd_fec_orphaned_t * orphaned; /* map of key->fec. FECs not yet inserted to tree */
270 : fd_fec_ele_t * pool; /* pool of FEC nodes backing the above maps / tree */
271 : fd_fec_parent_t * parents; /* map of key->parent_key for fast O(1) querying */
272 : fd_fec_children_t * children; /* map of slot->child_offs for fast O(1) querying */
273 : ulong * queue; /* queue of FEC keys for BFS chaining */
274 : fd_fec_out_t * out; /* queue of FEC keys to deliver to application */
275 : };
276 :
277 : FD_PROTOTYPES_BEGIN
278 :
279 : /* Constructors */
280 :
281 : /* fd_fec_chainer_{align,footprint} return the required alignment and
282 : footprint of a memory region suitable for use as chainer with up to
283 : fec_max elements and slot_max slots. */
284 :
285 : FD_FN_CONST static inline ulong
286 0 : fd_fec_chainer_align( void ) {
287 0 : return alignof(fd_fec_chainer_t);
288 0 : }
289 :
290 : FD_FN_CONST static inline ulong
291 0 : fd_fec_chainer_footprint( ulong fec_max ) {
292 0 : int lg_fec_max = fd_ulong_find_msb( fd_ulong_pow2_up( fec_max ) );
293 0 : return FD_LAYOUT_FINI(
294 0 : FD_LAYOUT_APPEND(
295 0 : FD_LAYOUT_APPEND(
296 0 : FD_LAYOUT_APPEND(
297 0 : FD_LAYOUT_APPEND(
298 0 : FD_LAYOUT_APPEND(
299 0 : FD_LAYOUT_APPEND(
300 0 : FD_LAYOUT_APPEND(
301 0 : FD_LAYOUT_APPEND(
302 0 : FD_LAYOUT_APPEND(
303 0 : FD_LAYOUT_INIT,
304 0 : alignof(fd_fec_chainer_t), sizeof(fd_fec_chainer_t) ),
305 0 : fd_fec_ancestry_align(), fd_fec_ancestry_footprint( fec_max ) ),
306 0 : fd_fec_frontier_align(), fd_fec_frontier_footprint( fec_max ) ),
307 0 : fd_fec_orphaned_align(), fd_fec_orphaned_footprint( fec_max ) ),
308 0 : fd_fec_pool_align(), fd_fec_pool_footprint( fec_max ) ),
309 0 : fd_fec_parents_align(), fd_fec_parents_footprint( lg_fec_max ) ),
310 0 : fd_fec_children_align(), fd_fec_children_footprint( lg_fec_max ) ),
311 0 : fd_fec_queue_align(), fd_fec_queue_footprint( fec_max ) ),
312 0 : fd_fec_out_align(), fd_fec_out_footprint( fec_max ) ),
313 0 : fd_fec_chainer_align() );
314 0 : }
315 :
316 : /* fd_fec_chainer_new formats an unused memory region for use as a
317 : chainer. mem is a non-NULL pointer to this region in the local
318 : address space with the required footprint and alignment. */
319 :
320 : void *
321 : fd_fec_chainer_new( void * shmem, ulong fec_max, ulong seed );
322 :
323 : /* fd_fec_chainer_join joins the caller to the chainer. chainer points
324 : to the first byte of the memory region backing the chainer in the
325 : caller's address space.
326 :
327 : Returns a pointer in the local address space to chainer on
328 : success. */
329 :
330 : fd_fec_chainer_t *
331 : fd_fec_chainer_join( void * chainer );
332 :
333 : /* fd_fec_chainer_leave leaves a current local join. Returns a pointer
334 : to the underlying shared memory region on success and NULL on failure
335 : (logs details). Reasons for failure include chainer is NULL. */
336 :
337 : void *
338 : fd_fec_chainer_leave( fd_fec_chainer_t * chainer );
339 :
340 : /* fd_fec_chainer_delete unformats a memory region used as a chainer.
341 : Assumes only the nobody is joined to the region. Returns a pointer
342 : to the underlying shared memory region or NULL if used obviously in
343 : error (e.g. chainer is obviously not a chainer ... logs details).
344 : The ownership of the memory region is transferred to the caller. */
345 :
346 : void *
347 : fd_fec_chainer_delete( void * chainer );
348 :
349 : fd_fec_ele_t *
350 : fd_fec_chainer_init( fd_fec_chainer_t * chainer, ulong slot, fd_hash_t const * merkle_root );
351 :
352 : FD_FN_PURE fd_fec_ele_t *
353 : fd_fec_chainer_query( fd_fec_chainer_t * chainer, ulong slot, uint fec_set_idx );
354 :
355 : /* fd_fec_chainer inserts a new FEC set into chainer. Returns the newly
356 : inserted fd_fec_ele_t, NULL on error. Inserting this FEC set may
357 : result in one or more FEC sets being ready for in-order delivery.
358 : Caller can consume these FEC sets via the deque in chainer->out.
359 :
360 : See top-level documentation for further details on insertion. */
361 :
362 : fd_fec_ele_t *
363 : fd_fec_chainer_insert( fd_fec_chainer_t * chainer,
364 : ulong slot,
365 : uint fec_set_idx,
366 : ushort data_cnt,
367 : int data_complete,
368 : int slot_complete,
369 : ushort parent_off,
370 : fd_hash_t const * merkle_root,
371 : fd_hash_t const * chained_merkle_root );
372 :
373 : /* fd_fec_chainer_publish prunes the fec tree when the wmk is updated. */
374 :
375 : void
376 : fd_fec_chainer_publish( fd_fec_chainer_t * chainer, ulong new_root );
377 :
378 : FD_PROTOTYPES_END
379 :
380 : #endif /* HEADER_fd_src_discof_repair_fd_fec_chainer_h */
|