Line data Source code
1 : #ifndef HEADER_fd_src_choreo_ghost_fd_ghost_h
2 : #define HEADER_fd_src_choreo_ghost_fd_ghost_h
3 :
4 : /* fd_ghost implements Solana's LMD-GHOST ("latest message-driven greedy
5 : heaviest-observed subtree") fork choice rule.
6 :
7 : Protocol details:
8 :
9 : - LMD is an acronym for "latest message-driven". It describes how
10 : votes are counted when picking the best fork. In this scheme, only
11 : a validator's latest vote counts. So if a validator votes for slot
12 : 3 and then slot 5, the vote for 5 overwrites the vote for 3.
13 :
14 : - GHOST is an acronym for "greedy heaviest-observed subtree":
15 :
16 : greedy: for each depth of the tree, pick the locally optimal
17 : child / subtree / fork. This will result in the global
18 : optimal choice.
19 :
20 : heaviest: pick based on the highest stake weight.
21 :
22 : observed: this is the validator's local view, and other validators
23 : may have differing views because they've observed
24 : different votes.
25 :
26 : subtree: pick based on the weight of an entire subtree, not just
27 : an individual node. For example, if slot 3 has 10 stake
28 : and slot 5 has 5 stake, but slot 5 has two children 6
29 : and 7 that each have 3 stake, our weights are
30 :
31 : slot 3 subtree [3] = 10
32 : slot 5 subtree [5, 6, 7] = 11 (5+3+3)
33 :
34 : Therefore slot 5 would be the heaviest.
35 :
36 : In-memory representation:
37 :
38 : - Each tree node is keyed by slot number.
39 :
40 : - Each tree node tracks the amount of stake (`stake`) that has voted
41 : for its slot, as well as the recursive sum of stake for the subtree
42 : rooted at that node (`weight`).
43 :
44 : Link to original GHOST paper: https://eprint.iacr.org/2013/881.pdf.
45 : This is simply a reference for those curious about the etymology, and
46 : not prerequisite reading for understanding this implementation. */
47 :
48 : #include "../fd_choreo_base.h"
49 : #include "../epoch/fd_epoch.h"
50 : #include "../../tango/fseq/fd_fseq.h"
51 :
52 : /* FD_GHOST_USE_HANDHOLDING: Define this to non-zero at compile time
53 : to turn on additional runtime checks and logging. */
54 :
55 : #ifndef FD_GHOST_USE_HANDHOLDING
56 : #define FD_GHOST_USE_HANDHOLDING 1
57 : #endif
58 :
59 : /* fd_ghost_node_t implements a left-child, right-sibling n-ary tree.
60 : Each node maintains the `node_pool` index of its left-most child
61 : (`child_idx`), its immediate-right sibling (`sibling_idx`), and its
62 : parent (`parent_idx`).
63 :
64 : This tree structure is gaddr-safe and supports accesses and
65 : operations from processes with separate local ghost joins. */
66 :
67 : struct __attribute__((aligned(128UL))) fd_ghost_node {
68 : ulong slot; /* slot this node is tracking, also the map key */
69 : ulong next; /* reserved for internal use by fd_pool, fd_map_chain and fd_ghost_publish */
70 : ulong weight; /* amount of stake that has voted (via replay) for this slot or any of its descendants */
71 : ulong replay_stake; /* amount of stake that has voted (via replay) for this slot */
72 : ulong gossip_stake; /* amount of stake that has voted (via gossip) for this slot */
73 : ulong rooted_stake; /* amount of stake that has rooted this slot */
74 : int valid; /* whether this node is valid for fork choice (fd_ghost_head) */
75 : ulong parent_idx; /* index of the parent in the node pool */
76 : ulong child_idx; /* index of the left-child in the node pool */
77 : ulong sibling_idx; /* index of the right-sibling in the node pool */
78 : };
79 : typedef struct fd_ghost_node fd_ghost_node_t;
80 :
81 : #define POOL_NAME fd_ghost_node_pool
82 0 : #define POOL_T fd_ghost_node_t
83 : #include "../../util/tmpl/fd_pool.c"
84 :
85 : #define MAP_NAME fd_ghost_node_map
86 : #define MAP_ELE_T fd_ghost_node_t
87 0 : #define MAP_KEY slot
88 : #include "../../util/tmpl/fd_map_chain.c"
89 :
90 : /* fd_ghost_t is the top-level structure that holds the root of the
91 : tree, as well as the memory pools and map structures for tracking
92 : ghost nodes and votes.
93 :
94 : These structures are bump-allocated and laid out contiguously in
95 : memory from the fd_ghost_t * pointer which points to the beginning of
96 : the memory region.
97 :
98 : ---------------------- <- fd_ghost_t *
99 : | metadata |
100 : ----------------------
101 : | node_pool |
102 : ----------------------
103 : | node_map |
104 : ----------------------
105 :
106 : A valid, initialized ghost is always non-empty. After
107 : `fd_ghost_init` the ghost will always have a root node unless
108 : modified improperly out of ghost's API. */
109 :
110 0 : #define FD_GHOST_MAGIC (0xf17eda2ce7940570UL) /* firedancer ghost version 0 */
111 :
112 : struct __attribute__((aligned(128UL))) fd_ghost {
113 :
114 : /* Metadata */
115 :
116 : ulong magic; /* ==FD_GHOST_MAGIC */
117 : ulong ghost_gaddr; /* wksp gaddr of this in the backing wksp, non-zero gaddr */
118 : ulong seed; /* seed for various hashing function used under the hood, arbitrary */
119 : ulong root_idx; /* node_pool idx of the root */
120 :
121 : /* version fseq. query pre & post read. if value is ULONG_MAX, ghost
122 : is uninitialized or invalid.
123 :
124 : odd: if either pre or post is odd, discard read.
125 : even: if pre == post, read is consistent. */
126 :
127 : ulong ver_gaddr;
128 :
129 : /* The ghost node_pool is a memory pool of tree nodes from which one
130 : is allocated for each slot. The node map is a fd_map_chain to
131 : support fast O(1) querying of ghost nodes by slot. */
132 :
133 : ulong node_pool_gaddr;
134 : ulong node_map_gaddr;
135 : };
136 : typedef struct fd_ghost fd_ghost_t;
137 :
138 : FD_PROTOTYPES_BEGIN
139 :
140 : /* Constructors */
141 :
142 : /* fd_ghost_{align,footprint} return the required alignment and
143 : footprint of a memory region suitable for use as ghost with up to
144 : node_max nodes and vote_max votes. */
145 :
146 : FD_FN_CONST static inline ulong
147 0 : fd_ghost_align( void ) {
148 0 : return alignof(fd_ghost_t);
149 0 : }
150 :
151 : FD_FN_CONST static inline ulong
152 0 : fd_ghost_footprint( ulong node_max ) {
153 0 : return FD_LAYOUT_FINI(
154 0 : FD_LAYOUT_APPEND(
155 0 : FD_LAYOUT_APPEND(
156 0 : FD_LAYOUT_APPEND(
157 0 : FD_LAYOUT_APPEND(
158 0 : FD_LAYOUT_INIT,
159 0 : alignof(fd_ghost_t), sizeof(fd_ghost_t) ),
160 0 : fd_fseq_align(), fd_fseq_footprint() ),
161 0 : fd_ghost_node_pool_align(), fd_ghost_node_pool_footprint( node_max ) ),
162 0 : fd_ghost_node_map_align(), fd_ghost_node_map_footprint( node_max ) ),
163 0 : fd_ghost_align() );
164 0 : }
165 :
166 : /* fd_ghost_new formats an unused memory region for use as a ghost.
167 : mem is a non-NULL pointer to this region in the local address space
168 : with the required footprint and alignment. */
169 :
170 : void *
171 : fd_ghost_new( void * shmem, ulong seed, ulong node_max );
172 :
173 : /* fd_ghost_join joins the caller to the ghost. ghost points to the
174 : first byte of the memory region backing the ghost in the caller's
175 : address space.
176 :
177 : Returns a pointer in the local address space to ghost on success. */
178 :
179 : fd_ghost_t *
180 : fd_ghost_join( void * ghost );
181 :
182 : /* fd_ghost_leave leaves a current local join. Returns a pointer to the
183 : underlying shared memory region on success and NULL on failure (logs
184 : details). Reasons for failure include ghost is NULL. */
185 :
186 : void *
187 : fd_ghost_leave( fd_ghost_t const * ghost );
188 :
189 : /* fd_ghost_delete unformats a memory region used as a ghost.
190 : Assumes only the nobody is joined to the region. Returns a
191 : pointer to the underlying shared memory region or NULL if used
192 : obviously in error (e.g. ghost is obviously not a ghost ... logs
193 : details). The ownership of the memory region is transferred to the
194 : caller. */
195 :
196 : void *
197 : fd_ghost_delete( void * ghost );
198 :
199 : /* fd_ghost_init initializes a ghost. Assumes ghost is a valid local
200 : join and no one else is joined. root is the initial root ghost will
201 : use. This is the snapshot slot if booting from a snapshot, 0 if the
202 : genesis slot.
203 :
204 : In general, this should be called by the same process that formatted
205 : ghost's memory, ie. the caller of fd_ghost_new. */
206 :
207 : void
208 : fd_ghost_init( fd_ghost_t * ghost, ulong root );
209 :
210 : /* Accessors */
211 :
212 : /* fd_ghost_wksp returns the local join to the wksp backing the ghost.
213 : The lifetime of the returned pointer is at least as long as the
214 : lifetime of the local join. Assumes ghost is a current local
215 : join. */
216 :
217 : FD_FN_PURE static inline fd_wksp_t *
218 0 : fd_ghost_wksp( fd_ghost_t const * ghost ) {
219 0 : return (fd_wksp_t *)( ( (ulong)ghost ) - ghost->ghost_gaddr );
220 0 : }
221 :
222 : FD_FN_PURE static inline ulong *
223 0 : fd_ghost_ver( fd_ghost_t const * ghost ) {
224 0 : return fd_wksp_laddr_fast( fd_ghost_wksp( ghost ), ghost->ver_gaddr );
225 0 : }
226 :
227 : FD_FN_PURE static inline fd_ghost_node_t *
228 0 : fd_ghost_node_pool( fd_ghost_t * ghost ) {
229 0 : return fd_wksp_laddr_fast( fd_ghost_wksp( ghost ), ghost->node_pool_gaddr );
230 0 : }
231 :
232 : FD_FN_PURE static inline fd_ghost_node_t const *
233 0 : fd_ghost_node_pool_const( fd_ghost_t const * ghost ) {
234 0 : return fd_wksp_laddr_fast( fd_ghost_wksp( ghost ), ghost->node_pool_gaddr );
235 0 : }
236 :
237 : FD_FN_PURE static inline fd_ghost_node_map_t *
238 0 : fd_ghost_node_map( fd_ghost_t * ghost ) {
239 0 : return fd_wksp_laddr_fast( fd_ghost_wksp( ghost ), ghost->node_map_gaddr );
240 0 : }
241 :
242 : FD_FN_PURE static inline fd_ghost_node_map_t const *
243 0 : fd_ghost_node_map_const( fd_ghost_t const * ghost ) {
244 0 : return fd_wksp_laddr_fast( fd_ghost_wksp( ghost ), ghost->node_map_gaddr );
245 0 : }
246 :
247 : /* fd_ghost_root returns a pointer to the ghost root. Assumes ghost is
248 : a current local join. */
249 :
250 : FD_FN_PURE static inline fd_ghost_node_t const *
251 0 : fd_ghost_root( fd_ghost_t const * ghost ) {
252 0 : return fd_ghost_node_pool_ele_const( fd_ghost_node_pool_const( ghost ), ghost->root_idx );
253 0 : }
254 :
255 : /* fd_ghost_parent returns a pointer to the `parent` of `child`.
256 : Assumes ghost is a current local join and child is a valid pointer
257 : to a node_pool element inside ghost. */
258 :
259 : FD_FN_PURE static inline fd_ghost_node_t const *
260 0 : fd_ghost_parent( fd_ghost_t const * ghost, fd_ghost_node_t const * child ) {
261 0 : return fd_ghost_node_pool_ele_const( fd_ghost_node_pool_const( ghost ), child->parent_idx );
262 0 : }
263 :
264 : /* fd_ghost_child returns a pointer to the left-most child of `parent`.
265 : Assumes ghost is a current local join and parent is a valid pointer
266 : to a node_pool element inside ghost. */
267 :
268 : FD_FN_PURE static inline fd_ghost_node_t const *
269 0 : fd_ghost_child( fd_ghost_t const * ghost, fd_ghost_node_t const * parent ) {
270 0 : return fd_ghost_node_pool_ele_const( fd_ghost_node_pool_const( ghost ), parent->child_idx );
271 0 : }
272 :
273 : /* fd_ghost_head greedily traverses the ghost beginning from `node`,
274 : returning the ending leaf of the traversal (see top-level
275 : documentation for traversal details). Assumes ghost is a current
276 : local join and has been initialized with fd_ghost_init and is
277 : therefore non-empty. */
278 :
279 : FD_FN_PURE fd_ghost_node_t const *
280 : fd_ghost_head( fd_ghost_t const * ghost, fd_ghost_node_t const * node );
281 :
282 : /* fd_ghost_query returns the node keyed by `slot` or NULL if not
283 : found. */
284 :
285 : FD_FN_PURE static inline fd_ghost_node_t const *
286 0 : fd_ghost_query( fd_ghost_t const * ghost, ulong slot ) {
287 0 : fd_ghost_node_map_t const * node_map = fd_ghost_node_map_const( ghost );
288 0 : fd_ghost_node_t const * node_pool = fd_ghost_node_pool_const( ghost );
289 0 : return fd_ghost_node_map_ele_query_const( node_map, &slot, NULL, node_pool );
290 0 : }
291 :
292 : /* fd_ghost_gca returns the greatest common ancestor of slot1, slot2 in
293 : ghost. Assumes slot1 or slot2 are present in ghost (warns and
294 : returns NULL with handholding enabled). This is guaranteed to be
295 : non-NULL if slot1 and slot2 are both present. */
296 :
297 : FD_FN_PURE fd_ghost_node_t const *
298 : fd_ghost_gca( fd_ghost_t const * ghost, ulong slot1, ulong slot2 );
299 :
300 : /* fd_ghost_is_ancestor returns 1 if `ancestor` is `slot`'s ancestor, 0
301 : otherwise. Also returns 0 if either `ancestor` or `slot` are not in
302 : ghost. */
303 :
304 : FD_FN_PURE int
305 : fd_ghost_is_ancestor( fd_ghost_t const * ghost, ulong ancestor, ulong slot );
306 :
307 : /* Operations */
308 :
309 : /* fd_ghost_insert inserts a new node keyed by `slot` into the ghost.
310 : Assumes slot >= ghost->smr, slot is not already in ghost, parent_slot
311 : is already in ghost, and the node pool has a free element (if
312 : handholding is enabled, explicitly checks and errors). Returns the
313 : inserted ghost node. */
314 :
315 : fd_ghost_node_t *
316 : fd_ghost_insert( fd_ghost_t * ghost, ulong parent_slot, ulong slot );
317 :
318 : /* fd_ghost_replay_vote votes for slot, adding pubkey's stake to the
319 : `stake` field for slot and to the `weight` field for both slot and
320 : slot's ancestors. If pubkey has previously voted, pubkey's stake is
321 : also subtracted from `weight` for its previous vote slot and its
322 : ancestors.
323 :
324 : Assumes slot is present in ghost (if handholding is enabled,
325 : explicitly checks and errors). Returns the ghost node keyed by slot.
326 :
327 : TODO the implementation can be made more efficient by
328 : short-circuiting and doing fewer traversals. Currently this is
329 : bounded to O(h), where h is the height of ghost. */
330 :
331 : void
332 : fd_ghost_replay_vote( fd_ghost_t * ghost, fd_voter_t * voter, ulong slot );
333 :
334 : /* fd_ghost_gossip_vote adds stake amount to the gossip_stake field of
335 : slot.
336 :
337 : Assumes slot is present in ghost (if handholding is enabled,
338 : explicitly checks and errors). Returns the ghost node keyed by slot.
339 :
340 : Unlike fd_ghost_replay_vote, this stake is not propagated to
341 : the weight field for slot and slot's ancestors. It is only counted
342 : towards slot itself, as gossip votes are only used for optimistic
343 : confirmation and not fork choice. */
344 :
345 : void
346 : fd_ghost_gossip_vote( fd_ghost_t * ghost, fd_voter_t * voter, ulong slot );
347 :
348 : /* fd_ghost_rooted_vote adds stake amount to the rooted_stake field of
349 : slot.
350 :
351 : Assumes slot is present in ghost (if handholding is enabled,
352 : explicitly checks and errors). Returns the ghost node keyed by slot.
353 :
354 : Note rooting a slot implies rooting its ancestor, but ghost does not
355 : explicitly track this. */
356 :
357 : void
358 : fd_ghost_rooted_vote( fd_ghost_t * ghost, fd_voter_t * voter, ulong root );
359 :
360 : /* fd_ghost_publish publishes slot as the new ghost root, setting the
361 : subtree beginning from slot as the new ghost tree (ie. slot and all
362 : its descendants). Prunes all nodes not in slot's ancestry. Assumes
363 : slot is present in ghost. Returns the new root. */
364 :
365 : fd_ghost_node_t const *
366 : fd_ghost_publish( fd_ghost_t * ghost, ulong slot );
367 :
368 : /* Misc */
369 :
370 : /* fd_ghost_verify checks the ghost is not obviously corrupt, as well as
371 : that ghost invariants are being preserved ie. the weight of every
372 : node is >= the sum of weights of its direct children. Returns 0 if
373 : verify succeeds, -1 otherwise. */
374 :
375 : FD_FN_PURE int
376 : fd_ghost_verify( fd_ghost_t const * ghost );
377 :
378 : /* fd_ghost_print pretty-prints a formatted ghost tree. Printing begins
379 : from `node` (it will appear as the root in the print output).
380 :
381 : The most straightforward and commonly used printing pattern is:
382 : `fd_ghost_print( ghost, fd_ghost_root( ghost ) )`
383 :
384 : This would print ghost beginning from the root.
385 :
386 : Alternatively, caller can print a more localized view, for example
387 : starting from the grandparent of the most recently executed slot:
388 :
389 : ```
390 : fd_ghost_node_t const * node = fd_ghost_query( slot );
391 : fd_ghost_print( ghost, fd_ghost_parent( fd_ghost_parent( node ) ) )
392 : ```
393 :
394 : Callers should add null-checks as appropriate in actual usage. */
395 :
396 : void
397 : fd_ghost_print( fd_ghost_t const * ghost, fd_epoch_t const * epoch, fd_ghost_node_t const * node );
398 :
399 : FD_PROTOTYPES_END
400 :
401 : #endif /* HEADER_fd_src_choreo_ghost_fd_ghost_h */
|