Line data Source code
1 : #ifndef HEADER_fd_src_choreo_ghost_fd_ghost_h
2 : #define HEADER_fd_src_choreo_ghost_fd_ghost_h
3 :
4 : /* fd_ghost implements Solana's LMD-GHOST ("latest message-driven greedy
5 : heaviest-observed subtree") fork choice rule.
6 :
7 : Protocol details:
8 :
9 : - LMD is an acronym for "latest message-driven". It describes how
10 : votes are counted when picking the best fork. In this scheme, only
11 : a validator's latest vote counts. So if a validator votes for slot
12 : 3 and then slot 5, the vote for 5 overwrites the vote for 3.
13 :
14 : - GHOST is an acronym for "greedy heaviest-observed subtree":
15 :
16 : greedy: for each depth of the tree, pick the locally optimal
17 : child / subtree / fork. This will result in the global
18 : optimal choice.
19 :
20 : heaviest: pick based on the highest stake weight.
21 :
22 : observed: this is the validator's local view, and other validators
23 : may have differing views because they've observed
24 : different votes.
25 :
26 : subtree: pick based on the weight of an entire subtree, not just
27 : an individual node. For example, if slot 3 has 10 stake
28 : and slot 5 has 5 stake, but slot 5 has two children 6
29 : and 7 that each have 3 stake, our weights are
30 :
31 : slot 3 subtree [3] = 10
32 : slot 5 subtree [5, 6, 7] = 11 (5+3+3)
33 :
34 : Therefore slot 5 would be the heaviest.
35 :
36 : In-memory representation:
37 :
38 : - Each tree node is keyed by slot number.
39 :
40 : - Each tree node tracks the amount of stake (`stake`) that has voted
41 : for its slot, as well as the recursive sum of stake for the subtree
42 : rooted at that node (`weight`).
43 :
44 : Link to original GHOST paper: https://eprint.iacr.org/2013/881.pdf.
45 : This is simply a reference for those curious about the etymology, and
46 : not prerequisite reading for understanding this implementation. */
47 :
48 : #include "../fd_choreo_base.h"
49 : #include "../epoch/fd_epoch.h"
50 : #include "../../tango/fseq/fd_fseq.h"
51 :
52 : /* FD_GHOST_USE_HANDHOLDING: Define this to non-zero at compile time
53 : to turn on additional runtime checks and logging. */
54 :
55 : #ifndef FD_GHOST_USE_HANDHOLDING
56 : #define FD_GHOST_USE_HANDHOLDING 1
57 : #endif
58 :
59 : /* fd_ghost_node_t implements a left-child, right-sibling n-ary tree.
60 : Each node maintains the `node_pool` index of its left-most child
61 : (`child_idx`), its immediate-right sibling (`sibling_idx`), and its
62 : parent (`parent_idx`).
63 :
64 : This tree structure is gaddr-safe and supports accesses and
65 : operations from processes with separate local ghost joins. */
66 :
67 : struct __attribute__((aligned(128UL))) fd_ghost_node {
68 : ulong slot; /* slot this node is tracking, also the map key */
69 : ulong next; /* reserved for internal use by fd_pool, fd_map_chain and fd_ghost_publish */
70 : ulong weight; /* amount of stake that has voted (via replay) for this slot or any of its descendants */
71 : ulong replay_stake; /* amount of stake that has voted (via replay) for this slot */
72 : ulong gossip_stake; /* amount of stake that has voted (via gossip) for this slot */
73 : ulong rooted_stake; /* amount of stake that has rooted this slot */
74 : int valid; /* whether this node is valid for fork choice (fd_ghost_head) */
75 : ulong parent_idx; /* index of the parent in the node pool */
76 : ulong child_idx; /* index of the left-child in the node pool */
77 : ulong sibling_idx; /* index of the right-sibling in the node pool */
78 : };
79 : typedef struct fd_ghost_node fd_ghost_node_t;
80 :
81 : #define POOL_NAME fd_ghost_node_pool
82 0 : #define POOL_T fd_ghost_node_t
83 : #include "../../util/tmpl/fd_pool.c"
84 :
85 : #define MAP_NAME fd_ghost_node_map
86 : #define MAP_ELE_T fd_ghost_node_t
87 0 : #define MAP_KEY slot
88 : #include "../../util/tmpl/fd_map_chain.c"
89 :
90 : /* fd_ghost_t is the top-level structure that holds the root of the
91 : tree, as well as the memory pools and map structures for tracking
92 : ghost nodes and votes.
93 :
94 : These structures are bump-allocated and laid out contiguously in
95 : memory from the fd_ghost_t * pointer which points to the beginning of
96 : the memory region.
97 :
98 : ---------------------- <- fd_ghost_t *
99 : | metadata |
100 : ----------------------
101 : | node_pool |
102 : ----------------------
103 : | node_map |
104 : ----------------------
105 :
106 : A valid, initialized ghost is always non-empty. After
107 : `fd_ghost_init` the ghost will always have a root node unless
108 : modified improperly out of ghost's API. */
109 :
110 0 : #define FD_GHOST_MAGIC (0xf17eda2ce7940570UL) /* firedancer ghost version 0 */
111 :
112 : struct __attribute__((aligned(128UL))) fd_ghost {
113 :
114 : /* Metadata */
115 :
116 : ulong magic; /* ==FD_GHOST_MAGIC */
117 : ulong ghost_gaddr; /* wksp gaddr of this in the backing wksp, non-zero gaddr */
118 : ulong seed; /* seed for various hashing function used under the hood, arbitrary */
119 : ulong root_idx; /* node_pool idx of the root */
120 :
121 : /* version fseq. query pre & post read. if value is ULONG_MAX, ghost
122 : is uninitialized or invalid.
123 :
124 : odd: if either pre or post is odd, discard read.
125 : even: if pre == post, read is consistent. */
126 :
127 : ulong ver_gaddr;
128 :
129 : /* The ghost node_pool is a memory pool of tree nodes from which one
130 : is allocated for each slot. The node map is a fd_map_chain to
131 : support fast O(1) querying of ghost nodes by slot. */
132 :
133 : ulong node_pool_gaddr;
134 : ulong node_map_gaddr;
135 : };
136 : typedef struct fd_ghost fd_ghost_t;
137 :
138 : FD_PROTOTYPES_BEGIN
139 :
140 : /* Constructors */
141 :
142 : /* fd_ghost_{align,footprint} return the required alignment and
143 : footprint of a memory region suitable for use as ghost with up to
144 : node_max nodes and vote_max votes. */
145 :
146 : FD_FN_CONST static inline ulong
147 0 : fd_ghost_align( void ) {
148 0 : return alignof(fd_ghost_t);
149 0 : }
150 :
151 : FD_FN_CONST static inline ulong
152 0 : fd_ghost_footprint( ulong node_max ) {
153 : /* clang-format on */
154 0 : return FD_LAYOUT_FINI(
155 0 : FD_LAYOUT_APPEND(
156 0 : FD_LAYOUT_APPEND(
157 0 : FD_LAYOUT_APPEND(
158 0 : FD_LAYOUT_APPEND(
159 0 : FD_LAYOUT_INIT,
160 0 : alignof(fd_ghost_t), sizeof(fd_ghost_t) ),
161 0 : fd_fseq_align(), fd_fseq_footprint() ),
162 0 : fd_ghost_node_pool_align(), fd_ghost_node_pool_footprint( node_max ) ),
163 0 : fd_ghost_node_map_align(), fd_ghost_node_map_footprint( node_max ) ),
164 0 : fd_ghost_align() );
165 0 : }
166 :
167 : /* fd_ghost_new formats an unused memory region for use as a ghost.
168 : mem is a non-NULL pointer to this region in the local address space
169 : with the required footprint and alignment. */
170 :
171 : void *
172 : fd_ghost_new( void * shmem, ulong seed, ulong node_max );
173 :
174 : /* fd_ghost_join joins the caller to the ghost. ghost points to the
175 : first byte of the memory region backing the ghost in the caller's
176 : address space.
177 :
178 : Returns a pointer in the local address space to ghost on success. */
179 :
180 : fd_ghost_t *
181 : fd_ghost_join( void * ghost );
182 :
183 : /* fd_ghost_leave leaves a current local join. Returns a pointer to the
184 : underlying shared memory region on success and NULL on failure (logs
185 : details). Reasons for failure include ghost is NULL. */
186 :
187 : void *
188 : fd_ghost_leave( fd_ghost_t const * ghost );
189 :
190 : /* fd_ghost_delete unformats a memory region used as a ghost.
191 : Assumes only the nobody is joined to the region. Returns a
192 : pointer to the underlying shared memory region or NULL if used
193 : obviously in error (e.g. ghost is obviously not a ghost ... logs
194 : details). The ownership of the memory region is transferred to the
195 : caller. */
196 :
197 : void *
198 : fd_ghost_delete( void * ghost );
199 :
200 : /* fd_ghost_init initializes a ghost. Assumes ghost is a valid local
201 : join and no one else is joined. root is the initial root ghost will
202 : use. This is the snapshot slot if booting from a snapshot, 0 if the
203 : genesis slot.
204 :
205 : In general, this should be called by the same process that formatted
206 : ghost's memory, ie. the caller of fd_ghost_new. */
207 :
208 : void
209 : fd_ghost_init( fd_ghost_t * ghost, ulong root );
210 :
211 : /* Accessors */
212 :
213 : /* fd_ghost_wksp returns the local join to the wksp backing the ghost.
214 : The lifetime of the returned pointer is at least as long as the
215 : lifetime of the local join. Assumes ghost is a current local
216 : join. */
217 :
218 : FD_FN_PURE static inline fd_wksp_t *
219 0 : fd_ghost_wksp( fd_ghost_t const * ghost ) {
220 0 : return (fd_wksp_t *)( ( (ulong)ghost ) - ghost->ghost_gaddr );
221 0 : }
222 :
223 : FD_FN_PURE static inline ulong *
224 0 : fd_ghost_ver( fd_ghost_t const * ghost ) {
225 0 : return fd_wksp_laddr_fast( fd_ghost_wksp( ghost ), ghost->ver_gaddr );
226 0 : }
227 :
228 : FD_FN_PURE static inline fd_ghost_node_t *
229 0 : fd_ghost_node_pool( fd_ghost_t * ghost ) {
230 0 : return fd_wksp_laddr_fast( fd_ghost_wksp( ghost ), ghost->node_pool_gaddr );
231 0 : }
232 :
233 : FD_FN_PURE static inline fd_ghost_node_t const *
234 0 : fd_ghost_node_pool_const( fd_ghost_t const * ghost ) {
235 0 : return fd_wksp_laddr_fast( fd_ghost_wksp( ghost ), ghost->node_pool_gaddr );
236 0 : }
237 :
238 : FD_FN_PURE static inline fd_ghost_node_map_t *
239 0 : fd_ghost_node_map( fd_ghost_t * ghost ) {
240 0 : return fd_wksp_laddr_fast( fd_ghost_wksp( ghost ), ghost->node_map_gaddr );
241 0 : }
242 :
243 : FD_FN_PURE static inline fd_ghost_node_map_t const *
244 0 : fd_ghost_node_map_const( fd_ghost_t const * ghost ) {
245 0 : return fd_wksp_laddr_fast( fd_ghost_wksp( ghost ), ghost->node_map_gaddr );
246 0 : }
247 :
248 : /* fd_ghost_root returns a pointer to the ghost root. Assumes ghost is
249 : a current local join. */
250 :
251 : FD_FN_PURE static inline fd_ghost_node_t const *
252 0 : fd_ghost_root( fd_ghost_t const * ghost ) {
253 0 : return fd_ghost_node_pool_ele_const( fd_ghost_node_pool_const( ghost ), ghost->root_idx );
254 0 : }
255 :
256 : /* fd_ghost_parent returns a pointer to the `parent` of `child`.
257 : Assumes ghost is a current local join and child is a valid pointer
258 : to a node_pool element inside ghost. */
259 :
260 : FD_FN_PURE static inline fd_ghost_node_t const *
261 0 : fd_ghost_parent( fd_ghost_t const * ghost, fd_ghost_node_t const * child ) {
262 0 : return fd_ghost_node_pool_ele_const( fd_ghost_node_pool_const( ghost ), child->parent_idx );
263 0 : }
264 :
265 : /* fd_ghost_child returns a pointer to the left-most child of `parent`.
266 : Assumes ghost is a current local join and parent is a valid pointer
267 : to a node_pool element inside ghost. */
268 :
269 : FD_FN_PURE static inline fd_ghost_node_t const *
270 0 : fd_ghost_child( fd_ghost_t const * ghost, fd_ghost_node_t const * parent ) {
271 0 : return fd_ghost_node_pool_ele_const( fd_ghost_node_pool_const( ghost ), parent->child_idx );
272 0 : }
273 :
274 : /* fd_ghost_head greedily traverses the ghost beginning from `root` (not
275 : necessarily the root of the ghost tree) and returns the heaviest leaf
276 : of the traversal (see top-level documentation for traversal details).
277 : Assumes ghost is a current local join and has been initialized with
278 : fd_ghost_init and is therefore non-empty. */
279 :
280 : fd_ghost_node_t const *
281 : fd_ghost_head( fd_ghost_t const * ghost, fd_ghost_node_t const * root );
282 :
283 : /* fd_ghost_query returns the node keyed by `slot` or NULL if not
284 : found. */
285 :
286 : FD_FN_PURE static inline fd_ghost_node_t const *
287 0 : fd_ghost_query( fd_ghost_t const * ghost, ulong slot ) {
288 0 : fd_ghost_node_map_t const * node_map = fd_ghost_node_map_const( ghost );
289 0 : fd_ghost_node_t const * node_pool = fd_ghost_node_pool_const( ghost );
290 0 : return fd_ghost_node_map_ele_query_const( node_map, &slot, NULL, node_pool );
291 0 : }
292 :
293 : /* fd_ghost_gca returns the greatest common ancestor of slot1, slot2 in
294 : ghost. Assumes slot1 or slot2 are present in ghost (warns and
295 : returns NULL with handholding enabled). This is guaranteed to be
296 : non-NULL if slot1 and slot2 are both present. */
297 :
298 : fd_ghost_node_t const *
299 : fd_ghost_gca( fd_ghost_t const * ghost, ulong slot1, ulong slot2 );
300 :
301 : /* fd_ghost_is_ancestor returns 1 if `ancestor` is `slot`'s ancestor, 0
302 : otherwise. Also returns 0 if either `ancestor` or `slot` are not in
303 : ghost. */
304 :
305 : int
306 : fd_ghost_is_ancestor( fd_ghost_t const * ghost, ulong ancestor, ulong slot );
307 :
308 : /* Operations */
309 :
310 : /* fd_ghost_insert inserts a new node keyed by `slot` into the ghost.
311 : Assumes slot >= ghost->smr, slot is not already in ghost, parent_slot
312 : is already in ghost, and the node pool has a free element (if
313 : handholding is enabled, explicitly checks and errors). Returns the
314 : inserted ghost node. */
315 :
316 : fd_ghost_node_t *
317 : fd_ghost_insert( fd_ghost_t * ghost, ulong parent_slot, ulong slot );
318 :
319 : /* fd_ghost_replay_vote votes for slot, adding pubkey's stake to the
320 : `stake` field for slot and to the `weight` field for both slot and
321 : slot's ancestors. If pubkey has previously voted, pubkey's stake is
322 : also subtracted from `weight` for its previous vote slot and its
323 : ancestors.
324 :
325 : Assumes slot is present in ghost (if handholding is enabled,
326 : explicitly checks and errors). Returns the ghost node keyed by slot.
327 :
328 : TODO the implementation can be made more efficient by
329 : short-circuiting and doing fewer traversals. Currently this is
330 : bounded to O(h), where h is the height of ghost. */
331 :
332 : void
333 : fd_ghost_replay_vote( fd_ghost_t * ghost, fd_voter_t * voter, ulong slot );
334 :
335 : /* fd_ghost_gossip_vote adds stake amount to the gossip_stake field of
336 : slot.
337 :
338 : Assumes slot is present in ghost (if handholding is enabled,
339 : explicitly checks and errors). Returns the ghost node keyed by slot.
340 :
341 : Unlike fd_ghost_replay_vote, this stake is not propagated to
342 : the weight field for slot and slot's ancestors. It is only counted
343 : towards slot itself, as gossip votes are only used for optimistic
344 : confirmation and not fork choice. */
345 :
346 : void
347 : fd_ghost_gossip_vote( fd_ghost_t * ghost, fd_voter_t * voter, ulong slot );
348 :
349 : /* fd_ghost_rooted_vote adds stake amount to the rooted_stake field of
350 : slot.
351 :
352 : Assumes slot is present in ghost (if handholding is enabled,
353 : explicitly checks and errors). Returns the ghost node keyed by slot.
354 :
355 : Note rooting a slot implies rooting its ancestor, but ghost does not
356 : explicitly track this. */
357 :
358 : void
359 : fd_ghost_rooted_vote( fd_ghost_t * ghost, fd_voter_t * voter, ulong root );
360 :
361 : /* fd_ghost_publish publishes slot as the new ghost root, setting the
362 : subtree beginning from slot as the new ghost tree (ie. slot and all
363 : its descendants). Prunes all nodes not in slot's ancestry. Assumes
364 : slot is present in ghost. Returns the new root. */
365 :
366 : fd_ghost_node_t const *
367 : fd_ghost_publish( fd_ghost_t * ghost, ulong slot );
368 :
369 : /* Misc */
370 :
371 : /* fd_ghost_verify checks the ghost is not obviously corrupt, as well as
372 : that ghost invariants are being preserved ie. the weight of every
373 : node is >= the sum of weights of its direct children. Returns 0 if
374 : verify succeeds, -1 otherwise. */
375 :
376 : int
377 : fd_ghost_verify( fd_ghost_t const * ghost );
378 :
379 : /* fd_ghost_print pretty-prints a formatted ghost tree. Printing begins
380 : from `node` (it will appear as the root in the print output).
381 :
382 : The most straightforward and commonly used printing pattern is:
383 : `fd_ghost_print( ghost, fd_ghost_root( ghost ) )`
384 :
385 : This would print ghost beginning from the root.
386 :
387 : Alternatively, caller can print a more localized view, for example
388 : starting from the grandparent of the most recently executed slot:
389 :
390 : ```
391 : fd_ghost_node_t const * node = fd_ghost_query( slot );
392 : fd_ghost_print( ghost, fd_ghost_parent( fd_ghost_parent( node ) ) )
393 : ```
394 :
395 : Callers should add null-checks as appropriate in actual usage. */
396 :
397 : void
398 : fd_ghost_print( fd_ghost_t const * ghost, fd_epoch_t const * epoch, fd_ghost_node_t const * node );
399 :
400 : FD_PROTOTYPES_END
401 :
402 : #endif /* HEADER_fd_src_choreo_ghost_fd_ghost_h */
|