LCOV - code coverage report
Current view: top level - choreo/ghost - fd_ghost.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 2 4 50.0 %
Date: 2026-07-02 05:42:57 Functions: 0 0 -

          Line data    Source code
       1             : #ifndef HEADER_fd_src_choreo_ghost_fd_ghost_h
       2             : #define HEADER_fd_src_choreo_ghost_fd_ghost_h
       3             : 
       4             : /* fd_ghost implements Solana's LMD-GHOST ("latest message-driven greedy
       5             :    heaviest-observed subtree") fork choice rule.
       6             : 
       7             :    LMD ("latest message-driven") means only a validator's latest vote
       8             :    counts.  If a validator votes for one fork than subsequently votes
       9             :    for a different fork, their vote only counts towards the latter fork
      10             :    and not the former.
      11             : 
      12             :    GHOST ("greedy heaviest-observed subtree") describes the fork choice
      13             :    rule.  Forks form a tree, where each node is a block.  Here's an
      14             :    example of a fork tree in which every block is labeled with its slot:
      15             : 
      16             :          /-- 3
      17             :    1-- 2
      18             :          \-- 4
      19             : 
      20             :    In the above tree 3 and 4 are different forks.  The responsibility of
      21             :    fork choice is to decide whether the validator should vote for 3 or 4
      22             :    which ultimately determines which fork the cluster converges on.
      23             : 
      24             :    In Solana, votes are stake-weighted.  Here is the same tree with
      25             :    stakes associated with each block.
      26             : 
      27             :                      /-- 3 (30%)
      28             :    1 (80%) -- 2 (70%)
      29             :                      \-- 4 (38%)
      30             : 
      31             :    80% of stake voted for 1, 70% for 2, 30% for 3 and 38% for 4.  How
      32             :    does fork choice pick 3 or 4?  It traverses down the tree, beginning
      33             :    from the root (1), then picks (2), then picks (4) where it terminates
      34             :    and returns 4 as the best leaf.
      35             : 
      36             :    greedy:   fork choice is a greedy algorithm.  During traversal, on
      37             :              each level of the tree it picks the locally optimal value.
      38             : 
      39             :    heaviest: pick based on the heaviest (highest) stake.
      40             : 
      41             :    observed: this is the validator's local view, and other validators
      42             :              may have differing views because they've observed
      43             :              different votes or different forks.
      44             : 
      45             :    subtree:  sum the vote stake of an entire subtree rooted at a given
      46             :              block, not just votes for the individual block itself.  In
      47             :              the tree above, 1 and 2 both have strictly more stake than
      48             :              3 or 4.  That is because the stake for 3 and 4 both rolled
      49             :              up into 2, and the stake for 2 rolled up into 1.  There
      50             :              were also votes for 1 and 2 that weren't just roll-ups so
      51             :              the total stake for a parent can exceed (>=) the sum of its
      52             :              children.
      53             : 
      54             :    The above diagrams used slot numbers for simplicity but ghost in fact
      55             :    uses the `block_id`, a 32-byte hash that uniquely identifies a block,
      56             :    to key the elements of the tree.  The block_id ensures that when
      57             :    there is equivocation (two or more blocks labeled with the same slot)
      58             :    the blocks can be disambiguated.
      59             : 
      60             :    Ghost handles equivocation by marking forks invalid for fork choice
      61             :    if any block along that fork equivocates.  For example, consider the
      62             :    following tree:
      63             : 
      64             :                       /-- 4'(30%)
      65             :    1 (80%) -- 2 (70%)
      66             :                      \-- 4 (38%)
      67             : 
      68             :    This is the same tree from earlier except 3 has been replaced with
      69             :    4'.  There are two equivocating blocks for slot 4: how does ghost
      70             :    handle this?  Ghost marks both 4 and 4' as invalid for fork choice.
      71             :    So in this example, fork choice will pick 2 as the best leaf.
      72             : 
      73             :    Ghost can mark a fork valid again if it becomes "duplicate confirmed"
      74             :    ie. it has received votes from >=52% of the cluster.  Revisiting the
      75             :    above tree, modified:
      76             : 
      77             :                       /-- 4'(30%)
      78             :    1 (80%) -- 2 (70%)
      79             :                      \-- 4 (52%) <- gossip duplicate confirmed (>=52%)
      80             : 
      81             :    Now that 4 is "duplicate confirmed", ghost marks 4 as valid again.
      82             :    Fork choice picks 4 as the best leaf.  Note gossip duplicate
      83             :    confirmation is separately tracked outside of fd_ghost.  See the
      84             :    fd_ghost API for how that works. */
      85             : 
      86             : #include "../fd_choreo_base.h"
      87             : 
      88             : typedef struct fd_ghost fd_ghost_t; /* forward decl*/
      89             : 
      90             : /* fd_ghost_blk_t implements a left-child, right-sibling n-ary tree.
      91             :    Each ele maintains the `pool` index of its left-most child
      92             :    (`child_idx`), its immediate-right sibling (`sibling_idx`), and its
      93             :    parent (`parent_idx`).
      94             : 
      95             :    This tree structure is gaddr-safe and supports accesses and
      96             :    operations from processes with separate local ghost joins. */
      97             : 
      98             : struct __attribute__((aligned(128UL))) fd_ghost_blk {
      99             :   fd_hash_t id;          /* block_id (merkle root of the last FEC set in the slot) */
     100             :   ulong     slot;        /* slot this ele is tracking */
     101             :   ulong     next;        /* reserved for internal use by fd_pool, fd_map_chain and fd_ghost_publish */
     102             :   ulong     parent;      /* pool idx of the parent */
     103             :   ulong     child;       /* pool idx of the left-child */
     104             :   ulong     sibling;     /* pool idx of the right-sibling */
     105             :   ulong     stake;       /* sum of stake that has voted for this slot or any of its descendants */
     106             :   ulong     total_stake; /* total stake for this blk */
     107             :   int       valid;       /* whether this block is valid for fork choice. an equivocating block is valid iff duplicate confirmed */
     108             :   ulong     vtr_dlist_gaddr; /* wksp gaddr of the dlist of vtrs whose prev_block_id is this blk's id */
     109             :   ulong     bank_seq;    /* app-wide bank sequence number of this block (fd_bank.bank_seq), or ULONG_MAX until associated with a replayed bank */
     110             : };
     111             : typedef struct fd_ghost_blk fd_ghost_blk_t;
     112             : 
     113             : /* fd_ghost_vtr_t keeps track of what a voter previously voted for.  A
     114             :    vtr lives on exactly one ghost blk's vtr_dlist at any time, identified
     115             :    by prev_block_id.  When the voter switches forks, the vtr is moved
     116             :    off the old blk's dlist onto the new blk's dlist.  When a blk is
     117             :    pruned by fd_ghost_publish, every vtr on its dlist is released back
     118             :    to the vtr_pool; if those voters vote again on a non-pruned blk, a
     119             :    fresh vtr is acquired. */
     120             : 
     121             : struct __attribute__((aligned(128UL))) fd_ghost_vtr {
     122             :   fd_pubkey_t addr;          /* map key, vote account address */
     123             :   ulong       next;          /* reserved for internal use by fd_pool */
     124             :   struct {
     125             :     ulong prev;
     126             :     ulong next;
     127             :   } map;                     /* reserved for internal use by vtr_map_chain */
     128             :   struct {
     129             :     ulong prev;
     130             :     ulong next;
     131             :   } dlist;                   /* reserved for internal use by vtr_dlist */
     132             :   ulong       prev_stake;    /* previous vote stake (vote can be from prior epoch) */
     133             :   ulong       prev_slot;     /* previous vote slot */
     134             :   fd_hash_t   prev_block_id; /* previous vote block_id  */
     135             : };
     136             : typedef struct fd_ghost_vtr fd_ghost_vtr_t;
     137             : 
     138             : FD_PROTOTYPES_BEGIN
     139             : 
     140             : /* Constructors */
     141             : 
     142             : /* fd_ghost_{align,footprint} return the required alignment and
     143             :    footprint of a memory region suitable for use as ghost with up to
     144             :    blk_max blocks and vtr_max voters. */
     145             : 
     146             : FD_FN_CONST ulong
     147             : fd_ghost_align( void );
     148             : 
     149             : FD_FN_CONST ulong
     150             : fd_ghost_footprint( ulong blk_max,
     151             :                     ulong vtr_max );
     152             : 
     153             : /* fd_ghost_new formats an unused memory region for use as a ghost.
     154             :    mem is a non-NULL pointer to this region in the local address space
     155             :    with the required footprint and alignment. */
     156             : 
     157             : void *
     158             : fd_ghost_new( void * shmem,
     159             :               ulong  blk_max,
     160             :               ulong  vtr_max,
     161             :               ulong  seed );
     162             : 
     163             : /* fd_ghost_join joins the caller to the ghost.  ghost points to the
     164             :    first byte of the memory region backing the ghost in the caller's
     165             :    address space.
     166             : 
     167             :    Returns a pointer in the local address space to ghost on success. */
     168             : 
     169             : fd_ghost_t *
     170             : fd_ghost_join( void * ghost );
     171             : 
     172             : /* fd_ghost_leave leaves a current local join.  Returns a pointer to the
     173             :    underlying shared memory region on success and NULL on failure (logs
     174             :    details).  Reasons for failure include ghost is NULL. */
     175             : 
     176             : void *
     177             : fd_ghost_leave( fd_ghost_t const * ghost );
     178             : 
     179             : /* fd_ghost_delete unformats a memory region used as a ghost.
     180             :    Assumes only the nobody is joined to the region.  Returns a
     181             :    pointer to the underlying shared memory region or NULL if used
     182             :    obviously in error (e.g. ghost is obviously not a ghost ... logs
     183             :    details).  The ownership of the memory region is transferred to the
     184             :    caller. */
     185             : 
     186             : void *
     187             : fd_ghost_delete( void * ghost );
     188             : 
     189             : /* Accessors */
     190             : 
     191             : /* fd_ghost_root returns a pointer in the caller's address space to the
     192             :    root.  Assumes ghost is a current local join. */
     193             : 
     194             : fd_ghost_blk_t *
     195             : fd_ghost_root( fd_ghost_t * ghost );
     196             : 
     197             : /* fd_ghost_parent returns a pointer in the caller's address space to
     198             :    blk's parent.  Assumes ghost is a current local join and blk is a
     199             :    valid pointer to a pool element inside ghost. */
     200             : 
     201             : fd_ghost_blk_t *
     202             : fd_ghost_parent( fd_ghost_t * ghost, fd_ghost_blk_t * blk );
     203             : 
     204             : /* fd_ghost_width returns the width of the ghost tree. */
     205             : 
     206             : ulong
     207             : fd_ghost_width( fd_ghost_t * ghost );
     208             : 
     209             : /* fd_ghost_query returns the block keyed by block_id.  Returns NULL if
     210             :    not found. */
     211             : 
     212             : fd_ghost_blk_t *
     213             : fd_ghost_query( fd_ghost_t       * ghost,
     214             :                 fd_hash_t  const * block_id );
     215             : 
     216             : /* fd_ghost_best returns the best block (according to fork choice) in
     217             :    the subtree beginning at root.  This is the ideal block to vote on or
     218             :    reset to, and feeds into downstream TowerBFT rules.  This is usually
     219             :    a leaf node in the tree but may not when blocks are marked invalid
     220             :    due to unconfirmed duplicates. Assumes root is marked valid so this
     221             :    will never return NULL. */
     222             : 
     223             : fd_ghost_blk_t *
     224             : fd_ghost_best( fd_ghost_t     * ghost,
     225             :                fd_ghost_blk_t * root );
     226             : 
     227             : /* fd_ghost_deepest returns the deepest ghost block (highest tree depth)
     228             :    in the subtree beginning at root.  Unlike fd_ghost_best, deepest can
     229             :    return a block marked invalid for fork choice.  In case of ties, the
     230             :    returned block will be the most recently inserted one.  This will
     231             :    never return NULL. */
     232             : 
     233             : fd_ghost_blk_t *
     234             : fd_ghost_deepest( fd_ghost_t     * ghost,
     235             :                   fd_ghost_blk_t * root );
     236             : 
     237             : 
     238             : /* fd_ghost_ancestor returns the ancestor on the same fork as descendant
     239             :    keyed by ancestor_id.  Returns NULL if not found. */
     240             : 
     241             : fd_ghost_blk_t *
     242             : fd_ghost_ancestor( fd_ghost_t      * ghost,
     243             :                    fd_ghost_blk_t  * descendant,
     244             :                    fd_hash_t const * ancestor_id );
     245             : 
     246             : /* fd_ghost_slot_ancestor returns the ancestor on the same fork as
     247             :    descendant with ancestor_slot.  Returns NULL if not found. */
     248             : 
     249             : fd_ghost_blk_t *
     250             : fd_ghost_slot_ancestor( fd_ghost_t     * ghost,
     251             :                         fd_ghost_blk_t * descendant,
     252             :                         ulong            ancestor_slot );
     253             : 
     254             : /* fd_ghost_invalid_ancestor returns the first ancestor on the same fork
     255             :    as descendant that is marked invalid.  Includes checking descendant
     256             :    itself.  Returns NULL if there are no invalid ancestors. */
     257             : 
     258             : fd_ghost_blk_t *
     259             : fd_ghost_invalid_ancestor( fd_ghost_t     * ghost,
     260             :                            fd_ghost_blk_t * descendant );
     261             : 
     262             : /* Operations */
     263             : 
     264             : /* fd_ghost_init initializes the ghost tree with a root block keyed by
     265             :    block_id at the given slot.  bank_seq is the app-wide bank sequence
     266             :    number of the block (or ULONG_MAX if unknown).  Must be called exactly
     267             :    once before any fd_ghost_insert calls.  Returns the new root block. */
     268             : 
     269             : fd_ghost_blk_t *
     270             : fd_ghost_init( fd_ghost_t      * ghost,
     271             :                ulong             bank_seq,
     272             :                ulong             slot,
     273             :                fd_hash_t const * block_id );
     274             : 
     275             : /* fd_ghost_insert inserts a new tree node representing a block keyed by
     276             :    block_id (and slot).  bank_seq is the app-wide bank sequence number of
     277             :    the block (or ULONG_MAX if unknown).  parent_block_id is used to link
     278             :    this new block to its parent in the ghost tree.  Returns the new
     279             :    block. */
     280             : 
     281             : fd_ghost_blk_t *
     282             : fd_ghost_insert( fd_ghost_t      * ghost,
     283             :                  ulong             bank_seq,
     284             :                  ulong             slot,
     285             :                  fd_hash_t const * block_id,
     286             :                  fd_hash_t const * parent_block_id );
     287             : 
     288             : /* fd_ghost_count_vote return codes. */
     289             : 
     290          30 : #define FD_GHOST_SUCCESS           ( 0) /* vote counted successfully */
     291           0 : #define FD_GHOST_ERR_NOT_VOTED     (-1) /* slot == ULONG_MAX (hasn't voted) */
     292           0 : #define FD_GHOST_ERR_VOTE_TOO_OLD  (-2) /* slot < root->slot */
     293           3 : #define FD_GHOST_ERR_ALREADY_VOTED (-3) /* slot <= prev_slot (not newer) */
     294             : 
     295             : /* fd_ghost_count_vote updates ghost's stake weights with the latest
     296             :    vote on the given voter's tower.  This counts pubkey's stake towards
     297             :    all ancestors on the same fork as block_id.  If the voter has
     298             :    previously voted, it subtracts the voter's previous stake from all
     299             :    ancestors of vtr->prev_block_id.  This ensures that only the most
     300             :    recent vote from a voter is counted (see top-level documentation
     301             :    about the LMD rule).  Returns FD_GHOST_SUCCESS on success, or a
     302             :    negative FD_GHOST_ERR_* code if the vote was not counted.
     303             : 
     304             :    Note that the vote is not counted if it's not newer than the voter's
     305             :    previous slot.  This is done intentionally: equivocating votes will
     306             :    be ignored.
     307             : 
     308             :    TODO the implementation can be made more efficient by incrementally
     309             :    updating and short-circuiting ancestor traversals.  Currently this is
     310             :    bounded to O(h), where h is the height of ghost ie. O(block_max) in
     311             :    the worst case. */
     312             : 
     313             : int
     314             : fd_ghost_count_vote( fd_ghost_t *        ghost,
     315             :                      fd_ghost_blk_t *    blk,
     316             :                      fd_pubkey_t const * vote_acc,
     317             :                      ulong               stake,
     318             :                      ulong               slot );
     319             : 
     320             : /* fd_ghost_publish publishes newr as the new ghost root, pruning any
     321             :    blocks not in the subtree beginning from the new root (ie. new root
     322             :    and all its descendants).  Returns the new root. */
     323             : 
     324             : void
     325             : fd_ghost_publish( fd_ghost_t     * ghost,
     326             :                   fd_ghost_blk_t * newr );
     327             : 
     328             : /* fd_ghost_confirm marks the block keyed by confirmed_block_id and its
     329             :    entire ancestry (up to root) as valid for fork choice, short-
     330             :    circuiting at the first ancestor that is already valid.  No-op if
     331             :    confirmed_block_id is not in ghost.  Does not invalidate equivocating
     332             :    blocks — the caller is responsible for calling fd_ghost_eqvoc on any
     333             :    known equivocating block_ids. */
     334             : 
     335             : void
     336             : fd_ghost_confirm( fd_ghost_t      * ghost,
     337             :                   fd_hash_t const * confirmed_block_id );
     338             : 
     339             : /* fd_ghost_eqvoc marks the block keyed by block_id and all of its
     340             :    descendants as invalid for fork choice.  No-op if block_id is not
     341             :    in ghost. */
     342             : 
     343             : void
     344             : fd_ghost_eqvoc( fd_ghost_t      * ghost,
     345             :                 fd_hash_t const * block_id );
     346             : 
     347             : /* Misc */
     348             : 
     349             : /* fd_ghost_verify checks the ghost is not obviously corrupt.  Returns 0
     350             :    if verify succeeds, -1 otherwise. */
     351             : 
     352             : int
     353             : fd_ghost_verify( fd_ghost_t * ghost );
     354             : 
     355             : /* fd_ghost_to_cstr pretty-prints a formatted ghost tree beginning from
     356             :    root (arbitrary node, not necessarily the current ghost root) to the
     357             :    buffer cstr of at least cstr_max capacity.  Returns a pointer to cstr
     358             :    and on return cstr_len contains actual bytes written including NUL.
     359             : 
     360             :    Typical usage:
     361             : 
     362             :      fd_ghost_to_cstr( ghost, fd_ghost_root( ghost ) );  // stringify ghost beginning from the current ghost root
     363             : 
     364             :    Alternative usage:
     365             : 
     366             :      fd_ghost_ele_t const * ele = fd_ghost_query( slot );
     367             :      fd_ghost_to_cstr( ghost, fd_ghost_parent( fd_ghost_parent( ele ) ) ); // print from ele's grandparent
     368             : 
     369             :    */
     370             : char *
     371             : fd_ghost_to_cstr( fd_ghost_t const *     ghost,
     372             :                   fd_ghost_blk_t const * root,
     373             :                   char *                 cstr,
     374             :                   ulong                  cstr_max,
     375             :                   ulong *                cstr_len );
     376             : 
     377             : /* fd_ghost_blk_map_remove removes blk from the ghost map.  Assumes blk
     378             :    is in the map.  Returns blk. */
     379             : 
     380             : fd_ghost_blk_t *
     381             : fd_ghost_blk_map_remove( fd_ghost_t     * ghost,
     382             :                          fd_ghost_blk_t * blk );
     383             : 
     384             : /* fd_ghost_blk_map_insert inserts a ghost blk into the ghost map. */
     385             : 
     386             : void
     387             : fd_ghost_blk_map_insert( fd_ghost_t     * ghost,
     388             :                          fd_ghost_blk_t * blk );
     389             : 
     390             : /* fd_ghost_blk_child returns the left-most child of blk, or NULL if
     391             :    blk has no children. */
     392             : 
     393             : fd_ghost_blk_t *
     394             : fd_ghost_blk_child( fd_ghost_t     * ghost,
     395             :                     fd_ghost_blk_t * blk );
     396             : 
     397             : /* fd_ghost_blk_sibling returns the next sibling of blk, or NULL if
     398             :    blk has no more siblings. */
     399             : 
     400             : fd_ghost_blk_t *
     401             : fd_ghost_blk_sibling( fd_ghost_t     * ghost,
     402             :                       fd_ghost_blk_t * blk );
     403             : 
     404             : /* fd_ghost_blk_next returns the block linked via blk->next (used for
     405             :    traversing a temporary linked list built from removed map elements),
     406             :    or NULL if at the end of the list. */
     407             : 
     408             : fd_ghost_blk_t *
     409             : fd_ghost_blk_next( fd_ghost_t     * ghost,
     410             :                    fd_ghost_blk_t * blk );
     411             : 
     412             : /* fd_ghost_blk_idx returns the pool index corresponding to blk. */
     413             : 
     414             : ulong
     415             : fd_ghost_blk_idx( fd_ghost_t     * ghost,
     416             :                   fd_ghost_blk_t * blk );
     417             : 
     418             : /* fd_ghost_blk_idx_null returns the sentinel null index for the ghost
     419             :    block pool. */
     420             : 
     421             : ulong
     422             : fd_ghost_blk_idx_null( fd_ghost_t * ghost );
     423             : 
     424             : 
     425             : FD_PROTOTYPES_END
     426             : 
     427             : #endif /* HEADER_fd_src_choreo_ghost_fd_ghost_h */

Generated by: LCOV version 1.14