LCOV - code coverage report
Current view: top level - flamenco/stakes - fd_vote_stakes.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 1 1 100.0 %
Date: 2026-05-15 07:18:56 Functions: 0 0 -

          Line data    Source code
       1             : #ifndef HEADER_fd_src_flamenco_stakes_fd_vote_stakes_h
       2             : #define HEADER_fd_src_flamenco_stakes_fd_vote_stakes_h
       3             : 
       4             : #include "../fd_flamenco_base.h"
       5             : 
       6             : /* fd_vote_stakes_t is a data structure that stores vote account stake
       7             :    updates across epoch boundaries.  It offers a mapping from vote
       8             :    account pubkeys to their t_1 and t_2 stakes.  The structure is
       9             :    designed to work with a large amount of vote accounts (in the order
      10             :    of 10s of millions) along with a relatively small number of forks
      11             :    across epoch boundaries.
      12             : 
      13             :    The structure is designed around these operations:
      14             :    1. Inserting updated vote account stakes into a given fork.
      15             :    2. Querying the stake for a given vote account with a given fork.
      16             : 
      17             :    Concurrent queries are allowed but concurrent inserts are not.  This
      18             :    is fine because the structure is only modified during boot and during
      19             :    the epoch boundary.
      20             : 
      21             :    Given a large number of vote accounts (e.g. 2^25 = 33,554,432), we
      22             :    need to store the pubkey, t_1 stake, and t_2 stake for each vote
      23             :    account.  We also need to store potentially ~32 forks across each
      24             :    epoch boundary.  If done naively, this would require
      25             :    2^25 * (32 + 8 + 8) * 32 = 51GB of memory not including any overhead
      26             :    for maintaining lookups for accounts.
      27             : 
      28             :    To avoid this, we can use some important runtime protocol properties.
      29             :    The most notable is that across forks, we will only have a small
      30             :    number of differences in vote account stakes: this is because
      31             :    realistically very few vote accounts will have differences in stakes
      32             :    that are caused by forks right before the epoch boundary.  So we can
      33             :    set our bound on elements as the total number of vote accounts +
      34             :    stakes across forks.  Let's say this is 2^26 if the max number of
      35             :    vote accounts we want to support is 2^25.  Now to store our index we
      36             :    only need:
      37             :    2^26 * (32 + 8 + 8) = 3GB of memory (not including overhead).
      38             :    Our index structure will look like this:
      39             :    pool<pubkey, stake_t_1, stake_t_2>
      40             : 
      41             :    What is described above the index of all vote accounts.  We need to
      42             :    account for the stakes across forks.  We have to maintain a list of
      43             :    all of the index entries used by each fork.  It is sufficient to use
      44             :    a uint list of indices into the index.  So each fork is just:
      45             :    pool<uint>.
      46             : 
      47             :    4 (uint pool idx) * 2^25 (# vote accounts) * 32 (# forks) = 4GiB
      48             : 
      49             :    For a given fork, when we insert a vote account we check if:
      50             :    1. The vote account + stake pair is already in the index.  If it is,
      51             :       we just increment a reference to the pair.  Add it to the list of
      52             :       pool indices that the fork maintains.
      53             :    2. The vote account + stake pair is not in the index.  If it is not,
      54             :       we need to insert it into the index and assume a reference of 1.
      55             :       Add it to the local list of pool indices that the fork maintains.
      56             :    In order to make both of these cases performant we need a mapping
      57             :    from pubkey + stake to the index pool element.  This is simply
      58             :    represented as:
      59             :    map<(pubkey+stake), index_pool_idx>.
      60             : 
      61             :    Now for queries, we need a way to query the t_1 and t_2 stake for a
      62             :    pubkey on a given fork.  The problem is the above map requires the
      63             :    t_1 stake as part of the key and there are potentially multiple
      64             :    entries for a given pubkey.  This is solved with a
      65             :    multi_map<pubkey, index_pool_idx>.  We query for a given pubkey and
      66             :    iterate through all of the entries: this is an acceptable trade-off
      67             :    since there will almost always be one element for a given pubkey
      68             :    (except around the epoch boundary).  However, for each index entry
      69             :    we need a way to make sure that it's the one that is in our fork's
      70             :    list of indices.  This is solved with a map for each fork that is
      71             :    keyed by the index pool idx.
      72             :    map<index_pool_idx, fork list entry>.
      73             : 
      74             :    Now, we can quickly insert entries for a given fork and also do fast
      75             :    queries for a given pubkey.
      76             : 
      77             :    The only remaining operation is updating the root fork.  If we are
      78             :    updating which fork is the root, we can safely assume that all other
      79             :    forks are no longer valid:
      80             :    1. either the fork was a competing fork that executed the epoch
      81             :       boundary and is no longer needed
      82             :    2. the fork corresponds to a fork for the previous epoch boundary.
      83             : 
      84             :    For any fork that's being removed, we need to reset its fork's pool
      85             :    and remove any references to the index pool entries.  If an index
      86             :    entry has a reference count of 0, we can remove it from the index
      87             :    entirely.  Under the hood, the forks in use are stored in a deque;
      88             :    when a root is being advanced, all entries from the deque are removed
      89             :    and each removed fork's entries are released.
      90             : 
      91             :    The memory footprint of what is actually described above is larger
      92             :    because each key of the index needs to be a compound of
      93             :    the pubkey, stake_t_1, node_account_t_1, and epoch.
      94             : 
      95             :    All public APIs internally acquire and release a read-write lock
      96             :    so the caller does not need to manage synchronization.  Read-only
      97             :    operations (query, ele_cnt, get_root_idx) acquire a shared read
      98             :    lock.  Mutating operations and the iterator acquire an exclusive
      99             :    write lock.
     100             : 
     101             :    fd_vote_stakes is the Firedancer equivalent to the Agave client
     102             :    epoch stakes data structure. */
     103             : 
     104             : FD_PROTOTYPES_BEGIN
     105             : 
     106       14247 : #define FD_VOTE_STAKES_ALIGN (128UL)
     107             : 
     108             : struct fd_vote_stakes;
     109             : typedef struct fd_vote_stakes fd_vote_stakes_t;
     110             : 
     111             : /* fd_vote_stakes_align returns the minimum alignment required for the
     112             :    fd_vote_stakes_t struct. */
     113             : 
     114             : ulong
     115             : fd_vote_stakes_align( void );
     116             : 
     117             : /* fd_vote_stakes_footprint returns the minimum footprint required for
     118             :    the fd_vote_stakes_t object given the max number of vote accounts,
     119             :    the max fork width (number of forks that can cross the epoch
     120             :    boundary), and the max number of map chains.  The map chains should
     121             :    be a power of 2 that is roughly equal to the expected number of vote
     122             :    accounts and not the maximum. */
     123             : 
     124             : ulong
     125             : fd_vote_stakes_footprint( ulong max_vote_accounts,
     126             :                           ulong expected_vote_accounts,
     127             :                           ulong max_fork_width );
     128             : 
     129             : 
     130             : /* fd_vote_stakes_new creates a new fd_vote_stakes_t object given a
     131             :    region of memory sized out according to fd_vote_stakes_footprint.
     132             :    The underlying data storage will actually support
     133             :    2 * max_vote_accounts entries because entries are deduped across
     134             :    forks after all of the entries have already been inserted. */
     135             : 
     136             : void *
     137             : fd_vote_stakes_new( void * shmem,
     138             :                     ulong  max_vote_accounts,
     139             :                     ulong  expected_vote_accounts,
     140             :                     ulong  max_fork_width,
     141             :                     ulong  seed );
     142             : 
     143             : 
     144             : /* fd_vote_stakes_join joins a valid fd_vote_stakes_t object from a
     145             :    region of memory. */
     146             : 
     147             : fd_vote_stakes_t *
     148             : fd_vote_stakes_join( void * shmem );
     149             : 
     150             : /* fd_vote_stakes_root_{insert, update}_key are APIs for
     151             :    inserting and updating keys for the root fork.  These
     152             :    operations are split out in order to support the snapshot loading
     153             :    process.  The set of stakes from the T-1 epoch are inserted into
     154             :    the root fork with a call to fd_vote_stakes_root_insert_key.  The
     155             :    set of stakes from the T-2 epoch are updated with a call to
     156             :    fd_vote_stakes_root_update_meta.  The caller is responsible for
     157             :    ensuring that for a given pubkey, insert_key is called before
     158             :    update_meta.  It is important that these APIs should only be called
     159             :    while the root fork is the only and current fork in use.
     160             : 
     161             :    If update_meta is called on a key that has not had a corresponding
     162             :    insert_key call, a key is created into the root fork with a t-1 stake
     163             :    of 0.  This usually means the vote account has been deleted, but it
     164             :    can be possible in the case where the only staker of a vote account
     165             :    has been marked delinquent in epoch T-1 and needs to be counted
     166             :    towards clock calculation for the rest of the epoch. */
     167             : 
     168             : void
     169             : fd_vote_stakes_root_insert_key( fd_vote_stakes_t *  vote_stakes,
     170             :                                 fd_pubkey_t const * pubkey,
     171             :                                 fd_pubkey_t const * node_account_t_1,
     172             :                                 ulong               stake_t_1,
     173             :                                 uchar               commission_t_1,
     174             :                                 ulong               epoch );
     175             : 
     176             : void
     177             : fd_vote_stakes_root_update_meta( fd_vote_stakes_t *  vote_stakes,
     178             :                                  fd_pubkey_t const * pubkey,
     179             :                                  fd_pubkey_t const * node_account_t_2,
     180             :                                  ulong               stake_t_2,
     181             :                                  uchar               commission_t_2,
     182             :                                  ulong               epoch );
     183             : 
     184             : /* fd_vote_stakes_insert inserts a new vote account stake into a given
     185             :    fork.  If the element already exists on a different fork, then the
     186             :    reference is incremented in the index and the fork will now have an
     187             :    element which points to the vote stake index element. */
     188             : 
     189             : void
     190             : fd_vote_stakes_insert( fd_vote_stakes_t *  vote_stakes,
     191             :                        ushort              fork_idx,
     192             :                        fd_pubkey_t const * pubkey,
     193             :                        fd_pubkey_t const * node_account_t_1,
     194             :                        fd_pubkey_t const * node_account_t_2,
     195             :                        ulong               stake_t_1,
     196             :                        ulong               stake_t_2,
     197             :                        uchar               commission_t_1,
     198             :                        uchar               commission_t_2,
     199             :                        uchar               exists_t_1,
     200             :                        uchar               exists_t_2,
     201             :                        ulong               epoch );
     202             : 
     203             : /* fd_vote_stakes_genesis_fini finalizes the vote stakes on the genesis
     204             :    block.  Any vote stakes that have been inserted will be updated to
     205             :    have identical T-1/T-2 stakes and node accounts.  This function
     206             :    assumes that all vote accounts have already been inserted into the
     207             :    genesis fork. */
     208             : 
     209             : void
     210             : fd_vote_stakes_genesis_fini( fd_vote_stakes_t * vote_stakes );
     211             : 
     212             : /* fd_vote_stakes_new_child creates a new child fork and returns the
     213             :    index identifier for the new fork. */
     214             : 
     215             : ushort
     216             : fd_vote_stakes_new_child( fd_vote_stakes_t * vote_stakes );
     217             : 
     218             : /* fd_vote_stakes_advance_root will move the root fork to the new
     219             :    candidate root fork.  If the root_idx is equal to the root, this
     220             :    function is a no-op.  However, if the root is different, all other
     221             :    child nodes will be removed from the structure. */
     222             : 
     223             : void
     224             : fd_vote_stakes_advance_root( fd_vote_stakes_t * vote_stakes,
     225             :                              ushort             root_idx );
     226             : 
     227             : /* fd_vote_stakes_query_stake queries the stake for a given vote account
     228             :    in the given fork.  If the element is found returns 1, otherwise
     229             :    returns 0.  If any of the optional fields are set to NULL, then their
     230             :    corresponding value will not be set.  An account can have zero stake
     231             :    but still exist. */
     232             : 
     233             : int
     234             : fd_vote_stakes_query( fd_vote_stakes_t *  vote_stakes,
     235             :                       ushort              fork_idx,
     236             :                       fd_pubkey_t const * pubkey,
     237             :                       ulong *             stake_t_1_out_opt,
     238             :                       ulong *             stake_t_2_out_opt,
     239             :                       fd_pubkey_t *       node_account_t_1_out_opt,
     240             :                       fd_pubkey_t *       node_account_t_2_out_opt,
     241             :                       uchar *             commission_t_1_out_opt,
     242             :                       uchar *             commission_t_2_out_opt,
     243             :                       uchar *             exists_t_1_out_opt,
     244             :                       uchar *             exists_t_2_out_opt );
     245             : 
     246             : /* fd_vote_stakes_query_t_1 and fd_vote_stakes_query_t_2 are shortcuts
     247             :    for querying the t_1 and t_2 stake for a given vote account in the
     248             :    given fork.  0 is returned if the vote account does not exist for the
     249             :    epoch.  If the account is found, stake_out, node_account_out, and
     250             :    commission_out will be set. */
     251             : 
     252             : int
     253             : fd_vote_stakes_query_t_1( fd_vote_stakes_t *  vote_stakes,
     254             :                           ushort              fork_idx,
     255             :                           fd_pubkey_t const * pubkey,
     256             :                           ulong *             stake_out,
     257             :                           fd_pubkey_t *       node_account_out,
     258             :                           uchar *             commission_out );
     259             : 
     260             : int
     261             : fd_vote_stakes_query_t_2( fd_vote_stakes_t *  vote_stakes,
     262             :                           ushort              fork_idx,
     263             :                           fd_pubkey_t const * pubkey,
     264             :                           ulong *             stake_out,
     265             :                           fd_pubkey_t *       node_account_out,
     266             :                           uchar *             commission_out );
     267             : 
     268             : /* fd_vote_stakes_ele_cnt returns the number of entries for a given
     269             :    fork. */
     270             : 
     271             : uint
     272             : fd_vote_stakes_ele_cnt( fd_vote_stakes_t * vote_stakes,
     273             :                         ushort             fork_idx );
     274             : 
     275             : /* fd_vote_stakes_get_root_idx returns the index of the root fork. */
     276             : 
     277             : ushort
     278             : fd_vote_stakes_get_root_idx( fd_vote_stakes_t * vote_stakes );
     279             : 
     280             : /* fd_vote_stakes_reset resets the vote stakes object to the initial
     281             :    state.  This is useful for resetting vote stakes if a new snapshot
     282             :    manifest is being loaded. */
     283             : 
     284             : void
     285             : fd_vote_stakes_reset( fd_vote_stakes_t * vote_stakes );
     286             : 
     287             : #define FD_VOTE_STAKES_ITER_FOOTPRINT (16UL)
     288             : #define FD_VOTE_STAKES_ITER_ALIGN     (8UL)
     289             : struct stakes_map_iter_t;
     290             : typedef struct stakes_map_iter_t fd_vote_stakes_iter_t;
     291             : 
     292             : /* A caller can iterate through the entries for a given fork.  The
     293             :    iterator is initialized by a call to fd_vote_stakes_fork_iter_init.
     294             :    The caller is responsible for managing the memory for the iterator.
     295             :    It is safe to call fd_vote_stakes_fork_iter_next if the result of
     296             :    fd_vote_stakes_fork_iter_done() == 0.  It is safe to call
     297             :    fd_vote_stakes_fork_iter_ele() to get the current entry if there is
     298             :    a valid initialized iterator.  fd_vote_stakes_fork_iter_next is
     299             :    called to advance the iterator.
     300             : 
     301             :    fd_vote_stakes_fork_iter_init acquires a write lock on the vote
     302             :    stakes.  The caller MUST call fd_vote_stakes_fork_iter_fini after
     303             :    the iteration loop completes to release the lock.
     304             : 
     305             :    It is not safe to call any other vote stakes apis while an iteration
     306             :    is in progress.
     307             : 
     308             :    Example use:
     309             :    uchar __attribute__((aligned(FD_VOTE_STAKES_ITER_ALIGN))) iter_mem[ FD_VOTE_STAKES_ITER_FOOTPRINT ];
     310             :    for( fd_vote_stakes_iter_t * iter = fd_vote_stakes_fork_iter_init( vote_stakes, fork_idx, iter_mem );
     311             :         !fd_vote_stakes_fork_iter_done( vote_stakes, fork_idx, iter );
     312             :         fd_vote_stakes_fork_iter_next( vote_stakes, fork_idx, iter ) ) {
     313             :      fd_vote_stakes_fork_iter_ele( vote_stakes, fork_idx, iter, &pubkey, &stake_t_1, &stake_t_2, &node_account_t_1, &node_account_t_2, &commission_t_1, &commission_t_2 );
     314             :    }
     315             :    fd_vote_stakes_fork_iter_fini( vote_stakes );
     316             : 
     317             :    Under the hood, the vote stakes iterator is a wrapper of the map
     318             :    chain iterator.
     319             : 
     320             :    TODO: fork_idx can probably get absorbed into the iterator. */
     321             : 
     322             : fd_vote_stakes_iter_t *
     323             : fd_vote_stakes_fork_iter_init( fd_vote_stakes_t * vote_stakes,
     324             :                                ushort             fork_idx,
     325             :                                uchar              iter_mem[ static FD_VOTE_STAKES_ITER_FOOTPRINT ] );
     326             : 
     327             : int
     328             : fd_vote_stakes_fork_iter_done( fd_vote_stakes_t *      vote_stakes,
     329             :                                ushort                  fork_idx,
     330             :                                fd_vote_stakes_iter_t * iter );
     331             : 
     332             : void
     333             : fd_vote_stakes_fork_iter_next( fd_vote_stakes_t *      vote_stakes,
     334             :                                ushort                  fork_idx,
     335             :                                fd_vote_stakes_iter_t * iter );
     336             : 
     337             : void
     338             : fd_vote_stakes_fork_iter_ele( fd_vote_stakes_t *      vote_stakes,
     339             :                               ushort                  fork_idx,
     340             :                               fd_vote_stakes_iter_t * iter,
     341             :                               fd_pubkey_t *           pubkey_out,
     342             :                               ulong *                 stake_t_1_out_opt,
     343             :                               ulong *                 stake_t_2_out_opt,
     344             :                               fd_pubkey_t *           node_account_t_1_out_opt,
     345             :                               fd_pubkey_t *           node_account_t_2_out_opt,
     346             :                               uchar *                 commission_t_1_out_opt,
     347             :                               uchar *                 commission_t_2_out_opt );
     348             : 
     349             : void
     350             : fd_vote_stakes_fork_iter_fini( fd_vote_stakes_t * vote_stakes );
     351             : 
     352             : FD_PROTOTYPES_END
     353             : 
     354             : #endif

Generated by: LCOV version 1.14