LCOV - code coverage report
Current view: top level - discof/replay - fd_replay_tile_private.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 4 0.0 %
Date: 2026-05-15 07:18:56 Functions: 0 0 -

          Line data    Source code
       1             : #ifndef HEADER_fd_src_discof_replay_fd_replay_tile_private_h
       2             : #define HEADER_fd_src_discof_replay_fd_replay_tile_private_h
       3             : 
       4             : #include "fd_replay_tile.h"
       5             : #include "fd_vote_tracker.h"
       6             : #include "../../disco/topo/fd_wksp_mon.h"
       7             : #include "../../disco/store/fd_store.h"
       8             : #include "../../disco/bundle/fd_bundle_crank.h"
       9             : #include "../../disco/keyguard/fd_keyswitch.h"
      10             : #include "../../discof/reasm/fd_reasm.h"
      11             : #include "../../discof/replay/fd_sched.h"
      12             : #include "../../flamenco/accdb/fd_accdb_admin.h"
      13             : #include "../../flamenco/capture/fd_capture_ctx.h"
      14             : #include "../../flamenco/genesis/fd_genesis_parse.h"
      15             : #include "../../flamenco/leaders/fd_multi_epoch_leaders.h"
      16             : #include "../../flamenco/progcache/fd_progcache.h"
      17             : #include "../../flamenco/runtime/fd_bank.h"
      18             : #include "../../flamenco/runtime/fd_txncache.h"
      19             : #include "../../flamenco/runtime/tests/fd_dump_pb.h"
      20             : #include <stdio.h>
      21             : 
      22             : struct fd_replay_in_link {
      23             :   fd_wksp_t * mem;
      24             :   ulong       chunk0;
      25             :   ulong       wmark;
      26             :   ulong       mtu;
      27             : };
      28             : 
      29             : typedef struct fd_replay_in_link fd_replay_in_link_t;
      30             : 
      31             : struct fd_replay_out_link {
      32             :   ulong       idx;
      33             :   fd_wksp_t * mem;
      34             :   ulong       chunk0;
      35             :   ulong       wmark;
      36             :   ulong       chunk;
      37             : };
      38             : 
      39             : typedef struct fd_replay_out_link fd_replay_out_link_t;
      40             : 
      41             : /* fd_block_id_map is a simple map of block-ids to bank indices.  The
      42             :    map sits on top of an array of fd_block_id_ele_t.  This serves as a
      43             :    translation layer between block ids to bank indices.  The data
      44             :    array is indexed by bank index and the latest observed merkle root
      45             :    for the bank index is stored in the array.  Once the block id has
      46             :    been observed, the entry is keyed by the latest merkle root (aka the
      47             :    block id). */
      48             : 
      49             : struct fd_block_id_ele {
      50             :   fd_hash_t latest_mr;
      51             :   uint      latest_fec_idx;
      52             :   int       block_id_seen;
      53             :   ulong     slot;
      54             :   ulong     next_;
      55             : };
      56             : typedef struct fd_block_id_ele fd_block_id_ele_t;
      57             : 
      58             : #define MAP_NAME               fd_block_id_map
      59             : #define MAP_ELE_T              fd_block_id_ele_t
      60             : #define MAP_KEY_T              fd_hash_t
      61           0 : #define MAP_KEY                latest_mr
      62           0 : #define MAP_NEXT               next_
      63           0 : #define MAP_KEY_EQ(k0,k1)      (!memcmp((k0),(k1), sizeof(fd_hash_t)))
      64           0 : #define MAP_KEY_HASH(key,seed) (fd_hash((seed),(key),sizeof(fd_hash_t)))
      65             : #include "../../util/tmpl/fd_map_chain.c"
      66             : 
      67             : struct fd_replay_tile {
      68             :   fd_wksp_t * wksp;
      69             : 
      70             :   uint rng_seed;
      71             :   fd_rng_t rng[ 1 ];
      72             : 
      73             :   fd_accdb_admin_t    accdb_admin[1];
      74             :   fd_accdb_user_t     accdb[1];
      75             :   fd_progcache_join_t progcache[1];
      76             :   fd_wksp_mon_t       progcache_wksp_mon[1];
      77             :   fd_wksp_mon_t       accdb_cache_wksp_mon[1];
      78             : 
      79             :   fd_txncache_t * txncache;
      80             :   fd_store_t *    store;
      81             :   fd_banks_t *    banks;
      82             :   ulong           frontier_indices[ FD_BANKS_MAX_BANKS ];
      83             :   ulong           frontier_cnt;
      84             : 
      85             :   /* This flag is 1 If we have seen a vote signature that our node has
      86             :      sent out get rooted at least one time.  The value is 0 otherwise.
      87             :      We can't become leader and pack blocks until this flag has been
      88             :      set.  This parallels the Agave 'has_new_vote_been_rooted'. */
      89             :   int identity_vote_rooted;
      90             :   int wait_for_vote_to_start_leader;
      91             : 
      92             :   /* wfs_enabled is 1 if the validator is booted in
      93             :      wait_for_supermajority mode. In this mode replay (and, by extension,
      94             :      downstream consumers) is not allowed to make progress until 80% of
      95             :      the cluster has published their ContactInfo in Gossip with a
      96             :      shred version matching expected_shred_version. When this happens,
      97             :      wfs_complete will be set to 1. */
      98             :   int   wfs_enabled;
      99             :   int   wfs_complete;
     100             : 
     101             :   fd_hash_t expected_bank_hash;
     102             : 
     103             :   ulong            reasm_seed;
     104             :   fd_reasm_t     * reasm;
     105             :   fd_reasm_fec_t * reasm_evicted; /* evicted FEC by reasm_insert must be stored in returnable_frag, and then drained in after_credit */
     106             : 
     107             :   fd_sched_t * sched;
     108             :   ulong        in_cnt;
     109             :   ulong        execrp_idle_cnt;
     110             : 
     111             :   ulong                vote_tracker_seed;
     112             :   fd_vote_tracker_t *  vote_tracker;
     113             : 
     114             :   int          has_genesis_hash;
     115             :   char         genesis_path[ PATH_MAX ];
     116             :   fd_hash_t    genesis_hash[1];
     117             :   fd_genesis_t genesis[1];
     118             :   ulong        cluster_type;
     119             : 
     120             :   int   has_genesis_timestamp;
     121             :   ulong genesis_timestamp;
     122             :   int   has_expected_genesis_timestamp;
     123             :   ulong expected_genesis_timestamp;
     124             : 
     125             :   ulong          hard_fork_cnt;
     126             :   fd_hard_fork_t hard_forks[ FD_HARD_FORKS_MAX ];
     127             : 
     128             :   ushort expected_shred_version;
     129             :   ushort ipecho_shred_version;
     130             : 
     131             :   /* A note on publishing ...
     132             : 
     133             :      The watermarks are used to publish our fork-aware structures.  For
     134             :      example, store, banks, and txncache need to be published to release
     135             :      resources occupied by rooted or dead blocks.  In general,
     136             :      publishing has the effect of pruning forks in those structures,
     137             :      indicating that it is ok to release the memory being occupied by
     138             :      the blocks on said forks.  Tower is responsible for informing us of
     139             :      the latest block on the consensus rooted fork.  As soon as we can,
     140             :      we should move the published root as close as possible to the
     141             :      latest consensus root, publishing/pruning everything on the fork
     142             :      tree along the way.  That is, all the blocks that directly descend
     143             :      from the current published root (inclusive) to the new published
     144             :      root (exclusive) on the rooted fork, as well as all the minority
     145             :      forks that branch from said blocks.
     146             : 
     147             :      Ideally, we'd move the published root to the consensus root
     148             :      immediately upon receiving a new consensus root.  However, that's
     149             :      not always safe to do.  One thing we need to be careful about is
     150             :      making sure that there are no more users/consumers of
     151             :      soon-to-be-pruned blocks, lest a use-after-free occurs.  This can
     152             :      be done by using a reference counter for each block.  Any
     153             :      concurrent activity, such as transaction execution in the exec
     154             :      tiles, should retain a refcnt on the block for as
     155             :      long as it needs access to the shared fork-aware structures related
     156             :      to that block.  Eventually, refcnt on a given block will drop down
     157             :      to 0 as the block either finishes replaying or gets marked as dead,
     158             :      and any other tile that has retained a refcnt on the block releases
     159             :      it.  At that point, it becomes a candidate for pruning.  The key to
     160             :      safe publishing then becomes figuring out how far we could advance
     161             :      the published root, such that every minority fork branching off of
     162             :      blocks in between the current published root (inclusive) and the
     163             :      new published root (exclusive) is safe to be pruned.  This is a
     164             :      straightforward tree traversal, where if a block B on the rooted
     165             :      fork has refcnt 0, and all minority forks branching off of B also
     166             :      have refcnt 0, then B is safe to be pruned.  We advance the
     167             :      published root to the farthest consecutively prunable block on the
     168             :      rooted fork.  Note that reasm presents the replay tile with a clean
     169             :      view of the world where every block is chained off of a parent
     170             :      block.  So there are no orpahned/dangling tree nodes to worry
     171             :      about.  The world is a nice single tree as far as replay is
     172             :      concerned.
     173             : 
     174             :      In the following fork tree, every node is a block and the number in
     175             :      parentheses is the refcnt on the block.  The chain marked with
     176             :      double slashes is the rooted fork.  Suppose the published root is
     177             :      at block P, and consensus root is at block T.  We can't publish
     178             :      past block P because Q has refcnt 1.
     179             : 
     180             : 
     181             :           P(0)
     182             :         /    \\
     183             :       Q(1)    A(0)
     184             :             / ||  \
     185             :         X(0) B(0)  C(0)
     186             :        /      || \
     187             :       Y(0)   M(0) R(0)
     188             :             / ||   /  \
     189             :         D(2) T(0) J(0) L(0)
     190             :               ||
     191             :               ..
     192             :               ..
     193             :               ..
     194             :               ||
     195             :       blocks we might be actively replaying
     196             : 
     197             : 
     198             :      When refcnt on Q drops to 0, we would be able to advance the
     199             :      published root to block M, because blocks P, A, and B, as well as
     200             :      all subtrees branching off of them, have refcnt 0, and therefore
     201             :      can be pruned.  Block M itself cannot be pruned yet because its
     202             :      child block D has refcnt 2.  After publishing/pruning, the fork
     203             :      tree would be:
     204             : 
     205             : 
     206             :              M(0)
     207             :             / ||
     208             :         D(2) T(0)
     209             :               ||
     210             :               ..
     211             :               ..
     212             :               ..
     213             :               ||
     214             :       blocks we might be actively replaying
     215             : 
     216             : 
     217             :      As a result, the shared fork-aware structures can free resources
     218             :      for blocks P, A, B, and all subtrees branching off of them.
     219             : 
     220             :      For the reference counting part, the replay tile is the sole entity
     221             :      that can update the refcnt.  This ensures that all refcnt increment
     222             :      and decrement attempts are serialized at the replay tile, and that
     223             :      there are no racy resurrection of a soon-to-be-pruned block.  If a
     224             :      refcnt increment request arrives after a block has been pruned,
     225             :      replay simply rejects the request.
     226             : 
     227             :      A note on the implementation of the above ...
     228             : 
     229             :      Upon receiving a new consensus root, we descend down the rooted
     230             :      fork from the current published root to the new consensus root.  On
     231             :      each node/block of the rooted fork, we do a summation of the refcnt
     232             :      on the block and all the minority fork blocks branching from the
     233             :      block.  If the summation is 0, the block is safe for pruning.  We
     234             :      advance the published root to the far end of the consecutive run of
     235             :      0 refcnt sums originating from the current published root.  On our
     236             :      descent down the minority forks, we also mark any block that hasn't
     237             :      finished replaying as dead, so we don't waste time executing them.
     238             :      No more transactions shall be dispatched for execution from dead
     239             :      blocks.
     240             : 
     241             :      Blocks start out with a refcnt of 0.  Other tiles may send a
     242             :      request to the replay tile for a reference on a block.  The
     243             :      transaction dispatcher is another source of refcnt updates.  On
     244             :      every dispatch of a transaction for block B, we increment the
     245             :      refcnt for B.  And on every transaction finalization, we decrement
     246             :      the refcnt for B.  This means that whenever the refcnt on a block
     247             :      is 0, there is no more reference on that block from the execution
     248             :      pipeline.  While it might be tempting to simply increment the
     249             :      refcnt once when we start replaying a block, and decrement the
     250             :      refcnt once when we finish a block, this more fine-grained refcnt
     251             :      update strategy allows for aborting and potentially immediate
     252             :      pruning of blocks under interleaved block replay.  Upon receiving a
     253             :      new consensus root, we can simply look at the refcnt on minority
     254             :      fork blocks, and a refcnt of 0 would imply that the block is safe
     255             :      for pruning, even if we haven't finished replaying it.  Without the
     256             :      fine-grained refcnt, we would need to first stop dispatching from
     257             :      the aborted block, and then wait for a full drain of the execution
     258             :      pipeline to know for sure that there are no more in-flight
     259             :      transactions executing on the aborted block.  Note that this will
     260             :      allow the refcnt on any block to transiently drop down to 0.  We
     261             :      will not mistakenly prune an actively replaying block, aka a leaf
     262             :      node, that is chaining off of the rooted fork, because the
     263             :      consensus root is always an ancestor of the actively replaying tip.
     264             :      */
     265             :   fd_hash_t consensus_root;          /* The most recent block to have reached max lockout in the tower. */
     266             :   ulong     consensus_root_slot;     /* slot number of the above. */
     267             :   ulong     consensus_root_bank_idx; /* bank index of the above. */
     268             :   ulong     published_root_slot;     /* slot number of the published root. */
     269             :   ulong     published_root_bank_idx; /* bank index of the published root. */
     270             : 
     271             :   /* Randomly generated block id for the initial genesis/snapshot slot.
     272             :      To be replaced with block id in the snapshot manifest when SIMD-333
     273             :      is activated. */
     274             : 
     275             :   fd_hash_t initial_block_id;
     276             : 
     277             :   /* We need to maintain a tile-local mapping of block-ids to bank index
     278             :      and vice versa.  This translation layer is needed for conversion
     279             :      since tower operates on block-ids and downstream consumers of FEC
     280             :      sets operate on bank indices.  This mapping must happen both ways:
     281             :      1. tower sends us block ids and we must map them to bank indices.
     282             :      2. when a block is completed, we must map the bank index to a block
     283             :         id to send a slot complete message to tower. */
     284             :   ulong               block_id_len;
     285             :   fd_block_id_ele_t * block_id_arr;
     286             :   ulong               block_id_map_seed;
     287             :   fd_block_id_map_t * block_id_map;
     288             : 
     289             :   /* Capture-related configs */
     290             :   fd_capture_ctx_t *     capture_ctx;
     291             :   FILE *                 capture_file;
     292             :   fd_capture_link_buf_t  cap_repl_out[1];
     293             : 
     294             :   /* Protobuf dumping context for debugging runtime execution and
     295             :      collecting seed corpora. */
     296             :   fd_dump_proto_ctx_t * dump_proto_ctx;
     297             : 
     298             :   /* Whether the runtime has been booted either from snapshot loading
     299             :      or from genesis. */
     300             :   int is_booted;
     301             : 
     302             :   /* Buffer to store vote towers that need to be published to the Tower
     303             :      tile. */
     304             : 
     305             :   fd_multi_epoch_leaders_t * mleaders;
     306             : 
     307             :   int larger_max_cost_per_block;
     308             : 
     309             :   /* When we transition to becoming leader, we can only unbecome the
     310             :      leader if we have received a block id from the FEC reassembler, and
     311             :      a message from PoH that the leader slot has ended.  After both of
     312             :      these conditions are met, then we are free to unbecome the leader.
     313             :   */
     314             :   uint        is_leader : 1;
     315             :   uint        supports_leader : 1;
     316             :   int         recv_poh;
     317             :   ulong       next_leader_slot;
     318             :   long        next_leader_tickcount;
     319             :   ulong       highwater_leader_slot;
     320             :   ulong       reset_slot;
     321             :   fd_bank_t * reset_bank;
     322             :   fd_hash_t   reset_block_id;
     323             :   long        reset_timestamp_nanos;
     324             :   double      slot_duration_nanos;
     325             :   double      slot_duration_ticks;
     326             :   fd_bank_t * leader_bank;
     327             : 
     328             :   fd_pubkey_t      identity_pubkey[1];
     329             :   ulong            identity_idx;
     330             : 
     331             :   fd_keyswitch_t * keyswitch;
     332             :   int              halt_leader;
     333             : 
     334             :   ulong  resolv_tile_cnt;
     335             : 
     336             :   int in_kind[ 128 ];
     337             :   fd_replay_in_link_t in[ 128 ];
     338             : 
     339             :   fd_replay_out_link_t exec_out[ 1 ];
     340             : 
     341             :   fd_replay_out_link_t replay_out[1];
     342             : 
     343             :   fd_replay_out_link_t epoch_out[1];
     344             : 
     345             :   /* The rpc tile needs to occasionally own a reference to a live bank.
     346             :      Replay needs to know if the rpc as a consumer is enabled so it can
     347             :      increment the bank's refcnt before publishing bank_idx. */
     348             :   int rpc_enabled;
     349             : 
     350             :   /* For dumping blocks to protobuf. For backtest only. */
     351             :   fd_block_dump_ctx_t * block_dump_ctx;
     352             : 
     353             :   /* We need a few pieces of information to compute the right addresses
     354             :      for bundle crank information that we need to send to pack. */
     355             :   struct {
     356             :     int                   enabled;
     357             :     fd_pubkey_t           vote_account;
     358             :     fd_bundle_crank_gen_t gen[1];
     359             :   } bundle;
     360             : 
     361             :   struct {
     362             :     ulong      store_query_acquire;
     363             :     ulong      store_query_release;
     364             :     fd_histf_t store_query_wait[1];
     365             :     fd_histf_t store_query_work[1];
     366             :     ulong      store_query_cnt;
     367             :     ulong      store_query_missing_cnt;
     368             :     ulong      store_query_mr;
     369             :     ulong      store_query_missing_mr;
     370             : 
     371             :     ulong slots_total;
     372             :     ulong transactions_total;
     373             : 
     374             :     ulong reasm_latest_slot;
     375             :     ulong reasm_latest_fec_idx;
     376             : 
     377             :     ulong sched_full;
     378             :     ulong reasm_empty;
     379             :     ulong leader_bid_wait;
     380             :     ulong banks_full;
     381             :     ulong storage_root_behind;
     382             : 
     383             :     fd_histf_t root_slot_dur[1];
     384             :     fd_histf_t root_account_dur[1];
     385             :   } metrics;
     386             : 
     387             :   uchar __attribute__((aligned(FD_MULTI_EPOCH_LEADERS_ALIGN))) mleaders_mem[ FD_MULTI_EPOCH_LEADERS_FOOTPRINT ];
     388             : 
     389             :   ulong                runtime_stack_seed;
     390             :   fd_runtime_stack_t * runtime_stack;
     391             : };
     392             : 
     393             : typedef struct fd_replay_tile fd_replay_tile_t;
     394             : 
     395             : #endif /* HEADER_fd_src_discof_replay_fd_replay_tile_private_h */

Generated by: LCOV version 1.14