LCOV - code coverage report
Current view: top level - discof/replay - fd_sched.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 11 0.0 %
Date: 2026-03-30 06:35:28 Functions: 0 0 -

          Line data    Source code
       1             : #ifndef HEADER_fd_src_discof_replay_fd_sched_h
       2             : #define HEADER_fd_src_discof_replay_fd_sched_h
       3             : 
       4             : #include "fd_rdisp.h"
       5             : #include "../../disco/store/fd_store.h" /* for fd_store_fec_t */
       6             : #include "../../disco/pack/fd_microblock.h" /* for fd_txn_p_t */
       7             : 
       8             : #include "../../flamenco/accdb/fd_accdb_user.h"
       9             : #include "../../util/spad/fd_spad.h" /* for ALUTs */
      10             : 
      11             : /* fd_sched wraps all the smarts and mechanical chores around scheduling
      12             :    transactions for replay execution.  It is built on top of the
      13             :    dispatcher fd_rdisp.  The dispatcher is responsible for high
      14             :    performance lane-based scheduling of transactions.  On top of that,
      15             :    we add fork-aware management of lanes, and policies regarding which
      16             :    lanes to prioritize for execution.
      17             : 
      18             :    Conceptually, transactions in a block form a DAG.  We would like to
      19             :    make our way through a block with a sufficient degree of parallelism,
      20             :    such that the execution time of the critical path of the DAG is the
      21             :    limiting factor.  The dispatcher does a good job of emerging the
      22             :    critical path of the DAG on the fly.  Blocks are tracked by the
      23             :    dispatcher either as a block staged on a lane, or as an unstaged
      24             :    block.  When a block is staged, it will enjoy the most intelligent
      25             :    online scheduling that the dispatcher has to offer.  Lanes have to
      26             :    consist of linear chains of blocks down a fork.  So to map a fork
      27             :    tree to lanes, we will need multiple lanes.  Ideally, every branch in
      28             :    the fork tree sits on some lane.  However, memory footprint limits us
      29             :    to a few number of lanes.
      30             : 
      31             :    This module implements a state machine for ensuring that blocks enter
      32             :    into and exit ouf of lanes in an orderly fashion.  The public APIs of
      33             :    this module are invoked to drive state transitions on a small number
      34             :    of events, such as new transactions arriving, or transactions
      35             :    completing, or a block being aborted/abandoned.  We also implement
      36             :    policies for deciding which blocks get staged onto lanes, or evicted
      37             :    from lanes, as well as which lanes to prioritize for execution.
      38             : 
      39             : 
      40             :    The general order in which calls happen under the normal case is:
      41             : 
      42             :    fd_sched_fec_ingest()* ... fd_sched_txn_next_ready()* ... fd_sched_txn_done()* ...
      43             :    more ingest, more ready, more done ...
      44             :    ...
      45             :    fd_sched_txn_next_ready() indicates that the last transaction in the block is being scheduled
      46             :    fd_sched_txn_done()*
      47             :    fd_sched_block_is_done()
      48             :    end-of-block processing in caller
      49             :    fd_sched_txn_next_ready() starts returning transactions from the next block
      50             :    more ingest, more ready, more done ...
      51             :    ... */
      52             : 
      53             : #define FD_SCHED_MIN_DEPTH 478
      54             : #define FD_SCHED_MAX_DEPTH FD_RDISP_MAX_DEPTH
      55             : 
      56             : struct fd_sched;
      57             : typedef struct fd_sched fd_sched_t;
      58             : 
      59             : struct fd_sched_alut_ctx {
      60             :   fd_accdb_user_t   accdb[1];
      61             :   fd_funk_txn_xid_t xid[1];
      62             :   ulong             els; /* Effective lookup slot. */
      63             : };
      64             : typedef struct fd_sched_alut_ctx fd_sched_alut_ctx_t;
      65             : 
      66             : struct fd_sched_fec {
      67             :   ulong            bank_idx;            /* Index of the block.  Assumed to be in [0, block_cnt_max).  Caller
      68             :                                            is responsible for ensuring that bank idx is in bounds and unique
      69             :                                            across equivocated blocks. */
      70             :   ulong            parent_bank_idx;     /* Index of the parent block.  Assumed to be in [0, block_cnt_max).
      71             :                                            Caller is responsible for ensuring that parent bank idx is in
      72             :                                            bounds and unique across equivocated blocks. */
      73             :   ulong            slot;                /* Slot number of the block. */
      74             :   ulong            parent_slot;         /* Slot number of the parent block. */
      75             :   fd_store_fec_t * fec;                 /* FEC set data. */
      76             :   uint             shred_cnt;           /* Number of shreds in the FEC set. */
      77             :   uint             is_last_in_batch:1;  /* Set if this is the last FEC set in the batch; relevant because the
      78             :                                            parser should ignore trailing bytes at the end of a batch. */
      79             :   uint             is_last_in_block:1;  /* Set if this is the last FEC set in the block. */
      80             :   uint             is_first_in_block:1; /* Set if this is the first FEC set in the block.  Bank should increment refcnt for sched if such a FEC set has been ingested by sched. */
      81             : 
      82             :   fd_sched_alut_ctx_t alut_ctx[ 1 ];
      83             : };
      84             : typedef struct fd_sched_fec fd_sched_fec_t;
      85             : 
      86             : /* The state of a transaction.  Non mutually exclusive. */
      87           0 : #define FD_SCHED_TXN_EXEC_DONE      (0x0001UL)
      88           0 : #define FD_SCHED_TXN_SIGVERIFY_DONE (0x0002UL)
      89           0 : #define FD_SCHED_TXN_IS_COMMITTABLE (0x0004UL)
      90           0 : #define FD_SCHED_TXN_IS_FEES_ONLY   (0x0008UL)
      91             : #define FD_SCHED_TXN_REPLAY_DONE    (FD_SCHED_TXN_EXEC_DONE|FD_SCHED_TXN_SIGVERIFY_DONE)
      92             : 
      93             : struct fd_sched_txn_info {
      94             :    ulong flags;
      95             :    int   txn_err;
      96             :    long  tick_parsed;
      97             :    long  tick_sigverify_disp;
      98             :    long  tick_sigverify_done;
      99             :    long  tick_exec_disp;
     100             :    long  tick_exec_done;
     101             : };
     102             : typedef struct fd_sched_txn_info fd_sched_txn_info_t;
     103             : 
     104             : /* The scheduler may return one of the following types of tasks for the
     105             :    replay tile.
     106             : 
     107             :    e - passed down to exec tiles.
     108             :    i - replay completes the task immediately.
     109             :    q - replay may either do it immediately or queue the task up. */
     110           0 : #define FD_SCHED_TT_NULL          (0UL)
     111           0 : #define FD_SCHED_TT_BLOCK_START   (1UL) /* (i) Start-of-block processing. */
     112           0 : #define FD_SCHED_TT_BLOCK_END     (2UL) /* (q) End-of-block processing. */
     113           0 : #define FD_SCHED_TT_TXN_EXEC      (3UL) /* (e) Transaction execution. */
     114           0 : #define FD_SCHED_TT_TXN_SIGVERIFY (4UL) /* (e) Transaction sigverify. */
     115             : #define FD_SCHED_TT_LTHASH        (5UL) /* (e) Account lthash. */
     116           0 : #define FD_SCHED_TT_POH_HASH      (6UL) /* (e) PoH hashing. */
     117           0 : #define FD_SCHED_TT_MARK_DEAD     (7UL) /* (i) Mark the block dead. */
     118             : 
     119             : struct fd_sched_block_start {
     120             :   ulong bank_idx;        /* Same as in fd_sched_fec_t. */
     121             :   ulong parent_bank_idx; /* Same as in fd_sched_fec_t. */
     122             :   ulong slot;            /* Slot number of the block. */
     123             : };
     124             : typedef struct fd_sched_block_start fd_sched_block_start_t;
     125             : 
     126             : struct fd_sched_block_end {
     127             :   ulong bank_idx;
     128             : };
     129             : typedef struct fd_sched_block_end fd_sched_block_end_t;
     130             : 
     131             : struct fd_sched_txn_exec {
     132             :   ulong bank_idx;
     133             :   ulong slot;
     134             :   ulong txn_idx;
     135             :   ulong exec_idx;
     136             : };
     137             : typedef struct fd_sched_txn_exec fd_sched_txn_exec_t;
     138             : 
     139             : struct fd_sched_txn_sigverify {
     140             :   ulong bank_idx;
     141             :   ulong txn_idx;
     142             :   ulong exec_idx;
     143             : };
     144             : typedef struct fd_sched_txn_sigverify fd_sched_txn_sigverify_t;
     145             : 
     146             : struct fd_sched_poh_hash {
     147             :   ulong     bank_idx;
     148             :   ulong     mblk_idx;
     149             :   ulong     exec_idx;
     150             :   ulong     hashcnt;
     151             :   fd_hash_t hash[ 1 ];
     152             : };
     153             : typedef struct fd_sched_poh_hash fd_sched_poh_hash_t;
     154             : 
     155             : struct fd_sched_mark_dead {
     156             :   ulong     bank_idx;
     157             : };
     158             : typedef struct fd_sched_mark_dead fd_sched_mark_dead_t;
     159             : 
     160             : struct fd_sched_task {
     161             :   ulong task_type; /* Set to one of the task types defined above. */
     162             :   union {
     163             :     fd_sched_block_start_t   block_start[ 1 ];
     164             :     fd_sched_block_end_t     block_end[ 1 ];
     165             :     fd_sched_txn_exec_t      txn_exec[ 1 ];
     166             :     fd_sched_txn_sigverify_t txn_sigverify[ 1 ];
     167             :     fd_sched_poh_hash_t      poh_hash[ 1 ];
     168             :     fd_sched_mark_dead_t     mark_dead[ 1 ];
     169             :   };
     170             : };
     171             : typedef struct fd_sched_task fd_sched_task_t;
     172             : 
     173             : FD_PROTOTYPES_BEGIN
     174             : 
     175             : /* fd_sched_{align,footprint} return the required alignment and
     176             :    footprint in bytes for a region of memory to be used as a scheduler.
     177             :    footprint silently returns 0 if params are invalid (thus convenient
     178             :    to validate params).
     179             : 
     180             :    depth controls the reorder buffer transaction count (~1 million
     181             :    recommended for live replay, ~10k recommended for async replay).
     182             :    block_cnt_max is the maximum number of blocks that will be tracked by
     183             :    the scheduler. */
     184             : 
     185             : ulong
     186             : fd_sched_align( void );
     187             : 
     188             : ulong
     189             : fd_sched_footprint( ulong depth,           /* in [FD_SCHED_MIN_DEPTH,FD_SCHED_MAX_DEPTH] */
     190             :                     ulong block_cnt_max ); /* >= 1 */
     191             : 
     192             : /* fd_sched_new creates a sched object backed by the given memory region
     193             :    (conforming to align() and footprint()).  Returns NULL if any
     194             :    parameter is invalid. */
     195             : 
     196             : void *
     197             : fd_sched_new( void *     mem,
     198             :               fd_rng_t * rng,
     199             :               ulong      depth,
     200             :               ulong      block_cnt_max,
     201             :               ulong      exec_cnt );
     202             : 
     203             : fd_sched_t *
     204             : fd_sched_join( void * mem );
     205             : 
     206             : /* Add the data in the FEC set to the scheduler.  If is_last_fec is 1,
     207             :    then this is the last FEC set in the block.  Transactions may span
     208             :    FEC set boundaries.  The scheduler is responsible for incrementally
     209             :    parsing transactions from concatenated FEC set data.  Assumes that
     210             :    FEC sets are delivered in replay order.  That is, forks form a
     211             :    partial ordering over FEC sets: in-order per fork, but arbitrary
     212             :    ordering across forks.  The fork tree is implied by the stream of
     213             :    parent-child relationships delivered in FEC sets.  Also assumes that
     214             :    there is enough space in the scheduler to ingest the FEC set.  The
     215             :    caller should generally call fd_sched_fec_can_ingest() first.
     216             : 
     217             :    Returns 1 on success, 0 if the block is bad and should be marked
     218             :    dead. */
     219             : FD_WARN_UNUSED int
     220             : fd_sched_fec_ingest( fd_sched_t * sched, fd_sched_fec_t * fec );
     221             : 
     222             : /* Check if there is enough space in the scheduler to ingest the data in
     223             :    the FEC set.  Returns 1 if there is, 0 otherwise.  This is a cheap
     224             :    and conservative check. */
     225             : int
     226             : fd_sched_fec_can_ingest( fd_sched_t * sched, fd_sched_fec_t * fec );
     227             : 
     228             : /* Returns the number of worst-case FEC sets sched can ingest. This is a
     229             :    cheap and conservative check. */
     230             : ulong
     231             : fd_sched_can_ingest_cnt( fd_sched_t * sched );
     232             : 
     233             : /* Returns 1 if sched is drained, 0 otherwise.  A drained scheduler will
     234             :    not return more work.  Otherwise, next_ready will return more work,
     235             :    so long as there are exec tiles available. */
     236             : int
     237             : fd_sched_is_drained( fd_sched_t * sched );
     238             : 
     239             : /* Obtain a transaction eligible for execution.  This implies that all
     240             :    prior transactions with w-r or w-w conflicts have completed.
     241             :    Information regarding the scheduled transaction is written to the out
     242             :    pointer.  Returns 1 on success, 0 on failure.  Failures are generally
     243             :    transient and non-fatal, and are simply an indication that no
     244             :    transaction is ready for execution yet.  When in-flight transactions
     245             :    retire or when more FEC sets are ingested, more transactions may
     246             :    become ready for execution.
     247             : 
     248             :    Transactions on the same fork will be returned in a way that
     249             :    maintains the serial fiction.  That is, reordering can happen, but
     250             :    only within the constraint that transactions appear to be ready in
     251             :    the order in which they occur in the block.  Transactions from
     252             :    different forks may interleave, and the caller should be prepared to
     253             :    switch execution context in response to interleavings.  The scheduler
     254             :    will barrier on block boundaries, in the sense that transactions from
     255             :    a subsequent block will not be returned for execution until all
     256             :    transactions from the previous block have completed.  This gives the
     257             :    caller a chance to perform end-of-block processing before
     258             :    transactions from a subsequent block start executing.  In general,
     259             :    the caller should check if the last transaction in the current block
     260             :    is done, and if so, do end-of-block processing before calling this
     261             :    function to start the next block.
     262             : 
     263             :    In addition to returning transactions for execution, this function
     264             :    may also return a sigverify task.  Sigverify can be completed
     265             :    aynschronously outside the critical path of transaction execution, as
     266             :    long as every transaction in a block passes sigverify before we
     267             :    commit the block.  The scheduler prioritizes actual execution of
     268             :    transactions over sigverify, and in general sigverify tasks are only
     269             :    returned when no real transaction can be dispatched.  In other words,
     270             :    the scheduler tries to exploit idle cycles in the exec tiles during
     271             :    times of low parallelism critical path progression.
     272             : 
     273             :    This function may also return a PoH hashing task.  These tasks are
     274             :    lower priority than transaction execution, but higher priority than
     275             :    sigverify.  This is because sigverify tasks are generally bite-sized,
     276             :    whereas PoH hashing can be longer, so we would like to get started on
     277             :    hashing sooner rather than later. */
     278             : ulong
     279             : fd_sched_task_next_ready( fd_sched_t * sched, fd_sched_task_t * out );
     280             : 
     281             : /* Mark a task as complete.  For transaction execution, this means that
     282             :    the effects of the execution are now visible on any core that could
     283             :    execute a subsequent transaction.  Returns 0 on success, -1 if given
     284             :    the result of the task, the block turns out to be bad.  -1 is only
     285             :    returned from PoH tasks.
     286             : 
     287             :    If a block has been abandoned or marked dead for any reason, it'll be
     288             :    pruned the moment in-flight task count hits 0 due to the last task
     289             :    completing.  Then, in the immediate ensuing stem run loop,
     290             :    sched_pruned_next() will return the index for the corresponding bank
     291             :    so the refcnt can be decremented for sched.
     292             : 
     293             :    The transaction at the given index may be freed upon return from this
     294             :    function.  Nonetheless, as long as there is no intervening FEC
     295             :    ingestion, it would still be safe to query the transaction using
     296             :    get_txn(). */
     297             : int
     298             : fd_sched_task_done( fd_sched_t * sched, ulong task_type, ulong txn_idx, ulong exec_idx, void * data );
     299             : 
     300             : /* Abandon a block.  This means that we are no longer interested in
     301             :    executing the block.  This also implies that any block which chains
     302             :    off of the provided block shall be abandoned.  This is mainly used
     303             :    when a block is aborted because we decided that it would be a
     304             :    dead/invalid block, and so there's no point in spending resources
     305             :    executing it.  The scheduler will no longer return transactions from
     306             :    abandoned blocks for execution.  This should only be invoked on an
     307             :    actively replayed block, and should only be invoked once on it.
     308             : 
     309             :    An abandoned block will be pruned from sched as soon as, and only if,
     310             :    the block has no more in-flight tasks associated with it.  No sooner,
     311             :    no later.  In the immediate ensuing stem run loop,
     312             :    sched_pruned_next() will return the index for the corresponding bank
     313             :    so the refcnt can be decremented for sched.  After that point, the
     314             :    bank_idx may be recycled for another block. */
     315             : void
     316             : fd_sched_block_abandon( fd_sched_t * sched, ulong bank_idx );
     317             : 
     318             : /* Add a block as immediately done to the scheduler.  This is useful for
     319             :    installing the snapshot slot, or for informing the scheduler of a
     320             :    packed leader block.  Parent block should be ULONG_MAX for the
     321             :    snapshot slot, and otherwise a block that hasn't been pruned. */
     322             : void
     323             : fd_sched_block_add_done( fd_sched_t * sched, ulong bank_idx, ulong parent_bank_idx, ulong slot );
     324             : 
     325             : /* Advance the root, pruning all blocks across forks that do not descend
     326             :    from the new root.  Assumes the new root is in the fork tree and
     327             :    connected to the current root.  Also assumes that there are no more
     328             :    in-flight transactions from the soon-to-be-pruned blocks.  This
     329             :    should be called after root_notify() and the caller is responsible
     330             :    for figuring out the new root to safely prune to. */
     331             : void
     332             : fd_sched_advance_root( fd_sched_t * sched, ulong root_idx );
     333             : 
     334             : /* Notify the scheduler of a new root.  This has the effect of calling
     335             :    abandon() on all minority forks that do not descend from the new
     336             :    root.  Shortly after a call to this function, in-flight transactions
     337             :    from these abandoned blocks should retire from the execution
     338             :    pipeline, and the new root will be safe for pruning. */
     339             : void
     340             : fd_sched_root_notify( fd_sched_t * sched, ulong root_idx );
     341             : 
     342             : /* Returns the index of a bank whose refcnt should be decremented for
     343             :    sched.  This function should be called in a loop to drain all
     344             :    outstanding refcnt decrements before any other sched API is called in
     345             :    a stem run loop.  Returns ULONG_MAX when there are no more
     346             :    outstanding refrences from sched and the loop should break. */
     347             : ulong
     348             : fd_sched_pruned_block_next( fd_sched_t * sched );
     349             : 
     350             : void
     351             : fd_sched_set_poh_params( fd_sched_t * sched, ulong bank_idx, ulong tick_height, ulong max_tick_height, ulong hashes_per_tick, fd_hash_t const * start_poh );
     352             : 
     353             : fd_txn_p_t *
     354             : fd_sched_get_txn( fd_sched_t * sched, ulong txn_idx );
     355             : 
     356             : fd_sched_txn_info_t *
     357             : fd_sched_get_txn_info( fd_sched_t * sched, ulong txn_idx );
     358             : 
     359             : fd_hash_t *
     360             : fd_sched_get_poh( fd_sched_t * sched, ulong bank_idx );
     361             : 
     362             : uint
     363             : fd_sched_get_shred_cnt( fd_sched_t * sched, ulong bank_idx );
     364             : 
     365             : void
     366             : fd_sched_metrics_write( fd_sched_t * sched );
     367             : 
     368             : /* Serialize the current state as a cstr to the returned buffer.  Caller
     369             :    may read from the buffer until the next invocation of any fd_sched
     370             :    function. */
     371             : char *
     372             : fd_sched_get_state_cstr( fd_sched_t * sched );
     373             : 
     374             : void *
     375             : fd_sched_leave( fd_sched_t * sched );
     376             : 
     377             : void *
     378             : fd_sched_delete( void * mem );
     379             : 
     380             : FD_PROTOTYPES_END
     381             : 
     382             : #endif /* HEADER_fd_src_discof_replay_fd_sched_h */

Generated by: LCOV version 1.14