LCOV - code coverage report
Current view: top level - discof/replay - fd_sched.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 7 0.0 %
Date: 2026-02-12 05:50:35 Functions: 0 0 -

          Line data    Source code
       1             : #ifndef HEADER_fd_src_discof_replay_fd_sched_h
       2             : #define HEADER_fd_src_discof_replay_fd_sched_h
       3             : 
       4             : #include "fd_rdisp.h"
       5             : #include "../../disco/store/fd_store.h" /* for fd_store_fec_t */
       6             : #include "../../disco/pack/fd_microblock.h" /* for fd_txn_p_t */
       7             : 
       8             : #include "../../flamenco/accdb/fd_accdb_user.h"
       9             : #include "../../util/spad/fd_spad.h" /* for ALUTs */
      10             : 
      11             : /* fd_sched wraps all the smarts and mechanical chores around scheduling
      12             :    transactions for replay execution.  It is built on top of the
      13             :    dispatcher fd_rdisp.  The dispatcher is responsible for high
      14             :    performance lane-based scheduling of transactions.  On top of that,
      15             :    we add fork-aware management of lanes, and policies regarding which
      16             :    lanes to prioritize for execution.
      17             : 
      18             :    Conceptually, transactions in a block form a DAG.  We would like to
      19             :    make our way through a block with a sufficient degree of parallelism,
      20             :    such that the execution time of the critical path of the DAG is the
      21             :    limiting factor.  The dispatcher does a good job of emerging the
      22             :    critical path of the DAG on the fly.  Blocks are tracked by the
      23             :    dispatcher either as a block staged on a lane, or as an unstaged
      24             :    block.  When a block is staged, it will enjoy the most intelligent
      25             :    online scheduling that the dispatcher has to offer.  Lanes have to
      26             :    consist of linear chains of blocks down a fork.  So to map a fork
      27             :    tree to lanes, we will need multiple lanes.  Ideally, every branch in
      28             :    the fork tree sits on some lane.  However, memory footprint limits us
      29             :    to a few number of lanes.
      30             : 
      31             :    This module implements a state machine for ensuring that blocks enter
      32             :    into and exit ouf of lanes in an orderly fashion.  The public APIs of
      33             :    this module are invoked to drive state transitions on a small number
      34             :    of events, such as new transactions arriving, or transactions
      35             :    completing, or a block being aborted/abandoned.  We also implement
      36             :    policies for deciding which blocks get staged onto lanes, or evicted
      37             :    from lanes, as well as which lanes to prioritize for execution.
      38             : 
      39             : 
      40             :    The general order in which calls happen under the normal case is:
      41             : 
      42             :    fd_sched_fec_ingest()* ... fd_sched_txn_next_ready()* ... fd_sched_txn_done()* ...
      43             :    more ingest, more ready, more done ...
      44             :    ...
      45             :    fd_sched_txn_next_ready() indicates that the last transaction in the block is being scheduled
      46             :    fd_sched_txn_done()*
      47             :    fd_sched_block_is_done()
      48             :    end-of-block processing in caller
      49             :    fd_sched_txn_next_ready() starts returning transactions from the next block
      50             :    more ingest, more ready, more done ...
      51             :    ... */
      52             : 
      53             : #define FD_SCHED_MIN_DEPTH 478
      54             : #define FD_SCHED_MAX_DEPTH FD_RDISP_MAX_DEPTH
      55             : 
      56             : struct fd_sched;
      57             : typedef struct fd_sched fd_sched_t;
      58             : 
      59             : 
      60             : struct fd_sched_alut_ctx {
      61             :   fd_accdb_user_t   accdb[1];
      62             :   fd_funk_txn_xid_t xid[1];
      63             :   ulong             els; /* Effective lookup slot. */
      64             : };
      65             : typedef struct fd_sched_alut_ctx fd_sched_alut_ctx_t;
      66             : 
      67             : struct fd_sched_fec {
      68             :   ulong            bank_idx;            /* Index of the block.  Assumed to be in [0, block_cnt_max).  Caller
      69             :                                            is responsible for ensuring that bank idx is in bounds and unique
      70             :                                            across equivocated blocks. */
      71             :   ulong            parent_bank_idx;     /* Index of the parent block.  Assumed to be in [0, block_cnt_max).
      72             :                                            Caller is responsible for ensuring that parent bank idx is in
      73             :                                            bounds and unique across equivocated blocks. */
      74             :   ulong            slot;                /* Slot number of the block. */
      75             :   ulong            parent_slot;         /* Slot number of the parent block. */
      76             :   fd_store_fec_t * fec;                 /* FEC set data. */
      77             :   uint             shred_cnt;           /* Number of shreds in the FEC set. */
      78             :   uint             is_last_in_batch:1;  /* Set if this is the last FEC set in the batch; relevant because the
      79             :                                            parser should ignore trailing bytes at the end of a batch. */
      80             :   uint             is_last_in_block:1;  /* Set if this is the last FEC set in the block. */
      81             :   uint             is_first_in_block:1; /* Set if this is the first FEC set in the block.  Bank should increment refcnt for sched if such a FEC set has been ingested by sched. */
      82             : 
      83             :   fd_sched_alut_ctx_t alut_ctx[ 1 ];
      84             : };
      85             : typedef struct fd_sched_fec fd_sched_fec_t;
      86             : 
      87             : /* The scheduler may return one of the following types of tasks for the
      88             :    replay tile.
      89             : 
      90             :    e - passed down to exec tiles.
      91             :    i - replay completes the task immediately.
      92             :    q - replay may either do it immediately or queue the task up. */
      93           0 : #define FD_SCHED_TT_NULL          (0UL)
      94           0 : #define FD_SCHED_TT_BLOCK_START   (1UL) /* (i) Start-of-block processing. */
      95           0 : #define FD_SCHED_TT_BLOCK_END     (2UL) /* (q) End-of-block processing. */
      96           0 : #define FD_SCHED_TT_TXN_EXEC      (3UL) /* (e) Transaction execution. */
      97           0 : #define FD_SCHED_TT_TXN_SIGVERIFY (4UL) /* (e) Transaction sigverify. */
      98             : #define FD_SCHED_TT_LTHASH        (5UL) /* (e) Account lthash. */
      99           0 : #define FD_SCHED_TT_POH_HASH      (6UL) /* (e) PoH hashing. */
     100           0 : #define FD_SCHED_TT_MARK_DEAD     (7UL) /* (i) Mark the block dead. */
     101             : 
     102             : struct fd_sched_block_start {
     103             :   ulong bank_idx;        /* Same as in fd_sched_fec_t. */
     104             :   ulong parent_bank_idx; /* Same as in fd_sched_fec_t. */
     105             :   ulong slot;            /* Slot number of the block. */
     106             : };
     107             : typedef struct fd_sched_block_start fd_sched_block_start_t;
     108             : 
     109             : struct fd_sched_block_end {
     110             :   ulong bank_idx;
     111             : };
     112             : typedef struct fd_sched_block_end fd_sched_block_end_t;
     113             : 
     114             : struct fd_sched_txn_exec {
     115             :   ulong bank_idx;
     116             :   ulong slot;
     117             :   ulong txn_idx;
     118             :   ulong exec_idx;
     119             : };
     120             : typedef struct fd_sched_txn_exec fd_sched_txn_exec_t;
     121             : 
     122             : struct fd_sched_txn_sigverify {
     123             :   ulong bank_idx;
     124             :   ulong txn_idx;
     125             :   ulong exec_idx;
     126             : };
     127             : typedef struct fd_sched_txn_sigverify fd_sched_txn_sigverify_t;
     128             : 
     129             : struct fd_sched_poh_hash {
     130             :   ulong     bank_idx;
     131             :   ulong     mblk_idx;
     132             :   ulong     exec_idx;
     133             :   ulong     hashcnt;
     134             :   fd_hash_t hash[ 1 ];
     135             : };
     136             : typedef struct fd_sched_poh_hash fd_sched_poh_hash_t;
     137             : 
     138             : struct fd_sched_mark_dead {
     139             :   ulong     bank_idx;
     140             : };
     141             : typedef struct fd_sched_mark_dead fd_sched_mark_dead_t;
     142             : 
     143             : struct fd_sched_task {
     144             :   ulong task_type; /* Set to one of the task types defined above. */
     145             :   union {
     146             :     fd_sched_block_start_t   block_start[ 1 ];
     147             :     fd_sched_block_end_t     block_end[ 1 ];
     148             :     fd_sched_txn_exec_t      txn_exec[ 1 ];
     149             :     fd_sched_txn_sigverify_t txn_sigverify[ 1 ];
     150             :     fd_sched_poh_hash_t      poh_hash[ 1 ];
     151             :     fd_sched_mark_dead_t     mark_dead[ 1 ];
     152             :   };
     153             : };
     154             : typedef struct fd_sched_task fd_sched_task_t;
     155             : 
     156             : FD_PROTOTYPES_BEGIN
     157             : 
     158             : /* fd_sched_{align,footprint} return the required alignment and
     159             :    footprint in bytes for a region of memory to be used as a scheduler.
     160             :    footprint silently returns 0 if params are invalid (thus convenient
     161             :    to validate params).
     162             : 
     163             :    depth controls the reorder buffer transaction count (~1 million
     164             :    recommended for live replay, ~10k recommended for async replay).
     165             :    block_cnt_max is the maximum number of blocks that will be tracked by
     166             :    the scheduler. */
     167             : 
     168             : ulong
     169             : fd_sched_align( void );
     170             : 
     171             : ulong
     172             : fd_sched_footprint( ulong depth,           /* in [FD_SCHED_MIN_DEPTH,FD_SCHED_MAX_DEPTH] */
     173             :                     ulong block_cnt_max ); /* >= 1 */
     174             : 
     175             : /* fd_sched_new creates a sched object backed by the given memory region
     176             :    (conforming to align() and footprint()).  Returns NULL if any
     177             :    parameter is invalid. */
     178             : 
     179             : void *
     180             : fd_sched_new( void * mem,
     181             :               ulong  depth,
     182             :               ulong  block_cnt_max,
     183             :               ulong  exec_cnt );
     184             : 
     185             : fd_sched_t *
     186             : fd_sched_join( void * mem );
     187             : 
     188             : /* Add the data in the FEC set to the scheduler.  If is_last_fec is 1,
     189             :    then this is the last FEC set in the block.  Transactions may span
     190             :    FEC set boundaries.  The scheduler is responsible for incrementally
     191             :    parsing transactions from concatenated FEC set data.  Assumes that
     192             :    FEC sets are delivered in replay order.  That is, forks form a
     193             :    partial ordering over FEC sets: in-order per fork, but arbitrary
     194             :    ordering across forks.  The fork tree is implied by the stream of
     195             :    parent-child relationships delivered in FEC sets.  Also assumes that
     196             :    there is enough space in the scheduler to ingest the FEC set.  The
     197             :    caller should generally call fd_sched_fec_can_ingest() first.
     198             : 
     199             :    Returns 1 on success, 0 if the block is bad and should be marked
     200             :    dead. */
     201             : FD_WARN_UNUSED int
     202             : fd_sched_fec_ingest( fd_sched_t * sched, fd_sched_fec_t * fec );
     203             : 
     204             : /* Check if there is enough space in the scheduler to ingest the data in
     205             :    the FEC set.  Returns 1 if there is, 0 otherwise.  This is a cheap
     206             :    and conservative check. */
     207             : int
     208             : fd_sched_fec_can_ingest( fd_sched_t * sched, fd_sched_fec_t * fec );
     209             : 
     210             : /* Check if there is enough space in the scheduler to ingest fec_cnt
     211             :    worst-case FEC sets.  Returns 1 if there is, 0 otherwise.  This is a
     212             :    cheap and conservative check, and has less precision than
     213             :    fd_sched_fec_can_ingest(). */
     214             : int
     215             : fd_sched_can_ingest( fd_sched_t * sched, ulong fec_cnt );
     216             : 
     217             : /* Obtain a transaction eligible for execution.  This implies that all
     218             :    prior transactions with w-r or w-w conflicts have completed.
     219             :    Information regarding the scheduled transaction is written to the out
     220             :    pointer.  Returns 1 on success, 0 on failure.  Failures are generally
     221             :    transient and non-fatal, and are simply an indication that no
     222             :    transaction is ready for execution yet.  When in-flight transactions
     223             :    retire or when more FEC sets are ingested, more transactions may
     224             :    become ready for execution.
     225             : 
     226             :    Transactions on the same fork will be returned in a way that
     227             :    maintains the serial fiction.  That is, reordering can happen, but
     228             :    only within the constraint that transactions appear to be ready in
     229             :    the order in which they occur in the block.  Transactions from
     230             :    different forks may interleave, and the caller should be prepared to
     231             :    switch execution context in response to interleavings.  The scheduler
     232             :    will barrier on block boundaries, in the sense that transactions from
     233             :    a subsequent block will not be returned for execution until all
     234             :    transactions from the previous block have completed.  This gives the
     235             :    caller a chance to perform end-of-block processing before
     236             :    transactions from a subsequent block start executing.  In general,
     237             :    the caller should check if the last transaction in the current block
     238             :    is done, and if so, do end-of-block processing before calling this
     239             :    function to start the next block.
     240             : 
     241             :    In addition to returning transactions for execution, this function
     242             :    may also return a sigverify task.  Sigverify can be completed
     243             :    aynschronously outside the critical path of transaction execution, as
     244             :    long as every transaction in a block passes sigverify before we
     245             :    commit the block.  The scheduler prioritizes actual execution of
     246             :    transactions over sigverify, and in general sigverify tasks are only
     247             :    returned when no real transaction can be dispatched.  In other words,
     248             :    the scheduler tries to exploit idle cycles in the exec tiles during
     249             :    times of low parallelism critical path progression.
     250             : 
     251             :    This function may also return a PoH hashing task.  These tasks are
     252             :    lower priority than transaction execution, but higher priority than
     253             :    sigverify.  This is because sigverify tasks are generally bite-sized,
     254             :    whereas PoH hashing can be longer, so we would like to get started on
     255             :    hashing sooner rather than later. */
     256             : ulong
     257             : fd_sched_task_next_ready( fd_sched_t * sched, fd_sched_task_t * out );
     258             : 
     259             : /* Mark a task as complete.  For transaction execution, this means that
     260             :    the effects of the execution are now visible on any core that could
     261             :    execute a subsequent transaction.  Returns 0 on success, -1 if given
     262             :    the result of the task, the block turns out to be bad.  -1 is only
     263             :    returned from PoH tasks.
     264             : 
     265             :    If a block has been abandoned or marked dead for any reason, it'll be
     266             :    pruned the moment in-flight task count hits 0 due to the last task
     267             :    completing.  Then, in the immediate ensuing stem run loop,
     268             :    sched_pruned_next() will return the index for the corresponding bank
     269             :    so the refcnt can be decremented for sched. */
     270             : int
     271             : fd_sched_task_done( fd_sched_t * sched, ulong task_type, ulong txn_idx, ulong exec_idx, void * data );
     272             : 
     273             : /* Abandon a block.  This means that we are no longer interested in
     274             :    executing the block.  This also implies that any block which chains
     275             :    off of the provided block shall be abandoned.  This is mainly used
     276             :    when a block is aborted because we decided that it would be a
     277             :    dead/invalid block, and so there's no point in spending resources
     278             :    executing it.  The scheduler will no longer return transactions from
     279             :    abandoned blocks for execution.  This should only be invoked on an
     280             :    actively replayed block, and should only be invoked once on it.
     281             : 
     282             :    An abandoned block will be pruned from sched as soon as, and only if,
     283             :    the block has no more in-flight tasks associated with it.  No sooner,
     284             :    no later.  In the immediate ensuing stem run loop,
     285             :    sched_pruned_next() will return the index for the corresponding bank
     286             :    so the refcnt can be decremented for sched.  After that point, the
     287             :    bank_idx may be recycled for another block. */
     288             : void
     289             : fd_sched_block_abandon( fd_sched_t * sched, ulong bank_idx );
     290             : 
     291             : /* Add a block as immediately done to the scheduler.  This is useful for
     292             :    installing the snapshot slot, or for informing the scheduler of a
     293             :    packed leader block.  Parent block should be ULONG_MAX for the
     294             :    snapshot slot, and otherwise a block that hasn't been pruned. */
     295             : void
     296             : fd_sched_block_add_done( fd_sched_t * sched, ulong bank_idx, ulong parent_bank_idx, ulong slot );
     297             : 
     298             : /* Advance the root, pruning all blocks across forks that do not descend
     299             :    from the new root.  Assumes the new root is in the fork tree and
     300             :    connected to the current root.  Also assumes that there are no more
     301             :    in-flight transactions from the soon-to-be-pruned blocks.  This
     302             :    should be called after root_notify() and the caller is responsible
     303             :    for figuring out the new root to safely prune to. */
     304             : void
     305             : fd_sched_advance_root( fd_sched_t * sched, ulong root_idx );
     306             : 
     307             : /* Notify the scheduler of a new root.  This has the effect of calling
     308             :    abandon() on all minority forks that do not descend from the new
     309             :    root.  Shortly after a call to this function, in-flight transactions
     310             :    from these abandoned blocks should retire from the execution
     311             :    pipeline, and the new root will be safe for pruning. */
     312             : void
     313             : fd_sched_root_notify( fd_sched_t * sched, ulong root_idx );
     314             : 
     315             : /* Returns the index of a bank whose refcnt should be decremented for
     316             :    sched.  This function should be called in a loop to drain all
     317             :    outstanding refcnt decrements before any other sched API is called in
     318             :    a stem run loop.  Returns ULONG_MAX when there are no more
     319             :    outstanding refrences from sched and the loop should break. */
     320             : ulong
     321             : fd_sched_pruned_block_next( fd_sched_t * sched );
     322             : 
     323             : void
     324             : fd_sched_set_poh_params( fd_sched_t * sched, ulong bank_idx, ulong tick_height, ulong max_tick_height, ulong hashes_per_tick, fd_hash_t const * start_poh );
     325             : 
     326             : fd_txn_p_t *
     327             : fd_sched_get_txn( fd_sched_t * sched, ulong txn_idx );
     328             : 
     329             : fd_hash_t *
     330             : fd_sched_get_poh( fd_sched_t * sched, ulong bank_idx );
     331             : 
     332             : uint
     333             : fd_sched_get_shred_cnt( fd_sched_t * sched, ulong bank_idx );
     334             : 
     335             : /* Serialize the current state as a cstr to the returned buffer.  Caller
     336             :    may read from the buffer until the next invocation of any fd_sched
     337             :    function. */
     338             : char *
     339             : fd_sched_get_state_cstr( fd_sched_t * sched );
     340             : 
     341             : void *
     342             : fd_sched_leave( fd_sched_t * sched );
     343             : 
     344             : void *
     345             : fd_sched_delete( void * mem );
     346             : 
     347             : FD_PROTOTYPES_END
     348             : 
     349             : #endif /* HEADER_fd_src_discof_replay_fd_sched_h */

Generated by: LCOV version 1.14