LCOV - code coverage report
Current view: top level - discof/replay - fd_sched.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 1638 0.0 %
Date: 2026-02-12 05:50:35 Functions: 0 64 0.0 %

          Line data    Source code
       1             : #include <stdio.h> /* for vsnprintf */
       2             : #include <stdarg.h> /* for va_list */
       3             : 
       4             : #include "fd_sched.h"
       5             : #include "fd_execrp.h" /* for poh hash value */
       6             : #include "../../util/math/fd_stat.h" /* for sorted search */
       7             : #include "../../disco/fd_disco_base.h" /* for FD_MAX_TXN_PER_SLOT */
       8             : #include "../../disco/metrics/fd_metrics.h" /* for fd_metrics_convert_seconds_to_ticks and etc. */
       9             : #include "../../discof/poh/fd_poh.h" /* for MAX_SKIPPED_TICKS */
      10             : #include "../../flamenco/runtime/fd_runtime.h" /* for fd_runtime_load_txn_address_lookup_tables */
      11             : #include "../../flamenco/runtime/sysvar/fd_sysvar_slot_hashes.h" /* for ALUTs */
      12             : 
      13           0 : #define FD_SCHED_MAX_STAGING_LANES_LOG     (2)
      14           0 : #define FD_SCHED_MAX_STAGING_LANES         (1UL<<FD_SCHED_MAX_STAGING_LANES_LOG)
      15           0 : #define FD_SCHED_MAX_EXEC_TILE_CNT         (64UL)
      16           0 : #define FD_SCHED_MAX_PRINT_BUF_SZ          (2UL<<20)
      17             : 
      18             : #define FD_SCHED_MAX_MBLK_PER_SLOT             (MAX_SKIPPED_TICKS)
      19           0 : #define FD_SCHED_MAX_MBLK_IN_PROGRESS_PER_SLOT (2UL*FD_SCHED_MAX_EXEC_TILE_CNT)
      20           0 : #define FD_SCHED_MAX_POH_HASHES_PER_TASK       (4096UL) /* This seems to be the sweet spot. */
      21             : 
      22             : /* 64 ticks per slot, and a single gigantic microblock containing min
      23             :    size transactions. */
      24             : FD_STATIC_ASSERT( FD_MAX_TXN_PER_SLOT_SHRED==((FD_SHRED_DATA_PAYLOAD_MAX_PER_SLOT-65UL*sizeof(fd_microblock_hdr_t))/FD_TXN_MIN_SERIALIZED_SZ), max_txn_per_slot_shred );
      25             : 
      26             : /* We size the buffer to be able to hold residual data from the previous
      27             :    FEC set that only becomes parseable after the next FEC set is
      28             :    ingested, as well as the incoming FEC set.  The largest minimally
      29             :    parseable unit of data is a transaction.  So that much data may
      30             :    straddle FEC set boundaries.  Other minimally parseable units of data
      31             :    include the microblock header and the microblock count within a
      32             :    batch. */
      33           0 : #define FD_SCHED_MAX_PAYLOAD_PER_FEC       (FD_STORE_DATA_MAX)
      34             : #define FD_SCHED_MAX_FEC_BUF_SZ            (FD_SCHED_MAX_PAYLOAD_PER_FEC+FD_TXN_MTU)
      35             : FD_STATIC_ASSERT( FD_TXN_MTU>=sizeof(fd_microblock_hdr_t), resize buffer for residual data );
      36             : FD_STATIC_ASSERT( FD_TXN_MTU>=sizeof(ulong),               resize buffer for residual data );
      37             : 
      38           0 : #define FD_SCHED_MAX_TXN_PER_FEC           ((FD_SCHED_MAX_PAYLOAD_PER_FEC-1UL)/FD_TXN_MIN_SERIALIZED_SZ+1UL) /* 478 */
      39             : 
      40             : FD_STATIC_ASSERT( FD_SCHED_MIN_DEPTH>=FD_SCHED_MAX_TXN_PER_FEC, limits );
      41             : FD_STATIC_ASSERT( FD_SCHED_MAX_DEPTH<=FD_RDISP_MAX_DEPTH,       limits );
      42             : 
      43           0 : #define FD_SCHED_MAGIC (0xace8a79c181f89b6UL) /* echo -n "fd_sched_v0" | sha512sum | head -c 16 */
      44             : 
      45           0 : #define FD_SCHED_OK          (0)
      46           0 : #define FD_SCHED_AGAIN_LATER (1)
      47           0 : #define FD_SCHED_BAD_BLOCK   (2)
      48             : 
      49             : 
      50             : /* Structs. */
      51             : 
      52             : struct fd_sched_mblk {
      53             :   ulong end_txn_idx; /* non-inclusive parse idx */
      54             :   ulong hashcnt; /* number of hashes to do, excluding the final mixin */
      55             :   fd_hash_t end_hash[ 1 ];
      56             : };
      57             : typedef struct fd_sched_mblk fd_sched_mblk_t;
      58             : 
      59             : struct fd_sched_mblk_in_progress {
      60             :   ulong curr_txn_idx; /* next up to be added into bmtree, parse idx */
      61             :   ulong curr_hashcnt;
      62             :   fd_hash_t curr_hash[ 1 ];
      63             :   uint curr_sig_cnt;
      64             :   uint mblk_idx; /* pointer back to the original mblk descriptor */
      65             :   uint next; /* slist */
      66             :   int  is_tick;
      67             : };
      68             : typedef struct fd_sched_mblk_in_progress fd_sched_mblk_in_progress_t;
      69             : 
      70             : #define SLIST_NAME  mblk_in_progress_slist
      71             : #define SLIST_ELE_T fd_sched_mblk_in_progress_t
      72           0 : #define SLIST_IDX_T uint
      73           0 : #define SLIST_NEXT  next
      74             : #include "../../util/tmpl/fd_slist.c"
      75             : 
      76             : #define SET_NAME txn_bitset
      77             : #define SET_MAX  FD_SCHED_MAX_DEPTH
      78             : #include "../../util/tmpl/fd_set.c"
      79             : 
      80             : #define SET_NAME mblk_in_progress_bitset
      81           0 : #define SET_MAX  FD_SCHED_MAX_MBLK_IN_PROGRESS_PER_SLOT
      82             : #include "../../util/tmpl/fd_set.c"
      83             : 
      84             : struct fd_sched_block {
      85             :   ulong               slot;
      86             :   ulong               parent_slot;
      87             :   ulong               parent_idx;  /* Index of the parent in the pool. */
      88             :   ulong               child_idx;   /* Index of the left-child in the pool. */
      89             :   ulong               sibling_idx; /* Index of the right-sibling in the pool. */
      90             : 
      91             :   /* Counters. */
      92             :   uint                txn_parsed_cnt;
      93             :   /*                  txn_queued_cnt = txn_parsed_cnt-txn_in_flight_cnt-txn_done_cnt */
      94             :   uint                txn_exec_in_flight_cnt;
      95             :   uint                txn_exec_done_cnt;
      96             :   uint                txn_sigverify_in_flight_cnt;
      97             :   uint                txn_sigverify_done_cnt;
      98             :   uint                poh_hashing_in_flight_cnt;
      99             :   uint                poh_hashing_done_cnt;
     100             :   uint                poh_hash_cmp_done_cnt; /* poh_hashing_done_cnt==poh_hash_cmp_done_cnt+len(mixin_in_progress) */
     101             :   uint                txn_done_cnt; /* A transaction is considered done when all types of tasks associated with it are done. */
     102             :   uint                shred_cnt;
     103             :   uint                fec_cnt;
     104             :   uint                mblk_cnt;      /* Total number of microblocks, including ticks and non ticks. */
     105             :   uint                mblk_tick_cnt; /* Total number of tick microblocks. */
     106             :   uint                mblk_unhashed_cnt; /* mblk_cnt==mblk_unhashed_cnt+len(hashing_in_progress)+len(mixin_in_progress)+poh_hash_cmp_done_cnt */
     107             :   ulong               hashcnt; /* How many hashes this block wants replay to do.  A mixin/record counts as one hash. */
     108             :   ulong               txn_pool_max_popcnt;   /* Peak transaction pool occupancy during the time this block was replaying. */
     109             :   ulong               block_pool_max_popcnt; /* Peak block pool occupancy. */
     110             :   ulong               txn_idx[ FD_MAX_TXN_PER_SLOT ]; /* Indexed by parse order. */
     111             :   long                txn_disp_ticks[ FD_MAX_TXN_PER_SLOT ]; /* Indexed by parse order. */
     112             :   long                txn_done_ticks[ FD_MAX_TXN_PER_SLOT ]; /* Indexed by parse order. */
     113             :   fd_ed25519_sig_t    txn_sigs[ FD_MAX_TXN_PER_SLOT ]; /* Indexed by parse order. */
     114             : 
     115             :   /* PoH verify. */
     116             :   fd_hash_t start_hash[ 1 ];
     117             :   fd_sched_mblk_t mblk_pool[ FD_SCHED_MAX_MBLK_PER_SLOT ]; /* Indexed by parse order. */
     118             :   mblk_in_progress_slist_t mblks_hashing_in_progress[ 1 ]; /* Microblocks progress from the unhashed state into hashing ... */
     119             :   mblk_in_progress_slist_t mblks_mixin_in_progress[ 1 ];   /* ... and then from hashing into the mixin queue.  FIFO. */
     120             :   mblk_in_progress_bitset_t mblk_in_progress_pool_free_bitset[ mblk_in_progress_bitset_word_cnt ];
     121             :   fd_sched_mblk_in_progress_t mblk_in_progress_pool[ FD_SCHED_MAX_MBLK_IN_PROGRESS_PER_SLOT ];
     122             :   uchar bmtree_mem[ FD_BMTREE_COMMIT_FOOTPRINT(0) ] __attribute__((aligned(FD_BMTREE_COMMIT_ALIGN)));
     123             :   fd_bmtree_commit_t * bmtree;
     124             :   ulong prev_tick_hashcnt;
     125             :   ulong curr_tick_hashcnt; /* Starts at 0, accumulates hashcnt, resets to 0 on the next tick. */
     126             :   ulong tick_height;       /* Block is built off of a parent block with this many ticks. */
     127             :   ulong max_tick_height;   /* Block should end with precisely this many ticks. */
     128             :   ulong hashes_per_tick;   /* Fixed per block, feature gated, known after bank clone. */
     129             :   int inconsistent_hashes_per_tick;
     130             : 
     131             :   /* Parser state. */
     132             :   uchar               txn[ FD_TXN_MAX_SZ ] __attribute__((aligned(alignof(fd_txn_t))));
     133             :   ulong               mblks_rem;    /* Number of microblocks remaining in the current batch. */
     134             :   ulong               txns_rem;     /* Number of transactions remaining in the current microblock. */
     135             :   fd_acct_addr_t      aluts[ 256 ]; /* Resolve ALUT accounts into this buffer for more parallelism. */
     136             :   uint                fec_buf_sz;   /* Size of the fec_buf in bytes. */
     137             :   uint                fec_buf_soff; /* Starting offset into fec_buf for unparsed transactions. */
     138             :   uint                fec_buf_boff; /* Byte offset into raw block data of the first byte currently in fec_buf */
     139             :   uint                fec_eob:1;    /* FEC end-of-batch: set if the last FEC set in the batch is being
     140             :                                        ingested. */
     141             :   uint                fec_sob:1;    /* FEC start-of-batch: set if the parser expects to be receiving a new
     142             :                                        batch. */
     143             : 
     144             :   /* Block state. */
     145             :   uint                fec_eos:1;                          /* FEC end-of-stream: set if the last FEC set in the block has been
     146             :                                                              ingested. */
     147             :   uint                rooted:1;                           /* Set if the block is rooted. */
     148             :   uint                dying:1;                            /* Set if the block has been abandoned and no transactions should be
     149             :                                                              scheduled from it. */
     150             :   uint                refcnt:1;                           /* Starts at 1 when the block is added, set to 0 if caller has been
     151             :                                                              informed to decrement refcnt for sched. */
     152             :   uint                in_sched:1;                         /* Set if the block is being tracked by the scheduler. */
     153             :   uint                in_rdisp:1;                         /* Set if the block is being tracked by the dispatcher, either as staged
     154             :                                                              or unstaged. */
     155             :   uint                block_start_signaled:1;             /* Set if the start-of-block sentinel has been dispatched. */
     156             :   uint                block_end_signaled:1;               /* Set if the end-of-block sentinel has been dispatched. */
     157             :   uint                block_start_done:1;                 /* Set if the start-of-block processing has been completed. */
     158             :   uint                block_end_done:1;                   /* Set if the end-of-block processing has been completed. */
     159             :   uint                staged:1;                           /* Set if the block is in a dispatcher staging lane; a staged block is
     160             :                                                              tracked by the dispatcher. */
     161             :   ulong               staging_lane;                       /* Ignored if staged==0. */
     162             :   ulong               luf_depth;                          /* Depth of longest unstaged fork starting from this node; only
     163             :                                                              stageable unstaged descendants are counted. */
     164             :   uchar               fec_buf[ FD_SCHED_MAX_FEC_BUF_SZ ]; /* The previous FEC set could have some residual data that only becomes
     165             :                                                              parseable after the next FEC set is ingested. */
     166             :   uint                shred_blk_offs[ FD_SHRED_BLK_MAX ]; /* The byte offsets into block data of ingested shreds */
     167             : };
     168             : typedef struct fd_sched_block fd_sched_block_t;
     169             : 
     170             : FD_STATIC_ASSERT( sizeof(fd_hash_t)==sizeof(((fd_microblock_hdr_t *)0)->hash), unexpected poh hash size );
     171             : 
     172             : 
     173             : struct fd_sched_metrics {
     174             :   uint  block_added_cnt;
     175             :   uint  block_added_staged_cnt;
     176             :   uint  block_added_unstaged_cnt;
     177             :   uint  block_added_dead_ood_cnt;
     178             :   uint  block_removed_cnt;
     179             :   uint  block_abandoned_cnt;
     180             :   uint  block_bad_cnt;
     181             :   uint  block_promoted_cnt;
     182             :   uint  block_demoted_cnt;
     183             :   uint  deactivate_no_child_cnt;
     184             :   uint  deactivate_no_txn_cnt;
     185             :   uint  deactivate_pruned_cnt;
     186             :   uint  deactivate_abandoned_cnt;
     187             :   uint  lane_switch_cnt;
     188             :   uint  lane_promoted_cnt;
     189             :   uint  lane_demoted_cnt;
     190             :   uint  fork_observed_cnt;
     191             :   uint  alut_success_cnt;
     192             :   uint  alut_serializing_cnt;
     193             :   uint  txn_abandoned_parsed_cnt;
     194             :   uint  txn_abandoned_exec_done_cnt;
     195             :   uint  txn_abandoned_done_cnt;
     196             :   uint  txn_max_in_flight_cnt;
     197             :   ulong txn_weighted_in_flight_cnt;
     198             :   ulong txn_weighted_in_flight_tickcount;
     199             :   ulong txn_none_in_flight_tickcount;
     200             :   ulong txn_parsed_cnt;
     201             :   ulong txn_exec_done_cnt;
     202             :   ulong txn_sigverify_done_cnt;
     203             :   ulong txn_done_cnt;
     204             :   ulong bytes_ingested_cnt;
     205             :   ulong bytes_ingested_unparsed_cnt;
     206             :   ulong bytes_dropped_cnt;
     207             :   ulong fec_cnt;
     208             : };
     209             : typedef struct fd_sched_metrics fd_sched_metrics_t;
     210             : 
     211             : #define DEQUE_NAME ref_q
     212           0 : #define DEQUE_T    ulong
     213             : #include "../../util/tmpl/fd_deque_dynamic.c"
     214             : 
     215             : struct fd_sched {
     216             :   char                print_buf[ FD_SCHED_MAX_PRINT_BUF_SZ ];
     217             :   ulong               print_buf_sz;
     218             :   fd_sched_metrics_t  metrics[ 1 ];
     219             :   ulong               canary; /* == FD_SCHED_MAGIC */
     220             :   ulong               depth;         /* Immutable. */
     221             :   ulong               block_cnt_max; /* Immutable. */
     222             :   ulong               exec_cnt;      /* Immutable. */
     223             :   long                txn_in_flight_last_tick;
     224             :   ulong               root_idx;
     225             :   fd_rdisp_t *        rdisp;
     226             :   ulong               txn_exec_ready_bitset[ 1 ];
     227             :   ulong               sigverify_ready_bitset[ 1 ];
     228             :   ulong               poh_ready_bitset[ 1 ];
     229             :   ulong               active_bank_idx; /* Index of the actively replayed block, or ULONG_MAX if no block is
     230             :                                           actively replayed; has to have a transaction to dispatch; staged
     231             :                                           blocks that have no transactions to dispatch are not eligible for
     232             :                                           being active. */
     233             :   ulong               last_active_bank_idx;
     234             :   ulong               staged_bitset;    /* Bit i set if staging lane i is occupied. */
     235             :   ulong               staged_head_bank_idx[ FD_SCHED_MAX_STAGING_LANES ]; /* Head of the linear chain in each staging lane, ignored if bit i is
     236             :                                                                              not set in the bitset. */
     237             :   ulong               txn_pool_free_cnt;
     238             :   fd_txn_p_t *        txn_pool;
     239             :   uint *              txn_idx_to_parse_idx;
     240             :   ulong               tile_to_bank_idx[ FD_SCHED_MAX_EXEC_TILE_CNT ]; /* Index of the bank that the exec tile is executing against. */
     241             :   txn_bitset_t        exec_done_set[ txn_bitset_word_cnt ];      /* Indexed by txn_idx. */
     242             :   txn_bitset_t        sigverify_done_set[ txn_bitset_word_cnt ]; /* Indexed by txn_idx. */
     243             :   txn_bitset_t        poh_mixin_done_set[ txn_bitset_word_cnt ]; /* Indexed by txn_idx. */
     244             :   fd_sched_block_t *  block_pool; /* Just a flat array. */
     245             :   ulong               block_pool_popcnt;
     246             :   ulong *             ref_q;
     247             : };
     248             : typedef struct fd_sched fd_sched_t;
     249             : 
     250             : 
     251             : /* Internal helpers. */
     252             : 
     253             : static int
     254             : verify_ticks( fd_sched_block_t * block );
     255             : 
     256             : static void
     257             : add_block( fd_sched_t * sched,
     258             :            ulong        bank_idx,
     259             :            ulong        parent_bank_idx );
     260             : 
     261             : FD_WARN_UNUSED static int
     262             : fd_sched_parse( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx );
     263             : 
     264             : FD_WARN_UNUSED static int
     265             : fd_sched_parse_txn( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx );
     266             : 
     267             : static void
     268             : dispatch_sigverify( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out );
     269             : 
     270             : static void
     271             : dispatch_poh( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out );
     272             : 
     273             : FD_WARN_UNUSED static int
     274             : maybe_mixin( fd_sched_t * sched, fd_sched_block_t * block );
     275             : 
     276             : static void
     277             : try_activate_block( fd_sched_t * sched );
     278             : 
     279             : static void
     280             : check_or_set_active_block( fd_sched_t * sched );
     281             : 
     282             : static void
     283             : subtree_abandon( fd_sched_t * sched, fd_sched_block_t * block );
     284             : 
     285             : static void
     286             : subtree_prune( fd_sched_t * sched, ulong bank_idx, ulong except_idx );
     287             : 
     288             : static void
     289             : maybe_switch_block( fd_sched_t * sched, ulong bank_idx );
     290             : 
     291             : FD_FN_UNUSED static ulong
     292             : find_and_stage_longest_unstaged_fork( fd_sched_t * sched, int lane_idx );
     293             : 
     294             : static ulong
     295             : compute_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx );
     296             : 
     297             : static ulong
     298             : stage_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx, int lane_idx );
     299             : 
     300             : static int
     301             : lane_is_demotable( fd_sched_t * sched, int lane_idx );
     302             : 
     303             : static ulong
     304             : demote_lane( fd_sched_t * sched, int lane_idx );
     305             : 
     306             : static inline fd_sched_block_t *
     307           0 : block_pool_ele( fd_sched_t * sched, ulong idx ) {
     308           0 :   FD_TEST( idx<sched->block_cnt_max || idx==ULONG_MAX );
     309           0 :   return idx==ULONG_MAX ? NULL : sched->block_pool+idx;
     310           0 : }
     311             : 
     312             : FD_FN_UNUSED static inline int
     313           0 : block_is_void( fd_sched_block_t * block ) {
     314           0 :   /* We've seen everything in the block and no transaction got parsed
     315           0 :      out. */
     316           0 :   return block->fec_eos && block->txn_parsed_cnt==0;
     317           0 : }
     318             : 
     319             : static inline int
     320           0 : block_should_signal_end( fd_sched_block_t * block ) {
     321             :   /* Under the current policy of eager synchronous PoH mixin, hashing
     322             :      done plus fec_eos imply that all mixins have been done. */
     323           0 :   if( FD_UNLIKELY( !( !block->fec_eos || ((block->mblk_cnt==block->poh_hashing_done_cnt&&block->mblk_cnt==block->poh_hash_cmp_done_cnt)||block->mblk_cnt!=block->poh_hashing_done_cnt) ) ) ) FD_LOG_CRIT(( "invariant violation: slot %lu fec_eos %d mblk_cnt %u poh_hashing_done_cnt %u poh_hash_cmp_done_cnt %u", block->slot, block->fec_eos, block->mblk_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt ));
     324           0 :   return block->fec_eos && block->txn_parsed_cnt==block->txn_done_cnt && block->mblk_cnt==block->poh_hashing_done_cnt && block->block_start_done && !block->block_end_signaled;
     325           0 : }
     326             : 
     327             : static inline int
     328           0 : block_will_signal_end( fd_sched_block_t * block ) {
     329           0 :   return block->fec_eos && !block->block_end_signaled;
     330           0 : }
     331             : 
     332             : /* Is there something known to be dispatchable in the block?  This is an
     333             :    important liveness property.  A block that doesn't contain any known
     334             :    dispatchable tasks will be deactivated or demoted. */
     335             : static inline int
     336           0 : block_is_dispatchable( fd_sched_block_t * block ) {
     337           0 :   ulong exec_queued_cnt      = block->txn_parsed_cnt-block->txn_exec_in_flight_cnt-block->txn_exec_done_cnt;
     338           0 :   ulong sigverify_queued_cnt = block->txn_parsed_cnt-block->txn_sigverify_in_flight_cnt-block->txn_sigverify_done_cnt;
     339           0 :   ulong poh_queued_cnt       = block->mblk_cnt-block->poh_hashing_in_flight_cnt-block->poh_hashing_done_cnt;
     340           0 :   return exec_queued_cnt>0UL ||
     341           0 :          sigverify_queued_cnt>0UL ||
     342           0 :          poh_queued_cnt>0UL ||
     343           0 :          !block->block_start_signaled ||
     344           0 :          block_will_signal_end( block );
     345           0 : }
     346             : 
     347             : static inline int
     348           0 : block_is_in_flight( fd_sched_block_t * block ) {
     349           0 :   return block->txn_exec_in_flight_cnt || block->txn_sigverify_in_flight_cnt || block->poh_hashing_in_flight_cnt || (block->block_end_signaled && !block->block_end_done);
     350           0 : }
     351             : 
     352             : static inline int
     353           0 : block_is_done( fd_sched_block_t * block ) {
     354           0 :   return block->fec_eos && block->txn_parsed_cnt==block->txn_done_cnt && block->mblk_cnt==block->poh_hash_cmp_done_cnt && block->block_start_done && block->block_end_done;
     355           0 : }
     356             : 
     357             : static inline int
     358           0 : block_is_stageable( fd_sched_block_t * block ) {
     359           0 :   int rv = !block_is_done( block ) && !block->dying;
     360           0 :   if( FD_UNLIKELY( rv && !block->in_rdisp ) ) {
     361             :     /* Invariant: stageable blocks may be currently staged or unstaged,
     362             :        but must be in the dispatcher either way.  When a block
     363             :        transitions to DONE, it will be immediately removed from the
     364             :        dispatcher.  When a block transitions to DYING, it will be
     365             :        eventually abandoned from the dispatcher. */
     366           0 :     FD_LOG_CRIT(( "invariant violation: stageable block->in_rdisp==0, txn_parsed_cnt %u, txn_done_cnt %u, fec_eos %u,, slot %lu, parent slot %lu",
     367           0 :                   block->txn_parsed_cnt, block->txn_done_cnt, (uint)block->fec_eos, block->slot, block->parent_slot ));
     368           0 :   }
     369           0 :   return rv;
     370           0 : }
     371             : 
     372             : static inline int
     373           0 : block_is_promotable( fd_sched_block_t * block ) {
     374           0 :   return block_is_stageable( block ) && block_is_dispatchable( block ) && !block->staged;
     375           0 : }
     376             : 
     377             : static inline int
     378           0 : block_is_demotable( fd_sched_block_t * block ) {
     379             :   /* A block can only be demoted from rdisp if it is empty, meaning no
     380             :      PENDING, READY, or DISPATCHED transactions.  This is equivalent to
     381             :      having no in-flight transactions (DISPATCHED) and no queued
     382             :      transactions (PENDING or READY).  This function actually implements
     383             :      a stronger requirement.  We consider a block demotable only if
     384             :      there are no in-flight or queued tasks of any kind. */
     385           0 :   return !block_is_in_flight( block ) && !block_is_dispatchable( block ) && block->staged;
     386           0 : }
     387             : 
     388             : static inline int
     389           0 : block_is_activatable( fd_sched_block_t * block ) {
     390           0 :   return block_is_stageable( block ) && block_is_dispatchable( block ) && block->staged;
     391           0 : }
     392             : 
     393             : static inline int
     394           0 : block_should_deactivate( fd_sched_block_t * block ) {
     395             :   /* We allow a grace period, during which a block has nothing to
     396             :      dispatch, but has something in-flight.  The block is allowed to
     397             :      stay activated and ingest FEC sets during this time.  The block
     398             :      will be deactivated if there's still nothing to dispatch by the
     399             :      time all in-flight tasks are completed. */
     400           0 :   return !block_is_activatable( block ) && !block_is_in_flight( block );
     401           0 : }
     402             : 
     403             : static inline int
     404           0 : block_is_prunable( fd_sched_block_t * block ) {
     405           0 :   return !block->in_rdisp && !block_is_in_flight( block );
     406           0 : }
     407             : 
     408             : static inline ulong
     409           0 : block_to_idx( fd_sched_t * sched, fd_sched_block_t * block ) { return (ulong)(block-sched->block_pool); }
     410             : 
     411             : __attribute__((format(printf,2,3)))
     412             : static void
     413             : fd_sched_printf( fd_sched_t * sched,
     414             :                  char const * fmt,
     415           0 :                  ... ) {
     416           0 :   va_list ap;
     417           0 :   ulong len;
     418           0 :   va_start( ap, fmt );
     419           0 :   int ret = vsnprintf( sched->print_buf+sched->print_buf_sz,
     420           0 :                        FD_SCHED_MAX_PRINT_BUF_SZ-sched->print_buf_sz,
     421           0 :                        fmt, ap );
     422           0 :   va_end( ap );
     423           0 :   len = fd_ulong_if( ret<0, 0UL, fd_ulong_min( (ulong)ret, FD_SCHED_MAX_PRINT_BUF_SZ-sched->print_buf_sz-1UL ) );
     424           0 :   sched->print_buf[ sched->print_buf_sz+len ] = '\0';
     425           0 :   sched->print_buf_sz += len;
     426           0 : }
     427             : 
     428             : FD_FN_UNUSED static void
     429           0 : log_block_txns( fd_sched_t * sched, fd_sched_block_t * block ) {
     430           0 :   for( ulong i=0UL; i<block->txn_parsed_cnt; i++ ) {
     431           0 :     sched->print_buf_sz = 0UL;
     432           0 :     FD_BASE58_ENCODE_64_BYTES( block->txn_sigs[ i ], sig_str );
     433           0 :     long disp_tick = block->txn_disp_ticks[ i ];
     434           0 :     long done_tick = block->txn_done_ticks[ i ];
     435           0 :     if( FD_LIKELY( disp_tick!=LONG_MAX && done_tick!=LONG_MAX ) ) fd_sched_printf( sched, "['%s',%ld,%ld],", sig_str, disp_tick, done_tick );
     436           0 :     else if( FD_LIKELY( disp_tick!=LONG_MAX ) ) fd_sched_printf( sched, "['%s',%ld,None],", sig_str, disp_tick );
     437           0 :     else fd_sched_printf( sched, "['%s',None,None],", sig_str );
     438           0 :     FD_LOG_DEBUG(( "%s", sched->print_buf ));
     439           0 :   }
     440           0 : }
     441             : 
     442             : FD_FN_UNUSED static void
     443           0 : print_histogram( fd_sched_t * sched, fd_histf_t * hist, ulong converter, char * title ) {
     444           0 :   fd_sched_printf( sched, " +---------------------+----------------------+--------------+\n" );
     445           0 :   fd_sched_printf( sched, " | %-19s |                      | Count        |\n", title );
     446           0 :   fd_sched_printf( sched, " +---------------------+----------------------+--------------+\n" );
     447           0 : 
     448           0 :   ulong total_count = 0;
     449           0 :   for( ulong i=0UL; i<fd_histf_bucket_cnt( hist ); i++ ) {
     450           0 :     total_count += fd_histf_cnt( hist, i );
     451           0 :   }
     452           0 : 
     453           0 :   for( ulong i=0UL; i< fd_histf_bucket_cnt( hist ); i++ ) {
     454           0 :     ulong bucket_count = fd_histf_cnt( hist, i );
     455           0 : 
     456           0 :     char * lt_str;
     457           0 :     char lt_buf[ 64 ];
     458           0 :     if( FD_UNLIKELY( i==fd_histf_bucket_cnt( hist )-1UL ) ) {
     459           0 :       lt_str = "+Inf";
     460           0 :     } else {
     461           0 :       ulong edge = fd_histf_right( hist, i );
     462           0 :       if( converter==FD_METRICS_CONVERTER_NANOSECONDS ) {
     463           0 :         edge = fd_metrics_convert_ticks_to_nanoseconds( edge-1UL );
     464           0 :         FD_TEST( fd_cstr_printf_check( lt_buf, sizeof( lt_buf ), NULL, "<= %lu nanos", edge ) );
     465           0 :       } else if( converter==FD_METRICS_CONVERTER_NONE ) {
     466           0 :         FD_TEST( fd_cstr_printf_check( lt_buf, sizeof( lt_buf ), NULL, "<= %lu", edge-1UL ) );
     467           0 :       }
     468           0 :       lt_str = lt_buf;
     469           0 :     }
     470           0 : 
     471           0 :     /* Create visual bar - scale to max 20 characters. */
     472           0 :     char bar_buf[ 22 ];
     473           0 :     if( bucket_count>0UL && total_count>0UL ) {
     474           0 :       ulong bar_length = (bucket_count*20UL)/total_count;
     475           0 :       if( !bar_length ) bar_length = 1;
     476           0 :       for( ulong j=0UL; j<bar_length; j++ ) { bar_buf[ j ] = '*'; }
     477           0 :       bar_buf[ bar_length ] = '\0';
     478           0 :     } else {
     479           0 :       bar_buf[ 0 ] = '\0';
     480           0 :     }
     481           0 : 
     482           0 :     fd_sched_printf( sched, " | %19s | %-20s | %12lu |\n", lt_str, bar_buf, bucket_count );
     483           0 :   }
     484           0 : }
     485             : 
     486             : FD_FN_UNUSED static void
     487           0 : print_block_metrics( fd_sched_t * sched, fd_sched_block_t * block ) {
     488           0 :   fd_sched_printf( sched, "block idx %lu, block slot %lu, parent_slot %lu, fec_eos %d, rooted %d, txn_parsed_cnt %u, txn_exec_done_cnt %u, txn_sigverify_done_cnt %u, poh_hashing_done_cnt %u, poh_hash_cmp_done_cnt %u, txn_done_cnt %u, shred_cnt %u, fec_cnt %u, mblk_cnt %u, mblk_tick_cnt %u, mblk_unhashed_cnt %u, hashcnt %lu, txn_pool_max_popcnt %lu/%lu, block_pool_max_popcnt %lu/%lu, mblks_rem %lu, txns_rem %lu, fec_buf_sz %u, fec_buf_boff %u, fec_buf_soff %u, fec_eob %d, fec_sob %d\n",
     489           0 :                    block_to_idx( sched, block ), block->slot, block->parent_slot, block->fec_eos, block->rooted, block->txn_parsed_cnt, block->txn_exec_done_cnt, block->txn_sigverify_done_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt, block->txn_done_cnt, block->shred_cnt, block->fec_cnt, block->mblk_cnt, block->mblk_tick_cnt, block->mblk_unhashed_cnt, block->hashcnt, block->txn_pool_max_popcnt, sched->depth, block->block_pool_max_popcnt, sched->block_cnt_max, block->mblks_rem, block->txns_rem, block->fec_buf_sz, block->fec_buf_boff, block->fec_buf_soff, block->fec_eob, block->fec_sob );
     490           0 : }
     491             : 
     492             : FD_FN_UNUSED static void
     493           0 : print_block_debug( fd_sched_t * sched, fd_sched_block_t * block ) {
     494           0 :   fd_sched_printf( sched, "block idx %lu, block slot %lu, parent_slot %lu, staged %d (lane %lu), dying %d, in_rdisp %d, fec_eos %d, rooted %d, block_start_signaled %d, block_end_signaled %d, block_start_done %d, block_end_done %d, txn_parsed_cnt %u, txn_exec_in_flight_cnt %u, txn_exec_done_cnt %u, txn_sigverify_in_flight_cnt %u, txn_sigverify_done_cnt %u, poh_hashing_in_flight_cnt %u, poh_hashing_done_cnt %u, poh_hash_cmp_done_cnt %u, txn_done_cnt %u, shred_cnt %u, fec_cnt %u, mblk_cnt %u, mblk_tick_cnt %u, mblk_unhashed_cnt %u, hashcnt %lu, txn_pool_max_popcnt %lu/%lu, block_pool_max_popcnt %lu/%lu, prev_tick_hashcnt %lu, curr_tick_hashcnt %lu, mblks_rem %lu, txns_rem %lu, fec_buf_sz %u, fec_buf_boff %u, fec_buf_soff %u, fec_eob %d, fec_sob %d\n",
     495           0 :                    block_to_idx( sched, block ), block->slot, block->parent_slot, block->staged, block->staging_lane, block->dying, block->in_rdisp, block->fec_eos, block->rooted, block->block_start_signaled, block->block_end_signaled, block->block_start_done, block->block_end_done, block->txn_parsed_cnt, block->txn_exec_in_flight_cnt, block->txn_exec_done_cnt, block->txn_sigverify_in_flight_cnt, block->txn_sigverify_done_cnt, block->poh_hashing_in_flight_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt, block->txn_done_cnt, block->shred_cnt, block->fec_cnt, block->mblk_cnt, block->mblk_tick_cnt, block->mblk_unhashed_cnt, block->hashcnt, block->txn_pool_max_popcnt, sched->depth, block->block_pool_max_popcnt, sched->block_cnt_max, block->prev_tick_hashcnt, block->curr_tick_hashcnt, block->mblks_rem, block->txns_rem, block->fec_buf_sz, block->fec_buf_boff, block->fec_buf_soff, block->fec_eob, block->fec_sob );
     496           0 : }
     497             : 
     498             : FD_FN_UNUSED static void
     499           0 : print_block_and_parent( fd_sched_t * sched, fd_sched_block_t * block ) {
     500           0 :   print_block_debug( sched, block );
     501           0 :   fd_sched_block_t * parent = block_pool_ele( sched, block->parent_idx );
     502           0 :   if( FD_LIKELY( parent ) ) print_block_debug( sched, parent );
     503           0 : }
     504             : 
     505             : FD_FN_UNUSED static void
     506           0 : print_metrics( fd_sched_t * sched ) {
     507           0 :     fd_sched_printf( sched, "metrics: block_added_cnt %u, block_added_staged_cnt %u, block_added_unstaged_cnt %u, block_added_dead_ood_cnt %u, block_removed_cnt %u, block_abandoned_cnt %u, block_bad_cnt %u, block_promoted_cnt %u, block_demoted_cnt %u, deactivate_no_child_cnt %u, deactivate_no_txn_cnt %u, deactivate_pruned_cnt %u, deactivate_abandoned_cnt %u, lane_switch_cnt %u, lane_promoted_cnt %u, lane_demoted_cnt %u, fork_observed_cnt %u, alut_success_cnt %u, alut_serializing_cnt %u, txn_abandoned_parsed_cnt %u, txn_abandoned_exec_done_cnt %u, txn_abandoned_done_cnt %u, txn_max_in_flight_cnt %u, txn_weighted_in_flight_cnt %lu, txn_weighted_in_flight_tickcount %lu, txn_none_in_flight_tickcount %lu, txn_parsed_cnt %lu, txn_exec_done_cnt %lu, txn_sigverify_done_cnt %lu, txn_done_cnt %lu, bytes_ingested_cnt %lu, bytes_ingested_unparsed_cnt %lu, bytes_dropped_cnt %lu, fec_cnt %lu\n",
     508           0 :                      sched->metrics->block_added_cnt, sched->metrics->block_added_staged_cnt, sched->metrics->block_added_unstaged_cnt, sched->metrics->block_added_dead_ood_cnt, sched->metrics->block_removed_cnt, sched->metrics->block_abandoned_cnt, sched->metrics->block_bad_cnt, sched->metrics->block_promoted_cnt, sched->metrics->block_demoted_cnt, sched->metrics->deactivate_no_child_cnt, sched->metrics->deactivate_no_txn_cnt, sched->metrics->deactivate_pruned_cnt, sched->metrics->deactivate_abandoned_cnt, sched->metrics->lane_switch_cnt, sched->metrics->lane_promoted_cnt, sched->metrics->lane_demoted_cnt, sched->metrics->fork_observed_cnt, sched->metrics->alut_success_cnt, sched->metrics->alut_serializing_cnt, sched->metrics->txn_abandoned_parsed_cnt, sched->metrics->txn_abandoned_exec_done_cnt, sched->metrics->txn_abandoned_done_cnt, sched->metrics->txn_max_in_flight_cnt, sched->metrics->txn_weighted_in_flight_cnt, sched->metrics->txn_weighted_in_flight_tickcount, sched->metrics->txn_none_in_flight_tickcount, sched->metrics->txn_parsed_cnt, sched->metrics->txn_exec_done_cnt, sched->metrics->txn_sigverify_done_cnt, sched->metrics->txn_done_cnt, sched->metrics->bytes_ingested_cnt, sched->metrics->bytes_ingested_unparsed_cnt, sched->metrics->bytes_dropped_cnt, sched->metrics->fec_cnt );
     509             : 
     510           0 : }
     511             : 
     512             : FD_FN_UNUSED static void
     513           0 : print_sched( fd_sched_t * sched ) {
     514           0 :   fd_sched_printf( sched, "sched canary 0x%lx, exec_cnt %lu, root_idx %lu, txn_exec_ready_bitset[ 0 ] 0x%lx, sigverify_ready_bitset[ 0 ] 0x%lx, poh_ready_bitset[ 0 ] 0x%lx, active_idx %lu, staged_bitset %lu, staged_head_idx[0] %lu, staged_head_idx[1] %lu, staged_head_idx[2] %lu, staged_head_idx[3] %lu, txn_pool_free_cnt %lu/%lu, block_pool_popcnt %lu/%lu\n",
     515           0 :                    sched->canary, sched->exec_cnt, sched->root_idx, sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ], sched->active_bank_idx, sched->staged_bitset, sched->staged_head_bank_idx[ 0 ], sched->staged_head_bank_idx[ 1 ], sched->staged_head_bank_idx[ 2 ], sched->staged_head_bank_idx[ 3 ], sched->txn_pool_free_cnt, sched->depth, sched->block_pool_popcnt, sched->block_cnt_max );
     516           0 :   fd_sched_block_t * active_block = block_pool_ele( sched, sched->active_bank_idx );
     517           0 :   if( active_block ) print_block_debug( sched, active_block );
     518           0 :   for( int l=0; l<(int)FD_SCHED_MAX_STAGING_LANES; l++ ) {
     519           0 :     if( fd_ulong_extract_bit( sched->staged_bitset, l ) ) {
     520           0 :       fd_sched_block_t * block = block_pool_ele( sched, sched->staged_head_bank_idx[ l ] );
     521           0 :       print_block_debug( sched, block );
     522           0 :     }
     523           0 :   }
     524           0 : }
     525             : 
     526             : FD_FN_UNUSED static void
     527           0 : print_all( fd_sched_t * sched, fd_sched_block_t * block ) {
     528           0 :   print_metrics( sched );
     529           0 :   print_sched( sched );
     530           0 :   print_block_and_parent( sched, block );
     531           0 : }
     532             : 
     533             : static void
     534           0 : handle_bad_block( fd_sched_t * sched, fd_sched_block_t * block ) {
     535           0 :   sched->print_buf_sz = 0UL;
     536           0 :   print_all( sched, block );
     537           0 :   FD_LOG_DEBUG(( "%s", sched->print_buf ));
     538           0 :   subtree_abandon( sched, block );
     539           0 :   sched->metrics->block_bad_cnt++;
     540           0 :   check_or_set_active_block( sched );
     541           0 : }
     542             : 
     543             : 
     544             : /* Public functions. */
     545             : 
     546             : ulong
     547           0 : fd_sched_align( void ) {
     548           0 :   return fd_ulong_max( alignof(fd_sched_t),
     549           0 :          fd_ulong_max( fd_rdisp_align(),
     550           0 :          fd_ulong_max( alignof(fd_sched_block_t), 64UL ))); /* Minimally cache line aligned. */
     551           0 : }
     552             : 
     553             : ulong
     554             : fd_sched_footprint( ulong depth,
     555           0 :                     ulong block_cnt_max ) {
     556           0 :   if( FD_UNLIKELY( depth<FD_SCHED_MIN_DEPTH || depth>FD_SCHED_MAX_DEPTH ) ) return 0UL; /* bad depth */
     557           0 :   if( FD_UNLIKELY( !block_cnt_max ) ) return 0UL; /* bad block_cnt_max */
     558           0 :   ulong l = FD_LAYOUT_INIT;
     559           0 :   l = FD_LAYOUT_APPEND( l, fd_sched_align(),          sizeof(fd_sched_t)                         );
     560           0 :   l = FD_LAYOUT_APPEND( l, fd_rdisp_align(),          fd_rdisp_footprint( depth, block_cnt_max ) ); /* dispatcher */
     561           0 :   l = FD_LAYOUT_APPEND( l, alignof(fd_sched_block_t), block_cnt_max*sizeof(fd_sched_block_t)     ); /* block pool */
     562           0 :   l = FD_LAYOUT_APPEND( l, ref_q_align(),             ref_q_footprint( block_cnt_max )           );
     563           0 :   l = FD_LAYOUT_APPEND( l, alignof(fd_txn_p_t),       depth*sizeof(fd_txn_p_t)                   ); /* txn_pool */
     564           0 :   l = FD_LAYOUT_APPEND( l, sizeof(uint),              depth*sizeof(uint)                         ); /* txn_idx_to_parse_idx */
     565           0 :   return FD_LAYOUT_FINI( l, fd_sched_align() );
     566           0 : }
     567             : 
     568             : void *
     569             : fd_sched_new( void * mem,
     570             :               ulong  depth,
     571             :               ulong  block_cnt_max,
     572           0 :               ulong  exec_cnt ) {
     573             : 
     574           0 :   if( FD_UNLIKELY( !mem ) ) {
     575           0 :     FD_LOG_WARNING(( "NULL mem" ));
     576           0 :     return NULL;
     577           0 :   }
     578             : 
     579           0 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)mem, fd_sched_align() ) ) ) {
     580           0 :     FD_LOG_WARNING(( "misaligned mem (%p)", mem ));
     581           0 :     return NULL;
     582           0 :   }
     583             : 
     584           0 :   if( FD_UNLIKELY( depth<32UL || depth>FD_SCHED_MAX_DEPTH ) ) {
     585           0 :     FD_LOG_WARNING(( "bad depth (%lu)", depth ));
     586           0 :     return NULL;
     587           0 :   }
     588             : 
     589           0 :   if( FD_UNLIKELY( !block_cnt_max ) ) {
     590           0 :     FD_LOG_WARNING(( "bad block_cnt_max (%lu)", block_cnt_max ));
     591           0 :     return NULL;
     592           0 :   }
     593             : 
     594           0 :   if( FD_UNLIKELY( !exec_cnt || exec_cnt>FD_SCHED_MAX_EXEC_TILE_CNT ) ) {
     595           0 :     FD_LOG_WARNING(( "bad exec_cnt (%lu)", exec_cnt ));
     596           0 :     return NULL;
     597           0 :   }
     598             : 
     599           0 :   FD_SCRATCH_ALLOC_INIT( l, mem );
     600           0 :   fd_sched_t * sched    = FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(),          sizeof(fd_sched_t)                         );
     601           0 :   void *       _rdisp   = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(),          fd_rdisp_footprint( depth, block_cnt_max ) );
     602           0 :   void *       _bpool   = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_block_t), block_cnt_max*sizeof(fd_sched_block_t)     );
     603           0 :   void *       _ref_q   = FD_SCRATCH_ALLOC_APPEND( l, ref_q_align(),             ref_q_footprint( block_cnt_max )           );
     604           0 :   fd_txn_p_t * txn_pool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_txn_p_t),       depth*sizeof(fd_txn_p_t)                   );
     605           0 :   uint * txn_idx_to_parse_idx = FD_SCRATCH_ALLOC_APPEND( l, sizeof(uint),        depth*sizeof(uint)                         );
     606           0 :   FD_SCRATCH_ALLOC_FINI( l, fd_sched_align() );
     607             : 
     608           0 :   sched->txn_pool             = txn_pool;
     609           0 :   sched->txn_idx_to_parse_idx = txn_idx_to_parse_idx;
     610             : 
     611           0 :   ulong seed = ((ulong)fd_tickcount()) ^ FD_SCHED_MAGIC;
     612           0 :   fd_rdisp_new( _rdisp, depth, block_cnt_max, seed );
     613             : 
     614           0 :   fd_sched_block_t * bpool = (fd_sched_block_t *)_bpool;
     615           0 :   for( ulong i=0; i<block_cnt_max; i++ ) {
     616           0 :     bpool[ i ].in_sched = 0;
     617           0 :     mblk_in_progress_slist_new( bpool[ i ].mblks_hashing_in_progress );
     618           0 :     mblk_in_progress_slist_new( bpool[ i ].mblks_mixin_in_progress );
     619           0 :     mblk_in_progress_bitset_new( bpool[ i ].mblk_in_progress_pool_free_bitset );
     620           0 :   }
     621             : 
     622           0 :   fd_memset( sched->metrics, 0, sizeof(fd_sched_metrics_t) );
     623           0 :   sched->txn_in_flight_last_tick = LONG_MAX;
     624             : 
     625           0 :   sched->canary               = FD_SCHED_MAGIC;
     626           0 :   sched->depth                = depth;
     627           0 :   sched->block_cnt_max        = block_cnt_max;
     628           0 :   sched->exec_cnt             = exec_cnt;
     629           0 :   sched->root_idx             = ULONG_MAX;
     630           0 :   sched->active_bank_idx      = ULONG_MAX;
     631           0 :   sched->last_active_bank_idx = ULONG_MAX;
     632           0 :   sched->staged_bitset        = 0UL;
     633             : 
     634           0 :   sched->txn_exec_ready_bitset[ 0 ]  = fd_ulong_mask_lsb( (int)exec_cnt );
     635           0 :   sched->sigverify_ready_bitset[ 0 ] = fd_ulong_mask_lsb( (int)exec_cnt );
     636           0 :   sched->poh_ready_bitset[ 0 ]       = fd_ulong_mask_lsb( (int)exec_cnt );
     637             : 
     638           0 :   sched->txn_pool_free_cnt = depth-1UL; /* -1 because index 0 is unusable as a sentinel reserved by the dispatcher */
     639             : 
     640           0 :   txn_bitset_new( sched->exec_done_set );
     641           0 :   txn_bitset_new( sched->sigverify_done_set );
     642           0 :   txn_bitset_new( sched->poh_mixin_done_set );
     643             : 
     644           0 :   sched->block_pool_popcnt = 0UL;
     645             : 
     646           0 :   ref_q_new( _ref_q, block_cnt_max );
     647             : 
     648           0 :   return sched;
     649           0 : }
     650             : 
     651             : fd_sched_t *
     652           0 : fd_sched_join( void * mem ) {
     653             : 
     654           0 :   if( FD_UNLIKELY( !mem ) ) {
     655           0 :     FD_LOG_WARNING(( "NULL mem" ));
     656           0 :     return NULL;
     657           0 :   }
     658             : 
     659           0 :   fd_sched_t * sched         = (fd_sched_t *)mem;
     660           0 :   ulong        depth         = sched->depth;
     661           0 :   ulong        block_cnt_max = sched->block_cnt_max;
     662             : 
     663           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
     664             : 
     665           0 :   FD_SCRATCH_ALLOC_INIT( l, mem );
     666           0 :   /*           */ FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(),          sizeof(fd_sched_t)                         );
     667           0 :   void * _rdisp = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(),          fd_rdisp_footprint( depth, block_cnt_max ) );
     668           0 :   void * _bpool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_block_t), block_cnt_max*sizeof(fd_sched_block_t)     );
     669           0 :   void * _ref_q = FD_SCRATCH_ALLOC_APPEND( l, ref_q_align(),             ref_q_footprint( block_cnt_max )           );
     670           0 :   FD_SCRATCH_ALLOC_FINI( l, fd_sched_align() );
     671             : 
     672           0 :   sched->rdisp      = fd_rdisp_join( _rdisp );
     673           0 :   sched->ref_q      = ref_q_join( _ref_q );
     674           0 :   sched->block_pool = _bpool;
     675             : 
     676           0 :   for( ulong i=0; i<block_cnt_max; i++ ) {
     677           0 :     mblk_in_progress_slist_join( sched->block_pool[ i ].mblks_hashing_in_progress );
     678           0 :     mblk_in_progress_slist_join( sched->block_pool[ i ].mblks_mixin_in_progress );
     679           0 :     mblk_in_progress_bitset_join( sched->block_pool[ i ].mblk_in_progress_pool_free_bitset );
     680           0 :   }
     681             : 
     682           0 :   txn_bitset_join( sched->exec_done_set );
     683           0 :   txn_bitset_join( sched->sigverify_done_set );
     684           0 :   txn_bitset_join( sched->poh_mixin_done_set );
     685             : 
     686           0 :   return sched;
     687           0 : }
     688             : 
     689             : int
     690           0 : fd_sched_fec_can_ingest( fd_sched_t * sched, fd_sched_fec_t * fec ) {
     691           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
     692           0 :   FD_TEST( fec->bank_idx<sched->block_cnt_max );
     693           0 :   FD_TEST( fec->parent_bank_idx<sched->block_cnt_max );
     694             : 
     695           0 :   if( FD_UNLIKELY( fec->fec->data_sz>FD_SCHED_MAX_PAYLOAD_PER_FEC ) ) {
     696           0 :     sched->print_buf_sz = 0UL;
     697           0 :     print_metrics( sched );
     698           0 :     print_sched( sched );
     699           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
     700           0 :     FD_LOG_CRIT(( "invalid FEC set: fec->data_sz %lu, slot %lu, parent slot %lu", fec->fec->data_sz, fec->slot, fec->parent_slot ));
     701           0 :   }
     702             : 
     703           0 :   ulong fec_buf_sz = 0UL;
     704           0 :   fd_sched_block_t * block = block_pool_ele( sched, fec->bank_idx );
     705           0 :   if( FD_LIKELY( !fec->is_first_in_block ) ) {
     706           0 :     fec_buf_sz += block->fec_buf_sz-block->fec_buf_soff;
     707           0 :   } else {
     708             :     /* No residual data as this is a fresh new block. */
     709           0 :   }
     710             :   /* Addition is safe and won't overflow because we checked the FEC set
     711             :      size above. */
     712           0 :   fec_buf_sz += fec->fec->data_sz;
     713             :   /* Assuming every transaction is min size, do we have enough free
     714             :      entries in the txn pool?  For a more precise txn count, we would
     715             :      have to do some parsing. */
     716           0 :   return sched->txn_pool_free_cnt>=fec_buf_sz/FD_TXN_MIN_SERIALIZED_SZ;
     717           0 : }
     718             : 
     719             : int
     720             : fd_sched_can_ingest( fd_sched_t * sched,
     721           0 :                      ulong        fec_cnt ) {
     722           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
     723             :   /* Worst case, we need one byte from the incoming data to extract a
     724             :      transaction out of the residual data, and the rest of the incoming
     725             :      data contributes toward min sized transactions. */
     726           0 :   return sched->txn_pool_free_cnt>=(FD_SCHED_MAX_TXN_PER_FEC*fec_cnt);
     727           0 : }
     728             : 
     729             : FD_WARN_UNUSED int
     730             : fd_sched_fec_ingest( fd_sched_t *     sched,
     731           0 :                      fd_sched_fec_t * fec ) {
     732           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
     733           0 :   FD_TEST( fec->bank_idx<sched->block_cnt_max );
     734           0 :   FD_TEST( fec->parent_bank_idx<sched->block_cnt_max );
     735           0 :   FD_TEST( ref_q_empty( sched->ref_q ) );
     736             : 
     737           0 :   fd_sched_block_t * block = block_pool_ele( sched, fec->bank_idx );
     738             : 
     739           0 :   if( FD_UNLIKELY( fec->fec->data_sz>FD_SCHED_MAX_PAYLOAD_PER_FEC ) ) {
     740           0 :     sched->print_buf_sz = 0UL;
     741           0 :     print_all( sched, block );
     742           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
     743           0 :     FD_LOG_CRIT(( "invalid FEC set: fec->data_sz %lu, slot %lu, parent slot %lu", fec->fec->data_sz, fec->slot, fec->parent_slot ));
     744           0 :   }
     745             : 
     746           0 :   if( FD_UNLIKELY( fec->is_first_in_block ) ) {
     747             :     /* This is a new block. */
     748           0 :     add_block( sched, fec->bank_idx, fec->parent_bank_idx );
     749           0 :     block->slot        = fec->slot;
     750           0 :     block->parent_slot = fec->parent_slot;
     751             : 
     752           0 :     if( FD_UNLIKELY( block->dying ) ) {
     753             :       /* The child of a dead block is also dead.  We added it to our
     754             :          fork tree just so we could track an entire lineage of dead
     755             :          children and propagate the dead property to the entire lineage,
     756             :          in case there were frags for more than one dead children
     757             :          in-flight at the time the parent was abandoned.  That being
     758             :          said, we shouldn't need to add the dead child to the
     759             :          dispatcher. */
     760           0 :       sched->metrics->block_added_dead_ood_cnt++;
     761             : 
     762             :       /* Ignore the FEC set for a dead block. */
     763           0 :       sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
     764           0 :       return 1;
     765           0 :     }
     766             : 
     767             :     /* Try to find a staging lane for this block. */
     768           0 :     int alloc_lane = 0;
     769           0 :     fd_sched_block_t * parent_block = block_pool_ele( sched, fec->parent_bank_idx );
     770           0 :     if( FD_LIKELY( parent_block->staged ) ) {
     771             :       /* Parent is staged.  So see if we can continue down the same
     772             :          staging lane. */
     773           0 :       ulong staging_lane = parent_block->staging_lane;
     774           0 :       ulong child_idx    = parent_block->child_idx;
     775           0 :       while( child_idx!=ULONG_MAX ) {
     776           0 :         fd_sched_block_t * child = block_pool_ele( sched, child_idx );
     777           0 :         if( child->staged && child->staging_lane==staging_lane ) {
     778             :           /* Found a child on the same lane.  So we're done. */
     779           0 :           staging_lane = FD_RDISP_UNSTAGED;
     780           0 :           break;
     781           0 :         }
     782           0 :         child_idx = child->sibling_idx;
     783           0 :       }
     784             :       /* No child is staged on the same lane as the parent.  So stage
     785             :          this block.  This is the common case. */
     786           0 :       if( FD_LIKELY( staging_lane!=FD_RDISP_UNSTAGED ) ) {
     787           0 :         block->in_rdisp     = 1;
     788           0 :         block->staged       = 1;
     789           0 :         block->staging_lane = staging_lane;
     790           0 :         fd_rdisp_add_block( sched->rdisp, fec->bank_idx, staging_lane );
     791           0 :         sched->metrics->block_added_cnt++;
     792           0 :         sched->metrics->block_added_staged_cnt++;
     793           0 :         FD_LOG_DEBUG(( "block %lu:%lu entered lane %lu: add", block->slot, fec->bank_idx, staging_lane ));
     794           0 :       } else {
     795           0 :         alloc_lane = 1;
     796           0 :       }
     797           0 :     } else {
     798           0 :       if( block_is_stageable( parent_block ) ) {
     799             :         /* Parent is unstaged but stageable.  So let's be unstaged too.
     800             :            This is not only a policy decision to be lazy and not promote
     801             :            the parent at the moment, but also an important invariant
     802             :            that we maintain for deadlock freeness in the face of staging
     803             :            lane shortage.  See the comments in lane eviction for how
     804             :            this invariant is relevant. */
     805           0 :         block->in_rdisp = 1;
     806           0 :         block->staged   = 0;
     807           0 :         fd_rdisp_add_block( sched->rdisp, fec->bank_idx, FD_RDISP_UNSTAGED );
     808           0 :         sched->metrics->block_added_cnt++;
     809           0 :         sched->metrics->block_added_unstaged_cnt++;
     810           0 :         FD_LOG_DEBUG(( "block %lu:%lu entered lane unstaged: add", block->slot, fec->bank_idx ));
     811           0 :       } else {
     812           0 :         alloc_lane = 1;
     813           0 :       }
     814           0 :     }
     815           0 :     if( FD_UNLIKELY( alloc_lane ) ) {
     816             :       /* We weren't able to inherit the parent's staging lane.  So try
     817             :          to find a new staging lane. */
     818           0 :       if( FD_LIKELY( sched->staged_bitset!=fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) ) ) { /* Optimize for lane available. */
     819           0 :         int lane_idx = fd_ulong_find_lsb( ~sched->staged_bitset );
     820           0 :         if( FD_UNLIKELY( lane_idx>=(int)FD_SCHED_MAX_STAGING_LANES ) ) {
     821           0 :           FD_LOG_CRIT(( "invariant violation: lane_idx %d, sched->staged_bitset %lx",
     822           0 :                         lane_idx, sched->staged_bitset ));
     823           0 :         }
     824           0 :         sched->staged_bitset = fd_ulong_set_bit( sched->staged_bitset, lane_idx );
     825           0 :         sched->staged_head_bank_idx[ lane_idx ] = fec->bank_idx;
     826           0 :         block->in_rdisp     = 1;
     827           0 :         block->staged       = 1;
     828           0 :         block->staging_lane = (ulong)lane_idx;
     829           0 :         fd_rdisp_add_block( sched->rdisp, fec->bank_idx, block->staging_lane );
     830           0 :         sched->metrics->block_added_cnt++;
     831           0 :         sched->metrics->block_added_staged_cnt++;
     832           0 :         FD_LOG_DEBUG(( "block %lu:%lu entered lane %lu: add", block->slot, fec->bank_idx, block->staging_lane ));
     833           0 :       } else {
     834             :         /* No lanes available. */
     835           0 :         block->in_rdisp = 1;
     836           0 :         block->staged   = 0;
     837           0 :         fd_rdisp_add_block( sched->rdisp, fec->bank_idx, FD_RDISP_UNSTAGED );
     838           0 :         sched->metrics->block_added_cnt++;
     839           0 :         sched->metrics->block_added_unstaged_cnt++;
     840           0 :         FD_LOG_DEBUG(( "block %lu:%lu entered lane unstaged: add", block->slot, fec->bank_idx ));
     841           0 :       }
     842           0 :     }
     843           0 :   }
     844             : 
     845           0 :   block->txn_pool_max_popcnt   = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt );
     846           0 :   block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt );
     847             : 
     848           0 :   if( FD_UNLIKELY( block->dying ) ) {
     849             :     /* Ignore the FEC set for a dead block. */
     850           0 :     sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
     851           0 :     return 1;
     852           0 :   }
     853             : 
     854           0 :   if( FD_UNLIKELY( !block->in_rdisp ) ) {
     855             :     /* Invariant: block must be in the dispatcher at this point. */
     856           0 :     sched->print_buf_sz = 0UL;
     857           0 :     print_all( sched, block );
     858           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
     859           0 :     FD_LOG_CRIT(( "invariant violation: block->in_rdisp==0, slot %lu, parent slot %lu",
     860           0 :                   block->slot, block->parent_slot ));
     861           0 :   }
     862             : 
     863           0 :   if( FD_UNLIKELY( block->fec_eos ) ) {
     864             :     /* This means something is wrong upstream.  We're getting more FEC
     865             :        sets for a block that has already ended, or so we were told. */
     866           0 :     sched->print_buf_sz = 0UL;
     867           0 :     print_all( sched, block );
     868           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
     869           0 :     FD_LOG_CRIT(( "invariant violation: block->fec_eos set but getting more FEC sets, slot %lu, parent slot %lu", fec->slot, fec->parent_slot ));
     870           0 :   }
     871           0 :   if( FD_UNLIKELY( block->fec_eob && fec->is_last_in_batch ) ) {
     872             :     /* If the previous FEC set ingestion and parse was successful,
     873             :        block->fec_eob should be cleared.  The fact that fec_eob is set
     874             :        means that the previous batch didn't parse properly.  So this is
     875             :        a bad block.  We should refuse to replay down the fork. */
     876           0 :     FD_LOG_INFO(( "bad block: failed to parse, slot %lu, parent slot %lu", fec->slot, fec->parent_slot ));
     877           0 :     handle_bad_block( sched, block );
     878           0 :     sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
     879           0 :     return 0;
     880           0 :   }
     881           0 :   if( FD_UNLIKELY( block->child_idx!=ULONG_MAX ) ) {
     882             :     /* This means something is wrong upstream.  FEC sets are not being
     883             :        delivered in replay order.  We got a child block FEC set before
     884             :        this block was completely delivered. */
     885           0 :     sched->print_buf_sz = 0UL;
     886           0 :     print_all( sched, block );
     887           0 :     fd_sched_block_t * child_block = block_pool_ele( sched, block->child_idx );
     888           0 :     print_block_debug( sched, child_block );
     889           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
     890           0 :     FD_LOG_CRIT(( "invariant violation: block->child_idx %lu, slot %lu, parent slot %lu", block->child_idx, fec->slot, fec->parent_slot ));
     891           0 :   }
     892             : 
     893           0 :   FD_TEST( block->fec_buf_sz>=block->fec_buf_soff );
     894           0 :   if( FD_LIKELY( block->fec_buf_sz>block->fec_buf_soff ) ) {
     895             :     /* If there is residual data from the previous FEC set within the
     896             :        same batch, we move it to the beginning of the buffer and append
     897             :        the new FEC set. */
     898           0 :     memmove( block->fec_buf, block->fec_buf+block->fec_buf_soff, block->fec_buf_sz-block->fec_buf_soff );
     899           0 :   }
     900           0 :   block->fec_buf_boff += block->fec_buf_soff;
     901           0 :   block->fec_buf_sz   -= block->fec_buf_soff;
     902           0 :   block->fec_buf_soff  = 0;
     903             :   /* Addition is safe and won't overflow because we checked the FEC
     904             :      set size above. */
     905           0 :   if( FD_UNLIKELY( block->fec_buf_sz+fec->fec->data_sz>FD_SCHED_MAX_FEC_BUF_SZ ) ) {
     906             :     /* In a conformant block, there shouldn't be more than a
     907             :        transaction's worth of residual data left over from the previous
     908             :        FEC set within the same batch.  So if this condition doesn't
     909             :        hold, it's a bad block.  Instead of crashing, we should refuse to
     910             :        replay down the fork. */
     911           0 :     FD_LOG_INFO(( "bad block: fec_buf_sz %u, fec->data_sz %lu, slot %lu, parent slot %lu", block->fec_buf_sz, fec->fec->data_sz, fec->slot, fec->parent_slot ));
     912           0 :     handle_bad_block( sched, block );
     913           0 :     sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
     914           0 :     return 0;
     915           0 :   }
     916             : 
     917             :   /* Append the new FEC set to the end of the buffer. */
     918           0 :   fd_memcpy( block->fec_buf+block->fec_buf_sz, fec->fec->data, fec->fec->data_sz );
     919           0 :   block->fec_buf_sz += (uint)fec->fec->data_sz;
     920           0 :   sched->metrics->bytes_ingested_cnt += fec->fec->data_sz;
     921             : 
     922           0 :   block->fec_eob = fec->is_last_in_batch;
     923           0 :   block->fec_eos = fec->is_last_in_block;
     924             : 
     925           0 :   ulong block_sz = block->shred_cnt>0 ? block->shred_blk_offs[ block->shred_cnt-1 ] : 0UL;
     926           0 :   for( ulong i=0; i<fec->shred_cnt; i++ ) {
     927           0 :     if( FD_LIKELY( i<32UL ) ) {
     928           0 :       block->shred_blk_offs[ block->shred_cnt++ ] = (uint)block_sz + fec->fec->block_offs[ i ];
     929           0 :     } else if( FD_UNLIKELY( i!=fec->shred_cnt-1UL ) ) {
     930             :       /* We don't track shred boundaries after 32 shreds, assume they're
     931             :          sized uniformly */
     932           0 :       ulong num_overflow_shreds = fec->shred_cnt-32UL;
     933           0 :       ulong overflow_idx        = i-32UL;
     934           0 :       ulong overflow_data_sz    = fec->fec->data_sz-fec->fec->block_offs[ 31 ];
     935           0 :       block->shred_blk_offs[ block->shred_cnt++ ] = (uint)block_sz + fec->fec->block_offs[ 31 ] + (uint)(overflow_data_sz / num_overflow_shreds * (overflow_idx + 1UL));
     936           0 :     } else {
     937           0 :       block->shred_blk_offs[ block->shred_cnt++ ] = (uint)block_sz + (uint)fec->fec->data_sz;
     938           0 :     }
     939           0 :   }
     940             : 
     941           0 :   int err = fd_sched_parse( sched, block, fec->alut_ctx );
     942             : 
     943           0 :   block->fec_cnt++;
     944           0 :   sched->metrics->fec_cnt++;
     945             : 
     946           0 :   if( FD_UNLIKELY( err==FD_SCHED_BAD_BLOCK ) ) {
     947           0 :     handle_bad_block( sched, block );
     948           0 :     sched->metrics->bytes_dropped_cnt += block->fec_buf_sz-block->fec_buf_soff;
     949           0 :     return 0;
     950           0 :   }
     951             : 
     952             :   /* We just received a FEC set, which may have made all transactions in
     953             :      a partially parsed microblock available.  If this were a malformed
     954             :      block that ends in a non-tick microblock, there's not going to be a
     955             :      hashing task from the missing ending tick to drain the mixin queue.
     956             :      So we try to drain the mixin queue right here.  Another option is
     957             :      to drain it at dispatch time, when we are about to dispatch the end
     958             :      of block signal, right before the check for whether block should
     959             :      end. */
     960           0 :   int mixin_res;
     961           0 :   while( (mixin_res=maybe_mixin( sched, block )) ) {
     962           0 :     if( FD_UNLIKELY( mixin_res==-1 ) ) {
     963           0 :       handle_bad_block( sched, block );
     964           0 :       return 0;
     965           0 :     }
     966           0 :     FD_TEST( mixin_res==1||mixin_res==2 );
     967           0 :   }
     968             : 
     969             :   /* Check if we need to set the active block. */
     970           0 :   check_or_set_active_block( sched );
     971             : 
     972           0 :   return 1;
     973           0 : }
     974             : 
     975             : ulong
     976           0 : fd_sched_task_next_ready( fd_sched_t * sched, fd_sched_task_t * out ) {
     977           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
     978           0 :   FD_TEST( ref_q_empty( sched->ref_q ) );
     979             : 
     980           0 :   ulong exec_ready_bitset0 = sched->txn_exec_ready_bitset[ 0 ];
     981           0 :   ulong exec_fully_ready_bitset = sched->sigverify_ready_bitset[ 0 ] & sched->poh_ready_bitset[ 0 ] & exec_ready_bitset0;
     982           0 :   if( FD_UNLIKELY( !exec_fully_ready_bitset ) ) {
     983             :     /* Early exit if no exec tiles available. */
     984           0 :     return 0UL;
     985           0 :   }
     986             : 
     987           0 :   if( FD_UNLIKELY( sched->active_bank_idx==ULONG_MAX ) ) {
     988             :     /* No need to try activating a block.  If we're in this state,
     989             :        there's truly nothing to execute.  We will activate something
     990             :        when we ingest a FEC set with transactions. */
     991           0 :     return 0UL;
     992           0 :   }
     993             : 
     994           0 :   out->task_type = FD_SCHED_TT_NULL;
     995             : 
     996             :   /* We could in theory reevaluate staging lane allocation here and do
     997             :      promotion/demotion as needed.  It's a policy decision to minimize
     998             :      fork churn for now and just execute down the same active fork. */
     999             : 
    1000           0 :   ulong bank_idx = sched->active_bank_idx;
    1001           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1002           0 :   if( FD_UNLIKELY( block_should_deactivate( block ) ) ) {
    1003           0 :     sched->print_buf_sz = 0UL;
    1004           0 :     print_all( sched, block );
    1005           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
    1006           0 :     FD_LOG_CRIT(( "invariant violation: active_bank_idx %lu is not activatable nor has anything in-flight", sched->active_bank_idx ));
    1007           0 :   }
    1008             : 
    1009           0 :   block->txn_pool_max_popcnt   = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt );
    1010           0 :   block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt );
    1011             : 
    1012           0 :   if( FD_UNLIKELY( !block->block_start_signaled ) ) {
    1013           0 :     out->task_type = FD_SCHED_TT_BLOCK_START;
    1014           0 :     out->block_start->bank_idx        = bank_idx;
    1015           0 :     out->block_start->parent_bank_idx = block->parent_idx;
    1016           0 :     out->block_start->slot            = block->slot;
    1017           0 :     block->block_start_signaled = 1;
    1018           0 :     return 1UL;
    1019           0 :   }
    1020             : 
    1021           0 :   ulong exec_tile_idx0 = fd_ulong_if( !!exec_fully_ready_bitset, (ulong)fd_ulong_find_lsb( exec_fully_ready_bitset ), ULONG_MAX );
    1022           0 :   ulong exec_queued_cnt = block->txn_parsed_cnt-block->txn_exec_in_flight_cnt-block->txn_exec_done_cnt;
    1023           0 :   if( FD_LIKELY( exec_queued_cnt>0UL && fd_ulong_popcnt( exec_fully_ready_bitset ) ) ) { /* Optimize for no fork switching. */
    1024             :     /* Transaction execution has the highest priority.  Current mainnet
    1025             :        block times are very much dominated by critical path transaction
    1026             :        execution.  To achieve the fastest block replay speed, we can't
    1027             :        afford to make any mistake in critical path dispatching.  Any
    1028             :        deviation from perfect critical path dispatching is basically
    1029             :        irrecoverable.  As such, we try to keep all the exec tiles busy
    1030             :        with transaction execution, but we allow at most one transaction
    1031             :        to be in-flight per exec tile.  This is to ensure that whenever a
    1032             :        critical path transaction completes, we have at least one exec
    1033             :        tile, e.g. the one that just completed said transaction, readily
    1034             :        available to continue executing down the critical path. */
    1035           0 :     out->txn_exec->txn_idx = fd_rdisp_get_next_ready( sched->rdisp, bank_idx );
    1036           0 :     if( FD_UNLIKELY( out->txn_exec->txn_idx==0UL ) ) {
    1037             :       /* There are transactions queued but none ready for execution.
    1038             :          This implies that there must be in-flight transactions on whose
    1039             :          completion the queued transactions depend. So we return and
    1040             :          wait for those in-flight transactions to retire.  This is a
    1041             :          policy decision to execute as much as we can down the current
    1042             :          fork. */
    1043           0 :       if( FD_UNLIKELY( !block->txn_exec_in_flight_cnt ) ) {
    1044           0 :         sched->print_buf_sz = 0UL;
    1045           0 :         print_all( sched, block );
    1046           0 :         FD_LOG_NOTICE(( "%s", sched->print_buf ));
    1047           0 :         FD_LOG_CRIT(( "invariant violation: no ready transaction found but block->txn_exec_in_flight_cnt==0" ));
    1048           0 :       }
    1049             : 
    1050             :       /* Next up are PoH tasks.  Same dispatching policy as sigverify
    1051             :          tasks. */
    1052           0 :       ulong poh_ready_bitset = exec_fully_ready_bitset;
    1053           0 :       ulong poh_hashing_queued_cnt = block->mblk_cnt-block->poh_hashing_in_flight_cnt-block->poh_hashing_done_cnt;
    1054           0 :       if( FD_LIKELY( poh_hashing_queued_cnt>0UL && fd_ulong_popcnt( poh_ready_bitset )>fd_int_if( block->txn_exec_in_flight_cnt>0U, 0, 1 ) ) ) {
    1055           0 :         dispatch_poh( sched, block, bank_idx, fd_ulong_find_lsb( poh_ready_bitset ), out );
    1056           0 :         return 1UL;
    1057           0 :       }
    1058             : 
    1059             :       /* Dispatch more sigverify tasks only if at least one exec tile is
    1060             :          executing transactions or completely idle.  Allow at most one
    1061             :          sigverify task in-flight per tile, and only dispatch to
    1062             :          completely idle tiles. */
    1063           0 :       ulong sigverify_ready_bitset = exec_fully_ready_bitset;
    1064           0 :       ulong sigverify_queued_cnt = block->txn_parsed_cnt-block->txn_sigverify_in_flight_cnt-block->txn_sigverify_done_cnt;
    1065           0 :       if( FD_LIKELY( sigverify_queued_cnt>0UL && fd_ulong_popcnt( sigverify_ready_bitset )>fd_int_if( block->txn_exec_in_flight_cnt>0U, 0, 1 ) ) ) {
    1066           0 :         dispatch_sigverify( sched, block, bank_idx, fd_ulong_find_lsb( sigverify_ready_bitset ), out );
    1067           0 :         return 1UL;
    1068           0 :       }
    1069           0 :       return 0UL;
    1070           0 :     }
    1071           0 :     out->task_type = FD_SCHED_TT_TXN_EXEC;
    1072           0 :     out->txn_exec->bank_idx = bank_idx;
    1073           0 :     out->txn_exec->slot     = block->slot;
    1074           0 :     out->txn_exec->exec_idx = exec_tile_idx0;
    1075           0 :     FD_TEST( out->txn_exec->exec_idx!=ULONG_MAX );
    1076             : 
    1077           0 :     long now = fd_tickcount();
    1078           0 :     ulong delta = (ulong)(now-sched->txn_in_flight_last_tick);
    1079           0 :     ulong txn_exec_busy_cnt = sched->exec_cnt-(ulong)fd_ulong_popcnt( exec_ready_bitset0 );
    1080           0 :     sched->metrics->txn_none_in_flight_tickcount     += fd_ulong_if( txn_exec_busy_cnt==0UL && sched->txn_in_flight_last_tick!=LONG_MAX, delta, 0UL );
    1081           0 :     sched->metrics->txn_weighted_in_flight_tickcount += fd_ulong_if( txn_exec_busy_cnt!=0UL, delta, 0UL );
    1082           0 :     sched->metrics->txn_weighted_in_flight_cnt       += delta*txn_exec_busy_cnt;
    1083           0 :     sched->txn_in_flight_last_tick = now;
    1084             : 
    1085           0 :     block->txn_disp_ticks[ sched->txn_idx_to_parse_idx[ out->txn_exec->txn_idx ] ] = now;
    1086             : 
    1087           0 :     sched->txn_exec_ready_bitset[ 0 ] = fd_ulong_clear_bit( exec_ready_bitset0, (int)exec_tile_idx0);
    1088           0 :     sched->tile_to_bank_idx[ exec_tile_idx0 ] = bank_idx;
    1089             : 
    1090           0 :     block->txn_exec_in_flight_cnt++;
    1091           0 :     sched->metrics->txn_max_in_flight_cnt = fd_uint_max( sched->metrics->txn_max_in_flight_cnt, block->txn_exec_in_flight_cnt );
    1092             : 
    1093           0 :     ulong total_exec_busy_cnt = sched->exec_cnt-(ulong)fd_ulong_popcnt( sched->txn_exec_ready_bitset[ 0 ]&sched->sigverify_ready_bitset[ 0 ]&sched->poh_ready_bitset[ 0 ] );
    1094           0 :     if( FD_UNLIKELY( (~sched->txn_exec_ready_bitset[ 0 ])&(~sched->sigverify_ready_bitset[ 0 ])&(~sched->poh_ready_bitset[ 0 ])&fd_ulong_mask_lsb( (int)sched->exec_cnt ) ) ) FD_LOG_CRIT(( "invariant violation: txn_exec_ready_bitset 0x%lx sigverify_ready_bitset 0x%lx poh_ready_bitset 0x%lx", sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ] ));
    1095           0 :     if( FD_UNLIKELY( block->txn_exec_in_flight_cnt+block->txn_sigverify_in_flight_cnt+block->poh_hashing_in_flight_cnt!=total_exec_busy_cnt ) ) {
    1096             :       /* Ideally we'd simply assert that the two sides of the equation
    1097             :          are equal.  But abandoned blocks throw a wrench into this.  We
    1098             :          allow abandoned blocks to have in-flight transactions that are
    1099             :          naturally drained while we try to dispatch from another block.
    1100             :          In such cases, the total number of in-flight transactions
    1101             :          should include the abandoned blocks too.  The contract is that
    1102             :          blocks with in-flight transactions cannot be abandoned or
    1103             :          demoted from rdisp.  So a dying block has to be the head of one
    1104             :          of the staging lanes. */
    1105             :       // FIXME This contract no longer true if we implement immediate
    1106             :       // demotion of abandoned blocks.
    1107           0 :       ulong total_in_flight = 0UL;
    1108           0 :       for( int l=0; l<(int)FD_SCHED_MAX_STAGING_LANES; l++ ) {
    1109           0 :         if( fd_ulong_extract_bit( sched->staged_bitset, l ) ) {
    1110           0 :           fd_sched_block_t * staged_block = block_pool_ele( sched, sched->staged_head_bank_idx[ l ] );
    1111           0 :           if( FD_UNLIKELY( block_is_in_flight( staged_block )&&!(staged_block==block||staged_block->dying) ) ) {
    1112           0 :             sched->print_buf_sz = 0UL;
    1113           0 :             print_all( sched, staged_block );
    1114           0 :             FD_LOG_NOTICE(( "%s", sched->print_buf ));
    1115           0 :             FD_LOG_CRIT(( "invariant violation: in-flight block is neither active nor dying" ));
    1116           0 :           }
    1117           0 :           total_in_flight += staged_block->txn_exec_in_flight_cnt;
    1118           0 :           total_in_flight += staged_block->txn_sigverify_in_flight_cnt;
    1119           0 :           total_in_flight += staged_block->poh_hashing_in_flight_cnt;
    1120           0 :         }
    1121           0 :       }
    1122           0 :       if( FD_UNLIKELY( total_in_flight!=total_exec_busy_cnt ) ) {
    1123           0 :         sched->print_buf_sz = 0UL;
    1124           0 :         print_all( sched, block );
    1125           0 :         FD_LOG_NOTICE(( "%s", sched->print_buf ));
    1126           0 :         FD_LOG_CRIT(( "invariant violation: total_in_flight %lu != total_exec_busy_cnt %lu", total_in_flight, total_exec_busy_cnt ));
    1127           0 :       }
    1128           0 :       FD_LOG_DEBUG(( "exec_busy_cnt %lu checks out", total_exec_busy_cnt ));
    1129           0 :     }
    1130           0 :     return 1UL;
    1131           0 :   }
    1132             : 
    1133             :   /* At this point txn_queued_cnt==0 */
    1134             : 
    1135             :   /* Next up are PoH tasks.  Same dispatching policy as sigverify. */
    1136           0 :   ulong poh_ready_bitset = exec_fully_ready_bitset;
    1137           0 :   ulong poh_hashing_queued_cnt = block->mblk_cnt-block->poh_hashing_in_flight_cnt-block->poh_hashing_done_cnt;
    1138           0 :   if( FD_LIKELY( poh_hashing_queued_cnt>0UL && fd_ulong_popcnt( poh_ready_bitset )>fd_int_if( block->fec_eos||block->txn_exec_in_flight_cnt>0U||sched->exec_cnt==1UL, 0, 1 ) ) ) {
    1139           0 :     dispatch_poh( sched, block, bank_idx, fd_ulong_find_lsb( poh_ready_bitset ), out );
    1140           0 :     return 1UL;
    1141           0 :   }
    1142             : 
    1143             :   /* Try to dispatch a sigverify task, but leave one exec tile idle for
    1144             :      critical path execution, unless there's not going to be any more
    1145             :      real transactions for the critical path.  In the degenerate case of
    1146             :      only one exec tile, keep it busy. */
    1147           0 :   ulong sigverify_ready_bitset = exec_fully_ready_bitset;
    1148           0 :   ulong sigverify_queued_cnt = block->txn_parsed_cnt-block->txn_sigverify_in_flight_cnt-block->txn_sigverify_done_cnt;
    1149           0 :   if( FD_LIKELY( sigverify_queued_cnt>0UL && fd_ulong_popcnt( sigverify_ready_bitset )>fd_int_if( block->fec_eos||block->txn_exec_in_flight_cnt>0U||sched->exec_cnt==1UL, 0, 1 ) ) ) {
    1150           0 :     dispatch_sigverify( sched, block, bank_idx, fd_ulong_find_lsb( sigverify_ready_bitset ), out );
    1151           0 :     return 1UL;
    1152           0 :   }
    1153             : 
    1154           0 :   if( FD_UNLIKELY( block_should_signal_end( block ) ) ) {
    1155           0 :     FD_TEST( block->block_start_signaled );
    1156           0 :     if( FD_UNLIKELY( verify_ticks( block ) ) ) {
    1157             :       /* Tick verification can't be done at parse time, because we may
    1158             :          not know the expected number of hashes yet.  It can't be driven
    1159             :          by transaction dispatch/completion, because the block may be
    1160             :          empty.  Similary, it can't be driven by PoH hashing, because a
    1161             :          bad block may simply not have any microblocks. */
    1162           0 :       handle_bad_block( sched, block );
    1163           0 :       out->task_type = FD_SCHED_TT_MARK_DEAD;
    1164           0 :       out->mark_dead->bank_idx = bank_idx;
    1165           0 :       return 1UL;
    1166           0 :     }
    1167           0 :     out->task_type = FD_SCHED_TT_BLOCK_END;
    1168           0 :     out->block_end->bank_idx = bank_idx;
    1169           0 :     block->block_end_signaled = 1;
    1170           0 :     FD_TEST( block->refcnt );
    1171           0 :     block->refcnt = 0;
    1172           0 :     if( FD_UNLIKELY( !ref_q_avail( sched->ref_q ) ) ) FD_LOG_CRIT(( "ref_q full" ));
    1173           0 :     ref_q_push_tail( sched->ref_q, bank_idx );
    1174           0 :     return 1UL;
    1175           0 :   }
    1176             : 
    1177             :   /* Nothing queued for the active block.  If we haven't received all
    1178             :      the FEC sets for it, then return and wait for more FEC sets, while
    1179             :      there are in-flight transactions.  This is a policy decision to
    1180             :      minimize fork churn and allow for executing down the current fork
    1181             :      as much as we can.  If we have received all the FEC sets for it,
    1182             :      then we'd still like to return and wait for the in-flight
    1183             :      transactions to retire, before switching to a different block.
    1184             : 
    1185             :      Either way, there should be in-flight transactions.  We deactivate
    1186             :      the active block the moment we exhausted transactions from it. */
    1187           0 :   if( FD_UNLIKELY( !block_is_in_flight( block ) ) ) {
    1188           0 :     sched->print_buf_sz = 0UL;
    1189           0 :     print_all( sched, block );
    1190           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
    1191           0 :     FD_LOG_CRIT(( "invariant violation: expected in-flight transactions but none" ));
    1192           0 :   }
    1193             : 
    1194           0 :   return 0UL;
    1195           0 : }
    1196             : 
    1197             : int
    1198           0 : fd_sched_task_done( fd_sched_t * sched, ulong task_type, ulong txn_idx, ulong exec_idx, void * data ) {
    1199           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1200             : 
    1201           0 :   ulong bank_idx = ULONG_MAX;
    1202           0 :   switch( task_type ) {
    1203           0 :     case FD_SCHED_TT_BLOCK_START:
    1204           0 :     case FD_SCHED_TT_BLOCK_END: {
    1205           0 :       (void)txn_idx;
    1206           0 :       (void)data;
    1207           0 :       bank_idx = sched->active_bank_idx;
    1208           0 :       break;
    1209           0 :     }
    1210           0 :     case FD_SCHED_TT_TXN_EXEC:
    1211           0 :     case FD_SCHED_TT_TXN_SIGVERIFY: {
    1212           0 :       (void)data;
    1213           0 :       FD_TEST( txn_idx < sched->depth );
    1214           0 :       bank_idx = sched->tile_to_bank_idx[ exec_idx ];
    1215           0 :       break;
    1216           0 :     }
    1217           0 :     case FD_SCHED_TT_POH_HASH: {
    1218           0 :       (void)txn_idx;
    1219           0 :       bank_idx = sched->tile_to_bank_idx[ exec_idx ];
    1220           0 :       break;
    1221           0 :     }
    1222           0 :     default: FD_LOG_CRIT(( "unsupported task_type %lu", task_type ));
    1223           0 :   }
    1224           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1225             : 
    1226           0 :   if( FD_UNLIKELY( !block->in_sched ) ) {
    1227           0 :     FD_LOG_CRIT(( "invariant violation: block->in_sched==0, slot %lu, parent slot %lu, idx %lu",
    1228           0 :                   block->slot, block->parent_slot, bank_idx ));
    1229           0 :   }
    1230           0 :   if( FD_UNLIKELY( !block->staged ) ) {
    1231             :     /* Invariant: only staged blocks can have in-flight transactions. */
    1232           0 :     FD_LOG_CRIT(( "invariant violation: block->staged==0, slot %lu, parent slot %lu",
    1233           0 :                   block->slot, block->parent_slot ));
    1234           0 :   }
    1235           0 :   if( FD_UNLIKELY( !block->in_rdisp ) ) {
    1236             :     /* Invariant: staged blocks must be in the dispatcher. */
    1237           0 :     FD_LOG_CRIT(( "invariant violation: block->in_rdisp==0, slot %lu, parent slot %lu",
    1238           0 :                   block->slot, block->parent_slot ));
    1239           0 :   }
    1240             : 
    1241           0 :   block->txn_pool_max_popcnt   = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt );
    1242           0 :   block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt );
    1243             : 
    1244           0 :   int exec_tile_idx = (int)exec_idx;
    1245             : 
    1246           0 :   switch( task_type ) {
    1247           0 :     case FD_SCHED_TT_BLOCK_START: {
    1248           0 :       FD_TEST( !block->block_start_done );
    1249           0 :       block->block_start_done = 1;
    1250           0 :       break;
    1251           0 :     }
    1252           0 :     case FD_SCHED_TT_BLOCK_END: {
    1253             :       /* It may seem redundant to be invoking task_done() on these
    1254             :          somewhat fake tasks.  But these are necessary to drive state
    1255             :          transition for empty blocks or slow blocks. */
    1256           0 :       FD_TEST( !block->block_end_done );
    1257           0 :       block->block_end_done = 1;
    1258           0 :       sched->print_buf_sz = 0UL;
    1259           0 :       print_block_metrics( sched, block );
    1260           0 :       FD_LOG_DEBUG(( "block %lu:%lu replayed fully: %s", block->slot, bank_idx, sched->print_buf ));
    1261           0 :       break;
    1262           0 :     }
    1263           0 :     case FD_SCHED_TT_TXN_EXEC: {
    1264           0 :       long now = fd_tickcount();
    1265           0 :       ulong delta = (ulong)(now-sched->txn_in_flight_last_tick);
    1266           0 :       ulong txn_exec_busy_cnt = sched->exec_cnt-(ulong)fd_ulong_popcnt( sched->txn_exec_ready_bitset[ 0 ] );
    1267           0 :       sched->metrics->txn_weighted_in_flight_tickcount += delta;
    1268           0 :       sched->metrics->txn_weighted_in_flight_cnt       += delta*txn_exec_busy_cnt;
    1269           0 :       sched->txn_in_flight_last_tick = now;
    1270             : 
    1271           0 :       block->txn_done_ticks[ sched->txn_idx_to_parse_idx[ txn_idx ] ] = now;
    1272             : 
    1273           0 :       block->txn_exec_done_cnt++;
    1274           0 :       block->txn_exec_in_flight_cnt--;
    1275           0 :       FD_TEST( !fd_ulong_extract_bit( sched->txn_exec_ready_bitset[ 0 ], exec_tile_idx ) );
    1276           0 :       sched->txn_exec_ready_bitset[ 0 ] = fd_ulong_set_bit( sched->txn_exec_ready_bitset[ 0 ], exec_tile_idx );
    1277           0 :       sched->metrics->txn_exec_done_cnt++;
    1278           0 :       txn_bitset_insert( sched->exec_done_set, txn_idx );
    1279           0 :       if( txn_bitset_test( sched->sigverify_done_set, txn_idx ) && txn_bitset_test( sched->poh_mixin_done_set, txn_idx ) ) {
    1280             :         /* Release the txn_idx if all tasks on it are done.  This is
    1281             :            guaranteed to only happen once per transaction because
    1282             :            whichever one completed first would not release. */
    1283           0 :         fd_rdisp_complete_txn( sched->rdisp, txn_idx, 1 );
    1284           0 :         sched->txn_pool_free_cnt++;
    1285           0 :         block->txn_done_cnt++;
    1286           0 :         sched->metrics->txn_done_cnt++;
    1287           0 :       } else {
    1288           0 :         fd_rdisp_complete_txn( sched->rdisp, txn_idx, 0 );
    1289           0 :       }
    1290           0 :       break;
    1291           0 :     }
    1292           0 :     case FD_SCHED_TT_TXN_SIGVERIFY: {
    1293           0 :       block->txn_sigverify_done_cnt++;
    1294           0 :       block->txn_sigverify_in_flight_cnt--;
    1295           0 :       FD_TEST( !fd_ulong_extract_bit( sched->sigverify_ready_bitset[ 0 ], exec_tile_idx ) );
    1296           0 :       sched->sigverify_ready_bitset[ 0 ] = fd_ulong_set_bit( sched->sigverify_ready_bitset[ 0 ], exec_tile_idx );
    1297           0 :       sched->metrics->txn_sigverify_done_cnt++;
    1298           0 :       txn_bitset_insert( sched->sigverify_done_set, txn_idx );
    1299           0 :       if( txn_bitset_test( sched->exec_done_set, txn_idx ) && txn_bitset_test( sched->poh_mixin_done_set, txn_idx ) ) {
    1300             :         /* Release the txn_idx if all tasks on it are done.  This is
    1301             :            guaranteed to only happen once per transaction because
    1302             :            whichever one completed first would not release. */
    1303           0 :         fd_rdisp_complete_txn( sched->rdisp, txn_idx, 1 );
    1304           0 :         sched->txn_pool_free_cnt++;
    1305           0 :         block->txn_done_cnt++;
    1306           0 :         sched->metrics->txn_done_cnt++;
    1307           0 :       }
    1308           0 :       break;
    1309           0 :     }
    1310           0 :     case FD_SCHED_TT_POH_HASH: {
    1311           0 :       block->poh_hashing_in_flight_cnt--;
    1312           0 :       FD_TEST( !fd_ulong_extract_bit( sched->poh_ready_bitset[ 0 ], exec_tile_idx ) );
    1313           0 :       sched->poh_ready_bitset[ 0 ] = fd_ulong_set_bit( sched->poh_ready_bitset[ 0 ], exec_tile_idx );
    1314           0 :       fd_execrp_poh_hash_done_msg_t * msg = fd_type_pun( data );
    1315           0 :       fd_sched_mblk_in_progress_t * mblk = block->mblk_in_progress_pool+msg->mblk_idx;
    1316           0 :       mblk->curr_hashcnt += msg->hashcnt;
    1317           0 :       memcpy( mblk->curr_hash, msg->hash, sizeof(fd_hash_t) );
    1318           0 :       fd_sched_mblk_t * mblk_desc = block->mblk_pool+mblk->mblk_idx;
    1319           0 :       ulong hashcnt_todo = mblk_desc->hashcnt-mblk->curr_hashcnt;
    1320           0 :       if( !hashcnt_todo ) {
    1321           0 :         block->poh_hashing_done_cnt++;
    1322           0 :         if( FD_LIKELY( !mblk->is_tick ) ) {
    1323             :           /* This is not a tick.  Enqueue for mixin. */
    1324           0 :           mblk_in_progress_slist_idx_push_tail( block->mblks_mixin_in_progress, msg->mblk_idx, block->mblk_in_progress_pool );
    1325           0 :         } else {
    1326             :           /* This is a tick.  No need to mixin.  Check the hash value
    1327             :              right away. */
    1328           0 :           if( FD_UNLIKELY( memcmp( mblk->curr_hash, mblk_desc->end_hash, sizeof(fd_hash_t) ) ) ) {
    1329           0 :             FD_BASE58_ENCODE_32_BYTES( mblk->curr_hash->hash, our_str );
    1330           0 :             FD_BASE58_ENCODE_32_BYTES( mblk_desc->end_hash->hash, ref_str );
    1331           0 :             FD_LOG_INFO(( "bad block: poh hash mismatch on mblk %u, ours %s, claimed %s, hashcnt %lu, is_tick, slot %lu, parent slot %lu", mblk->mblk_idx, our_str, ref_str, mblk_desc->hashcnt, block->slot, block->parent_slot ));
    1332           0 :             handle_bad_block( sched, block );
    1333           0 :             return -1;
    1334           0 :           }
    1335           0 :           block->poh_hash_cmp_done_cnt++;
    1336           0 :           mblk_in_progress_bitset_insert( block->mblk_in_progress_pool_free_bitset, msg->mblk_idx );
    1337           0 :         }
    1338             :         /* Try to drain the mixin queue. */
    1339           0 :         int mixin_res;
    1340           0 :         while( (mixin_res=maybe_mixin( sched, block )) ) {
    1341           0 :           if( FD_UNLIKELY( mixin_res==-1 ) ) {
    1342           0 :             handle_bad_block( sched, block );
    1343           0 :             return -1;
    1344           0 :           }
    1345           0 :           FD_TEST( mixin_res==1||mixin_res==2 );
    1346           0 :         }
    1347           0 :       } else {
    1348           0 :         mblk_in_progress_slist_idx_push_tail( block->mblks_hashing_in_progress, msg->mblk_idx, block->mblk_in_progress_pool );
    1349           0 :       }
    1350           0 :       FD_TEST( block->hashes_per_tick!=ULONG_MAX );
    1351           0 :       if( FD_UNLIKELY( block->curr_tick_hashcnt>block->hashes_per_tick && block->hashes_per_tick>1UL ) ) { /* >1 to ignore low power hashing or no hashing cases */
    1352             :         /* We couldn't really check this at parse time because we may
    1353             :            not have the expected hashes per tick value yet.  We couldn't
    1354             :            delay this till after all PoH hashing is done, because this
    1355             :            would be a DoS vector.  This is a good place to do the check.
    1356             :            Note that checking the hashcnt between ticks transitively
    1357             :            places an upper bound on the hashcnt of individual
    1358             :            microblocks, thus mitigating the DoS vector. */
    1359           0 :         FD_LOG_INFO(( "bad block: observed cumulative tick_hashcnt %lu, expected %lu, slot %lu, parent slot %lu", block->curr_tick_hashcnt, block->hashes_per_tick, block->slot, block->parent_slot ));
    1360           0 :         handle_bad_block( sched, block );
    1361           0 :         return -1;
    1362           0 :       }
    1363           0 :       break;
    1364           0 :     }
    1365           0 :   }
    1366             : 
    1367           0 :   if( FD_UNLIKELY( block->dying && !block_is_in_flight( block ) ) ) {
    1368           0 :     if( FD_UNLIKELY( sched->active_bank_idx==bank_idx ) ) {
    1369           0 :       FD_LOG_CRIT(( "invariant violation: active block shouldn't be dying, bank_idx %lu, slot %lu, parent slot %lu",
    1370           0 :                     bank_idx, block->slot, block->parent_slot ));
    1371           0 :     }
    1372           0 :     FD_LOG_DEBUG(( "dying block %lu drained", block->slot ));
    1373           0 :     subtree_abandon( sched, block );
    1374           0 :     return 0;
    1375           0 :   }
    1376             : 
    1377           0 :   if( FD_UNLIKELY( !block->dying && sched->active_bank_idx!=bank_idx ) ) {
    1378             :     /* Block is not dead.  So we should be actively replaying it. */
    1379           0 :     fd_sched_block_t * active_block = block_pool_ele( sched, sched->active_bank_idx );
    1380           0 :     FD_LOG_CRIT(( "invariant violation: sched->active_bank_idx %lu, slot %lu, parent slot %lu, bank_idx %lu, slot %lu, parent slot %lu",
    1381           0 :                   sched->active_bank_idx, active_block->slot, active_block->parent_slot,
    1382           0 :                   bank_idx, block->slot, block->parent_slot ));
    1383           0 :   }
    1384             : 
    1385           0 :   maybe_switch_block( sched, bank_idx );
    1386             : 
    1387           0 :   return 0;
    1388           0 : }
    1389             : 
    1390             : void
    1391           0 : fd_sched_block_abandon( fd_sched_t * sched, ulong bank_idx ) {
    1392           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1393           0 :   FD_TEST( bank_idx<sched->block_cnt_max );
    1394             : 
    1395           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1396           0 :   if( FD_UNLIKELY( !block->in_sched ) ) {
    1397           0 :     FD_LOG_CRIT(( "invariant violation: block->in_sched==0, slot %lu, parent slot %lu, idx %lu",
    1398           0 :                   block->slot, block->parent_slot, bank_idx ));
    1399           0 :   }
    1400             : 
    1401           0 :   FD_LOG_INFO(( "abandoning block %lu slot %lu", bank_idx, block->slot ));
    1402           0 :   sched->print_buf_sz = 0UL;
    1403           0 :   print_all( sched, block );
    1404           0 :   FD_LOG_DEBUG(( "%s", sched->print_buf ));
    1405           0 :   log_block_txns( sched, block );
    1406             : 
    1407           0 :   subtree_abandon( sched, block );
    1408           0 :   try_activate_block( sched );
    1409           0 : }
    1410             : 
    1411             : void
    1412           0 : fd_sched_block_add_done( fd_sched_t * sched, ulong bank_idx, ulong parent_bank_idx, ulong slot ) {
    1413           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1414           0 :   FD_TEST( bank_idx<sched->block_cnt_max );
    1415             : 
    1416           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1417           0 :   add_block( sched, bank_idx, parent_bank_idx );
    1418           0 :   block->slot                   = slot;
    1419           0 :   block->txn_parsed_cnt         = UINT_MAX;
    1420           0 :   block->txn_exec_done_cnt      = UINT_MAX;
    1421           0 :   block->txn_sigverify_done_cnt = UINT_MAX;
    1422           0 :   block->poh_hashing_done_cnt   = UINT_MAX;
    1423           0 :   block->poh_hash_cmp_done_cnt  = UINT_MAX;
    1424           0 :   block->txn_done_cnt           = UINT_MAX;
    1425           0 :   block->mblk_cnt               = UINT_MAX;
    1426           0 :   block->fec_eos                = 1;
    1427           0 :   block->block_start_signaled   = 1;
    1428           0 :   block->block_end_signaled     = 1;
    1429           0 :   block->block_start_done       = 1;
    1430           0 :   block->block_end_done         = 1;
    1431           0 :   block->refcnt                 = 0;
    1432           0 :   if( FD_LIKELY( parent_bank_idx!=ULONG_MAX ) ) {
    1433           0 :     fd_sched_block_t * parent_block = block_pool_ele( sched, parent_bank_idx );
    1434           0 :     block->parent_slot = parent_block->slot;
    1435           0 :   }
    1436           0 :   if( FD_UNLIKELY( parent_bank_idx==ULONG_MAX ) ) {
    1437             :     /* Assumes that a NULL parent implies the snapshot slot. */
    1438           0 :     block->parent_slot = ULONG_MAX;
    1439           0 :     block->rooted      = 1;
    1440           0 :     sched->root_idx    = bank_idx;
    1441           0 :   }
    1442           0 : }
    1443             : 
    1444             : void
    1445           0 : fd_sched_advance_root( fd_sched_t * sched, ulong root_idx ) {
    1446           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1447           0 :   FD_TEST( root_idx<sched->block_cnt_max );
    1448           0 :   FD_TEST( sched->root_idx<sched->block_cnt_max );
    1449           0 :   FD_TEST( ref_q_empty( sched->ref_q ) );
    1450             : 
    1451           0 :   fd_sched_block_t * new_root = block_pool_ele( sched, root_idx );
    1452           0 :   fd_sched_block_t * old_root = block_pool_ele( sched, sched->root_idx );
    1453           0 :   if( FD_UNLIKELY( !old_root->rooted ) ) {
    1454           0 :     FD_LOG_CRIT(( "invariant violation: old_root is not rooted, slot %lu, parent slot %lu",
    1455           0 :                   old_root->slot, old_root->parent_slot ));
    1456           0 :   }
    1457             : 
    1458             :   /* Early exit if the new root is the same as the old root. */
    1459           0 :   if( FD_UNLIKELY( root_idx==sched->root_idx ) ) {
    1460           0 :     FD_LOG_INFO(( "new root is the same as the old root, slot %lu, parent slot %lu",
    1461           0 :                   new_root->slot, new_root->parent_slot ));
    1462           0 :     return;
    1463           0 :   }
    1464             : 
    1465           0 :   subtree_prune( sched, sched->root_idx, root_idx );
    1466             : 
    1467           0 :   new_root->parent_idx = ULONG_MAX;
    1468           0 :   sched->root_idx = root_idx;
    1469           0 : }
    1470             : 
    1471             : void
    1472           0 : fd_sched_root_notify( fd_sched_t * sched, ulong root_idx ) {
    1473           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1474           0 :   FD_TEST( root_idx<sched->block_cnt_max );
    1475           0 :   FD_TEST( sched->root_idx<sched->block_cnt_max );
    1476           0 :   FD_TEST( ref_q_empty( sched->ref_q ) );
    1477             : 
    1478           0 :   fd_sched_block_t * block    = block_pool_ele( sched, root_idx );
    1479           0 :   fd_sched_block_t * old_root = block_pool_ele( sched, sched->root_idx );
    1480           0 :   if( FD_UNLIKELY( !old_root->rooted ) ) {
    1481           0 :     FD_LOG_CRIT(( "invariant violation: old_root is not rooted, slot %lu, parent slot %lu",
    1482           0 :                   old_root->slot, old_root->parent_slot ));
    1483           0 :   }
    1484             : 
    1485             :   /* Early exit if the new root is the same as the old root. */
    1486           0 :   if( FD_UNLIKELY( root_idx==sched->root_idx ) ) {
    1487           0 :     FD_LOG_INFO(( "new root is the same as the old root, slot %lu, parent slot %lu",
    1488           0 :                   block->slot, block->parent_slot ));
    1489           0 :     return;
    1490           0 :   }
    1491             : 
    1492             :   /* Mark every node from the new root up through its parents to the
    1493             :      old root as being rooted. */
    1494           0 :   fd_sched_block_t * curr = block;
    1495           0 :   fd_sched_block_t * prev = NULL;
    1496           0 :   while( curr ) {
    1497           0 :     if( FD_UNLIKELY( !block_is_done( curr ) ) ) {
    1498           0 :       FD_LOG_CRIT(( "invariant violation: rooting a block that is not done, slot %lu, parent slot %lu",
    1499           0 :                     curr->slot, curr->parent_slot ));
    1500           0 :     }
    1501           0 :     if( FD_UNLIKELY( curr->dying ) ) {
    1502           0 :       FD_LOG_CRIT(( "invariant violation: rooting a block that is dying, slot %lu, parent slot %lu",
    1503           0 :                     curr->slot, curr->parent_slot ));
    1504           0 :     }
    1505           0 :     if( FD_UNLIKELY( curr->staged ) ) {
    1506           0 :       FD_LOG_CRIT(( "invariant violation: rooting a block that is staged, slot %lu, parent slot %lu",
    1507           0 :                     curr->slot, curr->parent_slot ));
    1508           0 :     }
    1509           0 :     if( FD_UNLIKELY( curr->in_rdisp ) ) {
    1510           0 :       FD_LOG_CRIT(( "invariant violation: rooting a block that is in the dispatcher, slot %lu, parent slot %lu",
    1511           0 :                     curr->slot, curr->parent_slot ));
    1512           0 :     }
    1513           0 :     curr->rooted = 1;
    1514           0 :     prev = curr;
    1515           0 :     curr = block_pool_ele( sched, curr->parent_idx );
    1516           0 :   }
    1517             : 
    1518             :   /* If we didn't reach the old root, the new root is not a descendant. */
    1519           0 :   if( FD_UNLIKELY( prev!=old_root ) ) {
    1520           0 :     FD_LOG_CRIT(( "invariant violation: new root is not a descendant of old root, new root slot %lu, parent slot %lu, old root slot %lu, parent slot %lu",
    1521           0 :                   block->slot, block->parent_slot, old_root->slot, old_root->parent_slot ));
    1522           0 :   }
    1523             : 
    1524           0 :   ulong old_active_bank_idx = sched->active_bank_idx;
    1525             : 
    1526             :   /* Now traverse from old root towards new root, and abandon all
    1527             :      minority forks. */
    1528           0 :   curr = old_root;
    1529           0 :   while( curr && curr->rooted && curr!=block ) { /* curr!=block to avoid abandoning good forks. */
    1530           0 :     fd_sched_block_t * rooted_child_block = NULL;
    1531           0 :     ulong              child_idx          = curr->child_idx;
    1532           0 :     while( child_idx!=ULONG_MAX ) {
    1533           0 :       fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    1534           0 :       if( child->rooted ) {
    1535           0 :         rooted_child_block = child;
    1536           0 :       } else {
    1537             :         /* This is a minority fork. */
    1538           0 :         ulong abandoned_cnt = sched->metrics->block_abandoned_cnt;
    1539           0 :         subtree_abandon( sched, child );
    1540           0 :         abandoned_cnt = sched->metrics->block_abandoned_cnt-abandoned_cnt;
    1541           0 :         if( FD_UNLIKELY( abandoned_cnt ) ) FD_LOG_DEBUG(( "abandoned %lu blocks on minority fork starting at block %lu:%lu", abandoned_cnt, child->slot, child_idx ));
    1542           0 :       }
    1543           0 :       child_idx = child->sibling_idx;
    1544           0 :     }
    1545           0 :     curr = rooted_child_block;
    1546           0 :   }
    1547             : 
    1548             :   /* If the active block got abandoned, we need to reset it. */
    1549           0 :   if( sched->active_bank_idx==ULONG_MAX ) {
    1550           0 :     sched->metrics->deactivate_pruned_cnt += fd_uint_if( old_active_bank_idx!=ULONG_MAX, 1U, 0U );
    1551           0 :     try_activate_block( sched );
    1552           0 :   }
    1553           0 : }
    1554             : 
    1555             : ulong
    1556           0 : fd_sched_pruned_block_next( fd_sched_t * sched ) {
    1557           0 :   if( !ref_q_empty( sched->ref_q ) ) {
    1558           0 :     ulong bank_idx = ref_q_pop_head( sched->ref_q );
    1559           0 :     return bank_idx;
    1560           0 :   }
    1561           0 :   return ULONG_MAX;
    1562           0 : }
    1563             : 
    1564             : void
    1565           0 : fd_sched_set_poh_params( fd_sched_t * sched, ulong bank_idx, ulong tick_height, ulong max_tick_height, ulong hashes_per_tick, fd_hash_t const * start_poh ) {
    1566           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1567           0 :   FD_TEST( bank_idx<sched->block_cnt_max );
    1568           0 :   FD_TEST( max_tick_height>tick_height );
    1569           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1570           0 :   block->tick_height = tick_height;
    1571           0 :   block->max_tick_height = max_tick_height;
    1572           0 :   block->hashes_per_tick = hashes_per_tick;
    1573           0 :   memcpy( block->start_hash, start_poh, sizeof(block->start_hash[0]) );
    1574           0 : }
    1575             : 
    1576             : fd_txn_p_t *
    1577           0 : fd_sched_get_txn( fd_sched_t * sched, ulong txn_idx ) {
    1578           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1579           0 :   if( FD_UNLIKELY( txn_idx>=FD_SCHED_MAX_DEPTH ) ) {
    1580           0 :     return NULL;
    1581           0 :   }
    1582           0 :   return sched->txn_pool+txn_idx;
    1583           0 : }
    1584             : 
    1585             : fd_hash_t *
    1586           0 : fd_sched_get_poh( fd_sched_t * sched, ulong bank_idx ) {
    1587           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1588           0 :   FD_TEST( bank_idx<sched->block_cnt_max );
    1589           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1590           0 :   FD_TEST( block->fec_eos );
    1591           0 :   FD_TEST( block->mblk_cnt );
    1592           0 :   return block->mblk_pool[ block->mblk_cnt-1 ].end_hash;
    1593           0 : }
    1594             : 
    1595             : uint
    1596           0 : fd_sched_get_shred_cnt( fd_sched_t * sched, ulong bank_idx ) {
    1597           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1598           0 :   FD_TEST( bank_idx<sched->block_cnt_max );
    1599           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1600           0 :   return block->shred_cnt;
    1601           0 : }
    1602             : 
    1603             : char *
    1604           0 : fd_sched_get_state_cstr( fd_sched_t * sched ) {
    1605           0 :   sched->print_buf_sz = 0UL;
    1606           0 :   print_metrics( sched );
    1607           0 :   print_sched( sched );
    1608           0 :   return sched->print_buf;
    1609           0 : }
    1610             : 
    1611           0 : void * fd_sched_leave ( fd_sched_t * sched ) { return sched; }
    1612           0 : void * fd_sched_delete( void * mem         ) { return   mem; }
    1613             : 
    1614             : 
    1615             : /* Internal helpers. */
    1616             : 
    1617             : static void
    1618             : add_block( fd_sched_t * sched,
    1619             :            ulong        bank_idx,
    1620           0 :            ulong        parent_bank_idx ) {
    1621           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1622           0 :   FD_TEST( !block->in_sched );
    1623           0 :   sched->block_pool_popcnt++;
    1624             : 
    1625           0 :   block->txn_parsed_cnt              = 0U;
    1626           0 :   block->txn_exec_in_flight_cnt      = 0U;
    1627           0 :   block->txn_exec_done_cnt           = 0U;
    1628           0 :   block->txn_sigverify_in_flight_cnt = 0U;
    1629           0 :   block->txn_sigverify_done_cnt      = 0U;
    1630           0 :   block->poh_hashing_in_flight_cnt   = 0U;
    1631           0 :   block->poh_hashing_done_cnt        = 0U;
    1632           0 :   block->poh_hash_cmp_done_cnt       = 0U;
    1633           0 :   block->txn_done_cnt                = 0U;
    1634           0 :   block->shred_cnt                   = 0U;
    1635           0 :   block->fec_cnt                     = 0U;
    1636           0 :   block->mblk_cnt                    = 0U;
    1637           0 :   block->mblk_tick_cnt               = 0U;
    1638           0 :   block->mblk_unhashed_cnt           = 0U;
    1639           0 :   block->hashcnt                     = 0UL;
    1640           0 :   block->txn_pool_max_popcnt         = sched->depth - sched->txn_pool_free_cnt;
    1641           0 :   block->block_pool_max_popcnt       = sched->block_pool_popcnt;
    1642             : 
    1643           0 :   mblk_in_progress_bitset_full( block->mblk_in_progress_pool_free_bitset );
    1644           0 :   mblk_in_progress_slist_remove_all( block->mblks_hashing_in_progress, block->mblk_in_progress_pool );
    1645           0 :   mblk_in_progress_slist_remove_all( block->mblks_mixin_in_progress, block->mblk_in_progress_pool );
    1646             : 
    1647           0 :   block->prev_tick_hashcnt = ULONG_MAX;
    1648           0 :   block->curr_tick_hashcnt = 0UL;
    1649           0 :   block->tick_height       = ULONG_MAX;
    1650           0 :   block->max_tick_height   = ULONG_MAX;
    1651           0 :   block->hashes_per_tick   = ULONG_MAX;
    1652           0 :   block->inconsistent_hashes_per_tick = 0;
    1653             : 
    1654           0 :   block->mblks_rem    = 0UL;
    1655           0 :   block->txns_rem     = 0UL;
    1656           0 :   block->fec_buf_sz   = 0U;
    1657           0 :   block->fec_buf_boff = 0U;
    1658           0 :   block->fec_buf_soff = 0U;
    1659           0 :   block->fec_eob      = 0;
    1660           0 :   block->fec_sob      = 1;
    1661             : 
    1662           0 :   block->fec_eos              = 0;
    1663           0 :   block->rooted               = 0;
    1664           0 :   block->dying                = 0;
    1665           0 :   block->refcnt               = 1;
    1666           0 :   block->in_sched             = 1;
    1667           0 :   block->in_rdisp             = 0;
    1668           0 :   block->block_start_signaled = 0;
    1669           0 :   block->block_end_signaled   = 0;
    1670           0 :   block->block_start_done     = 0;
    1671           0 :   block->block_end_done       = 0;
    1672           0 :   block->staged               = 0;
    1673             : 
    1674           0 :   block->luf_depth = 0UL;
    1675             : 
    1676             :   /* New leaf node, no child, no sibling. */
    1677           0 :   block->child_idx   = ULONG_MAX;
    1678           0 :   block->sibling_idx = ULONG_MAX;
    1679           0 :   block->parent_idx  = ULONG_MAX;
    1680             : 
    1681           0 :   if( FD_UNLIKELY( parent_bank_idx==ULONG_MAX ) ) {
    1682           0 :     return;
    1683           0 :   }
    1684             : 
    1685             :   /* node->parent link */
    1686           0 :   fd_sched_block_t * parent_block = block_pool_ele( sched, parent_bank_idx );
    1687           0 :   block->parent_idx = parent_bank_idx;
    1688             : 
    1689             :   /* parent->node and sibling->node links */
    1690           0 :   ulong child_idx = bank_idx;
    1691           0 :   if( FD_LIKELY( parent_block->child_idx==ULONG_MAX ) ) { /* Optimize for no forking. */
    1692           0 :     parent_block->child_idx = child_idx;
    1693           0 :   } else {
    1694           0 :     fd_sched_block_t * curr_block = block_pool_ele( sched, parent_block->child_idx );
    1695           0 :     while( curr_block->sibling_idx!=ULONG_MAX ) {
    1696           0 :       curr_block = block_pool_ele( sched, curr_block->sibling_idx );
    1697           0 :     }
    1698           0 :     curr_block->sibling_idx = child_idx;
    1699           0 :     sched->metrics->fork_observed_cnt++;
    1700           0 :   }
    1701             : 
    1702           0 :   if( FD_UNLIKELY( parent_block->dying ) ) {
    1703           0 :     block->dying = 1;
    1704           0 :   }
    1705           0 : }
    1706             : 
    1707             : /* https://github.com/anza-xyz/agave/blob/v3.0.6/ledger/src/blockstore_processor.rs#L1057
    1708             : 
    1709             :    Returns 0 on success. */
    1710             : static int
    1711           0 : verify_ticks( fd_sched_block_t * block ) {
    1712           0 :   FD_TEST( block->fec_eos );
    1713           0 :   if( FD_UNLIKELY( block->mblk_tick_cnt+block->tick_height>block->max_tick_height ) ) {
    1714           0 :     FD_LOG_INFO(( "bad block: TOO_MANY_TICKS, slot %lu, parent slot %lu, tick_cnt %u, tick_height %lu, max_tick_height %lu", block->slot, block->parent_slot, block->mblk_tick_cnt, block->tick_height, block->max_tick_height ));
    1715           0 :     return -1;
    1716           0 :   }
    1717           0 :   if( FD_UNLIKELY( block->mblk_tick_cnt+block->tick_height<block->max_tick_height ) ) {
    1718           0 :     FD_LOG_INFO(( "bad block: TOO_FEW_TICKS, slot %lu, parent slot %lu, tick_cnt %u, tick_height %lu, max_tick_height %lu", block->slot, block->parent_slot, block->mblk_tick_cnt, block->tick_height, block->max_tick_height ));
    1719           0 :     return -1;
    1720           0 :   }
    1721             :   /* Number of ticks > 0 implies that number of microblocks > 0, so the
    1722             :      subtraction won't underflow. */
    1723           0 :   ulong last_mblk_idx = block->mblk_cnt-1UL;
    1724           0 :   ulong start_txn_idx = last_mblk_idx==0UL ? 0UL : block->mblk_pool[ last_mblk_idx-1UL ].end_txn_idx;
    1725           0 :   ulong end_txn_idx = block->mblk_pool[ last_mblk_idx ].end_txn_idx;
    1726           0 :   if( FD_UNLIKELY( start_txn_idx!=end_txn_idx ) ) {
    1727             :     /* The last microblock should be a tick (0 transactions). */
    1728           0 :     FD_LOG_INFO(( "bad block: TRAILING_ENTRY, slot %lu, parent slot %lu", block->slot, block->parent_slot ));
    1729           0 :     return -1;
    1730           0 :   }
    1731           0 :   if( FD_UNLIKELY( block->hashes_per_tick>1UL && (block->hashes_per_tick!=block->prev_tick_hashcnt||block->inconsistent_hashes_per_tick) ) ) {
    1732             :     /* >1 since we don't care about checking low power hashing or
    1733             :        hashing disabled. */
    1734           0 :     FD_LOG_INFO(( "bad block: INVALID_TICK_HASH_COUNT, slot %lu, parent slot %lu, expected %lu, got %lu", block->slot, block->parent_slot, block->hashes_per_tick, block->prev_tick_hashcnt ));
    1735           0 :     return -1;
    1736           0 :   }
    1737             : 
    1738           0 :   return 0;
    1739           0 : }
    1740             : 
    1741           0 : #define CHECK( cond )  do {             \
    1742           0 :   if( FD_UNLIKELY( !(cond) ) ) {        \
    1743           0 :     return FD_SCHED_AGAIN_LATER;        \
    1744           0 :   }                                     \
    1745           0 : } while( 0 )
    1746             : 
    1747             : /* CHECK that it is safe to read at least n more bytes. */
    1748           0 : #define CHECK_LEFT( n ) CHECK( (n)<=(block->fec_buf_sz-block->fec_buf_soff) )
    1749             : 
    1750             : /* Consume as much as possible from the buffer.  By the end of this
    1751             :    function, we will either have residual data that is unparseable only
    1752             :    because it is a batch that straddles FEC set boundaries, or we will
    1753             :    have reached the end of a batch.  In the former case, any remaining
    1754             :    bytes should be concatenated with the next FEC set for further
    1755             :    parsing.  In the latter case, any remaining bytes should be thrown
    1756             :    away. */
    1757             : FD_WARN_UNUSED static int
    1758           0 : fd_sched_parse( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx ) {
    1759           0 :   while( 1 ) {
    1760           0 :     while( block->txns_rem>0UL ) {
    1761           0 :       int err;
    1762           0 :       if( FD_UNLIKELY( (err=fd_sched_parse_txn( sched, block, alut_ctx ))!=FD_SCHED_OK ) ) {
    1763           0 :         return err;
    1764           0 :       }
    1765           0 :     }
    1766           0 :     if( block->txns_rem==0UL && block->mblks_rem>0UL ) {
    1767           0 :       if( FD_UNLIKELY( block->mblk_cnt>=FD_SCHED_MAX_MBLK_PER_SLOT ) ) {
    1768             :         /* A valid block shouldn't contain more than this amount of
    1769             :            microblocks. */
    1770           0 :         FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, mblk_cnt %u (%u ticks) >= %lu", block->slot, block->parent_slot, block->mblk_cnt, block->mblk_tick_cnt, FD_SCHED_MAX_MBLK_PER_SLOT ));
    1771           0 :         return FD_SCHED_BAD_BLOCK;
    1772           0 :       }
    1773             : 
    1774           0 :       CHECK_LEFT( sizeof(fd_microblock_hdr_t) );
    1775           0 :       fd_microblock_hdr_t * hdr = (fd_microblock_hdr_t *)fd_type_pun( block->fec_buf+block->fec_buf_soff );
    1776           0 :       block->fec_buf_soff      += (uint)sizeof(fd_microblock_hdr_t);
    1777             : 
    1778           0 :       block->mblks_rem--;
    1779           0 :       block->txns_rem = hdr->txn_cnt;
    1780           0 :       fd_sched_mblk_t * mblk = block->mblk_pool+block->mblk_cnt;
    1781           0 :       mblk->end_txn_idx = ( block->mblk_cnt ? block->mblk_pool[ block->mblk_cnt-1 ].end_txn_idx : 0U )+hdr->txn_cnt;
    1782             :       /* One might think that every microblock needs to have at least
    1783             :          one hash, otherwise the block should be considered invalid.  A
    1784             :          vanilla validator certainly produces microblocks that conform
    1785             :          to this.  But a modded validator could in theory produce zero
    1786             :          hash microblocks.  Agave's replay stage will happily take those
    1787             :          microblocks.  The Agave implementation-defined way of doing PoH
    1788             :          verify is as follows:
    1789             : 
    1790             :          For a tick microblock, do the same number of hashes as
    1791             :          specified by the microblock.  Zero hashes are allowed, in which
    1792             :          case this tick would have the same ending hash value as the
    1793             :          previous microblock.
    1794             : 
    1795             :          For a transaction microblock, if the number of hashes specified
    1796             :          by the microblock is <= 1, then do zero pure hashes, and simply
    1797             :          do a mixin/record.  Otherwise, do (number of hashes-1) amount
    1798             :          of pure hashing, and then do a mixin.  However, note that for
    1799             :          the purposes of tick_verify, the number of hashes specified by
    1800             :          the microblock is taken verbatim.
    1801             : 
    1802             :          https://github.com/anza-xyz/agave/blob/v3.0.6/entry/src/entry.rs#L232
    1803             : 
    1804             :          We implement the above for consensus. */
    1805           0 :       mblk->hashcnt = fd_ulong_sat_sub( hdr->hash_cnt, fd_ulong_if( !hdr->txn_cnt, 0UL, 1UL ) ); /* For pure hashing, implement the above. */
    1806           0 :       block->curr_tick_hashcnt += hdr->hash_cnt; /* For tick_verify, take the number of hashes verbatim. */
    1807           0 :       block->hashcnt += mblk->hashcnt+fd_ulong_if( !hdr->txn_cnt, 0UL, 1UL );
    1808           0 :       memcpy( mblk->end_hash, hdr->hash, sizeof(fd_hash_t) );
    1809           0 :       block->mblk_cnt++;
    1810           0 :       block->mblk_unhashed_cnt++;
    1811           0 :       if( FD_UNLIKELY( !hdr->txn_cnt ) ) {
    1812             :         /* This is a tick microblock. */
    1813           0 :         if( FD_UNLIKELY( block->prev_tick_hashcnt!=ULONG_MAX && block->prev_tick_hashcnt!=block->curr_tick_hashcnt ) ) {
    1814           0 :           block->inconsistent_hashes_per_tick = 1;
    1815           0 :           if( FD_LIKELY( block->hashes_per_tick!=ULONG_MAX && block->hashes_per_tick>1UL ) ) {
    1816             :             /* >1 to ignore low power hashing or hashing disabled */
    1817           0 :             FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, tick idx %u, prev hashcnt %lu, curr hashcnt %lu, hashes_per_tick %lu", block->slot, block->parent_slot, block->mblk_tick_cnt, block->prev_tick_hashcnt, block->curr_tick_hashcnt, block->hashes_per_tick ));
    1818           0 :             return FD_SCHED_BAD_BLOCK;
    1819           0 :           }
    1820           0 :         }
    1821           0 :         block->prev_tick_hashcnt = block->curr_tick_hashcnt;
    1822           0 :         block->curr_tick_hashcnt = 0UL;
    1823           0 :         block->mblk_tick_cnt++;
    1824           0 :       }
    1825             :       #if FD_SCHED_SKIP_POH
    1826             :       block->mblk_unhashed_cnt--;
    1827             :       block->poh_hashing_done_cnt++;
    1828             :       block->poh_hash_cmp_done_cnt++;
    1829             :       #endif
    1830           0 :       continue;
    1831           0 :     }
    1832           0 :     if( block->txns_rem==0UL && block->mblks_rem==0UL && block->fec_sob ) {
    1833           0 :       CHECK_LEFT( sizeof(ulong) );
    1834           0 :       FD_TEST( block->fec_buf_soff==0U );
    1835           0 :       block->mblks_rem     = FD_LOAD( ulong, block->fec_buf );
    1836           0 :       block->fec_buf_soff += (uint)sizeof(ulong);
    1837             : 
    1838           0 :       block->fec_sob = 0;
    1839           0 :       continue;
    1840           0 :     }
    1841           0 :     if( block->txns_rem==0UL && block->mblks_rem==0UL ) {
    1842           0 :       break;
    1843           0 :     }
    1844           0 :   }
    1845           0 :   if( block->fec_eob ) {
    1846             :     /* Ignore trailing bytes at the end of a batch. */
    1847           0 :     sched->metrics->bytes_ingested_unparsed_cnt += block->fec_buf_sz-block->fec_buf_soff;
    1848           0 :     block->fec_buf_boff += block->fec_buf_sz;
    1849           0 :     block->fec_buf_soff = 0U;
    1850           0 :     block->fec_buf_sz   = 0U;
    1851           0 :     block->fec_sob      = 1;
    1852           0 :     block->fec_eob      = 0;
    1853           0 :   }
    1854           0 :   return FD_SCHED_OK;
    1855           0 : }
    1856             : 
    1857             : FD_WARN_UNUSED static int
    1858           0 : fd_sched_parse_txn( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx ) {
    1859           0 :   fd_txn_t * txn = fd_type_pun( block->txn );
    1860             : 
    1861             :   /* FIXME: For the replay pipeline, we allow up to 128 instructions per
    1862             :      transaction.  Note that we are not concomitantly bumping the size
    1863             :      of fd_txn_t.  We allow this because transactions like that do get
    1864             :      packed by other validators, so we have to replay them.  Those
    1865             :      transactions will eventually fail in the runtime, which imposes a
    1866             :      limit of 64 instructions, but unfortunately they are not tossed out
    1867             :      at parse time and they land on chain.  static_instruction_limit is
    1868             :      going to enforece this limit at parse time, and transactions like
    1869             :      that would not land on chain.  Then this short term change should
    1870             :      be rolled back. */
    1871           0 :   ulong pay_sz = 0UL;
    1872           0 :   ulong txn_sz = fd_txn_parse_core( block->fec_buf+block->fec_buf_soff,
    1873           0 :                                     fd_ulong_min( FD_TXN_MTU, block->fec_buf_sz-block->fec_buf_soff ),
    1874           0 :                                     txn,
    1875           0 :                                     NULL,
    1876           0 :                                     &pay_sz,
    1877           0 :                                     ULONG_MAX );
    1878             : 
    1879           0 :   if( FD_UNLIKELY( !pay_sz || !txn_sz ) ) {
    1880             :     /* Can't parse out a full transaction. */
    1881           0 :     return FD_SCHED_AGAIN_LATER;
    1882           0 :   }
    1883             : 
    1884             :   /* FIXME: remove after static_instruction_limit */
    1885           0 :   if( FD_UNLIKELY( txn->instr_cnt>FD_TXN_INSTR_MAX*2UL ) ) FD_LOG_CRIT(( "slot %lu txn idx %u has %u instructions, static_instruction_limit will resolve this", block->slot, block->txn_parsed_cnt, txn->instr_cnt ));
    1886             : 
    1887           0 :   if( FD_UNLIKELY( block->txn_parsed_cnt>=FD_MAX_TXN_PER_SLOT ) ) {
    1888             :     /* The block contains more transactions than a valid block would.
    1889             :        Mark the block dead instead of keep processing it. */
    1890           0 :     FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, txn_parsed_cnt %u", block->slot, block->parent_slot, block->txn_parsed_cnt ));
    1891           0 :     return FD_SCHED_BAD_BLOCK;
    1892           0 :   }
    1893             : 
    1894             :   /* Try to expand ALUTs. */
    1895           0 :   int has_aluts   = txn->transaction_version==FD_TXN_V0 && txn->addr_table_adtl_cnt>0;
    1896           0 :   int serializing = 0;
    1897           0 :   if( has_aluts ) {
    1898           0 :     uchar __attribute__((aligned(FD_SLOT_HASHES_GLOBAL_ALIGN))) slot_hashes_mem[ FD_SYSVAR_SLOT_HASHES_FOOTPRINT ];
    1899           0 :     fd_slot_hashes_global_t const * slot_hashes_global = fd_sysvar_slot_hashes_read( alut_ctx->accdb, alut_ctx->xid, slot_hashes_mem );
    1900           0 :     if( FD_LIKELY( slot_hashes_global ) ) {
    1901           0 :       fd_slot_hash_t * slot_hash = deq_fd_slot_hash_t_join( (uchar *)slot_hashes_global + slot_hashes_global->hashes_offset );
    1902           0 :       serializing = !!fd_runtime_load_txn_address_lookup_tables( txn, block->fec_buf+block->fec_buf_soff, alut_ctx->accdb, alut_ctx->xid, alut_ctx->els, slot_hash, block->aluts );
    1903           0 :       sched->metrics->alut_success_cnt += (uint)!serializing;
    1904           0 :     } else {
    1905           0 :       serializing = 1;
    1906           0 :     }
    1907           0 :   }
    1908             : 
    1909           0 :   ulong bank_idx = (ulong)(block-sched->block_pool);
    1910           0 :   ulong txn_idx   = fd_rdisp_add_txn( sched->rdisp, bank_idx, txn, block->fec_buf+block->fec_buf_soff, serializing ? NULL : block->aluts, serializing );
    1911           0 :   FD_TEST( txn_idx!=0UL );
    1912           0 :   sched->metrics->txn_parsed_cnt++;
    1913           0 :   sched->metrics->alut_serializing_cnt += (uint)serializing;
    1914           0 :   sched->txn_pool_free_cnt--;
    1915           0 :   fd_txn_p_t * txn_p = sched->txn_pool + txn_idx;
    1916           0 :   txn_p->payload_sz  = pay_sz;
    1917             : 
    1918           0 :   txn_p->start_shred_idx = (ushort)fd_sort_up_uint_split( block->shred_blk_offs, block->shred_cnt, block->fec_buf_boff+block->fec_buf_soff );
    1919           0 :   txn_p->start_shred_idx = fd_ushort_if( txn_p->start_shred_idx>0U, (ushort)(txn_p->start_shred_idx-1U), txn_p->start_shred_idx );
    1920           0 :   txn_p->end_shred_idx = (ushort)fd_sort_up_uint_split( block->shred_blk_offs, block->shred_cnt, block->fec_buf_boff+block->fec_buf_soff+(uint)pay_sz );
    1921             : 
    1922           0 :   fd_memcpy( txn_p->payload, block->fec_buf+block->fec_buf_soff, pay_sz );
    1923           0 :   fd_memcpy( TXN(txn_p),     txn,                                txn_sz );
    1924           0 :   txn_bitset_remove( sched->exec_done_set, txn_idx );
    1925           0 :   txn_bitset_remove( sched->sigverify_done_set, txn_idx );
    1926           0 :   txn_bitset_remove( sched->poh_mixin_done_set, txn_idx );
    1927           0 :   sched->txn_idx_to_parse_idx[ txn_idx ] = block->txn_parsed_cnt;
    1928           0 :   memcpy( block->txn_sigs[ block->txn_parsed_cnt ], fd_txn_get_signatures( TXN(txn_p), txn_p->payload ), FD_TXN_SIGNATURE_SZ );
    1929           0 :   block->txn_idx[ block->txn_parsed_cnt ] = txn_idx;
    1930           0 :   block->txn_disp_ticks[ block->txn_parsed_cnt ] = LONG_MAX;
    1931           0 :   block->txn_done_ticks[ block->txn_parsed_cnt ] = LONG_MAX;
    1932           0 :   block->fec_buf_soff += (uint)pay_sz;
    1933           0 :   block->txn_parsed_cnt++;
    1934             : #if FD_SCHED_SKIP_SIGVERIFY
    1935             :   txn_bitset_insert( sched->sigverify_done_set, txn_idx );
    1936             :   block->txn_sigverify_done_cnt++;
    1937             : #endif
    1938             : #if FD_SCHED_SKIP_POH
    1939             :   txn_bitset_insert( sched->poh_mixin_done_set, txn_idx );
    1940             : #endif
    1941           0 :   block->txns_rem--;
    1942           0 :   return FD_SCHED_OK;
    1943           0 : }
    1944             : 
    1945             : #undef CHECK
    1946             : #undef CHECK_LEFT
    1947             : 
    1948             : static void
    1949           0 : dispatch_sigverify( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out ) {
    1950             :   /* Dispatch transactions for sigverify in parse order. */
    1951           0 :   out->task_type = FD_SCHED_TT_TXN_SIGVERIFY;
    1952           0 :   out->txn_sigverify->bank_idx = bank_idx;
    1953           0 :   out->txn_sigverify->txn_idx  = block->txn_idx[ block->txn_sigverify_done_cnt+block->txn_sigverify_in_flight_cnt ];
    1954           0 :   out->txn_sigverify->exec_idx = (ulong)exec_tile_idx;
    1955           0 :   sched->sigverify_ready_bitset[ 0 ] = fd_ulong_clear_bit( sched->sigverify_ready_bitset[ 0 ], exec_tile_idx );
    1956           0 :   sched->tile_to_bank_idx[ exec_tile_idx ] = bank_idx;
    1957           0 :   block->txn_sigverify_in_flight_cnt++;
    1958           0 :   if( FD_UNLIKELY( (~sched->txn_exec_ready_bitset[ 0 ])&(~sched->sigverify_ready_bitset[ 0 ])&(~sched->poh_ready_bitset[ 0 ])&fd_ulong_mask_lsb( (int)sched->exec_cnt ) ) ) FD_LOG_CRIT(( "invariant violation: txn_exec_ready_bitset 0x%lx sigverify_ready_bitset 0x%lx poh_ready_bitset 0x%lx", sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ] ));
    1959           0 : }
    1960             : 
    1961             : /* Assumes there is a PoH task available for dispatching. */
    1962             : static void
    1963           0 : dispatch_poh( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out ) {
    1964           0 :   fd_sched_mblk_in_progress_t * mblk = NULL;
    1965           0 :   ulong mblk_idx = ULONG_MAX;
    1966           0 :   fd_sched_mblk_t * mblk_desc = NULL;
    1967           0 :   ulong mblk_desc_idx = ULONG_MAX;
    1968           0 :   if( FD_LIKELY( !mblk_in_progress_slist_is_empty( block->mblks_hashing_in_progress, block->mblk_in_progress_pool ) ) ) {
    1969             :     /* There's a PoH task in progress, just continue working on that. */
    1970           0 :     mblk_idx = mblk_in_progress_slist_idx_pop_head( block->mblks_hashing_in_progress, block->mblk_in_progress_pool );
    1971           0 :     FD_TEST( !mblk_in_progress_slist_private_idx_is_null( mblk_idx ) );
    1972           0 :     mblk = block->mblk_in_progress_pool+mblk_idx;
    1973           0 :     mblk_desc_idx = mblk->mblk_idx;
    1974           0 :     mblk_desc = block->mblk_pool+mblk_desc_idx;
    1975           0 :   } else {
    1976             :     /* No in progress PoH task, so start a new one. */
    1977           0 :     FD_TEST( block->mblk_unhashed_cnt );
    1978           0 :     mblk_desc_idx = block->mblk_cnt-block->mblk_unhashed_cnt;
    1979           0 :     mblk_desc = block->mblk_pool+mblk_desc_idx;
    1980           0 :     block->mblk_unhashed_cnt--;
    1981           0 :     mblk_idx = mblk_in_progress_bitset_first( block->mblk_in_progress_pool_free_bitset );
    1982           0 :     FD_TEST( mblk_idx<FD_SCHED_MAX_MBLK_IN_PROGRESS_PER_SLOT );
    1983           0 :     mblk_in_progress_bitset_remove( block->mblk_in_progress_pool_free_bitset, mblk_idx );
    1984           0 :     mblk = block->mblk_in_progress_pool+mblk_idx;
    1985           0 :     ulong start_txn_idx = mblk_desc_idx==0UL ? 0UL : (mblk_desc-1)->end_txn_idx;
    1986           0 :     mblk->curr_txn_idx = start_txn_idx;
    1987           0 :     mblk->curr_hashcnt = 0UL;
    1988           0 :     mblk->curr_sig_cnt = 0U;
    1989           0 :     memcpy( mblk->curr_hash, mblk_desc_idx==0UL ? block->start_hash : (mblk_desc-1)->end_hash, sizeof(fd_hash_t) );
    1990           0 :     mblk->mblk_idx = (uint)mblk_desc_idx;
    1991           0 :     mblk->is_tick = start_txn_idx==mblk_desc->end_txn_idx;
    1992           0 :   }
    1993           0 :   FD_TEST( mblk );
    1994           0 :   FD_TEST( mblk_desc );
    1995           0 :   out->task_type = FD_SCHED_TT_POH_HASH;
    1996           0 :   out->poh_hash->bank_idx = bank_idx;
    1997           0 :   out->poh_hash->mblk_idx = mblk_idx;
    1998           0 :   out->poh_hash->exec_idx = (ulong)exec_tile_idx;
    1999           0 :   ulong hashcnt_todo = mblk_desc->hashcnt-mblk->curr_hashcnt;
    2000           0 :   out->poh_hash->hashcnt  = fd_ulong_min( hashcnt_todo, FD_SCHED_MAX_POH_HASHES_PER_TASK );
    2001           0 :   memcpy( out->poh_hash->hash, mblk->curr_hash, sizeof(fd_hash_t) );
    2002           0 :   sched->poh_ready_bitset[ 0 ] = fd_ulong_clear_bit( sched->poh_ready_bitset[ 0 ], exec_tile_idx );
    2003           0 :   sched->tile_to_bank_idx[ exec_tile_idx ] = bank_idx;
    2004           0 :   block->poh_hashing_in_flight_cnt++;
    2005           0 :   if( FD_UNLIKELY( (~sched->txn_exec_ready_bitset[ 0 ])&(~sched->sigverify_ready_bitset[ 0 ])&(~sched->poh_ready_bitset[ 0 ])&fd_ulong_mask_lsb( (int)sched->exec_cnt ) ) ) FD_LOG_CRIT(( "invariant violation: txn_exec_ready_bitset 0x%lx sigverify_ready_bitset 0x%lx poh_ready_bitset 0x%lx", sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ] ));
    2006           0 : }
    2007             : 
    2008             : /* Does up to one transaction mixin.  Returns 1 if one mixin was done, 2
    2009             :    if that mixin also completed a microblock, 0 if no transaction mixin
    2010             :    was available, -1 if there is a PoH verify error. */
    2011             : FD_WARN_UNUSED static int
    2012           0 : maybe_mixin( fd_sched_t * sched, fd_sched_block_t * block ) {
    2013           0 :   if( FD_UNLIKELY( mblk_in_progress_slist_is_empty( block->mblks_mixin_in_progress, block->mblk_in_progress_pool ) ) ) return 0;
    2014           0 :   FD_TEST( block->poh_hashing_done_cnt-block->poh_hash_cmp_done_cnt>0 );
    2015             : 
    2016             :   /* The microblock we would like to do mixin on is at the head of the
    2017             :      queue. */
    2018           0 :   ulong in_progress_idx = mblk_in_progress_slist_idx_pop_head( block->mblks_mixin_in_progress, block->mblk_in_progress_pool );
    2019           0 :   fd_sched_mblk_in_progress_t * mblk = block->mblk_in_progress_pool+in_progress_idx;
    2020           0 :   fd_sched_mblk_t * mblk_desc = block->mblk_pool+mblk->mblk_idx;
    2021           0 :   ulong start_txn_idx = mblk->mblk_idx==0 ? 0UL : (mblk_desc-1)->end_txn_idx;
    2022             : 
    2023           0 :   if( FD_UNLIKELY( mblk_desc->end_txn_idx>block->txn_parsed_cnt ) ) {
    2024             :     /* Very rarely, we've finished hashing, but not all transactions in
    2025             :        the microblock have been parsed out.  This can happen if we
    2026             :        haven't received all the FEC sets for this microblock.  We can't
    2027             :        yet fully mixin the microblock.  So we'll stick it back into the
    2028             :        end of the queue. */
    2029           0 :     mblk_in_progress_slist_idx_push_tail( block->mblks_mixin_in_progress, in_progress_idx, block->mblk_in_progress_pool );
    2030             :     /* Invariant: we wouldn't start to mixin unless the microblock is
    2031             :        fully parsed. */
    2032           0 :     FD_TEST( mblk->curr_txn_idx==start_txn_idx );
    2033             : 
    2034             :     /* No other microblock in the mixin queue. */
    2035           0 :     if( FD_UNLIKELY( block->poh_hashing_done_cnt-block->poh_hash_cmp_done_cnt==1 ) ) return 0;
    2036             : 
    2037             :     /* At this point, there's at least one more microblock in the mixin
    2038             :        queue we could try. */
    2039           0 :     in_progress_idx = mblk_in_progress_slist_idx_pop_head( block->mblks_mixin_in_progress, block->mblk_in_progress_pool );
    2040           0 :     mblk = block->mblk_in_progress_pool+in_progress_idx;
    2041           0 :     mblk_desc = block->mblk_pool+mblk->mblk_idx;
    2042           0 :     start_txn_idx = mblk->mblk_idx==0 ? 0UL : (mblk_desc-1)->end_txn_idx;
    2043             :     /* Invariant: at any given point in time, there can be at most one
    2044             :        microblock that hasn't been fully parsed yet, due to the nature
    2045             :        of sequential parsing.  So this microblock has to be fully
    2046             :        parsed. */
    2047           0 :     FD_TEST( mblk_desc->end_txn_idx<=block->txn_parsed_cnt );
    2048           0 :   }
    2049             : 
    2050             :   /* Now mixin. */
    2051           0 :   if( FD_LIKELY( mblk->curr_txn_idx==start_txn_idx ) ) block->bmtree = fd_bmtree_commit_init( block->bmtree_mem, 32UL, 1UL, 0UL ); /* Optimize for single-transaction microblocks, which are the majority. */
    2052             : 
    2053           0 :   ulong txn_gidx = block->txn_idx[ mblk->curr_txn_idx ];
    2054           0 :   fd_txn_p_t * _txn = sched->txn_pool+txn_gidx;
    2055           0 :   fd_txn_t * txn = TXN(_txn);
    2056           0 :   for( ulong j=0; j<txn->signature_cnt; j++ ) {
    2057           0 :     fd_bmtree_node_t node[ 1 ];
    2058           0 :     fd_bmtree_hash_leaf( node, _txn->payload+txn->signature_off+FD_TXN_SIGNATURE_SZ*j, 64UL, 1UL );
    2059           0 :     fd_bmtree_commit_append( block->bmtree, node, 1UL );
    2060           0 :     mblk->curr_sig_cnt++;
    2061           0 :   }
    2062             : 
    2063             :   /* Release the txn_idx. */
    2064           0 :   txn_bitset_insert( sched->poh_mixin_done_set, txn_gidx );
    2065           0 :   if( txn_bitset_test( sched->exec_done_set, txn_gidx ) && txn_bitset_test( sched->sigverify_done_set, txn_gidx ) ) {
    2066           0 :     fd_rdisp_complete_txn( sched->rdisp, txn_gidx, 1 );
    2067           0 :     sched->txn_pool_free_cnt++;
    2068           0 :     block->txn_done_cnt++;
    2069           0 :     sched->metrics->txn_done_cnt++;
    2070           0 :   }
    2071             : 
    2072           0 :   mblk->curr_txn_idx++;
    2073           0 :   int rv = 2;
    2074           0 :   if( FD_LIKELY( mblk->curr_txn_idx==mblk_desc->end_txn_idx ) ) {
    2075             :     /* Ready to compute the final hash for this microblock. */
    2076           0 :     uchar * root = fd_bmtree_commit_fini( block->bmtree );
    2077           0 :     uchar mixin_buf[ 64 ];
    2078           0 :     fd_memcpy( mixin_buf, mblk->curr_hash, 32UL );
    2079           0 :     fd_memcpy( mixin_buf+32UL, root, 32UL );
    2080           0 :     fd_sha256_hash( mixin_buf, 64UL, mblk->curr_hash );
    2081           0 :     if( FD_UNLIKELY( memcmp( mblk->curr_hash, mblk_desc->end_hash, sizeof(fd_hash_t) ) ) ) {
    2082           0 :       FD_BASE58_ENCODE_32_BYTES( mblk->curr_hash->hash, our_str );
    2083           0 :       FD_BASE58_ENCODE_32_BYTES( mblk_desc->end_hash->hash, ref_str );
    2084           0 :       FD_LOG_INFO(( "bad block: poh hash mismatch on mblk %u, ours %s, claimed %s, hashcnt %lu, txns [%lu,%lu), %u sigs, slot %lu, parent slot %lu", mblk->mblk_idx, our_str, ref_str, mblk_desc->hashcnt, start_txn_idx, mblk_desc->end_txn_idx, mblk->curr_sig_cnt, block->slot, block->parent_slot ));
    2085           0 :       return -1;
    2086           0 :     }
    2087           0 :     block->poh_hash_cmp_done_cnt++;
    2088           0 :     mblk_in_progress_bitset_insert( block->mblk_in_progress_pool_free_bitset, in_progress_idx );
    2089           0 :   } else {
    2090             :     /* There are more transactions to mixin in this microblock. */
    2091           0 :     mblk_in_progress_slist_idx_push_head( block->mblks_mixin_in_progress, in_progress_idx, block->mblk_in_progress_pool );
    2092           0 :     rv = 1;
    2093           0 :   }
    2094             : 
    2095           0 :   return rv;
    2096           0 : }
    2097             : 
    2098             : static void
    2099           0 : try_activate_block( fd_sched_t * sched ) {
    2100             : 
    2101             :   /* See if there are any allocated staging lanes that we can activate
    2102             :      for scheduling ... */
    2103           0 :   ulong staged_bitset = sched->staged_bitset;
    2104           0 :   while( staged_bitset ) {
    2105           0 :     int lane_idx  = fd_ulong_find_lsb( staged_bitset );
    2106           0 :     staged_bitset = fd_ulong_pop_lsb( staged_bitset );
    2107             : 
    2108           0 :     ulong              head_idx     = sched->staged_head_bank_idx[ lane_idx ];
    2109           0 :     fd_sched_block_t * head_block   = block_pool_ele( sched, head_idx );
    2110           0 :     fd_sched_block_t * parent_block = block_pool_ele( sched, head_block->parent_idx );
    2111           0 :     if( FD_UNLIKELY( parent_block->dying ) ) {
    2112             :       /* Invariant: no child of a dying block should be staged. */
    2113           0 :       FD_LOG_CRIT(( "invariant violation: staged_head_bank_idx %lu, slot %lu, parent slot %lu on lane %d has parent_block->dying set, slot %lu, parent slot %lu",
    2114           0 :                     head_idx, head_block->slot, head_block->parent_slot, lane_idx, parent_block->slot, parent_block->parent_slot ));
    2115           0 :     }
    2116             :     //FIXME: restore this invariant check when we have immediate demotion of dying blocks
    2117             :     // if( FD_UNLIKELY( head_block->dying ) ) {
    2118             :     //   /* Invariant: no dying block should be staged. */
    2119             :     //   FD_LOG_CRIT(( "invariant violation: staged_head_bank_idx %lu, slot %lu, prime %lu on lane %u has head_block->dying set",
    2120             :     //                 head_idx, (ulong)head_block->block_id.slot, (ulong)head_block->block_id.prime, lane_idx ));
    2121             :     // }
    2122           0 :     if( block_is_done( parent_block ) && block_is_activatable( head_block ) ) {
    2123             :       /* ... Yes, on this staging lane the parent block is done.  So we
    2124             :          can activate the staged child. */
    2125           0 :       if( FD_UNLIKELY( head_idx!=sched->last_active_bank_idx ) ) { /* Unlikely because lane switching only possible under forking. */
    2126           0 :         FD_LOG_DEBUG(( "activating block %lu:%lu: lane switch to %d", head_block->slot, head_idx, lane_idx ));
    2127           0 :         sched->metrics->lane_switch_cnt++;
    2128           0 :       }
    2129           0 :       sched->active_bank_idx = head_idx;
    2130           0 :       return;
    2131           0 :     }
    2132           0 :   }
    2133             : 
    2134             :   /* ... No, promote unstaged blocks. */
    2135           0 :   ulong root_idx = sched->root_idx;
    2136           0 :   if( FD_UNLIKELY( root_idx==ULONG_MAX ) ) {
    2137           0 :     FD_LOG_CRIT(( "invariant violation: root_idx==ULONG_MAX indicating fd_sched is uninitialized" ));
    2138           0 :   }
    2139             :   /* Find and stage the longest stageable unstaged fork.  This is a
    2140             :      policy decision. */
    2141           0 :   ulong depth = compute_longest_unstaged_fork( sched, root_idx );
    2142           0 :   if( FD_LIKELY( depth>0UL ) ) {
    2143           0 :     if( FD_UNLIKELY( sched->staged_bitset==fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) ) ) {
    2144             :       /* No more staging lanes available.  All of them are occupied by
    2145             :          slow squatters.  Only empty blocks can be demoted, and so
    2146             :          blocks with in-flight transactions, including dying in-flight
    2147             :          blocks, shouldn't be demoted.  We demote all demotable lanes.
    2148             :          Demotion isn't all that expensive, since demotable blocks have
    2149             :          no transactions in them.  If a demoted block proves to be
    2150             :          active still, it'll naturally promote back into a staging lane.
    2151             : 
    2152             :          In fact, all lanes should be demotable at this point.  None of
    2153             :          the lanes have anything dispatchable, otherwise we would have
    2154             :          simply activated one of the dispatchable lanes.  None of the
    2155             :          lanes have anything in-flight either, as we allow for a grace
    2156             :          period while something is in-flight, before we deactivate any
    2157             :          block.  In principle, we could get rid of the grace period and
    2158             :          deactivate right away.  In that case, it's okay if nothing is
    2159             :          demotable at the moment, as that simply implies that all lanes
    2160             :          have in-flight tasks.  We would get another chance to try to
    2161             :          demote when the last in-flight task on any lane completes.
    2162             : 
    2163             :          Another interesting side effect of the current dispatching and
    2164             :          lane switching policy is that each lane should have exactly one
    2165             :          block in it at this point.  A parent block by definition can't
    2166             :          be partially ingested.  Any parent block that is fully ingested
    2167             :          and dispatchable would have made the lane dispatchable, and we
    2168             :          wouldn't be here.  Any parent that is fully ingested and fully
    2169             :          dispatched would be fully done after the grace period.  So
    2170             :          there could only be one block per lane, and it is
    2171             :          simultaneously the head and the tail of the lane.
    2172             : 
    2173             :          A note on why this whole thing does not deadlock:
    2174             : 
    2175             :          One might reasonably wonder what happens if all the lanes are
    2176             :          non-empty, non-dead, but for some reason couldn't be activated
    2177             :          for dispatching.  We would deadlock in this case, as no lane
    2178             :          dispatches to the point of being demotable, and no unstaged
    2179             :          block can be promoted.  Such is not in fact possible.  The only
    2180             :          way a dispatchable lane can be ineligible for activation is if
    2181             :          it has a parent block that isn't done yet.  So a deadlock
    2182             :          happens when this parent block, or any of its dispatchable
    2183             :          ancestors, is unstaged.  An important invariant we maintain is
    2184             :          that a staged block can't have an unstaged stageable parent.
    2185             :          This invariant, by induction, gives us the guarantee that at
    2186             :          least one of the lanes can be activated. */
    2187           0 :       for( int l=0; l<(int)FD_SCHED_MAX_STAGING_LANES; l++ ) {
    2188           0 :         if( FD_UNLIKELY( !lane_is_demotable( sched, l ) ) ) {
    2189           0 :           FD_LOG_CRIT(( "invariant violation: lane %d is not demotable", l ));
    2190           0 :         }
    2191           0 :         ulong demoted_cnt = demote_lane( sched, l );
    2192           0 :         if( FD_UNLIKELY( demoted_cnt!=1UL ) ) {
    2193           0 :           FD_LOG_CRIT(( "invariant violation: %lu blocks demoted from lane %d, expected 1 demotion", demoted_cnt, l ));
    2194           0 :         }
    2195           0 :         sched->metrics->lane_demoted_cnt++;
    2196           0 :       }
    2197           0 :     }
    2198           0 :     FD_TEST( sched->staged_bitset!=fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) );
    2199           0 :     int lane_idx = fd_ulong_find_lsb( ~sched->staged_bitset );
    2200           0 :     if( FD_UNLIKELY( lane_idx>=(int)FD_SCHED_MAX_STAGING_LANES ) ) {
    2201           0 :       FD_LOG_CRIT(( "invariant violation: lane_idx %d, sched->staged_bitset %lx",
    2202           0 :                     lane_idx, sched->staged_bitset ));
    2203           0 :     }
    2204           0 :     ulong head_bank_idx = stage_longest_unstaged_fork( sched, root_idx, lane_idx );
    2205           0 :     if( FD_UNLIKELY( head_bank_idx==ULONG_MAX ) ) {
    2206             :       /* We found a promotable fork depth>0.  This should not happen. */
    2207           0 :       FD_LOG_CRIT(( "invariant violation: head_bank_idx==ULONG_MAX" ));
    2208           0 :     }
    2209             :     /* We don't bother with promotion unless the block is immediately
    2210             :        dispatchable.  So it's okay to set the active block here.  This
    2211             :        doesn't cause out-of-order block replay because any parent block
    2212             :        must be fully done.  If the parent block were dead, this fork
    2213             :        would be marked dead too and ineligible for promotion.  If the
    2214             :        parent block were not dead and not done and staged, we wouldn't
    2215             :        be trying to promote an unstaged fork.  If the parent block were
    2216             :        not dead and not done and unstaged, it would've been part of this
    2217             :        unstaged fork. */
    2218           0 :     fd_sched_block_t * head_block = block_pool_ele( sched, head_bank_idx );
    2219           0 :     FD_LOG_DEBUG(( "activating block %lu:%lu: unstaged promotion to lane %d", head_block->slot, head_bank_idx, lane_idx ));
    2220           0 :     sched->active_bank_idx = head_bank_idx;
    2221           0 :     return;
    2222           0 :   }
    2223             :   /* No unstaged blocks to promote.  So we're done.  Yay. */
    2224           0 : }
    2225             : 
    2226             : static void
    2227           0 : check_or_set_active_block( fd_sched_t * sched ) {
    2228           0 :   if( FD_UNLIKELY( sched->active_bank_idx==ULONG_MAX ) ) {
    2229           0 :     try_activate_block( sched );
    2230           0 :   } else {
    2231           0 :     fd_sched_block_t * active_block = block_pool_ele( sched, sched->active_bank_idx );
    2232           0 :     if( FD_UNLIKELY( block_should_deactivate( active_block ) ) ) {
    2233           0 :       sched->print_buf_sz = 0UL;
    2234           0 :       print_all( sched, active_block );
    2235           0 :       FD_LOG_NOTICE(( "%s", sched->print_buf ));
    2236           0 :       FD_LOG_CRIT(( "invariant violation: should have been deactivated" ));
    2237           0 :     }
    2238           0 :   }
    2239           0 : }
    2240             : 
    2241             : /* This function has two main jobs:
    2242             :    - Mark everything on the fork tree dying.
    2243             :    - Take blocks out of rdisp if possible. */
    2244             : static void
    2245           0 : subtree_mark_and_maybe_prune_rdisp( fd_sched_t * sched, fd_sched_block_t * block ) {
    2246           0 :   if( FD_UNLIKELY( block->rooted ) ) {
    2247           0 :     FD_LOG_CRIT(( "invariant violation: rooted block should not be abandoned, slot %lu, parent slot %lu",
    2248           0 :                   block->slot, block->parent_slot ));
    2249           0 :   }
    2250             :   /* All minority fork nodes pass through this function eventually.  So
    2251             :      this is a good point to check per-node invariants for minority
    2252             :      forks. */
    2253           0 :   if( FD_UNLIKELY( block->staged && !block->in_rdisp ) ) {
    2254           0 :     FD_LOG_CRIT(( "invariant violation: staged block is not in the dispatcher, slot %lu, parent slot %lu",
    2255           0 :                   block->slot, block->parent_slot ));
    2256           0 :   }
    2257             : 
    2258             :   /* Setting the flag is non-optional and can happen more than once. */
    2259           0 :   block->dying = 1;
    2260             : 
    2261             :   /* Removal from dispatcher should only happen once. */
    2262           0 :   if( block->in_rdisp ) {
    2263           0 :     fd_sched_block_t * parent = block_pool_ele( sched, block->parent_idx );
    2264           0 :     if( FD_UNLIKELY( !parent ) ) {
    2265             :       /* Only the root has no parent.  Abandon should never be called on
    2266             :          the root.  So any block we are trying to abandon should have a
    2267             :          parent. */
    2268           0 :       FD_LOG_CRIT(( "invariant violation: parent not found slot %lu, parent slot %lu",
    2269           0 :                     block->slot, block->parent_slot ));
    2270           0 :     }
    2271             : 
    2272             :     /* The dispatcher expects blocks to be abandoned in the same order
    2273             :        that they were added on each lane.  There are no requirements on
    2274             :        the order of abandoning if two blocks are not on the same lane,
    2275             :        or if a block is unstaged.  This means that in general we
    2276             :        shouldn't abandon a child block if the parent hasn't been
    2277             :        abandoned yet, if and only if they are on the same lane.  So wait
    2278             :        until we can abandon the parent, and then descend down the fork
    2279             :        tree to ensure orderly abandoning. */
    2280           0 :     int in_order = !parent->in_rdisp || /* parent is not in the dispatcher */
    2281           0 :                    !parent->staged   || /* parent is in the dispatcher but not staged */
    2282           0 :                    !block->staged    || /* parent is in the dispatcher and staged but this block is unstaged */
    2283           0 :                    block->staging_lane!=parent->staging_lane; /* this block is on a different staging lane than its parent */
    2284             : 
    2285           0 :     if( FD_UNLIKELY( in_order && block->staged && sched->active_bank_idx==sched->staged_head_bank_idx[ block->staging_lane ] && sched->active_bank_idx!=ULONG_MAX ) ) {
    2286           0 :       FD_TEST( block_pool_ele( sched, sched->active_bank_idx )==block );
    2287           0 :       FD_LOG_DEBUG(( "reset active_bank_idx %lu: abandon", sched->active_bank_idx ));
    2288           0 :       sched->last_active_bank_idx = sched->active_bank_idx;
    2289           0 :       sched->active_bank_idx = ULONG_MAX;
    2290           0 :       sched->metrics->deactivate_abandoned_cnt++;
    2291           0 :     }
    2292             : 
    2293             :     /* We inform the dispatcher of an abandon only when there are no
    2294             :        more in-flight transactions.  Otherwise, if the dispatcher
    2295             :        recycles the same txn_id that was just abandoned, and we receive
    2296             :        completion of an in-flight transaction whose txn_id was just
    2297             :        recycled. */
    2298             :     // FIXME The recycling might be fine now that we no longer use
    2299             :     // txn_id to index into anything.  We might be able to just drop
    2300             :     // txn_id on abandoned blocks.  Though would this leak transaction
    2301             :     // content if the txn_id is recycled?
    2302             :     // Note that subtree pruning from sched isn't dependent on the
    2303             :     // in-flight check being present here, as is_prunable already checks
    2304             :     // for in-flight==0.
    2305           0 :     int abandon = in_order && !block_is_in_flight( block );
    2306             : 
    2307           0 :     if( abandon ) {
    2308           0 :       block->in_rdisp = 0;
    2309           0 :       fd_rdisp_abandon_block( sched->rdisp, (ulong)(block-sched->block_pool) );
    2310           0 :       sched->txn_pool_free_cnt += block->txn_parsed_cnt-block->txn_done_cnt; /* in_flight_cnt==0 */
    2311           0 :       sched->metrics->block_abandoned_cnt++;
    2312           0 :       sched->metrics->txn_abandoned_parsed_cnt    += block->txn_parsed_cnt;
    2313           0 :       sched->metrics->txn_abandoned_exec_done_cnt += block->txn_exec_done_cnt;
    2314           0 :       sched->metrics->txn_abandoned_done_cnt      += block->txn_done_cnt;
    2315             : 
    2316             :       //FIXME when demote supports non-empty blocks, we should demote
    2317             :       //the block from the lane unconditionally and immediately,
    2318             :       //regardles of whether it's safe to abandon or not.  So a block
    2319             :       //would go immediately from staged to unstaged and eventually to
    2320             :       //abandoned.
    2321           0 :       if( FD_LIKELY( block->staged ) ) {
    2322           0 :         FD_LOG_DEBUG(( "block %lu:%lu exited lane %lu: abandon", block->slot, block_to_idx( sched, block ), block->staging_lane ));
    2323           0 :         block->staged = 0;
    2324             :         /* Now release the staging lane.  This will release the lane as
    2325             :            soon as we abandon the head block on a lane.  Technically a
    2326             :            release should only happen when we remove the tail block on a
    2327             :            lane.  This is fine though.  The way we abandon guarantees by
    2328             :            induction that an entire lane will be abandoned.  Only the
    2329             :            head block on a lane can possibly have in-flight
    2330             :            transactions, and so once a head block becomes eligible for
    2331             :            abandoning, the entire lane all the way to the tail block,
    2332             :            will be eligible. */
    2333           0 :         sched->staged_bitset = fd_ulong_clear_bit( sched->staged_bitset, (int)block->staging_lane );
    2334           0 :         sched->staged_head_bank_idx[ block->staging_lane ] = ULONG_MAX;
    2335           0 :       }
    2336           0 :     }
    2337           0 :   }
    2338             : 
    2339             :   /* Abandon the entire fork chaining off of this block. */
    2340           0 :   ulong child_idx = block->child_idx;
    2341           0 :   while( child_idx!=ULONG_MAX ) {
    2342           0 :     fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2343           0 :     subtree_mark_and_maybe_prune_rdisp( sched, child );
    2344           0 :     child_idx = child->sibling_idx;
    2345           0 :   }
    2346           0 : }
    2347             : 
    2348             : /* It's safe to call this function more than once on the same block.
    2349             :    The final call is when there are no more in-flight tasks for this
    2350             :    block, at which point the block will be pruned from sched. */
    2351             : static void
    2352           0 : subtree_abandon( fd_sched_t * sched, fd_sched_block_t * block ) {
    2353           0 :   subtree_mark_and_maybe_prune_rdisp( sched, block );
    2354           0 :   if( block_is_prunable( block ) ) {
    2355           0 :     fd_sched_block_t * parent = block_pool_ele( sched, block->parent_idx );
    2356           0 :     if( FD_LIKELY( parent ) ) {
    2357             :       /* Splice the block out of its parent's children list. */
    2358           0 :       ulong block_idx = block_to_idx( sched, block );
    2359           0 :       ulong * idx_p = &parent->child_idx;
    2360           0 :       while( *idx_p!=block_idx ) {
    2361           0 :         idx_p = &(block_pool_ele( sched, *idx_p )->sibling_idx);
    2362           0 :       }
    2363           0 :       *idx_p = block->sibling_idx;
    2364           0 :     }
    2365           0 :     subtree_prune( sched, block_to_idx( sched, block ), ULONG_MAX );
    2366           0 :   }
    2367           0 : }
    2368             : 
    2369             : static void
    2370           0 : subtree_prune( fd_sched_t * sched, ulong bank_idx, ulong except_idx ) {
    2371           0 :   fd_sched_block_t * head = block_pool_ele( sched, bank_idx );
    2372           0 :   head->parent_idx        = ULONG_MAX;
    2373           0 :   fd_sched_block_t * tail = head;
    2374             : 
    2375           0 :   while( head ) {
    2376           0 :     FD_TEST( head->in_sched );
    2377           0 :     head->in_sched = 0;
    2378           0 :     if( head->refcnt ) {
    2379           0 :       FD_TEST( !head->block_end_done );
    2380           0 :       head->refcnt = 0;
    2381           0 :       if( FD_UNLIKELY( !ref_q_avail( sched->ref_q ) ) ) FD_LOG_CRIT(( "ref_q full" ));
    2382           0 :       ref_q_push_tail( sched->ref_q, block_to_idx( sched, head ) );
    2383           0 :     }
    2384             : 
    2385           0 :     if( FD_UNLIKELY( !head->block_end_done ) ) {
    2386           0 :       sched->print_buf_sz = 0UL;
    2387           0 :       print_block_metrics( sched, head );
    2388           0 :       if( FD_LIKELY( head->block_start_done ) ) FD_LOG_DEBUG(( "block %lu:%lu replayed partially, pruning without full replay: %s", head->slot, block_to_idx( sched, head ), sched->print_buf ));
    2389           0 :       else FD_LOG_DEBUG(( "block %lu:%lu replayed nothing, pruning without any replay: %s", head->slot, block_to_idx( sched, head ), sched->print_buf ));
    2390           0 :     }
    2391             : 
    2392           0 :     ulong child_idx = head->child_idx;
    2393           0 :     while( child_idx!=ULONG_MAX ) {
    2394           0 :       fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2395             :       /* Add children to be visited.  We abuse the parent_idx field to
    2396             :          link up the next block to visit. */
    2397           0 :       if( child_idx!=except_idx ) {
    2398           0 :         tail->parent_idx = child_idx;
    2399           0 :         tail             = child;
    2400           0 :         tail->parent_idx = ULONG_MAX;
    2401           0 :       }
    2402           0 :       child_idx = child->sibling_idx;
    2403           0 :     }
    2404             : 
    2405             :     /* Prune the current block.  We will never publish halfway into a
    2406             :        staging lane, because anything on the rooted fork should have
    2407             :        finished replaying gracefully and be out of the dispatcher.  In
    2408             :        fact, anything that we are publishing away should be out of the
    2409             :        dispatcher at this point.  And there should be no more in-flight
    2410             :        transactions. */
    2411           0 :     if( FD_UNLIKELY( block_is_in_flight( head ) ) ) {
    2412           0 :       FD_LOG_CRIT(( "invariant violation: block has transactions in flight (%u exec %u sigverify %u poh), slot %lu, parent slot %lu",
    2413           0 :                     head->txn_exec_in_flight_cnt, head->txn_sigverify_in_flight_cnt, head->poh_hashing_in_flight_cnt, head->slot, head->parent_slot ));
    2414           0 :     }
    2415           0 :     if( FD_UNLIKELY( head->in_rdisp ) ) {
    2416             :       /* We should have removed it from the dispatcher when we were
    2417             :          notified of the new root, or when in-flight transactions were
    2418             :          drained. */
    2419           0 :       FD_LOG_CRIT(( "invariant violation: block is in the dispatcher, slot %lu, parent slot %lu",
    2420           0 :                     head->slot, head->parent_slot ));
    2421           0 :     }
    2422           0 :     sched->block_pool_popcnt--;
    2423             : 
    2424           0 :     fd_sched_block_t * next = block_pool_ele( sched, head->parent_idx );
    2425             : 
    2426             :     /* We don't have to clear the indices here since no one should be
    2427             :        accessing them.  Defensive programming. */
    2428           0 :     head->parent_idx  = ULONG_MAX;
    2429           0 :     head->child_idx   = ULONG_MAX;
    2430           0 :     head->sibling_idx = ULONG_MAX;
    2431             : 
    2432           0 :     head = next;
    2433           0 :   }
    2434           0 : }
    2435             : 
    2436             : static void
    2437           0 : maybe_switch_block( fd_sched_t * sched, ulong bank_idx ) {
    2438             :   /* This only happens rarely when there are dying in-flight blocks.
    2439             :      Early exit and don't let dying blocks affect replay. */
    2440           0 :   if( FD_UNLIKELY( bank_idx!=sched->active_bank_idx ) ) return;
    2441             : 
    2442           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    2443           0 :   if( FD_UNLIKELY( block_is_done( block ) ) ) {
    2444           0 :     fd_rdisp_remove_block( sched->rdisp, bank_idx );
    2445           0 :     FD_LOG_DEBUG(( "block %lu:%lu exited lane %lu: remove", block->slot, bank_idx, block->staging_lane ));
    2446           0 :     block->in_rdisp = 0;
    2447           0 :     block->staged   = 0;
    2448           0 :     sched->metrics->block_removed_cnt++;
    2449           0 :     FD_LOG_DEBUG(( "reset active_bank_idx %lu: remove", sched->active_bank_idx ));
    2450           0 :     sched->last_active_bank_idx = sched->active_bank_idx;
    2451           0 :     sched->active_bank_idx = ULONG_MAX;
    2452             : 
    2453             :     /* See if there is a child block down the same staging lane.  This
    2454             :        is a policy decision to minimize fork churn.  We could in theory
    2455             :        reevaluate staging lane allocation here and do promotion/demotion
    2456             :        as needed. */
    2457           0 :     ulong child_idx = block->child_idx;
    2458           0 :     while( child_idx!=ULONG_MAX ) {
    2459           0 :       fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2460           0 :       if( FD_LIKELY( child->staged && child->staging_lane==block->staging_lane ) ) {
    2461             :         /* There is a child block down the same staging lane ... */
    2462           0 :         if( FD_LIKELY( !child->dying ) ) {
    2463             :           /* ... and the child isn't dead */
    2464           0 :           if( FD_UNLIKELY( !block_is_activatable( child ) ) ) {
    2465             :             /* ... but the child is not activatable, likely because
    2466             :                there are no transactions available yet. */
    2467           0 :             sched->metrics->deactivate_no_txn_cnt++;
    2468           0 :             try_activate_block( sched );
    2469           0 :             return;
    2470           0 :           }
    2471             :           /* ... and it's immediately dispatchable, so switch the active
    2472             :              block to it, and have the child inherit the head status of
    2473             :              the lane.  This is the common case. */
    2474           0 :           FD_LOG_DEBUG(( "activating block %lu:%lu: child inheritance on lane %lu", child->slot, child_idx, child->staging_lane ));
    2475           0 :           sched->active_bank_idx = child_idx;
    2476           0 :           sched->staged_head_bank_idx[ block->staging_lane ] = child_idx;
    2477           0 :           if( FD_UNLIKELY( !fd_ulong_extract_bit( sched->staged_bitset, (int)block->staging_lane ) ) ) {
    2478           0 :             FD_LOG_CRIT(( "invariant violation: staged_bitset 0x%lx bit %lu is not set, slot %lu, parent slot %lu, child slot %lu, parent slot %lu",
    2479           0 :                           sched->staged_bitset, block->staging_lane, block->slot, block->parent_slot, child->slot, child->parent_slot ));
    2480           0 :           }
    2481           0 :           return;
    2482           0 :         } else {
    2483             :           /* ... but the child block is considered dead, likely because
    2484             :              the parser considers it invalid. */
    2485           0 :           FD_LOG_INFO(( "child block %lu is already dead", child->slot ));
    2486           0 :           subtree_abandon( sched, child );
    2487           0 :           break;
    2488           0 :         }
    2489           0 :       }
    2490           0 :       child_idx = child->sibling_idx;
    2491           0 :     }
    2492             :     /* There isn't a child block down the same staging lane.  This is
    2493             :        the last block in the staging lane.  Release the staging lane. */
    2494           0 :     sched->staged_bitset = fd_ulong_clear_bit( sched->staged_bitset, (int)block->staging_lane );
    2495           0 :     sched->staged_head_bank_idx[ block->staging_lane ] = ULONG_MAX;
    2496           0 :     sched->metrics->deactivate_no_child_cnt++;
    2497           0 :     try_activate_block( sched );
    2498           0 :   } else if( block_should_deactivate( block ) ) {
    2499             :     /* We exhausted the active block, but it's not fully done yet.  We
    2500             :        are just not getting FEC sets for it fast enough.  This could
    2501             :        happen when the network path is congested, or when the leader
    2502             :        simply went down.  Reset the active block. */
    2503           0 :     sched->last_active_bank_idx = sched->active_bank_idx;
    2504           0 :     sched->active_bank_idx = ULONG_MAX;
    2505           0 :     sched->metrics->deactivate_no_txn_cnt++;
    2506           0 :     try_activate_block( sched );
    2507           0 :   }
    2508           0 : }
    2509             : 
    2510             : FD_FN_UNUSED static ulong
    2511           0 : find_and_stage_longest_unstaged_fork( fd_sched_t * sched, int lane_idx ) {
    2512           0 :   ulong root_idx = sched->root_idx;
    2513           0 : 
    2514           0 :   if( FD_UNLIKELY( root_idx==ULONG_MAX ) ) {
    2515           0 :     FD_LOG_CRIT(( "invariant violation: root_idx==ULONG_MAX indicating fd_sched is uninitialized" ));
    2516           0 :   }
    2517           0 : 
    2518           0 :   /* First pass: compute the longest unstaged fork depth for each node
    2519           0 :      in the fork tree. */
    2520           0 :   ulong depth = compute_longest_unstaged_fork( sched, root_idx );
    2521           0 : 
    2522           0 :   /* Second pass: stage blocks on the longest unstaged fork. */
    2523           0 :   ulong head_bank_idx = stage_longest_unstaged_fork( sched, root_idx, lane_idx );
    2524           0 : 
    2525           0 :   if( FD_UNLIKELY( (depth>0UL && head_bank_idx==ULONG_MAX) || (depth==0UL && head_bank_idx!=ULONG_MAX) ) ) {
    2526           0 :     FD_LOG_CRIT(( "invariant violation: depth %lu, head_bank_idx %lu",
    2527           0 :                   depth, head_bank_idx ));
    2528           0 :   }
    2529           0 : 
    2530           0 :   return head_bank_idx;
    2531           0 : }
    2532             : 
    2533             : /* Returns length of the longest stageable unstaged fork, if there is
    2534             :    one, and 0 otherwise. */
    2535             : static ulong
    2536           0 : compute_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx ) {
    2537           0 :   if( FD_UNLIKELY( bank_idx==ULONG_MAX ) ) {
    2538           0 :     FD_LOG_CRIT(( "invariant violation: bank_idx==ULONG_MAX" ));
    2539           0 :   }
    2540             : 
    2541           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    2542             : 
    2543           0 :   ulong max_child_depth = 0UL;
    2544           0 :   ulong child_idx       = block->child_idx;
    2545           0 :   while( child_idx!=ULONG_MAX ) {
    2546           0 :     ulong child_depth = compute_longest_unstaged_fork( sched, child_idx );
    2547           0 :     if( child_depth > max_child_depth ) {
    2548           0 :       max_child_depth = child_depth;
    2549           0 :     }
    2550           0 :     fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2551           0 :     child_idx = child->sibling_idx;
    2552           0 :   }
    2553             : 
    2554           0 :   block->luf_depth = max_child_depth + fd_ulong_if( block_is_promotable( block ), 1UL, 0UL );
    2555           0 :   return block->luf_depth;
    2556           0 : }
    2557             : 
    2558             : static ulong
    2559           0 : stage_longest_unstaged_fork_helper( fd_sched_t * sched, ulong bank_idx, int lane_idx ) {
    2560           0 :   if( FD_UNLIKELY( bank_idx==ULONG_MAX ) ) {
    2561           0 :     FD_LOG_CRIT(( "invariant violation: bank_idx==ULONG_MAX" ));
    2562           0 :   }
    2563             : 
    2564           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    2565             : 
    2566           0 :   int   stage_it = fd_int_if( block_is_promotable( block ), 1, 0 );
    2567           0 :   ulong rv       = fd_ulong_if( stage_it, bank_idx, ULONG_MAX );
    2568           0 :   if( FD_LIKELY( stage_it ) ) {
    2569           0 :     block->staged = 1;
    2570           0 :     block->staging_lane = (ulong)lane_idx;
    2571           0 :     fd_rdisp_promote_block( sched->rdisp, bank_idx, block->staging_lane );
    2572           0 :     sched->metrics->block_promoted_cnt++;
    2573           0 :     FD_LOG_DEBUG(( "block %lu:%lu entered lane %lu: promote", block->slot, bank_idx, block->staging_lane ));
    2574           0 :   }
    2575             : 
    2576             :   /* Base case: leaf node. */
    2577           0 :   if( block->child_idx==ULONG_MAX ) return rv;
    2578             : 
    2579           0 :   ulong max_depth      = 0UL;
    2580           0 :   ulong best_child_idx = ULONG_MAX;
    2581           0 :   ulong child_idx      = block->child_idx;
    2582           0 :   while( child_idx!=ULONG_MAX ) {
    2583           0 :     fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2584           0 :     if( child->luf_depth>max_depth ) {
    2585           0 :       max_depth      = child->luf_depth;
    2586           0 :       best_child_idx = child_idx;
    2587           0 :     }
    2588           0 :     child_idx = child->sibling_idx;
    2589           0 :   }
    2590             : 
    2591             :   /* Recursively stage descendants. */
    2592           0 :   if( best_child_idx!=ULONG_MAX ) {
    2593           0 :     ulong head_bank_idx = stage_longest_unstaged_fork_helper( sched, best_child_idx, lane_idx );
    2594           0 :     rv = fd_ulong_if( rv!=ULONG_MAX, rv, head_bank_idx );
    2595           0 :   }
    2596             : 
    2597           0 :   return rv;
    2598           0 : }
    2599             : 
    2600             : /* Returns idx of head block of staged lane on success, idx_null
    2601             :    otherwise. */
    2602             : static ulong
    2603           0 : stage_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx, int lane_idx ) {
    2604           0 :   ulong head_bank_idx = stage_longest_unstaged_fork_helper( sched, bank_idx, lane_idx );
    2605           0 :   if( FD_LIKELY( head_bank_idx!=ULONG_MAX ) ) {
    2606           0 :     sched->metrics->lane_promoted_cnt++;
    2607           0 :     sched->staged_bitset = fd_ulong_set_bit( sched->staged_bitset, lane_idx );
    2608           0 :     sched->staged_head_bank_idx[ lane_idx ] = head_bank_idx;
    2609           0 :   }
    2610           0 :   return head_bank_idx;
    2611           0 : }
    2612             : 
    2613             : /* Check if an entire staging lane can be demoted.  Returns 1 if all
    2614             :    blocks in the lane are demotable, 0 otherwise. */
    2615             : static int
    2616           0 : lane_is_demotable( fd_sched_t * sched, int lane_idx ) {
    2617           0 :   ulong bank_idx = sched->staged_head_bank_idx[ lane_idx ];
    2618             : 
    2619           0 :   while( bank_idx!=ULONG_MAX ) {
    2620           0 :     fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    2621           0 :     FD_TEST( block->staged );
    2622           0 :     FD_TEST( block->staging_lane==(ulong)lane_idx );
    2623             : 
    2624           0 :     if( FD_UNLIKELY( !block_is_demotable( block ) ) ) {
    2625             :       /* Found a non-demotable block.  Early exit. */
    2626           0 :       return 0;
    2627           0 :     }
    2628             : 
    2629             :     /* Find the child in the same staging lane. */
    2630           0 :     ulong child_idx = block->child_idx;
    2631           0 :     ulong next_bank_idx = ULONG_MAX;
    2632           0 :     while( child_idx!=ULONG_MAX ) {
    2633           0 :       fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2634           0 :       if( child->staged && child->staging_lane==(ulong)lane_idx ) {
    2635           0 :         next_bank_idx = child_idx;
    2636           0 :         break;
    2637           0 :       }
    2638           0 :       child_idx = child->sibling_idx;
    2639           0 :     }
    2640           0 :     bank_idx = next_bank_idx;
    2641           0 :   }
    2642             : 
    2643           0 :   return 1;
    2644           0 : }
    2645             : 
    2646             : /* Demote all blocks in a staging lane.  Assumes that all blocks in the
    2647             :    lane are demotable.  Returns the number of blocks demoted. */
    2648             : static ulong
    2649           0 : demote_lane( fd_sched_t * sched, int lane_idx ) {
    2650           0 :   ulong bank_idx = sched->staged_head_bank_idx[ lane_idx ];
    2651           0 :   uint  demoted_cnt = 0U;
    2652             : 
    2653           0 :   while( bank_idx!=ULONG_MAX ) {
    2654           0 :     fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    2655           0 :     FD_TEST( block->staged );
    2656           0 :     FD_TEST( block->staging_lane==(ulong)lane_idx );
    2657             : 
    2658           0 :     int ret = fd_rdisp_demote_block( sched->rdisp, bank_idx );
    2659           0 :     if( FD_UNLIKELY( ret!=0 ) ) {
    2660           0 :       FD_LOG_CRIT(( "fd_rdisp_demote_block failed for slot %lu, bank_idx %lu, lane %d", block->slot, bank_idx, lane_idx ));
    2661           0 :     }
    2662           0 :     FD_LOG_DEBUG(( "block %lu:%lu exited lane %lu: demote", block->slot, bank_idx, block->staging_lane ));
    2663           0 :     block->staged = 0;
    2664           0 :     demoted_cnt++;
    2665             : 
    2666             :     /* Find the child in the same staging lane. */
    2667           0 :     ulong child_idx = block->child_idx;
    2668           0 :     ulong next_bank_idx = ULONG_MAX;
    2669           0 :     while( child_idx!=ULONG_MAX ) {
    2670           0 :       fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2671           0 :       if( child->staged && child->staging_lane==(ulong)lane_idx ) {
    2672           0 :         next_bank_idx = child_idx;
    2673           0 :         break;
    2674           0 :       }
    2675           0 :       child_idx = child->sibling_idx;
    2676           0 :     }
    2677           0 :     bank_idx = next_bank_idx;
    2678           0 :   }
    2679             : 
    2680             :   /* Clear the lane. */
    2681           0 :   sched->staged_bitset = fd_ulong_clear_bit( sched->staged_bitset, lane_idx );
    2682           0 :   sched->staged_head_bank_idx[ lane_idx ] = ULONG_MAX;
    2683             : 
    2684           0 :   sched->metrics->block_demoted_cnt += demoted_cnt;
    2685           0 :   FD_LOG_DEBUG(( "demoted %u blocks in lane %d", demoted_cnt, lane_idx ));
    2686           0 :   return demoted_cnt;
    2687           0 : }

Generated by: LCOV version 1.14