LCOV - code coverage report
Current view: top level - discof/replay - fd_sched.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 1822 0.0 %
Date: 2026-05-15 07:18:56 Functions: 0 70 0.0 %

          Line data    Source code
       1             : #include <stdio.h> /* for vsnprintf */
       2             : #include <stdarg.h> /* for va_list */
       3             : 
       4             : #include "fd_sched.h"
       5             : #include "fd_execrp.h" /* for poh hash value */
       6             : #include "../../util/math/fd_stat.h" /* for sorted search */
       7             : #include "../../ballet/block/fd_microblock.h"
       8             : #include "../../disco/fd_disco_base.h" /* for FD_MAX_TXN_PER_SLOT */
       9             : #include "../../disco/metrics/fd_metrics.h" /* for fd_metrics_convert_seconds_to_ticks and etc. */
      10             : #include "../../disco/pack/fd_chkdup.h"
      11             : #include "../../disco/shred/fd_shredder.h" /* FD_SHREDDER_CHAINED_FEC_SET_PAYLOAD_SZ */
      12             : #include "../../discof/poh/fd_poh.h" /* for MAX_SKIPPED_TICKS */
      13             : #include "../../flamenco/runtime/fd_runtime.h" /* for fd_runtime_load_txn_address_lookup_tables */
      14             : #include "../../flamenco/runtime/fd_system_ids.h"
      15             : #include "../../flamenco/runtime/sysvar/fd_sysvar_slot_hashes.h" /* for ALUTs */
      16             : #include "../../flamenco/accdb/fd_accdb_sync.h"
      17             : 
      18           0 : #define FD_SCHED_MAX_STAGING_LANES_LOG     (2)
      19           0 : #define FD_SCHED_MAX_STAGING_LANES         (1UL<<FD_SCHED_MAX_STAGING_LANES_LOG)
      20             : #define FD_SCHED_MAX_EXEC_TILE_CNT         (64UL)
      21           0 : #define FD_SCHED_MAX_PRINT_BUF_SZ          (2UL<<20)
      22             : 
      23             : #define FD_SCHED_MAX_MBLK_PER_SLOT             (MAX_SKIPPED_TICKS)
      24           0 : #define FD_SCHED_MAX_POH_HASHES_PER_TASK       (4096UL) /* This seems to be the sweet spot. */
      25             : 
      26             : /* 64 ticks per slot, and a single gigantic microblock containing min
      27             :    size transactions. */
      28             : FD_STATIC_ASSERT( FD_MAX_TXN_PER_SLOT_SHRED==((FD_SHRED_DATA_PAYLOAD_MAX_PER_SLOT-65UL*sizeof(fd_microblock_hdr_t))/FD_TXN_MIN_SERIALIZED_SZ), max_txn_per_slot_shred );
      29             : 
      30             : /* We size the buffer to be able to hold residual data from the previous
      31             :    FEC set that only becomes parseable after the next FEC set is
      32             :    ingested, as well as the incoming FEC set.  The largest minimally
      33             :    parseable unit of data is a transaction.  So that much data may
      34             :    straddle FEC set boundaries.  Other minimally parseable units of data
      35             :    include the microblock header and the microblock count within a
      36             :    batch. */
      37           0 : #define FD_SCHED_MAX_PAYLOAD_PER_FEC       (63985UL)
      38             : FD_STATIC_ASSERT( FD_SCHED_MAX_PAYLOAD_PER_FEC>=FD_SHREDDER_CHAINED_FEC_SET_PAYLOAD_SZ, bump sched fec size bound ); /* FD_SHREDDER_CHAINED_FEC_SET_PAYLOAD_SZ is the bound we want, but older ledgers have larger FEC sets. */
      39             : #define FD_SCHED_MAX_FEC_BUF_SZ            (FD_SCHED_MAX_PAYLOAD_PER_FEC+FD_TXN_MTU)
      40             : FD_STATIC_ASSERT( FD_TXN_MTU>=sizeof(fd_microblock_hdr_t), resize buffer for residual data );
      41             : FD_STATIC_ASSERT( FD_TXN_MTU>=sizeof(ulong),               resize buffer for residual data );
      42             : 
      43           0 : #define FD_SCHED_MAX_TXN_PER_FEC           ((FD_SCHED_MAX_PAYLOAD_PER_FEC-1UL)/FD_TXN_MIN_SERIALIZED_SZ+1UL) /* 478 */
      44           0 : #define FD_SCHED_MAX_MBLK_PER_FEC          ((FD_SCHED_MAX_PAYLOAD_PER_FEC-1UL)/sizeof(fd_microblock_hdr_t)+1UL) /* 1334 */
      45             : 
      46             : FD_STATIC_ASSERT( FD_SCHED_MIN_DEPTH>=FD_SCHED_MAX_TXN_PER_FEC, limits );
      47             : FD_STATIC_ASSERT( FD_SCHED_MAX_DEPTH<=FD_RDISP_MAX_DEPTH,       limits );
      48             : 
      49           0 : #define FD_SCHED_MAGIC (0xace8a79c181f89b6UL) /* echo -n "fd_sched_v0" | sha512sum | head -c 16 */
      50             : 
      51           0 : #define FD_SCHED_OK          (0)
      52           0 : #define FD_SCHED_AGAIN_LATER (1)
      53           0 : #define FD_SCHED_BAD_BLOCK   (2)
      54             : 
      55             : 
      56             : /* Structs. */
      57             : 
      58             : struct fd_sched_mblk {
      59             :   ulong start_txn_idx; /* inclusive parse idx */
      60             :   ulong end_txn_idx;   /* non-inclusive parse idx */
      61             :   ulong curr_txn_idx;  /* next txn to mixin, parse idx */
      62             :   ulong hashcnt;       /* number of pure hashes, excluding final mixin */
      63             :   ulong curr_hashcnt;
      64             :   fd_hash_t end_hash[ 1 ];
      65             :   fd_hash_t curr_hash[ 1 ];
      66             :   uint curr_sig_cnt;
      67             :   uint next;
      68             :   int is_tick;
      69             : };
      70             : typedef struct fd_sched_mblk fd_sched_mblk_t;
      71             : 
      72             : #define SLIST_NAME  mblk_slist
      73             : #define SLIST_ELE_T fd_sched_mblk_t
      74           0 : #define SLIST_IDX_T uint
      75           0 : #define SLIST_NEXT  next
      76             : #include "../../util/tmpl/fd_slist.c"
      77             : 
      78             : #define SET_NAME txn_bitset
      79             : #define SET_MAX  FD_SCHED_MAX_DEPTH
      80             : #include "../../util/tmpl/fd_set.c"
      81             : 
      82             : struct fd_sched_block {
      83             :   ulong               slot;
      84             :   ulong               parent_slot;
      85             :   ulong               parent_idx;  /* Index of the parent in the pool. */
      86             :   ulong               child_idx;   /* Index of the left-child in the pool. */
      87             :   ulong               sibling_idx; /* Index of the right-sibling in the pool. */
      88             : 
      89             :   /* Counters. */
      90             :   uint                txn_parsed_cnt;
      91             :   /*                  txn_queued_cnt = txn_parsed_cnt-txn_in_flight_cnt-txn_done_cnt */
      92             :   uint                txn_exec_in_flight_cnt;
      93             :   uint                txn_exec_done_cnt;
      94             :   uint                txn_sigverify_in_flight_cnt;
      95             :   uint                txn_sigverify_done_cnt;
      96             :   uint                poh_hashing_in_flight_cnt;
      97             :   uint                poh_hashing_done_cnt;
      98             :   uint                poh_hash_cmp_done_cnt; /* poh_hashing_done_cnt==poh_hash_cmp_done_cnt+len(mixin_in_progress) */
      99             :   uint                txn_done_cnt; /* A transaction is considered done when all types of tasks associated with it are done. */
     100             :   uint                shred_cnt;
     101             :   uint                mblk_cnt;          /* Total number of microblocks, including ticks and non ticks.
     102             :                                             mblk_cnt==len(unhashed)+len(hashing_in_progress)+hashing_in_flight_cnt+len(mixin_in_progress)+hash_cmp_done_cnt */
     103             :   uint                mblk_tick_cnt;     /* Total number of tick microblocks. */
     104             :   uint                mblk_freed_cnt;    /* This is ==hash_cmp_done_cnt in most cases, except for aborted
     105             :                                             blocks, where the freed cnt will catch up to mblk_cnt and surpass
     106             :                                             hash_cmp_done_cnt when the block is reaped. */
     107             :   uint                mblk_unhashed_cnt; /* ==len(unhashed) */
     108             :   ulong               hashcnt; /* How many hashes this block wants replay to do.  A mixin/record counts as one hash. */
     109             :   ulong               txn_pool_max_popcnt;   /* Peak transaction pool occupancy during the time this block was replaying. */
     110             :   ulong               mblk_pool_max_popcnt;  /* Peak mblk pool occupancy. */
     111             :   ulong               block_pool_max_popcnt; /* Peak block pool occupancy. */
     112             :   ulong               txn_idx[ FD_MAX_TXN_PER_SLOT ]; /* Indexed by parse order. */
     113             : 
     114             :   /* PoH verify. */
     115             :   fd_hash_t    poh_hash[ 1 ]; /* running end_hash of last parsed mblk */
     116             :   int          last_mblk_is_tick;
     117             :   mblk_slist_t mblks_unhashed[ 1 ]; /* A microblock, once parsed out, is in one of these queues.  It
     118             :                                        generally progresses from unhashed to hashing to mixin.  When a
     119             :                                        microblock is being hashed/in-flight, it'll be transiently out of
     120             :                                        any of the queues.  Once a microblock progresses through all stages
     121             :                                        of work, it'll be immediately freed. */
     122             :   mblk_slist_t mblks_hashing_in_progress[ 1 ];
     123             :   mblk_slist_t mblks_mixin_in_progress[ 1 ];
     124             :   uchar bmtree_mem[ FD_BMTREE_COMMIT_FOOTPRINT(0) ] __attribute__((aligned(FD_BMTREE_COMMIT_ALIGN)));
     125             :   fd_bmtree_commit_t * bmtree;
     126             :   ulong max_tick_hashcnt;
     127             :   ulong curr_tick_hashcnt; /* Starts at 0, accumulates hashcnt, resets to 0 on the next tick. */
     128             :   ulong tick_height;       /* Block is built off of a parent block with this many ticks. */
     129             :   ulong max_tick_height;   /* Block should end with precisely this many ticks. */
     130             :   ulong hashes_per_tick;   /* Fixed per block, feature gated, known after bank clone. */
     131             :   int inconsistent_hashes_per_tick;
     132             : 
     133             :   /* Parser state. */
     134             :   uchar               txn[ FD_TXN_MAX_SZ ] __attribute__((aligned(alignof(fd_txn_t))));
     135             :   ulong               mblks_rem;    /* Number of microblocks remaining in the current batch. */
     136             :   ulong               txns_rem;     /* Number of transactions remaining in the current microblock. */
     137             :   uint                fec_buf_sz;   /* Size of the fec_buf in bytes. */
     138             :   uint                fec_buf_soff; /* Starting offset into fec_buf for unparsed transactions. */
     139             :   uint                fec_buf_boff; /* Byte offset into raw block data of the first byte currently in fec_buf */
     140             :   uint                fec_eob:1;    /* FEC end-of-batch: set if the last FEC set in the batch is being
     141             :                                        ingested. */
     142             :   uint                fec_sob:1;    /* FEC start-of-batch: set if the parser expects to be receiving a new
     143             :                                        batch. */
     144             : 
     145             :   /* Block state. */
     146             :   uint                fec_eos:1;                          /* FEC end-of-stream: set if the last FEC set in the block has been
     147             :                                                              ingested. */
     148             :   uint                rooted:1;                           /* Set if the block is rooted. */
     149             :   uint                dying:1;                            /* Set if the block has been abandoned and no transactions should be
     150             :                                                              scheduled from it. */
     151             :   uint                refcnt:1;                           /* Starts at 1 when the block is added, set to 0 if caller has been
     152             :                                                              informed to decrement refcnt for sched. */
     153             :   uint                in_sched:1;                         /* Set if the block is being tracked by the scheduler. */
     154             :   uint                in_rdisp:1;                         /* Set if the block is being tracked by the dispatcher, either as staged
     155             :                                                              or unstaged. */
     156             :   uint                block_start_signaled:1;             /* Set if the start-of-block sentinel has been dispatched. */
     157             :   uint                block_end_signaled:1;               /* Set if the end-of-block sentinel has been dispatched. */
     158             :   uint                block_start_done:1;                 /* Set if the start-of-block processing has been completed. */
     159             :   uint                block_end_done:1;                   /* Set if the end-of-block processing has been completed. */
     160             :   uint                staged:1;                           /* Set if the block is in a dispatcher staging lane; a staged block is
     161             :                                                              tracked by the dispatcher. */
     162             :   ulong               staging_lane;                       /* Ignored if staged==0. */
     163             :   ulong               luf_depth;                          /* Depth of longest unstaged fork starting from this node; only
     164             :                                                              stageable unstaged descendants are counted. */
     165             :   uchar               fec_buf[ FD_SCHED_MAX_FEC_BUF_SZ ]; /* The previous FEC set could have some residual data that only becomes
     166             :                                                              parseable after the next FEC set is ingested. */
     167             :   uint                shred_blk_offs[ FD_SHRED_BLK_MAX ]; /* The byte offsets into block data of ingested shreds */
     168             : };
     169             : typedef struct fd_sched_block fd_sched_block_t;
     170             : 
     171             : FD_STATIC_ASSERT( sizeof(fd_hash_t)==sizeof(((fd_microblock_hdr_t *)0)->hash), unexpected poh hash size );
     172             : 
     173             : 
     174             : struct fd_sched_metrics {
     175             :   uint  block_added_cnt;
     176             :   uint  block_added_staged_cnt;
     177             :   uint  block_added_unstaged_cnt;
     178             :   uint  block_added_dead_ood_cnt;
     179             :   uint  block_removed_cnt;
     180             :   uint  block_abandoned_cnt;
     181             :   uint  block_bad_cnt;
     182             :   uint  block_promoted_cnt;
     183             :   uint  block_demoted_cnt;
     184             :   uint  deactivate_no_child_cnt;
     185             :   uint  deactivate_no_txn_cnt;
     186             :   uint  deactivate_pruned_cnt;
     187             :   uint  deactivate_abandoned_cnt;
     188             :   uint  lane_switch_cnt;
     189             :   uint  lane_promoted_cnt;
     190             :   uint  lane_demoted_cnt;
     191             :   uint  fork_observed_cnt;
     192             :   uint  alut_success_cnt;
     193             :   uint  alut_serializing_cnt;
     194             :   uint  txn_abandoned_parsed_cnt;
     195             :   uint  txn_abandoned_exec_done_cnt;
     196             :   uint  txn_abandoned_done_cnt;
     197             :   uint  txn_max_in_flight_cnt;
     198             :   ulong txn_weighted_in_flight_cnt;
     199             :   ulong txn_weighted_in_flight_tickcount;
     200             :   ulong txn_none_in_flight_tickcount;
     201             :   ulong txn_parsed_cnt;
     202             :   ulong txn_exec_done_cnt;
     203             :   ulong txn_sigverify_done_cnt;
     204             :   ulong txn_mixin_done_cnt;
     205             :   ulong txn_done_cnt;
     206             :   ulong mblk_parsed_cnt;
     207             :   ulong mblk_poh_hashed_cnt;
     208             :   ulong mblk_poh_done_cnt;
     209             :   ulong bytes_ingested_cnt;
     210             :   ulong bytes_ingested_unparsed_cnt;
     211             :   ulong bytes_dropped_cnt;
     212             :   ulong fec_cnt;
     213             : };
     214             : typedef struct fd_sched_metrics fd_sched_metrics_t;
     215             : 
     216             : #define DEQUE_NAME ref_q
     217           0 : #define DEQUE_T    ulong
     218             : #include "../../util/tmpl/fd_deque_dynamic.c"
     219             : 
     220             : struct fd_sched {
     221             :   fd_acct_addr_t        aluts[ 256 ]; /* Resolve ALUT accounts into this buffer for more parallelism. */
     222             :   char                  print_buf[ FD_SCHED_MAX_PRINT_BUF_SZ ];
     223             :   ulong                 print_buf_sz;
     224             :   fd_chkdup_t           chkdup[ 1 ];
     225             :   fd_sched_metrics_t    metrics[ 1 ];
     226             :   ulong                 canary; /* == FD_SCHED_MAGIC */
     227             :   ulong                 depth;         /* Immutable. */
     228             :   ulong                 block_cnt_max; /* Immutable. */
     229             :   ulong                 exec_cnt;      /* Immutable. */
     230             :   long                  txn_in_flight_last_tick;
     231             :   long                  next_ready_last_tick;
     232             :   ulong                 next_ready_last_bank_idx;
     233             :   ulong                 root_idx;
     234             :   fd_rdisp_t *          rdisp;
     235             :   ulong                 txn_exec_ready_bitset[ 1 ];
     236             :   ulong                 sigverify_ready_bitset[ 1 ];
     237             :   ulong                 poh_ready_bitset[ 1 ];
     238             :   ulong                 active_bank_idx; /* Index of the actively replayed block, or ULONG_MAX if no block is
     239             :                                             actively replayed; has to have a transaction to dispatch; staged
     240             :                                             blocks that have no transactions to dispatch are not eligible for
     241             :                                             being active. */
     242             :   ulong                 last_active_bank_idx;
     243             :   ulong                 staged_bitset;    /* Bit i set if staging lane i is occupied. */
     244             :   ulong                 staged_head_bank_idx[ FD_SCHED_MAX_STAGING_LANES ]; /* Head of the linear chain in each staging lane, ignored if bit i is
     245             :                                                                                not set in the bitset. */
     246             :   ulong                 staged_popcnt_wmk;
     247             :   ulong                 txn_pool_free_cnt;
     248             :   fd_txn_p_t *          txn_pool;      /* Just a flat array. */
     249             :   fd_sched_txn_info_t * txn_info_pool; /* Just a flat array. */
     250             :   fd_sched_mblk_t *     mblk_pool;     /* Just a flat array. */
     251             :   ulong                 mblk_pool_free_cnt;
     252             :   uint                  mblk_pool_free_head;
     253             :   ulong                 tile_to_bank_idx[ FD_SCHED_MAX_EXEC_TILE_CNT ]; /* Index of the bank that the exec tile is executing against. */
     254             :   txn_bitset_t          exec_done_set[ txn_bitset_word_cnt ];      /* Indexed by txn_idx. */
     255             :   txn_bitset_t          sigverify_done_set[ txn_bitset_word_cnt ]; /* Indexed by txn_idx. */
     256             :   txn_bitset_t          poh_mixin_done_set[ txn_bitset_word_cnt ]; /* Indexed by txn_idx. */
     257             :   fd_sched_block_t *    block_pool; /* Just a flat array. */
     258             :   ulong                 block_pool_popcnt;
     259             :   ulong *               ref_q;
     260             : };
     261             : typedef struct fd_sched fd_sched_t;
     262             : 
     263             : 
     264             : /* Internal helpers. */
     265             : 
     266             : static int
     267             : verify_ticks_eager( fd_sched_block_t * block );
     268             : 
     269             : static int
     270             : verify_ticks_final( fd_sched_block_t * block );
     271             : 
     272             : static void
     273             : add_block( fd_sched_t * sched,
     274             :            ulong        bank_idx,
     275             :            ulong        parent_bank_idx );
     276             : 
     277             : FD_WARN_UNUSED static int
     278             : fd_sched_parse( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx );
     279             : 
     280             : FD_WARN_UNUSED static int
     281             : fd_sched_parse_txn( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx );
     282             : 
     283             : static void
     284             : dispatch_sigverify( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out );
     285             : 
     286             : static void
     287             : dispatch_poh( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out );
     288             : 
     289             : FD_WARN_UNUSED static int
     290             : maybe_mixin( fd_sched_t * sched, fd_sched_block_t * block );
     291             : 
     292             : static void
     293           0 : free_mblk( fd_sched_t * sched, fd_sched_block_t * block, uint mblk_idx ) {
     294           0 :   sched->mblk_pool[ mblk_idx ].next = sched->mblk_pool_free_head;
     295           0 :   sched->mblk_pool_free_head = mblk_idx;
     296           0 :   sched->mblk_pool_free_cnt++;
     297           0 :   block->mblk_freed_cnt++;
     298           0 : }
     299             : 
     300             : static void
     301           0 : free_mblk_slist( fd_sched_t * sched, fd_sched_block_t * block, mblk_slist_t * list ) {
     302           0 :   while( !mblk_slist_is_empty( list, sched->mblk_pool ) ) {
     303           0 :     uint idx = (uint)mblk_slist_idx_pop_head( list, sched->mblk_pool );
     304           0 :     free_mblk( sched, block, idx );
     305           0 :   }
     306           0 : }
     307             : 
     308             : static void
     309             : try_activate_block( fd_sched_t * sched );
     310             : 
     311             : static void
     312             : check_or_set_active_block( fd_sched_t * sched );
     313             : 
     314             : static void
     315             : subtree_abandon( fd_sched_t * sched, fd_sched_block_t * block );
     316             : 
     317             : static void
     318             : subtree_prune( fd_sched_t * sched, ulong bank_idx, ulong except_idx );
     319             : 
     320             : static void
     321             : maybe_switch_block( fd_sched_t * sched, ulong bank_idx );
     322             : 
     323             : FD_FN_UNUSED static ulong
     324             : find_and_stage_longest_unstaged_fork( fd_sched_t * sched, int lane_idx );
     325             : 
     326             : static ulong
     327             : compute_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx );
     328             : 
     329             : static ulong
     330             : stage_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx, int lane_idx );
     331             : 
     332             : static int
     333             : lane_is_demotable( fd_sched_t * sched, int lane_idx );
     334             : 
     335             : static ulong
     336             : demote_lane( fd_sched_t * sched, int lane_idx );
     337             : 
     338             : static inline fd_sched_block_t *
     339           0 : block_pool_ele( fd_sched_t * sched, ulong idx ) {
     340           0 :   FD_TEST( idx<sched->block_cnt_max || idx==ULONG_MAX );
     341           0 :   return idx==ULONG_MAX ? NULL : sched->block_pool+idx;
     342           0 : }
     343             : 
     344             : FD_FN_UNUSED static inline int
     345           0 : block_is_void( fd_sched_block_t * block ) {
     346           0 :   /* We've seen everything in the block and no transaction got parsed
     347           0 :      out. */
     348           0 :   return block->fec_eos && block->txn_parsed_cnt==0;
     349           0 : }
     350             : 
     351             : static inline int
     352           0 : block_should_signal_end( fd_sched_block_t * block ) {
     353             :   /* Under the current policy of eager synchronous PoH mixin, hashing
     354             :      done plus fec_eos imply that all mixins have been done. */
     355           0 :   if( FD_UNLIKELY( !( !block->fec_eos || ((block->mblk_cnt==block->poh_hashing_done_cnt&&block->mblk_cnt==block->poh_hash_cmp_done_cnt)||block->mblk_cnt!=block->poh_hashing_done_cnt) ) ) ) FD_LOG_CRIT(( "invariant violation: slot %lu fec_eos %d mblk_cnt %u poh_hashing_done_cnt %u poh_hash_cmp_done_cnt %u", block->slot, block->fec_eos, block->mblk_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt ));
     356           0 :   return block->fec_eos && block->txn_parsed_cnt==block->txn_done_cnt && block->mblk_cnt==block->poh_hashing_done_cnt && block->block_start_done && !block->block_end_signaled;
     357           0 : }
     358             : 
     359             : static inline int
     360           0 : block_will_signal_end( fd_sched_block_t * block ) {
     361           0 :   return block->fec_eos && !block->block_end_signaled;
     362           0 : }
     363             : 
     364             : /* Is there something known to be dispatchable in the block?  This is an
     365             :    important liveness property.  A block that doesn't contain any known
     366             :    dispatchable tasks will be deactivated or demoted. */
     367             : static inline int
     368           0 : block_is_dispatchable( fd_sched_block_t * block ) {
     369           0 :   ulong exec_queued_cnt      = block->txn_parsed_cnt-block->txn_exec_in_flight_cnt-block->txn_exec_done_cnt;
     370           0 :   ulong sigverify_queued_cnt = block->txn_parsed_cnt-block->txn_sigverify_in_flight_cnt-block->txn_sigverify_done_cnt;
     371           0 :   ulong poh_queued_cnt       = block->mblk_cnt-block->poh_hashing_in_flight_cnt-block->poh_hashing_done_cnt;
     372           0 :   return exec_queued_cnt>0UL ||
     373           0 :          sigverify_queued_cnt>0UL ||
     374           0 :          poh_queued_cnt>0UL ||
     375           0 :          !block->block_start_signaled ||
     376           0 :          block_will_signal_end( block );
     377           0 : }
     378             : 
     379             : static inline int
     380           0 : block_is_in_flight( fd_sched_block_t * block ) {
     381           0 :   return block->txn_exec_in_flight_cnt || block->txn_sigverify_in_flight_cnt || block->poh_hashing_in_flight_cnt || (block->block_end_signaled && !block->block_end_done);
     382           0 : }
     383             : 
     384             : static inline int
     385           0 : block_is_done( fd_sched_block_t * block ) {
     386           0 :   return block->fec_eos && block->txn_parsed_cnt==block->txn_done_cnt && block->mblk_cnt==block->poh_hash_cmp_done_cnt && block->block_start_done && block->block_end_done;
     387           0 : }
     388             : 
     389             : static inline int
     390           0 : block_is_stageable( fd_sched_block_t * block ) {
     391           0 :   int rv = !block_is_done( block ) && !block->dying;
     392           0 :   if( FD_UNLIKELY( rv && !block->in_rdisp ) ) {
     393             :     /* Invariant: stageable blocks may be currently staged or unstaged,
     394             :        but must be in the dispatcher either way.  When a block
     395             :        transitions to DONE, it will be immediately removed from the
     396             :        dispatcher.  When a block transitions to DYING, it will be
     397             :        eventually abandoned from the dispatcher. */
     398           0 :     FD_LOG_CRIT(( "invariant violation: stageable block->in_rdisp==0, txn_parsed_cnt %u, txn_done_cnt %u, fec_eos %u,, slot %lu, parent slot %lu",
     399           0 :                   block->txn_parsed_cnt, block->txn_done_cnt, (uint)block->fec_eos, block->slot, block->parent_slot ));
     400           0 :   }
     401           0 :   return rv;
     402           0 : }
     403             : 
     404             : static inline int
     405           0 : block_is_promotable( fd_sched_block_t * block ) {
     406           0 :   return block_is_stageable( block ) && block_is_dispatchable( block ) && !block->staged;
     407           0 : }
     408             : 
     409             : static inline int
     410           0 : block_is_demotable( fd_sched_block_t * block ) {
     411             :   /* A block can only be demoted from rdisp if it is empty, meaning no
     412             :      PENDING, READY, or DISPATCHED transactions.  This is equivalent to
     413             :      having no in-flight transactions (DISPATCHED) and no queued
     414             :      transactions (PENDING or READY).  This function actually implements
     415             :      a stronger requirement.  We consider a block demotable only if
     416             :      there are no in-flight or queued tasks of any kind. */
     417           0 :   return !block_is_in_flight( block ) && !block_is_dispatchable( block ) && block->staged;
     418           0 : }
     419             : 
     420             : static inline int
     421           0 : block_is_activatable( fd_sched_block_t * block ) {
     422           0 :   return block_is_stageable( block ) && block_is_dispatchable( block ) && block->staged;
     423           0 : }
     424             : 
     425             : static inline int
     426           0 : block_should_deactivate( fd_sched_block_t * block ) {
     427             :   /* We allow a grace period, during which a block has nothing to
     428             :      dispatch, but has something in-flight.  The block is allowed to
     429             :      stay activated and ingest FEC sets during this time.  The block
     430             :      will be deactivated if there's still nothing to dispatch by the
     431             :      time all in-flight tasks are completed. */
     432           0 :   return !block_is_activatable( block ) && !block_is_in_flight( block );
     433           0 : }
     434             : 
     435             : static inline int
     436           0 : block_is_prunable( fd_sched_block_t * block ) {
     437           0 :   return !block->in_rdisp && !block_is_in_flight( block );
     438           0 : }
     439             : 
     440             : static int
     441           0 : subtree_is_prunable( fd_sched_t * sched, fd_sched_block_t * block ) {
     442           0 :   if( FD_UNLIKELY( !block_is_prunable( block ) ) ) return 0;
     443             : 
     444           0 :   ulong child_idx = block->child_idx;
     445           0 :   while( child_idx!=ULONG_MAX ) {
     446           0 :     fd_sched_block_t * child_block = block_pool_ele( sched, child_idx );
     447           0 :     if( FD_UNLIKELY( !subtree_is_prunable( sched, child_block ) ) ) {
     448           0 :       return 0;
     449           0 :     }
     450           0 :     child_idx = child_block->sibling_idx;
     451           0 :   }
     452             : 
     453           0 :   return 1;
     454           0 : }
     455             : 
     456             : static inline ulong
     457           0 : block_to_idx( fd_sched_t * sched, fd_sched_block_t * block ) { return (ulong)(block-sched->block_pool); }
     458             : 
     459             : __attribute__((format(printf,2,3)))
     460             : static void
     461             : fd_sched_printf( fd_sched_t * sched,
     462             :                  char const * fmt,
     463           0 :                  ... ) {
     464           0 :   va_list ap;
     465           0 :   ulong len;
     466           0 :   va_start( ap, fmt );
     467           0 :   int ret = vsnprintf( sched->print_buf+sched->print_buf_sz,
     468           0 :                        FD_SCHED_MAX_PRINT_BUF_SZ-sched->print_buf_sz,
     469           0 :                        fmt, ap );
     470           0 :   va_end( ap );
     471           0 :   len = fd_ulong_if( ret<0, 0UL, fd_ulong_min( (ulong)ret, FD_SCHED_MAX_PRINT_BUF_SZ-sched->print_buf_sz-1UL ) );
     472           0 :   sched->print_buf[ sched->print_buf_sz+len ] = '\0';
     473           0 :   sched->print_buf_sz += len;
     474           0 : }
     475             : 
     476             : FD_FN_UNUSED static void
     477           0 : print_histogram( fd_sched_t * sched, fd_histf_t * hist, ulong converter, char * title ) {
     478           0 :   fd_sched_printf( sched, " +---------------------+----------------------+--------------+\n" );
     479           0 :   fd_sched_printf( sched, " | %-19s |                      | Count        |\n", title );
     480           0 :   fd_sched_printf( sched, " +---------------------+----------------------+--------------+\n" );
     481           0 : 
     482           0 :   ulong total_count = 0;
     483           0 :   for( ulong i=0UL; i<fd_histf_bucket_cnt( hist ); i++ ) {
     484           0 :     total_count += fd_histf_cnt( hist, i );
     485           0 :   }
     486           0 : 
     487           0 :   for( ulong i=0UL; i< fd_histf_bucket_cnt( hist ); i++ ) {
     488           0 :     ulong bucket_count = fd_histf_cnt( hist, i );
     489           0 : 
     490           0 :     char * lt_str;
     491           0 :     char lt_buf[ 64 ];
     492           0 :     if( FD_UNLIKELY( i==fd_histf_bucket_cnt( hist )-1UL ) ) {
     493           0 :       lt_str = "+Inf";
     494           0 :     } else {
     495           0 :       ulong edge = fd_histf_right( hist, i );
     496           0 :       if( converter==FD_METRICS_CONVERTER_NANOSECONDS ) {
     497           0 :         edge = fd_metrics_convert_ticks_to_nanoseconds( edge-1UL );
     498           0 :         FD_TEST( fd_cstr_printf_check( lt_buf, sizeof( lt_buf ), NULL, "<= %lu nanos", edge ) );
     499           0 :       } else if( converter==FD_METRICS_CONVERTER_NONE ) {
     500           0 :         FD_TEST( fd_cstr_printf_check( lt_buf, sizeof( lt_buf ), NULL, "<= %lu", edge-1UL ) );
     501           0 :       }
     502           0 :       lt_str = lt_buf;
     503           0 :     }
     504           0 : 
     505           0 :     /* Create visual bar - scale to max 20 characters. */
     506           0 :     char bar_buf[ 22 ];
     507           0 :     if( bucket_count>0UL && total_count>0UL ) {
     508           0 :       ulong bar_length = (bucket_count*20UL)/total_count;
     509           0 :       if( !bar_length ) bar_length = 1;
     510           0 :       for( ulong j=0UL; j<bar_length; j++ ) { bar_buf[ j ] = '*'; }
     511           0 :       bar_buf[ bar_length ] = '\0';
     512           0 :     } else {
     513           0 :       bar_buf[ 0 ] = '\0';
     514           0 :     }
     515           0 : 
     516           0 :     fd_sched_printf( sched, " | %19s | %-20s | %12lu |\n", lt_str, bar_buf, bucket_count );
     517           0 :   }
     518           0 : }
     519             : 
     520             : FD_FN_UNUSED static void
     521           0 : print_block_metrics( fd_sched_t * sched, fd_sched_block_t * block ) {
     522           0 :   fd_sched_printf( sched, "block idx %lu, block slot %lu, parent_slot %lu, fec_eos %d, rooted %d, txn_parsed_cnt %u, txn_exec_done_cnt %u, txn_sigverify_done_cnt %u, poh_hashing_done_cnt %u, poh_hash_cmp_done_cnt %u, txn_done_cnt %u, shred_cnt %u, mblk_cnt %u, mblk_freed_cnt %u, mblk_tick_cnt %u, mblk_unhashed_cnt %u, hashcnt %lu, txn_pool_max_popcnt %lu/%lu, mblk_pool_max_popcnt %lu/%lu, block_pool_max_popcnt %lu/%lu, mblks_rem %lu, txns_rem %lu, fec_buf_sz %u, fec_buf_boff %u, fec_buf_soff %u, fec_eob %d, fec_sob %d\n",
     523           0 :                    block_to_idx( sched, block ), block->slot, block->parent_slot, block->fec_eos, block->rooted, block->txn_parsed_cnt, block->txn_exec_done_cnt, block->txn_sigverify_done_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt, block->txn_done_cnt, block->shred_cnt, block->mblk_cnt, block->mblk_freed_cnt, block->mblk_tick_cnt, block->mblk_unhashed_cnt, block->hashcnt, block->txn_pool_max_popcnt, sched->depth, block->mblk_pool_max_popcnt, sched->depth, block->block_pool_max_popcnt, sched->block_cnt_max, block->mblks_rem, block->txns_rem, block->fec_buf_sz, block->fec_buf_boff, block->fec_buf_soff, block->fec_eob, block->fec_sob );
     524           0 : }
     525             : 
     526             : FD_FN_UNUSED static void
     527           0 : print_block_debug( fd_sched_t * sched, fd_sched_block_t * block ) {
     528           0 :   fd_sched_printf( sched, "block idx %lu, block slot %lu, parent_slot %lu, staged %d (lane %lu), dying %d, in_rdisp %d, fec_eos %d, rooted %d, block_start_signaled %d, block_end_signaled %d, block_start_done %d, block_end_done %d, txn_parsed_cnt %u, txn_exec_in_flight_cnt %u, txn_exec_done_cnt %u, txn_sigverify_in_flight_cnt %u, txn_sigverify_done_cnt %u, poh_hashing_in_flight_cnt %u, poh_hashing_done_cnt %u, poh_hash_cmp_done_cnt %u, txn_done_cnt %u, shred_cnt %u, mblk_cnt %u, mblk_freed_cnt %u, mblk_tick_cnt %u, mblk_unhashed_cnt %u, hashcnt %lu, txn_pool_max_popcnt %lu/%lu, mblk_pool_max_popcnt %lu/%lu, block_pool_max_popcnt %lu/%lu, max_tick_hashcnt %lu, curr_tick_hashcnt %lu, mblks_rem %lu, txns_rem %lu, fec_buf_sz %u, fec_buf_boff %u, fec_buf_soff %u, fec_eob %d, fec_sob %d\n",
     529           0 :                    block_to_idx( sched, block ), block->slot, block->parent_slot, block->staged, block->staging_lane, block->dying, block->in_rdisp, block->fec_eos, block->rooted, block->block_start_signaled, block->block_end_signaled, block->block_start_done, block->block_end_done, block->txn_parsed_cnt, block->txn_exec_in_flight_cnt, block->txn_exec_done_cnt, block->txn_sigverify_in_flight_cnt, block->txn_sigverify_done_cnt, block->poh_hashing_in_flight_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt, block->txn_done_cnt, block->shred_cnt, block->mblk_cnt, block->mblk_freed_cnt, block->mblk_tick_cnt, block->mblk_unhashed_cnt, block->hashcnt, block->txn_pool_max_popcnt, sched->depth, block->mblk_pool_max_popcnt, sched->depth, block->block_pool_max_popcnt, sched->block_cnt_max, block->max_tick_hashcnt, block->curr_tick_hashcnt, block->mblks_rem, block->txns_rem, block->fec_buf_sz, block->fec_buf_boff, block->fec_buf_soff, block->fec_eob, block->fec_sob );
     530           0 : }
     531             : 
     532             : FD_FN_UNUSED static void
     533           0 : print_block_and_parent( fd_sched_t * sched, fd_sched_block_t * block ) {
     534           0 :   print_block_debug( sched, block );
     535           0 :   fd_sched_block_t * parent = block_pool_ele( sched, block->parent_idx );
     536           0 :   if( FD_LIKELY( parent ) ) print_block_debug( sched, parent );
     537           0 : }
     538             : 
     539             : FD_FN_UNUSED static void
     540           0 : print_metrics( fd_sched_t * sched ) {
     541           0 :     fd_sched_printf( sched, "metrics: block_added_cnt %u, block_added_staged_cnt %u, block_added_unstaged_cnt %u, block_added_dead_ood_cnt %u, block_removed_cnt %u, block_abandoned_cnt %u, block_bad_cnt %u, block_promoted_cnt %u, block_demoted_cnt %u, deactivate_no_child_cnt %u, deactivate_no_txn_cnt %u, deactivate_pruned_cnt %u, deactivate_abandoned_cnt %u, lane_switch_cnt %u, lane_promoted_cnt %u, lane_demoted_cnt %u, fork_observed_cnt %u, alut_success_cnt %u, alut_serializing_cnt %u, txn_abandoned_parsed_cnt %u, txn_abandoned_exec_done_cnt %u, txn_abandoned_done_cnt %u, txn_max_in_flight_cnt %u, txn_weighted_in_flight_cnt %lu, txn_weighted_in_flight_tickcount %lu, txn_none_in_flight_tickcount %lu, txn_parsed_cnt %lu, txn_exec_done_cnt %lu, txn_sigverify_done_cnt %lu, txn_mixin_done_cnt %lu, txn_done_cnt %lu, mblk_parsed_cnt %lu, mblk_poh_hashed_cnt %lu, mblk_poh_done_cnt %lu, bytes_ingested_cnt %lu, bytes_ingested_unparsed_cnt %lu, bytes_dropped_cnt %lu, fec_cnt %lu\n",
     542           0 :                      sched->metrics->block_added_cnt, sched->metrics->block_added_staged_cnt, sched->metrics->block_added_unstaged_cnt, sched->metrics->block_added_dead_ood_cnt, sched->metrics->block_removed_cnt, sched->metrics->block_abandoned_cnt, sched->metrics->block_bad_cnt, sched->metrics->block_promoted_cnt, sched->metrics->block_demoted_cnt, sched->metrics->deactivate_no_child_cnt, sched->metrics->deactivate_no_txn_cnt, sched->metrics->deactivate_pruned_cnt, sched->metrics->deactivate_abandoned_cnt, sched->metrics->lane_switch_cnt, sched->metrics->lane_promoted_cnt, sched->metrics->lane_demoted_cnt, sched->metrics->fork_observed_cnt, sched->metrics->alut_success_cnt, sched->metrics->alut_serializing_cnt, sched->metrics->txn_abandoned_parsed_cnt, sched->metrics->txn_abandoned_exec_done_cnt, sched->metrics->txn_abandoned_done_cnt, sched->metrics->txn_max_in_flight_cnt, sched->metrics->txn_weighted_in_flight_cnt, sched->metrics->txn_weighted_in_flight_tickcount, sched->metrics->txn_none_in_flight_tickcount, sched->metrics->txn_parsed_cnt, sched->metrics->txn_exec_done_cnt, sched->metrics->txn_sigverify_done_cnt, sched->metrics->txn_mixin_done_cnt, sched->metrics->txn_done_cnt, sched->metrics->mblk_parsed_cnt, sched->metrics->mblk_poh_hashed_cnt, sched->metrics->mblk_poh_done_cnt, sched->metrics->bytes_ingested_cnt, sched->metrics->bytes_ingested_unparsed_cnt, sched->metrics->bytes_dropped_cnt, sched->metrics->fec_cnt );
     543             : 
     544           0 : }
     545             : 
     546             : FD_FN_UNUSED static void
     547           0 : print_sched( fd_sched_t * sched ) {
     548           0 :   fd_sched_printf( sched, "sched canary 0x%lx, exec_cnt %lu, root_idx %lu, txn_exec_ready_bitset[ 0 ] 0x%lx, sigverify_ready_bitset[ 0 ] 0x%lx, poh_ready_bitset[ 0 ] 0x%lx, active_idx %lu, staged_bitset %lu, staged_head_idx[0] %lu, staged_head_idx[1] %lu, staged_head_idx[2] %lu, staged_head_idx[3] %lu, staged_popcnt_wmk %lu, txn_pool_free_cnt %lu/%lu, block_pool_popcnt %lu/%lu\n",
     549           0 :                    sched->canary, sched->exec_cnt, sched->root_idx, sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ], sched->active_bank_idx, sched->staged_bitset, sched->staged_head_bank_idx[ 0 ], sched->staged_head_bank_idx[ 1 ], sched->staged_head_bank_idx[ 2 ], sched->staged_head_bank_idx[ 3 ], sched->staged_popcnt_wmk, sched->txn_pool_free_cnt, sched->depth, sched->block_pool_popcnt, sched->block_cnt_max );
     550           0 :   fd_sched_block_t * active_block = block_pool_ele( sched, sched->active_bank_idx );
     551           0 :   if( active_block ) print_block_debug( sched, active_block );
     552           0 :   for( int l=0; l<(int)FD_SCHED_MAX_STAGING_LANES; l++ ) {
     553           0 :     if( fd_ulong_extract_bit( sched->staged_bitset, l ) ) {
     554           0 :       fd_sched_block_t * block = block_pool_ele( sched, sched->staged_head_bank_idx[ l ] );
     555           0 :       print_block_debug( sched, block );
     556           0 :     }
     557           0 :   }
     558           0 : }
     559             : 
     560             : FD_FN_UNUSED static void
     561           0 : print_all( fd_sched_t * sched, fd_sched_block_t * block ) {
     562           0 :   print_metrics( sched );
     563           0 :   print_sched( sched );
     564           0 :   print_block_and_parent( sched, block );
     565           0 : }
     566             : 
     567             : static void
     568           0 : handle_bad_block( fd_sched_t * sched, fd_sched_block_t * block ) {
     569           0 :   sched->print_buf_sz = 0UL;
     570           0 :   print_all( sched, block );
     571           0 :   FD_LOG_DEBUG(( "%s", sched->print_buf ));
     572           0 :   subtree_abandon( sched, block );
     573           0 :   sched->metrics->block_bad_cnt++;
     574           0 :   check_or_set_active_block( sched );
     575           0 : }
     576             : 
     577             : 
     578             : /* Public functions. */
     579             : 
     580             : ulong
     581           0 : fd_sched_align( void ) {
     582           0 :   return fd_ulong_max( alignof(fd_sched_t),
     583           0 :          fd_ulong_max( fd_rdisp_align(),
     584           0 :          fd_ulong_max( alignof(fd_sched_block_t), 64UL ))); /* Minimally cache line aligned. */
     585           0 : }
     586             : 
     587             : ulong
     588             : fd_sched_footprint( ulong depth,
     589           0 :                     ulong block_cnt_max ) {
     590           0 :   if( FD_UNLIKELY( depth<FD_SCHED_MIN_DEPTH || depth>FD_SCHED_MAX_DEPTH ) ) return 0UL; /* bad depth */
     591           0 :   if( FD_UNLIKELY( !block_cnt_max ) ) return 0UL; /* bad block_cnt_max */
     592           0 :   if( FD_UNLIKELY( depth>UINT_MAX-1UL ) ) return 0UL; /* mblk_pool use uint as pointers */
     593             : 
     594           0 :   ulong l = FD_LAYOUT_INIT;
     595           0 :   l = FD_LAYOUT_APPEND( l, fd_sched_align(),             sizeof(fd_sched_t)                         );
     596           0 :   l = FD_LAYOUT_APPEND( l, fd_rdisp_align(),             fd_rdisp_footprint( depth, block_cnt_max ) ); /* dispatcher */
     597           0 :   l = FD_LAYOUT_APPEND( l, alignof(fd_sched_block_t),    block_cnt_max*sizeof(fd_sched_block_t)     ); /* block pool */
     598           0 :   l = FD_LAYOUT_APPEND( l, ref_q_align(),                ref_q_footprint( block_cnt_max )           );
     599           0 :   l = FD_LAYOUT_APPEND( l, alignof(fd_txn_p_t),          depth*sizeof(fd_txn_p_t)                   ); /* txn_pool */
     600           0 :   l = FD_LAYOUT_APPEND( l, alignof(fd_sched_txn_info_t), depth*sizeof(fd_sched_txn_info_t)          ); /* txn_info_pool */
     601           0 :   l = FD_LAYOUT_APPEND( l, alignof(fd_sched_mblk_t),     depth*sizeof(fd_sched_mblk_t)              ); /* mblk_pool */
     602           0 :   return FD_LAYOUT_FINI( l, fd_sched_align() );
     603           0 : }
     604             : 
     605             : void *
     606             : fd_sched_new( void *     mem,
     607             :               fd_rng_t * rng,
     608             :               ulong      depth,
     609             :               ulong      block_cnt_max,
     610           0 :               ulong      exec_cnt ) {
     611             : 
     612           0 :   if( FD_UNLIKELY( !mem ) ) {
     613           0 :     FD_LOG_WARNING(( "NULL mem" ));
     614           0 :     return NULL;
     615           0 :   }
     616             : 
     617           0 :   if( FD_UNLIKELY( !rng ) ) {
     618           0 :     FD_LOG_WARNING(( "NULL rng" ));
     619           0 :     return NULL;
     620           0 :   }
     621             : 
     622           0 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)mem, fd_sched_align() ) ) ) {
     623           0 :     FD_LOG_WARNING(( "misaligned mem (%p)", mem ));
     624           0 :     return NULL;
     625           0 :   }
     626             : 
     627           0 :   if( FD_UNLIKELY( depth<FD_SCHED_MIN_DEPTH || depth>FD_SCHED_MAX_DEPTH ) ) {
     628           0 :     FD_LOG_WARNING(( "bad depth (%lu)", depth ));
     629           0 :     return NULL;
     630           0 :   }
     631             : 
     632           0 :   if( FD_UNLIKELY( !block_cnt_max ) ) {
     633           0 :     FD_LOG_WARNING(( "bad block_cnt_max (%lu)", block_cnt_max ));
     634           0 :     return NULL;
     635           0 :   }
     636             : 
     637           0 :   if( FD_UNLIKELY( depth>UINT_MAX-1UL ) ) {
     638           0 :     FD_LOG_WARNING(( "bad depth (%lu)", depth ));
     639           0 :     return NULL;
     640           0 :   }
     641             : 
     642           0 :   if( FD_UNLIKELY( !exec_cnt || exec_cnt>FD_SCHED_MAX_EXEC_TILE_CNT ) ) {
     643           0 :     FD_LOG_WARNING(( "bad exec_cnt (%lu)", exec_cnt ));
     644           0 :     return NULL;
     645           0 :   }
     646             : 
     647           0 :   FD_SCRATCH_ALLOC_INIT( l, mem );
     648           0 :   fd_sched_t *          sched          = FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(),             sizeof(fd_sched_t)                         );
     649           0 :   void *                _rdisp         = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(),             fd_rdisp_footprint( depth, block_cnt_max ) );
     650           0 :   void *                _bpool         = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_block_t),    block_cnt_max*sizeof(fd_sched_block_t)     );
     651           0 :   void *                _ref_q         = FD_SCRATCH_ALLOC_APPEND( l, ref_q_align(),                ref_q_footprint( block_cnt_max )           );
     652           0 :   fd_txn_p_t *          _txn_pool      = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_txn_p_t),          depth*sizeof(fd_txn_p_t)                   );
     653           0 :   fd_sched_txn_info_t * _txn_info_pool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_txn_info_t), depth*sizeof(fd_sched_txn_info_t)          );
     654           0 :   fd_sched_mblk_t *     _mblk_pool     = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_mblk_t),     depth*sizeof(fd_sched_mblk_t)              );
     655           0 :   FD_SCRATCH_ALLOC_FINI( l, fd_sched_align() );
     656             : 
     657           0 :   sched->txn_pool      = _txn_pool;
     658           0 :   sched->txn_info_pool = _txn_info_pool;
     659           0 :   sched->mblk_pool     = _mblk_pool;
     660             : 
     661           0 :   fd_rdisp_new( _rdisp, depth, block_cnt_max, fd_rng_ulong( rng ) );
     662             : 
     663           0 :   fd_sched_block_t * bpool = (fd_sched_block_t *)_bpool;
     664           0 :   for( ulong i=0; i<block_cnt_max; i++ ) {
     665           0 :     bpool[ i ].in_sched = 0;
     666           0 :     mblk_slist_new( bpool[ i ].mblks_unhashed );
     667           0 :     mblk_slist_new( bpool[ i ].mblks_hashing_in_progress );
     668           0 :     mblk_slist_new( bpool[ i ].mblks_mixin_in_progress );
     669           0 :   }
     670             : 
     671           0 :   FD_TEST( fd_chkdup_new( sched->chkdup, rng ) );
     672             : 
     673           0 :   fd_memset( sched->metrics, 0, sizeof(fd_sched_metrics_t) );
     674           0 :   sched->txn_in_flight_last_tick  = LONG_MAX;
     675           0 :   sched->next_ready_last_tick     = LONG_MAX;
     676           0 :   sched->next_ready_last_bank_idx = ULONG_MAX;
     677             : 
     678           0 :   sched->canary               = FD_SCHED_MAGIC;
     679           0 :   sched->depth                = depth;
     680           0 :   sched->block_cnt_max        = block_cnt_max;
     681           0 :   sched->exec_cnt             = exec_cnt;
     682           0 :   sched->root_idx             = ULONG_MAX;
     683           0 :   sched->active_bank_idx      = ULONG_MAX;
     684           0 :   sched->last_active_bank_idx = ULONG_MAX;
     685           0 :   sched->staged_bitset        = 0UL;
     686           0 :   sched->staged_popcnt_wmk    = 0UL;
     687             : 
     688           0 :   sched->txn_exec_ready_bitset[ 0 ]  = fd_ulong_mask_lsb( (int)exec_cnt );
     689           0 :   sched->sigverify_ready_bitset[ 0 ] = fd_ulong_mask_lsb( (int)exec_cnt );
     690           0 :   sched->poh_ready_bitset[ 0 ]       = fd_ulong_mask_lsb( (int)exec_cnt );
     691             : 
     692           0 :   sched->txn_pool_free_cnt = depth-1UL; /* -1 because index 0 is unusable as a sentinel reserved by the dispatcher */
     693             : 
     694           0 :   for( ulong i=0UL; i<depth-1UL; i++ ) sched->mblk_pool[ i ].next = (uint)(i+1UL);
     695           0 :   sched->mblk_pool[ depth-1UL ].next = UINT_MAX;
     696           0 :   sched->mblk_pool_free_head = 0U;
     697           0 :   sched->mblk_pool_free_cnt  = depth;
     698             : 
     699           0 :   txn_bitset_new( sched->exec_done_set );
     700           0 :   txn_bitset_new( sched->sigverify_done_set );
     701           0 :   txn_bitset_new( sched->poh_mixin_done_set );
     702             : 
     703           0 :   sched->block_pool_popcnt = 0UL;
     704             : 
     705           0 :   ref_q_new( _ref_q, block_cnt_max );
     706             : 
     707           0 :   return sched;
     708           0 : }
     709             : 
     710             : fd_sched_t *
     711           0 : fd_sched_join( void * mem ) {
     712             : 
     713           0 :   if( FD_UNLIKELY( !mem ) ) {
     714           0 :     FD_LOG_WARNING(( "NULL mem" ));
     715           0 :     return NULL;
     716           0 :   }
     717             : 
     718           0 :   fd_sched_t * sched         = (fd_sched_t *)mem;
     719           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
     720           0 :   ulong        depth         = sched->depth;
     721           0 :   ulong        block_cnt_max = sched->block_cnt_max;
     722             : 
     723           0 :   FD_SCRATCH_ALLOC_INIT( l, mem );
     724           0 :   /*                     */ FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(),             sizeof(fd_sched_t)                         );
     725           0 :   void *           _rdisp = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(),             fd_rdisp_footprint( depth, block_cnt_max ) );
     726           0 :   void *           _bpool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_block_t),    block_cnt_max*sizeof(fd_sched_block_t)     );
     727           0 :   void *           _ref_q = FD_SCRATCH_ALLOC_APPEND( l, ref_q_align(),                ref_q_footprint( block_cnt_max )           );
     728           0 :   /*                     */ FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_txn_p_t),          depth*sizeof(fd_txn_p_t)                   );
     729           0 :   /*                     */ FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_txn_info_t), depth*sizeof(fd_sched_txn_info_t)          );
     730           0 :   /*                     */ FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_mblk_t),     depth*sizeof(fd_sched_mblk_t)              );
     731           0 :   FD_SCRATCH_ALLOC_FINI( l, fd_sched_align() );
     732             : 
     733           0 :   sched->rdisp      = fd_rdisp_join( _rdisp );
     734           0 :   sched->ref_q      = ref_q_join( _ref_q );
     735           0 :   sched->block_pool = _bpool;
     736             : 
     737           0 :   for( ulong i=0; i<block_cnt_max; i++ ) {
     738           0 :     mblk_slist_join( sched->block_pool[ i ].mblks_unhashed );
     739           0 :     mblk_slist_join( sched->block_pool[ i ].mblks_hashing_in_progress );
     740           0 :     mblk_slist_join( sched->block_pool[ i ].mblks_mixin_in_progress );
     741           0 :   }
     742             : 
     743           0 :   txn_bitset_join( sched->exec_done_set );
     744           0 :   txn_bitset_join( sched->sigverify_done_set );
     745           0 :   txn_bitset_join( sched->poh_mixin_done_set );
     746             : 
     747           0 :   return sched;
     748           0 : }
     749             : 
     750             : int
     751           0 : fd_sched_fec_can_ingest( fd_sched_t * sched, fd_sched_fec_t * fec ) {
     752           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
     753           0 :   FD_TEST( fec->bank_idx<sched->block_cnt_max );
     754           0 :   FD_TEST( fec->parent_bank_idx<sched->block_cnt_max );
     755             : 
     756           0 :   if( FD_UNLIKELY( fec->fec->data_sz>FD_SCHED_MAX_PAYLOAD_PER_FEC ) ) {
     757           0 :     sched->print_buf_sz = 0UL;
     758           0 :     print_metrics( sched );
     759           0 :     print_sched( sched );
     760           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
     761           0 :     FD_LOG_CRIT(( "invalid FEC set: fec->data_sz %lu, slot %lu, parent slot %lu", fec->fec->data_sz, fec->slot, fec->parent_slot ));
     762           0 :   }
     763             : 
     764           0 :   ulong fec_buf_sz = 0UL;
     765           0 :   fd_sched_block_t * block = block_pool_ele( sched, fec->bank_idx );
     766           0 :   if( FD_LIKELY( !fec->is_first_in_block ) ) {
     767           0 :     fec_buf_sz += block->fec_buf_sz-block->fec_buf_soff;
     768           0 :   } else {
     769             :     /* No residual data as this is a fresh new block. */
     770           0 :   }
     771             :   /* Addition is safe and won't overflow because we checked the FEC set
     772             :      size above. */
     773           0 :   fec_buf_sz += fec->fec->data_sz;
     774             :   /* Assuming every transaction is min size, do we have enough free
     775             :      entries in the txn pool?  For a more precise txn count, we would
     776             :      have to do some parsing. */
     777           0 :   return sched->txn_pool_free_cnt>=fec_buf_sz/FD_TXN_MIN_SERIALIZED_SZ && sched->mblk_pool_free_cnt>=fec_buf_sz/sizeof(fd_microblock_hdr_t);
     778           0 : }
     779             : 
     780             : ulong
     781           0 : fd_sched_can_ingest_cnt( fd_sched_t * sched ) {
     782           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
     783             :   /* Worst case, we need one byte from the incoming data to extract a
     784             :      transaction out of the residual data, and the rest of the incoming
     785             :      data contributes toward min sized transactions. */
     786           0 :   return fd_ulong_min( sched->txn_pool_free_cnt/FD_SCHED_MAX_TXN_PER_FEC, sched->mblk_pool_free_cnt/FD_SCHED_MAX_MBLK_PER_FEC );
     787           0 : }
     788             : 
     789             : int
     790           0 : fd_sched_is_drained( fd_sched_t * sched ) {
     791           0 :   int nothing_inflight = sched->exec_cnt==(ulong)fd_ulong_popcnt( sched->txn_exec_ready_bitset[ 0 ]&sched->sigverify_ready_bitset[ 0 ]&sched->poh_ready_bitset[ 0 ] );
     792           0 :   int nothing_queued = sched->active_bank_idx==ULONG_MAX;
     793           0 :   return nothing_inflight && nothing_queued;
     794           0 : }
     795             : 
     796             : FD_WARN_UNUSED int
     797             : fd_sched_fec_ingest( fd_sched_t *     sched,
     798           0 :                      fd_sched_fec_t * fec ) {
     799           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
     800           0 :   FD_TEST( fec->bank_idx<sched->block_cnt_max );
     801           0 :   FD_TEST( fec->parent_bank_idx<sched->block_cnt_max );
     802           0 :   FD_TEST( ref_q_empty( sched->ref_q ) );
     803             : 
     804           0 :   fd_sched_block_t * block = block_pool_ele( sched, fec->bank_idx );
     805             : 
     806           0 :   if( FD_UNLIKELY( fec->fec->data_sz>FD_SCHED_MAX_PAYLOAD_PER_FEC ) ) {
     807           0 :     sched->print_buf_sz = 0UL;
     808           0 :     print_all( sched, block );
     809           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
     810           0 :     FD_LOG_CRIT(( "invalid FEC set: fec->data_sz %lu, slot %lu, parent slot %lu", fec->fec->data_sz, fec->slot, fec->parent_slot ));
     811           0 :   }
     812             : 
     813           0 :   sched->metrics->fec_cnt++;
     814             : 
     815           0 :   if( FD_UNLIKELY( fec->is_first_in_block ) ) {
     816             :     /* This is a new block. */
     817           0 :     add_block( sched, fec->bank_idx, fec->parent_bank_idx );
     818           0 :     block->slot        = fec->slot;
     819           0 :     block->parent_slot = fec->parent_slot;
     820             : 
     821           0 :     if( FD_UNLIKELY( block->dying ) ) {
     822             :       /* The child of a dead block is also dead.  We added it to our
     823             :          fork tree just so we could track an entire lineage of dead
     824             :          children and propagate the dead property to the entire lineage,
     825             :          in case there were frags for more than one dead children
     826             :          in-flight at the time the parent was abandoned.  That being
     827             :          said, we shouldn't need to add the dead child to the
     828             :          dispatcher. */
     829           0 :       sched->metrics->block_added_dead_ood_cnt++;
     830             : 
     831             :       /* Ignore the FEC set for a dead block. */
     832           0 :       sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
     833           0 :       return 1;
     834           0 :     }
     835             : 
     836             :     /* Try to find a staging lane for this block. */
     837           0 :     int alloc_lane = 0;
     838           0 :     fd_sched_block_t * parent_block = block_pool_ele( sched, fec->parent_bank_idx );
     839           0 :     if( FD_LIKELY( parent_block->staged ) ) {
     840             :       /* Parent is staged.  So see if we can continue down the same
     841             :          staging lane. */
     842           0 :       ulong staging_lane = parent_block->staging_lane;
     843           0 :       ulong child_idx    = parent_block->child_idx;
     844           0 :       while( child_idx!=ULONG_MAX ) {
     845           0 :         fd_sched_block_t * child = block_pool_ele( sched, child_idx );
     846           0 :         if( child->staged && child->staging_lane==staging_lane ) {
     847             :           /* Found a child on the same lane.  So we're done. */
     848           0 :           staging_lane = FD_RDISP_UNSTAGED;
     849           0 :           break;
     850           0 :         }
     851           0 :         child_idx = child->sibling_idx;
     852           0 :       }
     853             :       /* No child is staged on the same lane as the parent.  So stage
     854             :          this block.  This is the common case. */
     855           0 :       if( FD_LIKELY( staging_lane!=FD_RDISP_UNSTAGED ) ) {
     856           0 :         block->in_rdisp     = 1;
     857           0 :         block->staged       = 1;
     858           0 :         block->staging_lane = staging_lane;
     859           0 :         fd_rdisp_add_block( sched->rdisp, fec->bank_idx, staging_lane );
     860           0 :         sched->metrics->block_added_cnt++;
     861           0 :         sched->metrics->block_added_staged_cnt++;
     862           0 :         FD_LOG_DEBUG(( "block %lu:%lu entered lane %lu: add", block->slot, fec->bank_idx, staging_lane ));
     863           0 :       } else {
     864           0 :         alloc_lane = 1;
     865           0 :       }
     866           0 :     } else {
     867           0 :       if( block_is_stageable( parent_block ) ) {
     868             :         /* Parent is unstaged but stageable.  So let's be unstaged too.
     869             :            This is not only a policy decision to be lazy and not promote
     870             :            the parent at the moment, but also an important invariant
     871             :            that we maintain for deadlock freeness in the face of staging
     872             :            lane shortage.  See the comments in lane eviction for how
     873             :            this invariant is relevant. */
     874           0 :         block->in_rdisp = 1;
     875           0 :         block->staged   = 0;
     876           0 :         fd_rdisp_add_block( sched->rdisp, fec->bank_idx, FD_RDISP_UNSTAGED );
     877           0 :         sched->metrics->block_added_cnt++;
     878           0 :         sched->metrics->block_added_unstaged_cnt++;
     879           0 :         FD_LOG_DEBUG(( "block %lu:%lu entered lane unstaged: add", block->slot, fec->bank_idx ));
     880           0 :       } else {
     881           0 :         alloc_lane = 1;
     882           0 :       }
     883           0 :     }
     884           0 :     if( FD_UNLIKELY( alloc_lane ) ) {
     885             :       /* We weren't able to inherit the parent's staging lane.  So try
     886             :          to find a new staging lane. */
     887           0 :       if( FD_LIKELY( sched->staged_bitset!=fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) ) ) { /* Optimize for lane available. */
     888           0 :         int lane_idx = fd_ulong_find_lsb( ~sched->staged_bitset );
     889           0 :         if( FD_UNLIKELY( lane_idx>=(int)FD_SCHED_MAX_STAGING_LANES ) ) {
     890           0 :           FD_LOG_CRIT(( "invariant violation: lane_idx %d, sched->staged_bitset %lx",
     891           0 :                         lane_idx, sched->staged_bitset ));
     892           0 :         }
     893           0 :         sched->staged_bitset = fd_ulong_set_bit( sched->staged_bitset, lane_idx );
     894           0 :         sched->staged_head_bank_idx[ lane_idx ] = fec->bank_idx;
     895           0 :         sched->staged_popcnt_wmk = fd_ulong_max( sched->staged_popcnt_wmk, (ulong)fd_ulong_popcnt( sched->staged_bitset ) );
     896           0 :         block->in_rdisp     = 1;
     897           0 :         block->staged       = 1;
     898           0 :         block->staging_lane = (ulong)lane_idx;
     899           0 :         fd_rdisp_add_block( sched->rdisp, fec->bank_idx, block->staging_lane );
     900           0 :         sched->metrics->block_added_cnt++;
     901           0 :         sched->metrics->block_added_staged_cnt++;
     902           0 :         FD_LOG_DEBUG(( "block %lu:%lu entered lane %lu: add", block->slot, fec->bank_idx, block->staging_lane ));
     903           0 :       } else {
     904             :         /* No lanes available. */
     905           0 :         block->in_rdisp = 1;
     906           0 :         block->staged   = 0;
     907           0 :         fd_rdisp_add_block( sched->rdisp, fec->bank_idx, FD_RDISP_UNSTAGED );
     908           0 :         sched->metrics->block_added_cnt++;
     909           0 :         sched->metrics->block_added_unstaged_cnt++;
     910           0 :         FD_LOG_DEBUG(( "block %lu:%lu entered lane unstaged: add", block->slot, fec->bank_idx ));
     911           0 :       }
     912           0 :     }
     913           0 :   }
     914             : 
     915           0 :   block->txn_pool_max_popcnt   = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt - 1UL );
     916           0 :   block->mblk_pool_max_popcnt  = fd_ulong_max( block->mblk_pool_max_popcnt, sched->depth - sched->mblk_pool_free_cnt );
     917           0 :   block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt );
     918             : 
     919           0 :   if( FD_UNLIKELY( block->dying ) ) {
     920             :     /* Ignore the FEC set for a dead block. */
     921           0 :     sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
     922           0 :     return 1;
     923           0 :   }
     924             : 
     925           0 :   if( FD_UNLIKELY( !block->in_rdisp ) ) {
     926             :     /* Invariant: block must be in the dispatcher at this point. */
     927           0 :     sched->print_buf_sz = 0UL;
     928           0 :     print_all( sched, block );
     929           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
     930           0 :     FD_LOG_CRIT(( "invariant violation: block->in_rdisp==0, slot %lu, parent slot %lu",
     931           0 :                   block->slot, block->parent_slot ));
     932           0 :   }
     933             : 
     934           0 :   if( FD_UNLIKELY( block->fec_eos ) ) {
     935             :     /* This means something is wrong upstream.  We're getting more FEC
     936             :        sets for a block that has already ended, or so we were told. */
     937           0 :     sched->print_buf_sz = 0UL;
     938           0 :     print_all( sched, block );
     939           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
     940           0 :     FD_LOG_CRIT(( "invariant violation: block->fec_eos set but getting more FEC sets, slot %lu, parent slot %lu", fec->slot, fec->parent_slot ));
     941           0 :   }
     942           0 :   if( FD_UNLIKELY( block->fec_eob ) ) {
     943             :     /* If the previous FEC set ingestion and parse was successful,
     944             :        block->fec_eob should be cleared.  The fact that fec_eob is set
     945             :        means that the previous batch didn't parse properly.  So this is
     946             :        a bad block.  We should refuse to replay down the fork. */
     947           0 :     FD_LOG_INFO(( "bad block: failed to parse, slot %lu, parent slot %lu", fec->slot, fec->parent_slot ));
     948           0 :     handle_bad_block( sched, block );
     949           0 :     sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
     950           0 :     return 0;
     951           0 :   }
     952           0 :   if( FD_UNLIKELY( block->child_idx!=ULONG_MAX ) ) {
     953             :     /* This means something is wrong upstream.  FEC sets are not being
     954             :        delivered in replay order.  We got a child block FEC set before
     955             :        this block was completely delivered. */
     956           0 :     sched->print_buf_sz = 0UL;
     957           0 :     print_all( sched, block );
     958           0 :     fd_sched_block_t * child_block = block_pool_ele( sched, block->child_idx );
     959           0 :     print_block_debug( sched, child_block );
     960           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
     961           0 :     FD_LOG_CRIT(( "invariant violation: block->child_idx %lu, slot %lu, parent slot %lu", block->child_idx, fec->slot, fec->parent_slot ));
     962           0 :   }
     963             : 
     964           0 :   FD_TEST( block->fec_buf_sz>=block->fec_buf_soff );
     965           0 :   if( FD_LIKELY( block->fec_buf_sz>block->fec_buf_soff ) ) {
     966             :     /* If there is residual data from the previous FEC set within the
     967             :        same batch, we move it to the beginning of the buffer and append
     968             :        the new FEC set. */
     969           0 :     memmove( block->fec_buf, block->fec_buf+block->fec_buf_soff, block->fec_buf_sz-block->fec_buf_soff );
     970           0 :   }
     971           0 :   block->fec_buf_boff += block->fec_buf_soff;
     972           0 :   block->fec_buf_sz   -= block->fec_buf_soff;
     973           0 :   block->fec_buf_soff  = 0;
     974             :   /* Addition is safe and won't overflow because we checked the FEC
     975             :      set size above. */
     976           0 :   if( FD_UNLIKELY( block->fec_buf_sz+fec->fec->data_sz>FD_SCHED_MAX_FEC_BUF_SZ ) ) {
     977             :     /* In a conformant block, there shouldn't be more than a
     978             :        transaction's worth of residual data left over from the previous
     979             :        FEC set within the same batch.  So if this condition doesn't
     980             :        hold, it's a bad block.  Instead of crashing, we should refuse to
     981             :        replay down the fork. */
     982           0 :     FD_LOG_INFO(( "bad block: fec_buf_sz %u, fec->data_sz %lu, slot %lu, parent slot %lu", block->fec_buf_sz, fec->fec->data_sz, fec->slot, fec->parent_slot ));
     983           0 :     handle_bad_block( sched, block );
     984           0 :     sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
     985           0 :     return 0;
     986           0 :   }
     987             : 
     988             :   /* Append the new FEC set to the end of the buffer. */
     989           0 :   fd_memcpy( block->fec_buf+block->fec_buf_sz, fec->data, fec->fec->data_sz );
     990           0 :   block->fec_buf_sz += (uint)fec->fec->data_sz;
     991           0 :   sched->metrics->bytes_ingested_cnt += fec->fec->data_sz;
     992             : 
     993           0 :   block->fec_eob = fec->is_last_in_batch;
     994           0 :   block->fec_eos = fec->is_last_in_block;
     995             : 
     996           0 :   ulong block_sz = block->shred_cnt>0 ? block->shred_blk_offs[ block->shred_cnt-1 ] : 0UL;
     997           0 :   for( ulong i=0; i<fec->shred_cnt; i++ ) {
     998           0 :     if( FD_LIKELY( i<32UL ) ) {
     999           0 :       block->shred_blk_offs[ block->shred_cnt++ ] = (uint)block_sz + fec->fec->shred_offs[ i ];
    1000           0 :     } else if( FD_UNLIKELY( i!=fec->shred_cnt-1UL ) ) {
    1001             :       /* We don't track shred boundaries after 32 shreds, assume they're
    1002             :          sized uniformly */
    1003           0 :       ulong num_overflow_shreds = fec->shred_cnt-32UL;
    1004           0 :       ulong overflow_idx        = i-32UL;
    1005           0 :       ulong overflow_data_sz    = fec->fec->data_sz-fec->fec->shred_offs[ 31 ];
    1006           0 :       block->shred_blk_offs[ block->shred_cnt++ ] = (uint)block_sz + fec->fec->shred_offs[ 31 ] + (uint)(overflow_data_sz / num_overflow_shreds * (overflow_idx + 1UL));
    1007           0 :     } else {
    1008           0 :       block->shred_blk_offs[ block->shred_cnt++ ] = (uint)block_sz + (uint)fec->fec->data_sz;
    1009           0 :     }
    1010           0 :   }
    1011             : 
    1012           0 :   int err = fd_sched_parse( sched, block, fec->alut_ctx );
    1013             : 
    1014           0 :   if( FD_UNLIKELY( err==FD_SCHED_BAD_BLOCK ) ) {
    1015           0 :     handle_bad_block( sched, block );
    1016           0 :     sched->metrics->bytes_dropped_cnt += block->fec_buf_sz-block->fec_buf_soff;
    1017           0 :     return 0;
    1018           0 :   }
    1019             : 
    1020           0 :   if( FD_UNLIKELY( (fec->is_last_in_batch||fec->is_last_in_block) && (block->txns_rem||block->mblks_rem||block->fec_eob) ) ) {
    1021             :     /* A malformed block that fails to parse out exactly as many
    1022             :        transactions and microblocks as it should.
    1023             : 
    1024             :        Upon getting a last-in-batch FEC, everything in the ongoing batch
    1025             :        should completely parse, and the eob flag should be reset.  There
    1026             :        should be no go around for a last-in-batch FEC. */
    1027           0 :     FD_LOG_INFO(( "bad block: bytes_rem %u, txns_rem %lu, mblks_rem %lu, fec_eob %d, slot %lu, parent slot %lu", block->fec_buf_sz-block->fec_buf_soff, block->txns_rem, block->mblks_rem, block->fec_eob, block->slot, block->parent_slot ));
    1028           0 :     handle_bad_block( sched, block );
    1029           0 :     return 0;
    1030           0 :   }
    1031             : 
    1032           0 :   if( FD_UNLIKELY( block->fec_eos && !block->last_mblk_is_tick ) ) {
    1033             :     /* The last microblock should be a tick.
    1034             : 
    1035             :        Note that this early parse-time detection could cause us to throw
    1036             :        a slightly different error from Agave, in the case that there are
    1037             :        too few ticks, since the tick count check precedes the trailing
    1038             :        entry check in Agave.  That being said, ultimately a
    1039             :        TRAILING_ENTRY renders a block invalid, regardless of anything
    1040             :        else. */
    1041           0 :     FD_LOG_INFO(( "bad block: TRAILING_ENTRY, slot %lu, parent slot %lu, mblk_cnt %u", block->slot, block->parent_slot, block->mblk_cnt ));
    1042           0 :     handle_bad_block( sched, block );
    1043           0 :     return 0;
    1044           0 :   }
    1045             : 
    1046             :   /* We just received a FEC set, which may have made all transactions in
    1047             :      a partially parsed microblock available.  If this were a malformed
    1048             :      block that ends in a non-tick microblock, there's not going to be a
    1049             :      hashing task from the missing ending tick to drain the mixin queue.
    1050             :      So we try to drain the mixin queue right here.  Another option is
    1051             :      to drain it at dispatch time, when we are about to dispatch the end
    1052             :      of block signal, right before the check for whether block should
    1053             :      end. */
    1054           0 :   int mixin_res;
    1055           0 :   while( (mixin_res=maybe_mixin( sched, block )) ) {
    1056           0 :     if( FD_UNLIKELY( mixin_res==-1 ) ) {
    1057           0 :       handle_bad_block( sched, block );
    1058           0 :       return 0;
    1059           0 :     }
    1060           0 :     FD_TEST( mixin_res==1||mixin_res==2 );
    1061           0 :   }
    1062             : 
    1063             :   /* Check if we need to set the active block. */
    1064           0 :   check_or_set_active_block( sched );
    1065             : 
    1066           0 :   return 1;
    1067           0 : }
    1068             : 
    1069             : ulong
    1070           0 : fd_sched_task_next_ready( fd_sched_t * sched, fd_sched_task_t * out ) {
    1071           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1072           0 :   FD_TEST( ref_q_empty( sched->ref_q ) );
    1073             : 
    1074           0 :   ulong exec_ready_bitset0 = sched->txn_exec_ready_bitset[ 0 ];
    1075           0 :   ulong exec_fully_ready_bitset = sched->sigverify_ready_bitset[ 0 ] & sched->poh_ready_bitset[ 0 ] & exec_ready_bitset0;
    1076           0 :   if( FD_UNLIKELY( !exec_fully_ready_bitset ) ) {
    1077             :     /* Early exit if no exec tiles available. */
    1078           0 :     return 0UL;
    1079           0 :   }
    1080             : 
    1081           0 :   if( FD_UNLIKELY( sched->active_bank_idx==ULONG_MAX ) ) {
    1082             :     /* No need to try activating a block.  If we're in this state,
    1083             :        there's truly nothing to execute.  We will activate something
    1084             :        when we ingest a FEC set with transactions. */
    1085           0 :     return 0UL;
    1086           0 :   }
    1087             : 
    1088           0 :   out->task_type = FD_SCHED_TT_NULL;
    1089             : 
    1090             :   /* We could in theory reevaluate staging lane allocation here and do
    1091             :      promotion/demotion as needed.  It's a policy decision to minimize
    1092             :      fork churn for now and just execute down the same active fork. */
    1093             : 
    1094           0 :   ulong bank_idx = sched->active_bank_idx;
    1095           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1096           0 :   if( FD_UNLIKELY( block_should_deactivate( block ) ) ) {
    1097           0 :     sched->print_buf_sz = 0UL;
    1098           0 :     print_all( sched, block );
    1099           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
    1100           0 :     FD_LOG_CRIT(( "invariant violation: active_bank_idx %lu is not activatable nor has anything in-flight", sched->active_bank_idx ));
    1101           0 :   }
    1102             : 
    1103           0 :   block->txn_pool_max_popcnt   = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt - 1UL );
    1104           0 :   block->mblk_pool_max_popcnt  = fd_ulong_max( block->mblk_pool_max_popcnt, sched->depth - sched->mblk_pool_free_cnt );
    1105           0 :   block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt );
    1106             : 
    1107           0 :   if( FD_UNLIKELY( !block->block_start_signaled ) ) {
    1108           0 :     out->task_type = FD_SCHED_TT_BLOCK_START;
    1109           0 :     out->block_start->bank_idx        = bank_idx;
    1110           0 :     out->block_start->parent_bank_idx = block->parent_idx;
    1111           0 :     out->block_start->slot            = block->slot;
    1112           0 :     block->block_start_signaled = 1;
    1113           0 :     sched->next_ready_last_tick     = fd_tickcount();
    1114           0 :     sched->next_ready_last_bank_idx = bank_idx;
    1115           0 :     return 1UL;
    1116           0 :   }
    1117             : 
    1118           0 :   ulong exec_tile_idx0 = fd_ulong_if( !!exec_fully_ready_bitset, (ulong)fd_ulong_find_lsb( exec_fully_ready_bitset ), ULONG_MAX );
    1119           0 :   ulong exec_queued_cnt = block->txn_parsed_cnt-block->txn_exec_in_flight_cnt-block->txn_exec_done_cnt;
    1120           0 :   if( FD_LIKELY( exec_queued_cnt>0UL && fd_ulong_popcnt( exec_fully_ready_bitset ) ) ) { /* Optimize for no fork switching. */
    1121             :     /* Transaction execution has the highest priority.  Current mainnet
    1122             :        block times are very much dominated by critical path transaction
    1123             :        execution.  To achieve the fastest block replay speed, we can't
    1124             :        afford to make any mistake in critical path dispatching.  Any
    1125             :        deviation from perfect critical path dispatching is basically
    1126             :        irrecoverable.  As such, we try to keep all the exec tiles busy
    1127             :        with transaction execution, but we allow at most one transaction
    1128             :        to be in-flight per exec tile.  This is to ensure that whenever a
    1129             :        critical path transaction completes, we have at least one exec
    1130             :        tile, e.g. the one that just completed said transaction, readily
    1131             :        available to continue executing down the critical path. */
    1132           0 :     out->txn_exec->txn_idx = fd_rdisp_get_next_ready( sched->rdisp, bank_idx );
    1133           0 :     if( FD_UNLIKELY( out->txn_exec->txn_idx==0UL ) ) {
    1134             :       /* There are transactions queued but none ready for execution.
    1135             :          This implies that there must be in-flight transactions on whose
    1136             :          completion the queued transactions depend. So we return and
    1137             :          wait for those in-flight transactions to retire.  This is a
    1138             :          policy decision to execute as much as we can down the current
    1139             :          fork. */
    1140           0 :       if( FD_UNLIKELY( !block->txn_exec_in_flight_cnt ) ) {
    1141           0 :         sched->print_buf_sz = 0UL;
    1142           0 :         print_all( sched, block );
    1143           0 :         FD_LOG_NOTICE(( "%s", sched->print_buf ));
    1144           0 :         FD_LOG_CRIT(( "invariant violation: no ready transaction found but block->txn_exec_in_flight_cnt==0" ));
    1145           0 :       }
    1146             : 
    1147             :       /* Next up are PoH tasks.  Same dispatching policy as sigverify
    1148             :          tasks. */
    1149           0 :       ulong poh_ready_bitset = exec_fully_ready_bitset;
    1150           0 :       ulong poh_hashing_queued_cnt = block->mblk_cnt-block->poh_hashing_in_flight_cnt-block->poh_hashing_done_cnt;
    1151           0 :       if( FD_LIKELY( poh_hashing_queued_cnt>0UL && fd_ulong_popcnt( poh_ready_bitset )>fd_int_if( block->txn_exec_in_flight_cnt>0U, 0, 1 ) ) ) {
    1152           0 :         dispatch_poh( sched, block, bank_idx, fd_ulong_find_lsb( poh_ready_bitset ), out );
    1153           0 :         sched->next_ready_last_tick     = fd_tickcount();
    1154           0 :         sched->next_ready_last_bank_idx = bank_idx;
    1155           0 :         return 1UL;
    1156           0 :       }
    1157             : 
    1158             :       /* Dispatch more sigverify tasks only if at least one exec tile is
    1159             :          executing transactions or completely idle.  Allow at most one
    1160             :          sigverify task in-flight per tile, and only dispatch to
    1161             :          completely idle tiles. */
    1162           0 :       ulong sigverify_ready_bitset = exec_fully_ready_bitset;
    1163           0 :       ulong sigverify_queued_cnt = block->txn_parsed_cnt-block->txn_sigverify_in_flight_cnt-block->txn_sigverify_done_cnt;
    1164           0 :       if( FD_LIKELY( sigverify_queued_cnt>0UL && fd_ulong_popcnt( sigverify_ready_bitset )>fd_int_if( block->txn_exec_in_flight_cnt>0U, 0, 1 ) ) ) {
    1165           0 :         dispatch_sigverify( sched, block, bank_idx, fd_ulong_find_lsb( sigverify_ready_bitset ), out );
    1166           0 :         sched->next_ready_last_tick = sched->txn_info_pool[ out->txn_sigverify->txn_idx ].tick_sigverify_disp = fd_tickcount();
    1167           0 :         sched->next_ready_last_bank_idx = bank_idx;
    1168           0 :         return 1UL;
    1169           0 :       }
    1170           0 :       return 0UL;
    1171           0 :     }
    1172           0 :     out->task_type = FD_SCHED_TT_TXN_EXEC;
    1173           0 :     out->txn_exec->bank_idx = bank_idx;
    1174           0 :     out->txn_exec->slot     = block->slot;
    1175           0 :     out->txn_exec->exec_idx = exec_tile_idx0;
    1176           0 :     FD_TEST( out->txn_exec->exec_idx!=ULONG_MAX );
    1177             : 
    1178           0 :     long now = fd_tickcount();
    1179           0 :     ulong delta = (ulong)(now-sched->txn_in_flight_last_tick);
    1180           0 :     ulong txn_exec_busy_cnt = sched->exec_cnt-(ulong)fd_ulong_popcnt( exec_ready_bitset0 );
    1181           0 :     sched->metrics->txn_none_in_flight_tickcount     += fd_ulong_if( txn_exec_busy_cnt==0UL && sched->txn_in_flight_last_tick!=LONG_MAX, delta, 0UL );
    1182           0 :     sched->metrics->txn_weighted_in_flight_tickcount += fd_ulong_if( txn_exec_busy_cnt!=0UL, delta, 0UL );
    1183           0 :     sched->metrics->txn_weighted_in_flight_cnt       += delta*txn_exec_busy_cnt;
    1184           0 :     sched->txn_in_flight_last_tick = now;
    1185             : 
    1186           0 :     sched->txn_info_pool[ out->txn_exec->txn_idx ].tick_exec_disp = now;
    1187             : 
    1188           0 :     sched->txn_exec_ready_bitset[ 0 ] = fd_ulong_clear_bit( exec_ready_bitset0, (int)exec_tile_idx0);
    1189           0 :     sched->tile_to_bank_idx[ exec_tile_idx0 ] = bank_idx;
    1190             : 
    1191           0 :     block->txn_exec_in_flight_cnt++;
    1192           0 :     sched->metrics->txn_max_in_flight_cnt = fd_uint_max( sched->metrics->txn_max_in_flight_cnt, block->txn_exec_in_flight_cnt );
    1193             : 
    1194           0 :     ulong total_exec_busy_cnt = sched->exec_cnt-(ulong)fd_ulong_popcnt( sched->txn_exec_ready_bitset[ 0 ]&sched->sigverify_ready_bitset[ 0 ]&sched->poh_ready_bitset[ 0 ] );
    1195           0 :     if( FD_UNLIKELY( (~sched->txn_exec_ready_bitset[ 0 ])&(~sched->sigverify_ready_bitset[ 0 ])&(~sched->poh_ready_bitset[ 0 ])&fd_ulong_mask_lsb( (int)sched->exec_cnt ) ) ) FD_LOG_CRIT(( "invariant violation: txn_exec_ready_bitset 0x%lx sigverify_ready_bitset 0x%lx poh_ready_bitset 0x%lx", sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ] ));
    1196           0 :     if( FD_UNLIKELY( block->txn_exec_in_flight_cnt+block->txn_sigverify_in_flight_cnt+block->poh_hashing_in_flight_cnt!=total_exec_busy_cnt ) ) {
    1197             :       /* Ideally we'd simply assert that the two sides of the equation
    1198             :          are equal.  But abandoned blocks throw a wrench into this.  We
    1199             :          allow abandoned blocks to have in-flight transactions that are
    1200             :          naturally drained while we try to dispatch from another block.
    1201             :          In such cases, the total number of in-flight transactions
    1202             :          should include the abandoned blocks too.  The contract is that
    1203             :          blocks with in-flight transactions cannot be abandoned or
    1204             :          demoted from rdisp.  So a dying block has to be the head of one
    1205             :          of the staging lanes. */
    1206             :       // FIXME This contract no longer true if we implement immediate
    1207             :       // demotion of abandoned blocks.
    1208           0 :       ulong total_in_flight = 0UL;
    1209           0 :       for( int l=0; l<(int)FD_SCHED_MAX_STAGING_LANES; l++ ) {
    1210           0 :         if( fd_ulong_extract_bit( sched->staged_bitset, l ) ) {
    1211           0 :           fd_sched_block_t * staged_block = block_pool_ele( sched, sched->staged_head_bank_idx[ l ] );
    1212           0 :           if( FD_UNLIKELY( block_is_in_flight( staged_block )&&!(staged_block==block||staged_block->dying) ) ) {
    1213           0 :             sched->print_buf_sz = 0UL;
    1214           0 :             print_all( sched, staged_block );
    1215           0 :             FD_LOG_NOTICE(( "%s", sched->print_buf ));
    1216           0 :             FD_LOG_CRIT(( "invariant violation: in-flight block is neither active nor dying" ));
    1217           0 :           }
    1218           0 :           total_in_flight += staged_block->txn_exec_in_flight_cnt;
    1219           0 :           total_in_flight += staged_block->txn_sigverify_in_flight_cnt;
    1220           0 :           total_in_flight += staged_block->poh_hashing_in_flight_cnt;
    1221           0 :         }
    1222           0 :       }
    1223           0 :       if( FD_UNLIKELY( total_in_flight!=total_exec_busy_cnt ) ) {
    1224           0 :         sched->print_buf_sz = 0UL;
    1225           0 :         print_all( sched, block );
    1226           0 :         FD_LOG_NOTICE(( "%s", sched->print_buf ));
    1227           0 :         FD_LOG_CRIT(( "invariant violation: total_in_flight %lu != total_exec_busy_cnt %lu", total_in_flight, total_exec_busy_cnt ));
    1228           0 :       }
    1229           0 :       FD_LOG_DEBUG(( "exec_busy_cnt %lu checks out", total_exec_busy_cnt ));
    1230           0 :     }
    1231           0 :     sched->next_ready_last_tick     = now;
    1232           0 :     sched->next_ready_last_bank_idx = bank_idx;
    1233           0 :     return 1UL;
    1234           0 :   }
    1235             : 
    1236             :   /* At this point txn_queued_cnt==0 */
    1237             : 
    1238             :   /* Next up are PoH tasks.  Same dispatching policy as sigverify. */
    1239           0 :   ulong poh_ready_bitset = exec_fully_ready_bitset;
    1240           0 :   ulong poh_hashing_queued_cnt = block->mblk_cnt-block->poh_hashing_in_flight_cnt-block->poh_hashing_done_cnt;
    1241           0 :   if( FD_LIKELY( poh_hashing_queued_cnt>0UL && fd_ulong_popcnt( poh_ready_bitset )>fd_int_if( block->fec_eos||block->txn_exec_in_flight_cnt>0U||sched->exec_cnt==1UL, 0, 1 ) ) ) {
    1242           0 :     dispatch_poh( sched, block, bank_idx, fd_ulong_find_lsb( poh_ready_bitset ), out );
    1243           0 :     sched->next_ready_last_tick     = fd_tickcount();
    1244           0 :     sched->next_ready_last_bank_idx = bank_idx;
    1245           0 :     return 1UL;
    1246           0 :   }
    1247             : 
    1248             :   /* Try to dispatch a sigverify task, but leave one exec tile idle for
    1249             :      critical path execution, unless there's not going to be any more
    1250             :      real transactions for the critical path.  In the degenerate case of
    1251             :      only one exec tile, keep it busy. */
    1252           0 :   ulong sigverify_ready_bitset = exec_fully_ready_bitset;
    1253           0 :   ulong sigverify_queued_cnt = block->txn_parsed_cnt-block->txn_sigverify_in_flight_cnt-block->txn_sigverify_done_cnt;
    1254           0 :   if( FD_LIKELY( sigverify_queued_cnt>0UL && fd_ulong_popcnt( sigverify_ready_bitset )>fd_int_if( block->fec_eos||block->txn_exec_in_flight_cnt>0U||sched->exec_cnt==1UL, 0, 1 ) ) ) {
    1255           0 :     dispatch_sigverify( sched, block, bank_idx, fd_ulong_find_lsb( sigverify_ready_bitset ), out );
    1256           0 :     sched->next_ready_last_tick     = sched->txn_info_pool[ out->txn_sigverify->txn_idx ].tick_sigverify_disp = fd_tickcount();
    1257           0 :     sched->next_ready_last_bank_idx = bank_idx;
    1258           0 :     return 1UL;
    1259           0 :   }
    1260             : 
    1261           0 :   if( FD_UNLIKELY( block_should_signal_end( block ) ) ) {
    1262           0 :     FD_TEST( block->block_start_signaled );
    1263           0 :     if( FD_UNLIKELY( verify_ticks_final( block ) ) ) {
    1264             :       /* Tick verification can't be done at parse time (except for
    1265             :          TRAILING_ENTRY), because we may not know the expected number of
    1266             :          hashes yet.  It can't be driven by transaction dispatch or
    1267             :          completion, because the block may be empty.  Similary, it can't
    1268             :          be driven by PoH hashing, because a bad block may simply not
    1269             :          have any microblocks. */
    1270           0 :       handle_bad_block( sched, block );
    1271           0 :       out->task_type = FD_SCHED_TT_MARK_DEAD;
    1272           0 :       out->mark_dead->bank_idx = bank_idx;
    1273           0 :       sched->next_ready_last_tick     = fd_tickcount();
    1274           0 :       sched->next_ready_last_bank_idx = bank_idx;
    1275           0 :       return 1UL;
    1276           0 :     }
    1277           0 :     out->task_type = FD_SCHED_TT_BLOCK_END;
    1278           0 :     out->block_end->bank_idx = bank_idx;
    1279           0 :     block->block_end_signaled = 1;
    1280           0 :     FD_TEST( block->refcnt );
    1281           0 :     block->refcnt = 0;
    1282           0 :     FD_TEST( ref_q_avail( sched->ref_q ) );
    1283           0 :     ref_q_push_tail( sched->ref_q, bank_idx );
    1284           0 :     sched->next_ready_last_tick     = fd_tickcount();
    1285           0 :     sched->next_ready_last_bank_idx = bank_idx;
    1286           0 :     return 1UL;
    1287           0 :   }
    1288             : 
    1289             :   /* Nothing queued for the active block.  If we haven't received all
    1290             :      the FEC sets for it, then return and wait for more FEC sets, while
    1291             :      there are in-flight transactions.  This is a policy decision to
    1292             :      minimize fork churn and allow for executing down the current fork
    1293             :      as much as we can.  If we have received all the FEC sets for it,
    1294             :      then we'd still like to return and wait for the in-flight
    1295             :      transactions to retire, before switching to a different block.
    1296             : 
    1297             :      Either way, there should be in-flight transactions.  We deactivate
    1298             :      the active block the moment we exhausted transactions from it. */
    1299           0 :   if( FD_UNLIKELY( !block_is_in_flight( block ) ) ) {
    1300           0 :     sched->print_buf_sz = 0UL;
    1301           0 :     print_all( sched, block );
    1302           0 :     FD_LOG_NOTICE(( "%s", sched->print_buf ));
    1303           0 :     FD_LOG_CRIT(( "invariant violation: expected in-flight transactions but none" ));
    1304           0 :   }
    1305             : 
    1306           0 :   return 0UL;
    1307           0 : }
    1308             : 
    1309             : int
    1310           0 : fd_sched_task_done( fd_sched_t * sched, ulong task_type, ulong txn_idx, ulong exec_idx, void * data ) {
    1311           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1312             : 
    1313           0 :   ulong bank_idx = ULONG_MAX;
    1314           0 :   switch( task_type ) {
    1315           0 :     case FD_SCHED_TT_BLOCK_START:
    1316           0 :     case FD_SCHED_TT_BLOCK_END: {
    1317           0 :       (void)txn_idx;
    1318           0 :       (void)data;
    1319           0 :       bank_idx = sched->active_bank_idx;
    1320           0 :       break;
    1321           0 :     }
    1322           0 :     case FD_SCHED_TT_TXN_EXEC:
    1323           0 :     case FD_SCHED_TT_TXN_SIGVERIFY: {
    1324           0 :       (void)data;
    1325           0 :       FD_TEST( txn_idx < sched->depth );
    1326           0 :       bank_idx = sched->tile_to_bank_idx[ exec_idx ];
    1327           0 :       break;
    1328           0 :     }
    1329           0 :     case FD_SCHED_TT_POH_HASH: {
    1330           0 :       (void)txn_idx;
    1331           0 :       bank_idx = sched->tile_to_bank_idx[ exec_idx ];
    1332           0 :       break;
    1333           0 :     }
    1334           0 :     default: FD_LOG_CRIT(( "unsupported task_type %lu", task_type ));
    1335           0 :   }
    1336           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1337             : 
    1338           0 :   if( FD_UNLIKELY( !block->in_sched ) ) {
    1339           0 :     FD_LOG_CRIT(( "invariant violation: block->in_sched==0, slot %lu, parent slot %lu, idx %lu",
    1340           0 :                   block->slot, block->parent_slot, bank_idx ));
    1341           0 :   }
    1342           0 :   if( FD_UNLIKELY( !block->staged ) ) {
    1343             :     /* Invariant: only staged blocks can have in-flight transactions. */
    1344           0 :     FD_LOG_CRIT(( "invariant violation: block->staged==0, slot %lu, parent slot %lu",
    1345           0 :                   block->slot, block->parent_slot ));
    1346           0 :   }
    1347           0 :   if( FD_UNLIKELY( !block->in_rdisp ) ) {
    1348             :     /* Invariant: staged blocks must be in the dispatcher. */
    1349           0 :     FD_LOG_CRIT(( "invariant violation: block->in_rdisp==0, slot %lu, parent slot %lu",
    1350           0 :                   block->slot, block->parent_slot ));
    1351           0 :   }
    1352             : 
    1353           0 :   block->txn_pool_max_popcnt   = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt - 1UL );
    1354           0 :   block->mblk_pool_max_popcnt  = fd_ulong_max( block->mblk_pool_max_popcnt, sched->depth - sched->mblk_pool_free_cnt );
    1355           0 :   block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt );
    1356             : 
    1357           0 :   int exec_tile_idx = (int)exec_idx;
    1358             : 
    1359           0 :   switch( task_type ) {
    1360           0 :     case FD_SCHED_TT_BLOCK_START: {
    1361           0 :       FD_TEST( !block->block_start_done );
    1362           0 :       block->block_start_done = 1;
    1363           0 :       break;
    1364           0 :     }
    1365           0 :     case FD_SCHED_TT_BLOCK_END: {
    1366             :       /* It may seem redundant to be invoking task_done() on these
    1367             :          somewhat fake tasks.  But these are necessary to drive state
    1368             :          transition for empty blocks or slow blocks. */
    1369           0 :       FD_TEST( !block->block_end_done );
    1370           0 :       block->block_end_done = 1;
    1371           0 :       sched->print_buf_sz = 0UL;
    1372           0 :       print_block_metrics( sched, block );
    1373           0 :       FD_LOG_DEBUG(( "block %lu:%lu replayed fully: %s", block->slot, bank_idx, sched->print_buf ));
    1374           0 :       break;
    1375           0 :     }
    1376           0 :     case FD_SCHED_TT_TXN_EXEC: {
    1377           0 :       long now = fd_tickcount();
    1378           0 :       ulong delta = (ulong)(now-sched->txn_in_flight_last_tick);
    1379           0 :       ulong txn_exec_busy_cnt = sched->exec_cnt-(ulong)fd_ulong_popcnt( sched->txn_exec_ready_bitset[ 0 ] );
    1380           0 :       sched->metrics->txn_weighted_in_flight_tickcount += delta;
    1381           0 :       sched->metrics->txn_weighted_in_flight_cnt       += delta*txn_exec_busy_cnt;
    1382           0 :       sched->txn_in_flight_last_tick = now;
    1383             : 
    1384           0 :       sched->txn_info_pool[ txn_idx ].tick_exec_done = now;
    1385             : 
    1386           0 :       block->txn_exec_done_cnt++;
    1387           0 :       block->txn_exec_in_flight_cnt--;
    1388           0 :       FD_TEST( !fd_ulong_extract_bit( sched->txn_exec_ready_bitset[ 0 ], exec_tile_idx ) );
    1389           0 :       sched->txn_exec_ready_bitset[ 0 ] = fd_ulong_set_bit( sched->txn_exec_ready_bitset[ 0 ], exec_tile_idx );
    1390           0 :       sched->metrics->txn_exec_done_cnt++;
    1391           0 :       txn_bitset_insert( sched->exec_done_set, txn_idx );
    1392           0 :       sched->txn_info_pool[ txn_idx ].flags |= FD_SCHED_TXN_EXEC_DONE;
    1393           0 :       if( txn_bitset_test( sched->sigverify_done_set, txn_idx ) && txn_bitset_test( sched->poh_mixin_done_set, txn_idx ) ) {
    1394             :         /* Release the txn_idx if all tasks on it are done.  This is
    1395             :            guaranteed to only happen once per transaction because
    1396             :            whichever one completed first would not release. */
    1397           0 :         fd_rdisp_complete_txn( sched->rdisp, txn_idx, 1 );
    1398           0 :         sched->txn_pool_free_cnt++;
    1399           0 :         block->txn_done_cnt++;
    1400           0 :         sched->metrics->txn_done_cnt++;
    1401           0 :       } else {
    1402           0 :         fd_rdisp_complete_txn( sched->rdisp, txn_idx, 0 );
    1403           0 :       }
    1404           0 :       break;
    1405           0 :     }
    1406           0 :     case FD_SCHED_TT_TXN_SIGVERIFY: {
    1407           0 :       sched->txn_info_pool[ txn_idx ].tick_sigverify_done = fd_tickcount();
    1408           0 :       block->txn_sigverify_done_cnt++;
    1409           0 :       block->txn_sigverify_in_flight_cnt--;
    1410           0 :       FD_TEST( !fd_ulong_extract_bit( sched->sigverify_ready_bitset[ 0 ], exec_tile_idx ) );
    1411           0 :       sched->sigverify_ready_bitset[ 0 ] = fd_ulong_set_bit( sched->sigverify_ready_bitset[ 0 ], exec_tile_idx );
    1412           0 :       sched->metrics->txn_sigverify_done_cnt++;
    1413           0 :       txn_bitset_insert( sched->sigverify_done_set, txn_idx );
    1414           0 :       sched->txn_info_pool[ txn_idx ].flags |= FD_SCHED_TXN_SIGVERIFY_DONE;
    1415           0 :       if( txn_bitset_test( sched->exec_done_set, txn_idx ) && txn_bitset_test( sched->poh_mixin_done_set, txn_idx ) ) {
    1416             :         /* Release the txn_idx if all tasks on it are done.  This is
    1417             :            guaranteed to only happen once per transaction because
    1418             :            whichever one completed first would not release. */
    1419           0 :         fd_rdisp_complete_txn( sched->rdisp, txn_idx, 1 );
    1420           0 :         sched->txn_pool_free_cnt++;
    1421           0 :         block->txn_done_cnt++;
    1422           0 :         sched->metrics->txn_done_cnt++;
    1423           0 :       }
    1424           0 :       break;
    1425           0 :     }
    1426           0 :     case FD_SCHED_TT_POH_HASH: {
    1427           0 :       block->poh_hashing_in_flight_cnt--;
    1428           0 :       FD_TEST( !fd_ulong_extract_bit( sched->poh_ready_bitset[ 0 ], exec_tile_idx ) );
    1429           0 :       sched->poh_ready_bitset[ 0 ] = fd_ulong_set_bit( sched->poh_ready_bitset[ 0 ], exec_tile_idx );
    1430           0 :       fd_execrp_poh_hash_done_msg_t * msg = fd_type_pun( data );
    1431           0 :       fd_sched_mblk_t * mblk = sched->mblk_pool+msg->mblk_idx;
    1432           0 :       mblk->curr_hashcnt += msg->hashcnt;
    1433           0 :       memcpy( mblk->curr_hash, msg->hash, sizeof(fd_hash_t) );
    1434           0 :       ulong hashcnt_todo = mblk->hashcnt-mblk->curr_hashcnt;
    1435           0 :       if( !hashcnt_todo ) {
    1436           0 :         block->poh_hashing_done_cnt++;
    1437           0 :         sched->metrics->mblk_poh_hashed_cnt++;
    1438           0 :         if( FD_LIKELY( !mblk->is_tick ) ) {
    1439             :           /* This is not a tick.  Enqueue for mixin. */
    1440           0 :           mblk_slist_idx_push_tail( block->mblks_mixin_in_progress, msg->mblk_idx, sched->mblk_pool );
    1441           0 :         } else {
    1442             :           /* This is a tick.  No need to mixin.  Check the hash value
    1443             :              right away. */
    1444           0 :           block->poh_hash_cmp_done_cnt++;
    1445           0 :           sched->metrics->mblk_poh_done_cnt++;
    1446           0 :           free_mblk( sched, block, (uint)msg->mblk_idx );
    1447           0 :           if( FD_UNLIKELY( memcmp( mblk->curr_hash, mblk->end_hash, sizeof(fd_hash_t) ) ) ) {
    1448           0 :             FD_BASE58_ENCODE_32_BYTES( mblk->curr_hash->hash, our_str );
    1449           0 :             FD_BASE58_ENCODE_32_BYTES( mblk->end_hash->hash, ref_str );
    1450           0 :             FD_LOG_INFO(( "bad block: poh hash mismatch on mblk %lu, ours %s, claimed %s, hashcnt %lu, is_tick, slot %lu, parent slot %lu", msg->mblk_idx, our_str, ref_str, mblk->hashcnt, block->slot, block->parent_slot ));
    1451           0 :             handle_bad_block( sched, block );
    1452           0 :             return -1;
    1453           0 :           }
    1454           0 :         }
    1455             :         /* Try to drain the mixin queue. */
    1456           0 :         int mixin_res;
    1457           0 :         while( (mixin_res=maybe_mixin( sched, block )) ) {
    1458           0 :           if( FD_UNLIKELY( mixin_res==-1 ) ) {
    1459           0 :             handle_bad_block( sched, block );
    1460           0 :             return -1;
    1461           0 :           }
    1462           0 :           FD_TEST( mixin_res==1||mixin_res==2 );
    1463           0 :         }
    1464           0 :       } else {
    1465           0 :         mblk_slist_idx_push_tail( block->mblks_hashing_in_progress, msg->mblk_idx, sched->mblk_pool );
    1466           0 :       }
    1467           0 :       if( FD_UNLIKELY( verify_ticks_eager( block ) ) ) {
    1468           0 :         handle_bad_block( sched, block );
    1469           0 :         return -1;
    1470           0 :       }
    1471           0 :       break;
    1472           0 :     }
    1473           0 :   }
    1474             : 
    1475           0 :   if( FD_UNLIKELY( block->dying && !block_is_in_flight( block ) ) ) {
    1476           0 :     if( FD_UNLIKELY( sched->active_bank_idx==bank_idx ) ) {
    1477           0 :       FD_LOG_CRIT(( "invariant violation: active block shouldn't be dying, bank_idx %lu, slot %lu, parent slot %lu",
    1478           0 :                     bank_idx, block->slot, block->parent_slot ));
    1479           0 :     }
    1480           0 :     FD_LOG_DEBUG(( "dying block %lu drained", block->slot ));
    1481           0 :     subtree_abandon( sched, block );
    1482           0 :     try_activate_block( sched );
    1483           0 :     return 0;
    1484           0 :   }
    1485             : 
    1486           0 :   if( FD_UNLIKELY( !block->dying && sched->active_bank_idx!=bank_idx ) ) {
    1487             :     /* Block is not dead.  So we should be actively replaying it. */
    1488           0 :     fd_sched_block_t * active_block = block_pool_ele( sched, sched->active_bank_idx );
    1489           0 :     FD_LOG_CRIT(( "invariant violation: sched->active_bank_idx %lu, slot %lu, parent slot %lu, bank_idx %lu, slot %lu, parent slot %lu",
    1490           0 :                   sched->active_bank_idx, active_block->slot, active_block->parent_slot,
    1491           0 :                   bank_idx, block->slot, block->parent_slot ));
    1492           0 :   }
    1493             : 
    1494           0 :   maybe_switch_block( sched, bank_idx );
    1495             : 
    1496           0 :   return 0;
    1497           0 : }
    1498             : 
    1499             : void
    1500           0 : fd_sched_block_abandon( fd_sched_t * sched, ulong bank_idx ) {
    1501           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1502           0 :   FD_TEST( bank_idx<sched->block_cnt_max );
    1503             : 
    1504           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1505           0 :   if( FD_UNLIKELY( !block->in_sched ) ) {
    1506           0 :     FD_LOG_CRIT(( "invariant violation: block->in_sched==0, slot %lu, parent slot %lu, idx %lu",
    1507           0 :                   block->slot, block->parent_slot, bank_idx ));
    1508           0 :   }
    1509             : 
    1510           0 :   FD_LOG_INFO(( "abandoning block %lu slot %lu", bank_idx, block->slot ));
    1511           0 :   sched->print_buf_sz = 0UL;
    1512           0 :   print_all( sched, block );
    1513           0 :   FD_LOG_DEBUG(( "%s", sched->print_buf ));
    1514             : 
    1515           0 :   subtree_abandon( sched, block );
    1516           0 :   try_activate_block( sched );
    1517           0 : }
    1518             : 
    1519             : void
    1520           0 : fd_sched_block_add_done( fd_sched_t * sched, ulong bank_idx, ulong parent_bank_idx, ulong slot ) {
    1521           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1522           0 :   FD_TEST( bank_idx<sched->block_cnt_max );
    1523             : 
    1524           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1525           0 :   add_block( sched, bank_idx, parent_bank_idx );
    1526           0 :   block->slot                   = slot;
    1527           0 :   block->fec_eos                = 1;
    1528           0 :   block->block_start_signaled   = 1;
    1529           0 :   block->block_end_signaled     = 1;
    1530           0 :   block->block_start_done       = 1;
    1531           0 :   block->block_end_done         = 1;
    1532           0 :   block->refcnt                 = 0;
    1533           0 :   if( FD_LIKELY( parent_bank_idx!=ULONG_MAX ) ) {
    1534           0 :     fd_sched_block_t * parent_block = block_pool_ele( sched, parent_bank_idx );
    1535           0 :     block->parent_slot = parent_block->slot;
    1536           0 :   }
    1537           0 :   if( FD_UNLIKELY( parent_bank_idx==ULONG_MAX ) ) {
    1538             :     /* Assumes that a NULL parent implies the snapshot slot. */
    1539           0 :     block->parent_slot = ULONG_MAX;
    1540           0 :     block->rooted      = 1;
    1541           0 :     sched->root_idx    = bank_idx;
    1542           0 :   }
    1543           0 : }
    1544             : 
    1545             : void
    1546           0 : fd_sched_advance_root( fd_sched_t * sched, ulong root_idx ) {
    1547           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1548           0 :   FD_TEST( root_idx<sched->block_cnt_max );
    1549           0 :   FD_TEST( sched->root_idx<sched->block_cnt_max );
    1550           0 :   FD_TEST( ref_q_empty( sched->ref_q ) );
    1551             : 
    1552           0 :   fd_sched_block_t * new_root = block_pool_ele( sched, root_idx );
    1553           0 :   fd_sched_block_t * old_root = block_pool_ele( sched, sched->root_idx );
    1554           0 :   if( FD_UNLIKELY( !old_root->rooted ) ) {
    1555           0 :     FD_LOG_CRIT(( "invariant violation: old_root is not rooted, slot %lu, parent slot %lu",
    1556           0 :                   old_root->slot, old_root->parent_slot ));
    1557           0 :   }
    1558             : 
    1559             :   /* Early exit if the new root is the same as the old root. */
    1560           0 :   if( FD_UNLIKELY( root_idx==sched->root_idx ) ) {
    1561           0 :     FD_LOG_INFO(( "new root is the same as the old root, slot %lu, parent slot %lu",
    1562           0 :                   new_root->slot, new_root->parent_slot ));
    1563           0 :     return;
    1564           0 :   }
    1565             : 
    1566           0 :   subtree_prune( sched, sched->root_idx, root_idx );
    1567             : 
    1568           0 :   new_root->parent_idx = ULONG_MAX;
    1569           0 :   sched->root_idx = root_idx;
    1570           0 : }
    1571             : 
    1572             : void
    1573           0 : fd_sched_root_notify( fd_sched_t * sched, ulong root_idx ) {
    1574           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1575           0 :   FD_TEST( root_idx<sched->block_cnt_max );
    1576           0 :   FD_TEST( sched->root_idx<sched->block_cnt_max );
    1577           0 :   FD_TEST( ref_q_empty( sched->ref_q ) );
    1578             : 
    1579           0 :   fd_sched_block_t * block    = block_pool_ele( sched, root_idx );
    1580           0 :   fd_sched_block_t * old_root = block_pool_ele( sched, sched->root_idx );
    1581           0 :   if( FD_UNLIKELY( !old_root->rooted ) ) {
    1582           0 :     FD_LOG_CRIT(( "invariant violation: old_root is not rooted, slot %lu, parent slot %lu",
    1583           0 :                   old_root->slot, old_root->parent_slot ));
    1584           0 :   }
    1585             : 
    1586             :   /* Early exit if the new root is the same as the old root. */
    1587           0 :   if( FD_UNLIKELY( root_idx==sched->root_idx ) ) {
    1588           0 :     FD_LOG_INFO(( "new root is the same as the old root, slot %lu, parent slot %lu",
    1589           0 :                   block->slot, block->parent_slot ));
    1590           0 :     return;
    1591           0 :   }
    1592             : 
    1593             :   /* Mark every node from the new root up through its parents to the
    1594             :      old root as being rooted. */
    1595           0 :   fd_sched_block_t * curr = block;
    1596           0 :   fd_sched_block_t * prev = NULL;
    1597           0 :   while( curr ) {
    1598           0 :     if( FD_UNLIKELY( !block_is_done( curr ) ) ) {
    1599           0 :       FD_LOG_CRIT(( "invariant violation: rooting a block that is not done, slot %lu, parent slot %lu",
    1600           0 :                     curr->slot, curr->parent_slot ));
    1601           0 :     }
    1602           0 :     if( FD_UNLIKELY( curr->dying ) ) {
    1603           0 :       FD_LOG_CRIT(( "invariant violation: rooting a block that is dying, slot %lu, parent slot %lu",
    1604           0 :                     curr->slot, curr->parent_slot ));
    1605           0 :     }
    1606           0 :     if( FD_UNLIKELY( curr->staged ) ) {
    1607           0 :       FD_LOG_CRIT(( "invariant violation: rooting a block that is staged, slot %lu, parent slot %lu",
    1608           0 :                     curr->slot, curr->parent_slot ));
    1609           0 :     }
    1610           0 :     if( FD_UNLIKELY( curr->in_rdisp ) ) {
    1611           0 :       FD_LOG_CRIT(( "invariant violation: rooting a block that is in the dispatcher, slot %lu, parent slot %lu",
    1612           0 :                     curr->slot, curr->parent_slot ));
    1613           0 :     }
    1614           0 :     curr->rooted = 1;
    1615           0 :     prev = curr;
    1616           0 :     curr = block_pool_ele( sched, curr->parent_idx );
    1617           0 :   }
    1618             : 
    1619             :   /* If we didn't reach the old root, the new root is not a descendant. */
    1620           0 :   if( FD_UNLIKELY( prev!=old_root ) ) {
    1621           0 :     FD_LOG_CRIT(( "invariant violation: new root is not a descendant of old root, new root slot %lu, parent slot %lu, old root slot %lu, parent slot %lu",
    1622           0 :                   block->slot, block->parent_slot, old_root->slot, old_root->parent_slot ));
    1623           0 :   }
    1624             : 
    1625           0 :   ulong old_active_bank_idx = sched->active_bank_idx;
    1626             : 
    1627             :   /* Now traverse from old root towards new root, and abandon all
    1628             :      minority forks. */
    1629           0 :   curr = old_root;
    1630           0 :   while( curr && curr->rooted && curr!=block ) { /* curr!=block to avoid abandoning good forks. */
    1631           0 :     fd_sched_block_t * rooted_child_block = NULL;
    1632           0 :     ulong              child_idx          = curr->child_idx;
    1633           0 :     while( child_idx!=ULONG_MAX ) {
    1634           0 :       fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    1635           0 :       child_idx = child->sibling_idx;
    1636           0 :       if( child->rooted ) {
    1637           0 :         rooted_child_block = child;
    1638           0 :       } else {
    1639             :         /* This is a minority fork. */
    1640           0 :         ulong abandoned_cnt = sched->metrics->block_abandoned_cnt;
    1641           0 :         subtree_abandon( sched, child );
    1642           0 :         abandoned_cnt = sched->metrics->block_abandoned_cnt-abandoned_cnt;
    1643           0 :         if( FD_UNLIKELY( abandoned_cnt ) ) FD_LOG_DEBUG(( "abandoned %lu blocks on minority fork starting at block %lu:%lu", abandoned_cnt, child->slot, block_to_idx( sched, child ) ));
    1644           0 :       }
    1645           0 :     }
    1646           0 :     curr = rooted_child_block;
    1647           0 :   }
    1648             : 
    1649             :   /* If the active block got abandoned, we need to reset it. */
    1650           0 :   if( sched->active_bank_idx==ULONG_MAX ) {
    1651           0 :     sched->metrics->deactivate_pruned_cnt += fd_uint_if( old_active_bank_idx!=ULONG_MAX, 1U, 0U );
    1652           0 :     try_activate_block( sched );
    1653           0 :   }
    1654           0 : }
    1655             : 
    1656             : ulong
    1657           0 : fd_sched_pruned_block_next( fd_sched_t * sched ) {
    1658           0 :   if( !ref_q_empty( sched->ref_q ) ) {
    1659           0 :     ulong bank_idx = ref_q_pop_head( sched->ref_q );
    1660           0 :     return bank_idx;
    1661           0 :   }
    1662           0 :   return ULONG_MAX;
    1663           0 : }
    1664             : 
    1665             : void
    1666           0 : fd_sched_set_poh_params( fd_sched_t * sched, ulong bank_idx, ulong tick_height, ulong max_tick_height, ulong hashes_per_tick, fd_hash_t const * start_poh ) {
    1667           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1668           0 :   FD_TEST( bank_idx<sched->block_cnt_max );
    1669           0 :   FD_TEST( max_tick_height>tick_height );
    1670           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1671           0 :   block->tick_height = tick_height;
    1672           0 :   block->max_tick_height = max_tick_height;
    1673           0 :   block->hashes_per_tick = hashes_per_tick;
    1674             :   #if FD_SCHED_SKIP_POH
    1675             :   /* No-op. */
    1676             :   #else
    1677           0 :   if( FD_LIKELY( block->mblk_cnt ) ) {
    1678             :     /* Fix up the first mblk's curr_hash. */
    1679           0 :     FD_TEST( block->mblk_unhashed_cnt );
    1680           0 :     FD_TEST( !mblk_slist_is_empty( block->mblks_unhashed, sched->mblk_pool ) );
    1681           0 :     FD_TEST( !block->mblk_freed_cnt );
    1682           0 :     fd_sched_mblk_t * first_mblk = sched->mblk_pool + mblk_slist_idx_peek_head( block->mblks_unhashed, sched->mblk_pool );
    1683           0 :     memcpy( first_mblk->curr_hash, start_poh, sizeof(fd_hash_t) );
    1684           0 :   } else {
    1685           0 :     memcpy( block->poh_hash, start_poh, sizeof(fd_hash_t) );
    1686           0 :   }
    1687           0 :   #endif
    1688           0 : }
    1689             : 
    1690             : fd_txn_p_t *
    1691           0 : fd_sched_get_txn( fd_sched_t * sched, ulong txn_idx ) {
    1692           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1693           0 :   if( FD_UNLIKELY( txn_idx>=sched->depth ) ) {
    1694           0 :     return NULL;
    1695           0 :   }
    1696           0 :   return sched->txn_pool+txn_idx;
    1697           0 : }
    1698             : 
    1699             : fd_sched_txn_info_t *
    1700           0 : fd_sched_get_txn_info( fd_sched_t * sched, ulong txn_idx ) {
    1701           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1702           0 :   if( FD_UNLIKELY( txn_idx>=sched->depth ) ) {
    1703           0 :     return NULL;
    1704           0 :   }
    1705           0 :   return sched->txn_info_pool+txn_idx;
    1706           0 : }
    1707             : 
    1708             : fd_hash_t *
    1709           0 : fd_sched_get_poh( fd_sched_t * sched, ulong bank_idx ) {
    1710           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1711           0 :   FD_TEST( bank_idx<sched->block_cnt_max );
    1712           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1713           0 :   FD_TEST( block->fec_eos );
    1714           0 :   FD_TEST( block->mblk_cnt );
    1715           0 :   return block->poh_hash;
    1716           0 : }
    1717             : 
    1718             : uint
    1719           0 : fd_sched_get_shred_cnt( fd_sched_t * sched, ulong bank_idx ) {
    1720           0 :   FD_TEST( sched->canary==FD_SCHED_MAGIC );
    1721           0 :   FD_TEST( bank_idx<sched->block_cnt_max );
    1722           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1723           0 :   return block->shred_cnt;
    1724           0 : }
    1725             : 
    1726             : void
    1727           0 : fd_sched_metrics_write( fd_sched_t * sched ) {
    1728           0 :   FD_MGAUGE_SET( REPLAY, SCHED_ACTIVE_BANK_IDX, sched->active_bank_idx );
    1729           0 :   FD_MGAUGE_SET( REPLAY, SCHED_LAST_DISPATCH_BANK_IDX, sched->next_ready_last_bank_idx );
    1730           0 :   FD_MGAUGE_SET( REPLAY, SCHED_LAST_DISPATCH_TIME_NANOS, fd_ulong_if( sched->next_ready_last_tick!=LONG_MAX, (ulong)sched->next_ready_last_tick, ULONG_MAX ) );
    1731           0 :   FD_MGAUGE_SET( REPLAY, SCHED_STAGING_LANE_POPCNT, (ulong)fd_ulong_popcnt( sched->staged_bitset ) );
    1732           0 :   FD_MGAUGE_SET( REPLAY, SCHED_STAGING_LANE_POPCNT_WMK, sched->staged_popcnt_wmk );
    1733           0 :   FD_MGAUGE_SET( REPLAY, SCHED_STAGING_LANE_HEAD_BANK_IDX0, fd_ulong_if( fd_ulong_extract_bit( sched->staged_bitset, 0 ), sched->staged_head_bank_idx[ 0 ], ULONG_MAX ) );
    1734           0 :   FD_MGAUGE_SET( REPLAY, SCHED_STAGING_LANE_HEAD_BANK_IDX1, fd_ulong_if( fd_ulong_extract_bit( sched->staged_bitset, 1 ), sched->staged_head_bank_idx[ 1 ], ULONG_MAX ) );
    1735           0 :   FD_MGAUGE_SET( REPLAY, SCHED_STAGING_LANE_HEAD_BANK_IDX2, fd_ulong_if( fd_ulong_extract_bit( sched->staged_bitset, 2 ), sched->staged_head_bank_idx[ 2 ], ULONG_MAX ) );
    1736           0 :   FD_MGAUGE_SET( REPLAY, SCHED_STAGING_LANE_HEAD_BANK_IDX3, fd_ulong_if( fd_ulong_extract_bit( sched->staged_bitset, 3 ), sched->staged_head_bank_idx[ 3 ], ULONG_MAX ) );
    1737           0 :   FD_MGAUGE_SET( REPLAY, SCHED_TXN_POOL_POPCNT, sched->depth-sched->txn_pool_free_cnt-1UL );
    1738           0 :   FD_MGAUGE_SET( REPLAY, SCHED_TXN_POOL_SIZE, sched->depth-1UL );
    1739           0 :   FD_MGAUGE_SET( REPLAY, SCHED_MBLK_POOL_POPCNT, sched->depth-sched->mblk_pool_free_cnt );
    1740           0 :   FD_MGAUGE_SET( REPLAY, SCHED_MBLK_POOL_SIZE, sched->depth );
    1741           0 :   FD_MGAUGE_SET( REPLAY, SCHED_BLOCK_POOL_POPCNT, sched->block_pool_popcnt );
    1742           0 :   FD_MGAUGE_SET( REPLAY, SCHED_BLOCK_POOL_SIZE, sched->block_cnt_max );
    1743             : 
    1744           0 :   FD_MCNT_SET( REPLAY, SCHED_BLOCK_ADDED_STAGED, sched->metrics->block_added_staged_cnt );
    1745           0 :   FD_MCNT_SET( REPLAY, SCHED_BLOCK_ADDED_UNSTAGED, sched->metrics->block_added_unstaged_cnt );
    1746           0 :   FD_MCNT_SET( REPLAY, SCHED_BLOCK_REPLAYED, sched->metrics->block_removed_cnt );
    1747           0 :   FD_MCNT_SET( REPLAY, SCHED_BLOCK_ABANDONED, sched->metrics->block_abandoned_cnt );
    1748           0 :   FD_MCNT_SET( REPLAY, SCHED_BLOCK_BAD, sched->metrics->block_bad_cnt );
    1749           0 :   FD_MCNT_SET( REPLAY, SCHED_BLOCK_PROMOTED, sched->metrics->block_promoted_cnt );
    1750           0 :   FD_MCNT_SET( REPLAY, SCHED_BLOCK_DEMOTED, sched->metrics->block_demoted_cnt );
    1751           0 :   FD_MCNT_SET( REPLAY, SCHED_DEACTIVATE_NO_CHILD, sched->metrics->deactivate_no_child_cnt );
    1752           0 :   FD_MCNT_SET( REPLAY, SCHED_DEACTIVATE_NO_WORK, sched->metrics->deactivate_no_txn_cnt );
    1753           0 :   FD_MCNT_SET( REPLAY, SCHED_DEACTIVATE_ABANDONED, sched->metrics->deactivate_abandoned_cnt );
    1754           0 :   FD_MCNT_SET( REPLAY, SCHED_DEACTIVATE_MINORITY, sched->metrics->deactivate_pruned_cnt );
    1755           0 :   FD_MCNT_SET( REPLAY, SCHED_LANE_SWITCH, sched->metrics->lane_switch_cnt );
    1756           0 :   FD_MCNT_SET( REPLAY, SCHED_LANE_PROMOTE, sched->metrics->lane_promoted_cnt );
    1757           0 :   FD_MCNT_SET( REPLAY, SCHED_LANE_DEMOTE, sched->metrics->lane_demoted_cnt );
    1758           0 :   FD_MCNT_SET( REPLAY, SCHED_FORK_OBSERVED, sched->metrics->fork_observed_cnt );
    1759           0 :   FD_MCNT_SET( REPLAY, SCHED_ALUT_SUCCESS, sched->metrics->alut_success_cnt );
    1760           0 :   FD_MCNT_SET( REPLAY, SCHED_ALUT_FAILURE, sched->metrics->alut_serializing_cnt );
    1761           0 :   FD_MCNT_SET( REPLAY, SCHED_TXN_ABANDONED_PARSED, sched->metrics->txn_abandoned_parsed_cnt );
    1762           0 :   FD_MCNT_SET( REPLAY, SCHED_TXN_ABANDONED_EXEC, sched->metrics->txn_abandoned_exec_done_cnt );
    1763           0 :   FD_MCNT_SET( REPLAY, SCHED_TXN_ABANDONED_DONE, sched->metrics->txn_abandoned_done_cnt );
    1764           0 :   FD_MCNT_SET( REPLAY, SCHED_WEIGHTED_IN_FLIGHT, sched->metrics->txn_weighted_in_flight_cnt );
    1765           0 :   FD_MCNT_SET( REPLAY, SCHED_WEIGHTED_IN_FLIGHT_DURATION, sched->metrics->txn_weighted_in_flight_tickcount );
    1766           0 :   FD_MCNT_SET( REPLAY, SCHED_NONE_IN_FLIGHT_DURATION, sched->metrics->txn_none_in_flight_tickcount );
    1767           0 :   FD_MCNT_SET( REPLAY, SCHED_TXN_PARSED, sched->metrics->txn_parsed_cnt );
    1768           0 :   FD_MCNT_SET( REPLAY, SCHED_TXN_EXEC, sched->metrics->txn_exec_done_cnt );
    1769           0 :   FD_MCNT_SET( REPLAY, SCHED_TXN_SIGVERIFY, sched->metrics->txn_sigverify_done_cnt );
    1770           0 :   FD_MCNT_SET( REPLAY, SCHED_TXN_MIXIN, sched->metrics->txn_mixin_done_cnt );
    1771           0 :   FD_MCNT_SET( REPLAY, SCHED_TXN_DONE, sched->metrics->txn_done_cnt );
    1772           0 :   FD_MCNT_SET( REPLAY, SCHED_MBLK_PARSED, sched->metrics->mblk_parsed_cnt );
    1773           0 :   FD_MCNT_SET( REPLAY, SCHED_MBLK_HASHED, sched->metrics->mblk_poh_hashed_cnt );
    1774           0 :   FD_MCNT_SET( REPLAY, SCHED_MBLK_DONE, sched->metrics->mblk_poh_done_cnt );
    1775           0 :   FD_MCNT_SET( REPLAY, SCHED_BYTES_INGESTED, sched->metrics->bytes_ingested_cnt );
    1776           0 :   FD_MCNT_SET( REPLAY, SCHED_BYTES_INGESTED_PADDING, sched->metrics->bytes_ingested_unparsed_cnt );
    1777           0 :   FD_MCNT_SET( REPLAY, SCHED_BYTES_DROPPED, sched->metrics->bytes_dropped_cnt );
    1778           0 :   FD_MCNT_SET( REPLAY, SCHED_FEC, sched->metrics->fec_cnt );
    1779           0 : }
    1780             : 
    1781             : char *
    1782           0 : fd_sched_get_state_cstr( fd_sched_t * sched ) {
    1783           0 :   sched->print_buf_sz = 0UL;
    1784           0 :   print_metrics( sched );
    1785           0 :   print_sched( sched );
    1786           0 :   return sched->print_buf;
    1787           0 : }
    1788             : 
    1789           0 : void * fd_sched_leave ( fd_sched_t * sched ) { return sched; }
    1790           0 : void * fd_sched_delete( void * mem         ) { return   mem; }
    1791             : 
    1792             : 
    1793             : /* Internal helpers. */
    1794             : 
    1795             : static void
    1796             : add_block( fd_sched_t * sched,
    1797             :            ulong        bank_idx,
    1798           0 :            ulong        parent_bank_idx ) {
    1799           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    1800           0 :   FD_TEST( !block->in_sched );
    1801           0 :   sched->block_pool_popcnt++;
    1802             : 
    1803           0 :   block->txn_parsed_cnt              = 0U;
    1804           0 :   block->txn_exec_in_flight_cnt      = 0U;
    1805           0 :   block->txn_exec_done_cnt           = 0U;
    1806           0 :   block->txn_sigverify_in_flight_cnt = 0U;
    1807           0 :   block->txn_sigverify_done_cnt      = 0U;
    1808           0 :   block->poh_hashing_in_flight_cnt   = 0U;
    1809           0 :   block->poh_hashing_done_cnt        = 0U;
    1810           0 :   block->poh_hash_cmp_done_cnt       = 0U;
    1811           0 :   block->txn_done_cnt                = 0U;
    1812           0 :   block->shred_cnt                   = 0U;
    1813           0 :   block->mblk_cnt                    = 0U;
    1814           0 :   block->mblk_freed_cnt              = 0U;
    1815           0 :   block->mblk_tick_cnt               = 0U;
    1816           0 :   block->mblk_unhashed_cnt           = 0U;
    1817           0 :   block->hashcnt                     = 0UL;
    1818           0 :   block->txn_pool_max_popcnt         = sched->depth - sched->txn_pool_free_cnt - 1UL;
    1819           0 :   block->mblk_pool_max_popcnt        = sched->depth - sched->mblk_pool_free_cnt;
    1820           0 :   block->block_pool_max_popcnt       = sched->block_pool_popcnt;
    1821             : 
    1822           0 :   mblk_slist_remove_all( block->mblks_unhashed, sched->mblk_pool );
    1823           0 :   mblk_slist_remove_all( block->mblks_hashing_in_progress, sched->mblk_pool );
    1824           0 :   mblk_slist_remove_all( block->mblks_mixin_in_progress, sched->mblk_pool );
    1825           0 :   block->last_mblk_is_tick = 0;
    1826           0 :   block->max_tick_hashcnt  = 0UL;
    1827           0 :   block->curr_tick_hashcnt = 0UL;
    1828           0 :   block->tick_height       = ULONG_MAX;
    1829           0 :   block->max_tick_height   = ULONG_MAX;
    1830           0 :   block->hashes_per_tick   = ULONG_MAX;
    1831           0 :   block->inconsistent_hashes_per_tick = 0;
    1832             : 
    1833           0 :   block->mblks_rem    = 0UL;
    1834           0 :   block->txns_rem     = 0UL;
    1835           0 :   block->fec_buf_sz   = 0U;
    1836           0 :   block->fec_buf_boff = 0U;
    1837           0 :   block->fec_buf_soff = 0U;
    1838           0 :   block->fec_eob      = 0;
    1839           0 :   block->fec_sob      = 1;
    1840             : 
    1841           0 :   block->fec_eos              = 0;
    1842           0 :   block->rooted               = 0;
    1843           0 :   block->dying                = 0;
    1844           0 :   block->refcnt               = 1;
    1845           0 :   block->in_sched             = 1;
    1846           0 :   block->in_rdisp             = 0;
    1847           0 :   block->block_start_signaled = 0;
    1848           0 :   block->block_end_signaled   = 0;
    1849           0 :   block->block_start_done     = 0;
    1850           0 :   block->block_end_done       = 0;
    1851           0 :   block->staged               = 0;
    1852             : 
    1853           0 :   block->luf_depth = 0UL;
    1854             : 
    1855             :   /* New leaf node, no child, no sibling. */
    1856           0 :   block->child_idx   = ULONG_MAX;
    1857           0 :   block->sibling_idx = ULONG_MAX;
    1858           0 :   block->parent_idx  = ULONG_MAX;
    1859             : 
    1860           0 :   if( FD_UNLIKELY( parent_bank_idx==ULONG_MAX ) ) {
    1861           0 :     return;
    1862           0 :   }
    1863             : 
    1864             :   /* node->parent link */
    1865           0 :   fd_sched_block_t * parent_block = block_pool_ele( sched, parent_bank_idx );
    1866           0 :   block->parent_idx = parent_bank_idx;
    1867             : 
    1868             :   /* parent->node and sibling->node links */
    1869           0 :   ulong child_idx = bank_idx;
    1870           0 :   if( FD_LIKELY( parent_block->child_idx==ULONG_MAX ) ) { /* Optimize for no forking. */
    1871           0 :     parent_block->child_idx = child_idx;
    1872           0 :   } else {
    1873           0 :     fd_sched_block_t * curr_block = block_pool_ele( sched, parent_block->child_idx );
    1874           0 :     while( curr_block->sibling_idx!=ULONG_MAX ) {
    1875           0 :       curr_block = block_pool_ele( sched, curr_block->sibling_idx );
    1876           0 :     }
    1877           0 :     curr_block->sibling_idx = child_idx;
    1878           0 :     sched->metrics->fork_observed_cnt++;
    1879           0 :   }
    1880             : 
    1881           0 :   if( FD_UNLIKELY( parent_block->dying ) ) {
    1882           0 :     block->dying = 1;
    1883           0 :   }
    1884           0 : }
    1885             : 
    1886             : /* Agave invokes verify_ticks() anywhere between once per slot and once
    1887             :    per entry batch, before tranactions are parsed or dispatched for
    1888             :    execution.  We can't do quite the same thing due to out-of-order
    1889             :    scheduling and the fact that we allow parsing to run well ahead of
    1890             :    block boundaries.  Out-of-order scheduling is good, so is overlapping
    1891             :    parsing with execution.  The easiest thing for us would be to just
    1892             :    delay verify_ticks() wholesale till the end of a slot, except that
    1893             :    this opens us up to bogus tick and hash counts, potentially causing
    1894             :    runaway consumption of compute cycles.  Of all the checks that are
    1895             :    performed in verify_ticks(), two types are relevant to mitigating
    1896             :    this risk.  One is constraining the number of ticks, and the other is
    1897             :    constraining the number of hashes per tick.  So we implement these
    1898             :    checks here, and perform them on the fly as eagerly as possible.
    1899             : 
    1900             :    Returns 0 on success. */
    1901             : static int
    1902           0 : verify_ticks_eager( fd_sched_block_t * block ) {
    1903           0 :   FD_TEST( block->hashes_per_tick!=ULONG_MAX ); /* PoH params initialized. */
    1904             : 
    1905           0 :   if( FD_UNLIKELY( block->mblk_tick_cnt+block->tick_height>block->max_tick_height ) ) {
    1906           0 :     FD_LOG_INFO(( "bad block: TOO_MANY_TICKS, slot %lu, parent slot %lu, tick_cnt %u, tick_height %lu, max_tick_height %lu", block->slot, block->parent_slot, block->mblk_tick_cnt, block->tick_height, block->max_tick_height ));
    1907           0 :     return -1;
    1908           0 :   }
    1909           0 :   if( FD_UNLIKELY( block->hashes_per_tick>1UL && block->mblk_tick_cnt && (block->hashes_per_tick!=block->max_tick_hashcnt||block->inconsistent_hashes_per_tick) ) ) {
    1910           0 :     FD_LOG_INFO(( "bad block: INVALID_TICK_HASH_COUNT, slot %lu, parent slot %lu, expected %lu, got %lu", block->slot, block->parent_slot, block->hashes_per_tick, block->max_tick_hashcnt ));
    1911           0 :     return -1;
    1912           0 :   }
    1913           0 :   if( FD_UNLIKELY( block->hashes_per_tick>1UL && block->curr_tick_hashcnt>block->hashes_per_tick ) ) { /* >1 to ignore low power hashing or no hashing cases */
    1914             :     /* We couldn't really check this at parse time because we may not
    1915             :        have the expected hashes per tick value yet.  We couldn't delay
    1916             :        this till after all PoH hashing is done, because this would be a
    1917             :        DoS vector.  This can't be merged with the above check, because a
    1918             :        malformed block might not end with a tick.  As in, a block might
    1919             :        end with a non-tick microblock with a high hashcnt.  Note that
    1920             :        checking the hashcnt between ticks transitively places an upper
    1921             :        bound on the hashcnt of individual microblocks, thus mitigating
    1922             :        the DoS vector. */
    1923           0 :     FD_LOG_INFO(( "bad block: INVALID_TICK_HASH_COUNT, observed cumulative tick_hashcnt %lu, expected %lu, slot %lu, parent slot %lu", block->curr_tick_hashcnt, block->hashes_per_tick, block->slot, block->parent_slot ));
    1924           0 :     return -1;
    1925           0 :   }
    1926             : 
    1927           0 :   return 0;
    1928           0 : }
    1929             : 
    1930             : /* https://github.com/anza-xyz/agave/blob/v3.0.6/ledger/src/blockstore_processor.rs#L1057
    1931             : 
    1932             :    The only check we don't do here is TRAILING_ENTRY, which can be done
    1933             :    independently when we parse the final FEC set of a block.
    1934             : 
    1935             :    Returns 0 on success. */
    1936             : static int
    1937           0 : verify_ticks_final( fd_sched_block_t * block ) {
    1938           0 :   FD_TEST( block->fec_eos );
    1939             : 
    1940           0 :   if( FD_UNLIKELY( block->mblk_tick_cnt+block->tick_height<block->max_tick_height ) ) {
    1941           0 :     FD_LOG_INFO(( "bad block: TOO_FEW_TICKS, slot %lu, parent slot %lu, tick_cnt %u, tick_height %lu, max_tick_height %lu", block->slot, block->parent_slot, block->mblk_tick_cnt, block->tick_height, block->max_tick_height ));
    1942           0 :     return -1;
    1943           0 :   }
    1944             : 
    1945           0 :   return verify_ticks_eager( block );
    1946           0 : }
    1947             : 
    1948           0 : #define CHECK( cond )  do {             \
    1949           0 :   if( FD_UNLIKELY( !(cond) ) ) {        \
    1950           0 :     return FD_SCHED_AGAIN_LATER;        \
    1951           0 :   }                                     \
    1952           0 : } while( 0 )
    1953             : 
    1954             : /* CHECK that it is safe to read at least n more bytes. */
    1955           0 : #define CHECK_LEFT( n ) CHECK( (n)<=(block->fec_buf_sz-block->fec_buf_soff) )
    1956             : 
    1957             : /* Consume as much as possible from the buffer.  By the end of this
    1958             :    function, we will either have residual data that is unparseable only
    1959             :    because it is a batch that straddles FEC set boundaries, or we will
    1960             :    have reached the end of a batch.  In the former case, any remaining
    1961             :    bytes should be concatenated with the next FEC set for further
    1962             :    parsing.  In the latter case, any remaining bytes should be thrown
    1963             :    away. */
    1964             : FD_WARN_UNUSED static int
    1965           0 : fd_sched_parse( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx ) {
    1966           0 :   while( 1 ) {
    1967           0 :     while( block->txns_rem>0UL ) {
    1968           0 :       int err;
    1969           0 :       if( FD_UNLIKELY( (err=fd_sched_parse_txn( sched, block, alut_ctx ))!=FD_SCHED_OK ) ) {
    1970           0 :         return err;
    1971           0 :       }
    1972           0 :     }
    1973           0 :     if( block->txns_rem==0UL && block->mblks_rem>0UL ) {
    1974           0 :       if( FD_UNLIKELY( block->mblk_cnt>=FD_SCHED_MAX_MBLK_PER_SLOT ) ) {
    1975             :         /* A valid block shouldn't contain more than this amount of
    1976             :            microblocks. */
    1977           0 :         FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, mblk_cnt %u (%u ticks) >= %lu", block->slot, block->parent_slot, block->mblk_cnt, block->mblk_tick_cnt, FD_SCHED_MAX_MBLK_PER_SLOT ));
    1978           0 :         return FD_SCHED_BAD_BLOCK;
    1979           0 :       }
    1980             : 
    1981           0 :       CHECK_LEFT( sizeof(fd_microblock_hdr_t) );
    1982           0 :       fd_microblock_hdr_t * hdr = (fd_microblock_hdr_t *)fd_type_pun( block->fec_buf+block->fec_buf_soff );
    1983           0 :       block->fec_buf_soff      += (uint)sizeof(fd_microblock_hdr_t);
    1984             : 
    1985           0 :       if( FD_UNLIKELY( hdr->txn_cnt>fd_ulong_sat_sub( FD_MAX_TXN_PER_SLOT, block->txn_parsed_cnt ) ) ) {
    1986           0 :         FD_LOG_INFO(( "bad block: illegally many transactions specified in microblock header in slot %lu, parent slot %lu, txn_parsed_cnt %u, hdr->txn_cnt %lu", block->slot, block->parent_slot, block->txn_parsed_cnt, hdr->txn_cnt ));
    1987           0 :         return FD_SCHED_BAD_BLOCK;
    1988           0 :       }
    1989           0 :       if( FD_UNLIKELY( hdr->hash_cnt>fd_ulong_sat_sub( FD_RUNTIME_MAX_HASHES_PER_TICK, block->curr_tick_hashcnt ) ) ) {
    1990           0 :         FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, curr_tick_hashcnt %lu, hdr->hash_cnt %lu", block->slot, block->parent_slot, block->curr_tick_hashcnt, hdr->hash_cnt ));
    1991           0 :         return FD_SCHED_BAD_BLOCK;
    1992           0 :       }
    1993             : 
    1994           0 :       block->mblks_rem--;
    1995           0 :       block->txns_rem = hdr->txn_cnt;
    1996             : 
    1997           0 :       FD_TEST( sched->mblk_pool_free_cnt ); /* can_ingest should have guaranteed sufficient free capacity. */
    1998           0 :       uint mblk_idx = sched->mblk_pool_free_head;
    1999           0 :       sched->mblk_pool_free_head = sched->mblk_pool[ mblk_idx ].next;
    2000           0 :       sched->mblk_pool_free_cnt--;
    2001             : 
    2002           0 :       fd_sched_mblk_t * mblk = sched->mblk_pool+mblk_idx;
    2003           0 :       mblk->start_txn_idx = block->txn_parsed_cnt;
    2004           0 :       mblk->end_txn_idx   = mblk->start_txn_idx+hdr->txn_cnt;
    2005             :       /* One might think that every microblock needs to have at least
    2006             :          one hash, otherwise the block should be considered invalid.  A
    2007             :          vanilla validator certainly produces microblocks that conform
    2008             :          to this.  But a modded validator could in theory produce zero
    2009             :          hash microblocks.  Agave's replay stage will happily take those
    2010             :          microblocks.  The Agave implementation-defined way of doing PoH
    2011             :          verify is as follows:
    2012             : 
    2013             :          For a tick microblock, do the same number of hashes as
    2014             :          specified by the microblock.  Zero hashes are allowed, in which
    2015             :          case this tick would have the same ending hash value as the
    2016             :          previous microblock.
    2017             : 
    2018             :          For a transaction microblock, if the number of hashes specified
    2019             :          by the microblock is <= 1, then do zero pure hashes, and simply
    2020             :          do a mixin/record.  Otherwise, do (number of hashes-1) amount
    2021             :          of pure hashing, and then do a mixin.  However, note that for
    2022             :          the purposes of tick_verify, the number of hashes specified by
    2023             :          the microblock is taken verbatim.
    2024             : 
    2025             :          https://github.com/anza-xyz/agave/blob/v3.0.6/entry/src/entry.rs#L232
    2026             : 
    2027             :          We implement the above for consensus. */
    2028           0 :       mblk->hashcnt = fd_ulong_sat_sub( hdr->hash_cnt, fd_ulong_if( !hdr->txn_cnt, 0UL, 1UL ) ); /* For pure hashing, implement the above. */
    2029           0 :       memcpy( mblk->end_hash, hdr->hash, sizeof(fd_hash_t) );
    2030           0 :       memcpy( mblk->curr_hash, block->poh_hash, sizeof(fd_hash_t) );
    2031           0 :       mblk->curr_txn_idx = mblk->start_txn_idx;
    2032           0 :       mblk->curr_hashcnt = 0UL;
    2033           0 :       mblk->curr_sig_cnt = 0U;
    2034           0 :       mblk->is_tick      = !hdr->txn_cnt;
    2035             : 
    2036             :       /* Update block tracking. */
    2037           0 :       block->curr_tick_hashcnt = fd_ulong_sat_add( hdr->hash_cnt, block->curr_tick_hashcnt ); /* For tick_verify, take the number of hashes verbatim. */
    2038           0 :       block->hashcnt += mblk->hashcnt+fd_ulong_if( !hdr->txn_cnt, 0UL, 1UL );
    2039           0 :       memcpy( block->poh_hash, hdr->hash, sizeof(fd_hash_t) );
    2040           0 :       block->last_mblk_is_tick = mblk->is_tick;
    2041           0 :       block->mblk_cnt++;
    2042           0 :       sched->metrics->mblk_parsed_cnt++;
    2043           0 :       if( FD_UNLIKELY( !hdr->txn_cnt ) ) {
    2044             :         /* This is a tick microblock. */
    2045           0 :         if( FD_UNLIKELY( block->mblk_tick_cnt && block->max_tick_hashcnt!=block->curr_tick_hashcnt ) ) {
    2046           0 :           block->inconsistent_hashes_per_tick = 1;
    2047           0 :           if( FD_LIKELY( block->hashes_per_tick!=ULONG_MAX && block->hashes_per_tick>1UL ) ) {
    2048             :             /* >1 to ignore low power hashing or hashing disabled */
    2049           0 :             FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, tick idx %u, max hashcnt %lu, curr hashcnt %lu, hashes_per_tick %lu", block->slot, block->parent_slot, block->mblk_tick_cnt, block->max_tick_hashcnt, block->curr_tick_hashcnt, block->hashes_per_tick ));
    2050           0 :             return FD_SCHED_BAD_BLOCK;
    2051           0 :           }
    2052           0 :         }
    2053           0 :         block->max_tick_hashcnt  = fd_ulong_max( block->curr_tick_hashcnt, block->max_tick_hashcnt );
    2054           0 :         block->curr_tick_hashcnt = 0UL;
    2055           0 :         block->mblk_tick_cnt++;
    2056           0 :       }
    2057             :       #if FD_SCHED_SKIP_POH
    2058             :       block->poh_hashing_done_cnt++;
    2059             :       block->poh_hash_cmp_done_cnt++;
    2060             :       free_mblk( sched, block, mblk_idx );
    2061             :       #else
    2062           0 :       mblk_slist_idx_push_tail( block->mblks_unhashed, mblk_idx, sched->mblk_pool );
    2063           0 :       block->mblk_unhashed_cnt++;
    2064           0 :       #endif
    2065           0 :       continue;
    2066           0 :     }
    2067           0 :     if( block->txns_rem==0UL && block->mblks_rem==0UL && block->fec_sob ) {
    2068           0 :       CHECK_LEFT( sizeof(ulong) );
    2069           0 :       FD_TEST( block->fec_buf_soff==0U );
    2070           0 :       block->mblks_rem     = FD_LOAD( ulong, block->fec_buf );
    2071           0 :       block->fec_buf_soff += (uint)sizeof(ulong);
    2072             : 
    2073           0 :       if( FD_UNLIKELY( block->mblks_rem>fd_ulong_sat_sub( FD_SCHED_MAX_MBLK_PER_SLOT, block->mblk_cnt ) ) ) {
    2074           0 :         FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, mblk_cnt %u (%u ticks), hdr->mblk_cnt %lu >= %lu", block->slot, block->parent_slot, block->mblk_cnt, block->mblk_tick_cnt, block->mblks_rem, FD_SCHED_MAX_MBLK_PER_SLOT ));
    2075           0 :         return FD_SCHED_BAD_BLOCK;
    2076           0 :       }
    2077             : 
    2078           0 :       block->fec_sob = 0;
    2079           0 :       continue;
    2080           0 :     }
    2081           0 :     if( block->txns_rem==0UL && block->mblks_rem==0UL ) {
    2082           0 :       break;
    2083           0 :     }
    2084           0 :   }
    2085           0 :   if( block->fec_eob ) {
    2086             :     /* Ignore trailing bytes at the end of a batch. */
    2087           0 :     sched->metrics->bytes_ingested_unparsed_cnt += block->fec_buf_sz-block->fec_buf_soff;
    2088           0 :     block->fec_buf_boff += block->fec_buf_sz;
    2089           0 :     block->fec_buf_soff = 0U;
    2090           0 :     block->fec_buf_sz   = 0U;
    2091           0 :     block->fec_sob      = 1;
    2092           0 :     block->fec_eob      = 0;
    2093           0 :   }
    2094           0 :   return FD_SCHED_OK;
    2095           0 : }
    2096             : 
    2097             : FD_WARN_UNUSED static int
    2098           0 : fd_sched_parse_txn( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx ) {
    2099           0 :   fd_txn_t * txn = fd_type_pun( block->txn );
    2100             : 
    2101           0 :   uchar * payload = block->fec_buf+block->fec_buf_soff;
    2102           0 :   ulong pay_sz = 0UL;
    2103           0 :   ulong txn_sz = fd_txn_parse_core( payload,
    2104           0 :                                     fd_ulong_min( FD_TXN_MTU, block->fec_buf_sz-block->fec_buf_soff ),
    2105           0 :                                     txn,
    2106           0 :                                     NULL,
    2107           0 :                                     &pay_sz );
    2108             : 
    2109           0 :   if( FD_UNLIKELY( !pay_sz || !txn_sz ) ) {
    2110             :     /* Can't parse out a full transaction. */
    2111           0 :     return FD_SCHED_AGAIN_LATER;
    2112           0 :   }
    2113             : 
    2114           0 :   if( FD_UNLIKELY( block->txn_parsed_cnt>=FD_MAX_TXN_PER_SLOT ) ) {
    2115             :     /* The block contains more transactions than a valid block would.
    2116             :        Mark the block dead instead of keep processing it. */
    2117           0 :     FD_LOG_INFO(( "bad block: illegally many transactions in slot %lu, parent slot %lu, txn_parsed_cnt %u", block->slot, block->parent_slot, block->txn_parsed_cnt ));
    2118           0 :     return FD_SCHED_BAD_BLOCK;
    2119           0 :   }
    2120             : 
    2121           0 :   ulong imm_cnt = fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_IMM );
    2122           0 :   ulong alt_cnt = fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_ALT );
    2123             : 
    2124             :   /* Try to expand ALUTs. */
    2125           0 :   int serializing = 0;
    2126           0 :   if( alt_cnt>0UL ) {
    2127           0 :     fd_accdb_ro_t ro[1];
    2128           0 :     if( FD_LIKELY( fd_accdb_open_ro( alut_ctx->accdb, ro, alut_ctx->xid, &fd_sysvar_slot_hashes_id ) ) ) {
    2129           0 :       fd_slot_hashes_t slot_hashes_view[1];
    2130           0 :       if( FD_LIKELY( fd_sysvar_slot_hashes_view( slot_hashes_view,
    2131           0 :                                                  fd_accdb_ref_data_const( ro ),
    2132           0 :                                                  fd_accdb_ref_data_sz( ro ) ) ) ) {
    2133           0 :         serializing = !!fd_runtime_load_txn_address_lookup_tables( NULL, txn, payload, alut_ctx->accdb, alut_ctx->xid, alut_ctx->els, slot_hashes_view, sched->aluts );
    2134           0 :         sched->metrics->alut_success_cnt += (uint)!serializing;
    2135           0 :       } else {
    2136           0 :         serializing = 1;
    2137           0 :       }
    2138           0 :       fd_accdb_close_ro( alut_ctx->accdb, ro );
    2139           0 :     } else {
    2140           0 :       serializing = 1;
    2141           0 :     }
    2142           0 :   }
    2143             : 
    2144             :   /* Transactions should not have duplicate accounts.
    2145             :      https://github.com/anza-xyz/agave/blob/v3.1.11/ledger/src/blockstore_processor.rs#L778-L790 */
    2146           0 :   fd_acct_addr_t const * imms = fd_txn_get_acct_addrs( txn, payload );
    2147           0 :   fd_acct_addr_t * alts = (!alt_cnt||serializing) ? NULL : sched->aluts;
    2148           0 :   alt_cnt = alts ? alt_cnt : 0UL;
    2149           0 :   if( FD_UNLIKELY( fd_chkdup_check( sched->chkdup, imms, imm_cnt, alts, alt_cnt ) ) ) {
    2150           0 :     FD_LOG_INFO(( "bad block: duplicate accounts in slot %lu, parent slot %lu, txn_parsed_cnt %u", block->slot, block->parent_slot, block->txn_parsed_cnt ));
    2151           0 :     return FD_SCHED_BAD_BLOCK;
    2152           0 :   }
    2153             : 
    2154           0 :   ulong bank_idx = (ulong)(block-sched->block_pool);
    2155           0 :   ulong txn_idx  = fd_rdisp_add_txn( sched->rdisp, bank_idx, txn, payload, alts, serializing );
    2156           0 :   FD_TEST( txn_idx!=0UL );
    2157           0 :   sched->metrics->txn_parsed_cnt++;
    2158           0 :   sched->metrics->alut_serializing_cnt += (uint)serializing;
    2159           0 :   sched->txn_pool_free_cnt--;
    2160           0 :   fd_txn_p_t * txn_p = sched->txn_pool + txn_idx;
    2161           0 :   txn_p->payload_sz  = pay_sz;
    2162             : 
    2163           0 :   txn_p->start_shred_idx = (ushort)fd_sort_up_uint_split( block->shred_blk_offs, block->shred_cnt, block->fec_buf_boff+block->fec_buf_soff );
    2164           0 :   txn_p->start_shred_idx = fd_ushort_if( txn_p->start_shred_idx>0U, (ushort)(txn_p->start_shred_idx-1U), txn_p->start_shred_idx );
    2165           0 :   txn_p->end_shred_idx = (ushort)fd_sort_up_uint_split( block->shred_blk_offs, block->shred_cnt, block->fec_buf_boff+block->fec_buf_soff+(uint)pay_sz );
    2166             : 
    2167           0 :   fd_memcpy( txn_p->payload, payload, pay_sz );
    2168           0 :   fd_memcpy( TXN(txn_p),     txn,     txn_sz );
    2169           0 :   txn_bitset_remove( sched->exec_done_set, txn_idx );
    2170           0 :   txn_bitset_remove( sched->sigverify_done_set, txn_idx );
    2171           0 :   txn_bitset_remove( sched->poh_mixin_done_set, txn_idx );
    2172           0 :   sched->txn_info_pool[ txn_idx ].flags = 0UL;
    2173           0 :   sched->txn_info_pool[ txn_idx ].txn_err = 0;
    2174           0 :   sched->txn_info_pool[ txn_idx ].tick_parsed = fd_tickcount();
    2175           0 :   sched->txn_info_pool[ txn_idx ].tick_sigverify_disp = LONG_MAX;
    2176           0 :   sched->txn_info_pool[ txn_idx ].tick_sigverify_done = LONG_MAX;
    2177           0 :   sched->txn_info_pool[ txn_idx ].tick_exec_disp = LONG_MAX;
    2178           0 :   sched->txn_info_pool[ txn_idx ].tick_exec_done = LONG_MAX;
    2179           0 :   block->txn_idx[ block->txn_parsed_cnt ] = txn_idx;
    2180           0 :   block->fec_buf_soff += (uint)pay_sz;
    2181           0 :   block->txn_parsed_cnt++;
    2182             : #if FD_SCHED_SKIP_SIGVERIFY
    2183             :   txn_bitset_insert( sched->sigverify_done_set, txn_idx );
    2184             :   block->txn_sigverify_done_cnt++;
    2185             : #endif
    2186             : #if FD_SCHED_SKIP_POH
    2187             :   txn_bitset_insert( sched->poh_mixin_done_set, txn_idx );
    2188             : #endif
    2189           0 :   block->txns_rem--;
    2190           0 :   return FD_SCHED_OK;
    2191           0 : }
    2192             : 
    2193             : #undef CHECK
    2194             : #undef CHECK_LEFT
    2195             : 
    2196             : static void
    2197           0 : dispatch_sigverify( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out ) {
    2198             :   /* Dispatch transactions for sigverify in parse order. */
    2199           0 :   out->task_type = FD_SCHED_TT_TXN_SIGVERIFY;
    2200           0 :   out->txn_sigverify->bank_idx = bank_idx;
    2201           0 :   out->txn_sigverify->txn_idx  = block->txn_idx[ block->txn_sigverify_done_cnt+block->txn_sigverify_in_flight_cnt ];
    2202           0 :   out->txn_sigverify->exec_idx = (ulong)exec_tile_idx;
    2203           0 :   sched->sigverify_ready_bitset[ 0 ] = fd_ulong_clear_bit( sched->sigverify_ready_bitset[ 0 ], exec_tile_idx );
    2204           0 :   sched->tile_to_bank_idx[ exec_tile_idx ] = bank_idx;
    2205           0 :   block->txn_sigverify_in_flight_cnt++;
    2206           0 :   if( FD_UNLIKELY( (~sched->txn_exec_ready_bitset[ 0 ])&(~sched->sigverify_ready_bitset[ 0 ])&(~sched->poh_ready_bitset[ 0 ])&fd_ulong_mask_lsb( (int)sched->exec_cnt ) ) ) FD_LOG_CRIT(( "invariant violation: txn_exec_ready_bitset 0x%lx sigverify_ready_bitset 0x%lx poh_ready_bitset 0x%lx", sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ] ));
    2207           0 : }
    2208             : 
    2209             : /* Assumes there is a PoH task available for dispatching. */
    2210             : static void
    2211           0 : dispatch_poh( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out ) {
    2212           0 :   fd_sched_mblk_t * mblk = NULL;
    2213           0 :   uint mblk_idx;
    2214           0 :   if( FD_LIKELY( !mblk_slist_is_empty( block->mblks_hashing_in_progress, sched->mblk_pool ) ) ) {
    2215             :     /* There's a PoH task in progress, just continue working on that. */
    2216           0 :     mblk_idx = (uint)mblk_slist_idx_pop_head( block->mblks_hashing_in_progress, sched->mblk_pool );
    2217           0 :     mblk = sched->mblk_pool+mblk_idx;
    2218           0 :   } else {
    2219             :     /* No in progress PoH task, so start a new one. */
    2220           0 :     FD_TEST( block->mblk_unhashed_cnt );
    2221           0 :     mblk_idx = (uint)mblk_slist_idx_pop_head( block->mblks_unhashed, sched->mblk_pool );
    2222           0 :     mblk = sched->mblk_pool+mblk_idx;
    2223           0 :     block->mblk_unhashed_cnt--;
    2224           0 :   }
    2225           0 :   out->task_type = FD_SCHED_TT_POH_HASH;
    2226           0 :   out->poh_hash->bank_idx = bank_idx;
    2227           0 :   out->poh_hash->mblk_idx = mblk_idx;
    2228           0 :   out->poh_hash->exec_idx = (ulong)exec_tile_idx;
    2229           0 :   ulong hashcnt_todo = mblk->hashcnt-mblk->curr_hashcnt;
    2230           0 :   out->poh_hash->hashcnt  = fd_ulong_min( hashcnt_todo, FD_SCHED_MAX_POH_HASHES_PER_TASK );
    2231           0 :   memcpy( out->poh_hash->hash, mblk->curr_hash, sizeof(fd_hash_t) );
    2232           0 :   sched->poh_ready_bitset[ 0 ] = fd_ulong_clear_bit( sched->poh_ready_bitset[ 0 ], exec_tile_idx );
    2233           0 :   sched->tile_to_bank_idx[ exec_tile_idx ] = bank_idx;
    2234           0 :   block->poh_hashing_in_flight_cnt++;
    2235           0 :   if( FD_UNLIKELY( (~sched->txn_exec_ready_bitset[ 0 ])&(~sched->sigverify_ready_bitset[ 0 ])&(~sched->poh_ready_bitset[ 0 ])&fd_ulong_mask_lsb( (int)sched->exec_cnt ) ) ) FD_LOG_CRIT(( "invariant violation: txn_exec_ready_bitset 0x%lx sigverify_ready_bitset 0x%lx poh_ready_bitset 0x%lx", sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ] ));
    2236           0 : }
    2237             : 
    2238             : /* Does up to one transaction mixin.  Returns 1 if one mixin was done, 2
    2239             :    if that mixin also completed a microblock, 0 if no transaction mixin
    2240             :    was available, -1 if there is a PoH verify error. */
    2241             : FD_WARN_UNUSED static int
    2242           0 : maybe_mixin( fd_sched_t * sched, fd_sched_block_t * block ) {
    2243           0 :   if( FD_UNLIKELY( mblk_slist_is_empty( block->mblks_mixin_in_progress, sched->mblk_pool ) ) ) return 0;
    2244           0 :   FD_TEST( block->poh_hashing_done_cnt-block->poh_hash_cmp_done_cnt>0 );
    2245             : 
    2246             :   /* The microblock we would like to do mixin on is at the head of the
    2247             :      queue.  It may have had some mixin, it may have never had any
    2248             :      mixin.  In the case of the former, we should continue to mixin the
    2249             :      same head microblock until it's done, lest the per-block bmtree
    2250             :      gets clobbered when we start a new one. */
    2251           0 :   ulong mblk_idx = mblk_slist_idx_pop_head( block->mblks_mixin_in_progress, sched->mblk_pool );
    2252           0 :   fd_sched_mblk_t * mblk = sched->mblk_pool+mblk_idx;
    2253             : 
    2254           0 :   if( FD_UNLIKELY( mblk->end_txn_idx>block->txn_parsed_cnt ) ) {
    2255             :     /* A partially parsed microblock is by definition at the end of the
    2256             :        FEC stream.  If such a microblock is in progress, there should be
    2257             :        no other microblock in this block so far that hasn't been
    2258             :        dispatched, because microblocks are dispatched in parse order. */
    2259           0 :     if( FD_UNLIKELY( block->mblk_unhashed_cnt ) ) {
    2260           0 :       sched->print_buf_sz = 0UL;
    2261           0 :       print_all( sched, block );
    2262           0 :       FD_LOG_CRIT(( "invariant violation end_txn_idx %lu: %s", mblk->end_txn_idx, sched->print_buf ));
    2263           0 :     }
    2264             : 
    2265             :     /* If we've decided to start mixin on a partially parsed microblock,
    2266             :        there better be nothing else in-progress.  Otherwise, they might
    2267             :        clobber the per-block bmtree for mixin. */
    2268           0 :     if( FD_UNLIKELY( mblk->curr_txn_idx!=mblk->start_txn_idx && (block->poh_hashing_in_flight_cnt||!mblk_slist_is_empty( block->mblks_hashing_in_progress, sched->mblk_pool )||!mblk_slist_is_empty( block->mblks_mixin_in_progress, sched->mblk_pool )) ) ) {
    2269           0 :       sched->print_buf_sz = 0UL;
    2270           0 :       print_all( sched, block );
    2271           0 :       FD_LOG_CRIT(( "invariant violation end_txn_idx %lu start_txn_idx %lu curr_txn_idx %lu: %s", mblk->end_txn_idx, mblk->start_txn_idx, mblk->curr_txn_idx, sched->print_buf ));
    2272           0 :     }
    2273           0 :   }
    2274             : 
    2275             :   /* Very rarely, we've finished hashing, but not all transactions in
    2276             :      the microblock have been parsed out.  This can happen if we haven't
    2277             :      received all the FEC sets for this microblock.  We can't yet fully
    2278             :      mixin the microblock.  So we'll stick it back into the end of the
    2279             :      queue, and try to see if there's a fully parsed microblock.
    2280             :      Unless, there's truly nothing else to mixin.  Then we would start
    2281             :      mixin with the partially parsed microblock.  We do this because the
    2282             :      txn pool is meant to be an OOO scheduling window not tied to
    2283             :      max_live_slots sizing requirements, so there shouldn't be a way for
    2284             :      external input to tie up txn pool entries for longer than
    2285             :      necessary. */
    2286           0 :   if( FD_UNLIKELY( mblk->curr_txn_idx>=block->txn_parsed_cnt || /* Nothing more to mixin for this microblock. */
    2287           0 :                    (mblk->end_txn_idx>block->txn_parsed_cnt &&  /* There is something to mixin, but the microblock isn't fully parsed yet ... */
    2288           0 :                     mblk->curr_txn_idx==mblk->start_txn_idx &&  /* ... and we haven't started mixin on it yet ... */
    2289           0 :                     (block->poh_hashing_in_flight_cnt ||        /* ... and another microblock is in-progress and might preempt this microblock and clobber the bmtree, so we shouldn't start the partial microblock just yet. */
    2290           0 :                      !mblk_slist_is_empty( block->mblks_hashing_in_progress, sched->mblk_pool ) ||
    2291           0 :                      !mblk_slist_is_empty( block->mblks_mixin_in_progress, sched->mblk_pool ))) ) ) {
    2292           0 :     mblk_slist_idx_push_tail( block->mblks_mixin_in_progress, mblk_idx, sched->mblk_pool );
    2293             : 
    2294             :     /* No other microblock in the mixin queue. */
    2295           0 :     if( FD_UNLIKELY( block->poh_hashing_done_cnt-block->poh_hash_cmp_done_cnt==1 ) ) return 0;
    2296             : 
    2297             :     /* At this point, there's at least one more microblock in the mixin
    2298             :        queue we could try.  It's a predecessor (in parse order) that
    2299             :        finished hashing later than the partially parsed microblock at
    2300             :        the head of the mixin queue. */
    2301             : 
    2302             :     /* It should never clobber the bmtree for a microblock that has had some mixin done on it. */
    2303           0 :     if( FD_UNLIKELY( mblk->curr_txn_idx!=mblk->start_txn_idx ) ) {
    2304           0 :       sched->print_buf_sz = 0UL;
    2305           0 :       print_all( sched, block );
    2306           0 :       FD_LOG_CRIT(( "invariant violation curr_txn_idx %lu start_txn_idx %lu: %s", mblk->curr_txn_idx, mblk->start_txn_idx, sched->print_buf ));
    2307           0 :     }
    2308             : 
    2309           0 :     mblk_idx = mblk_slist_idx_pop_head( block->mblks_mixin_in_progress, sched->mblk_pool );
    2310           0 :     mblk = sched->mblk_pool+mblk_idx;
    2311             : 
    2312             :     /* It should be a fresh microblock for mixin. */
    2313           0 :     FD_TEST( mblk->curr_txn_idx==mblk->start_txn_idx );
    2314             :     /* Invariant: at any given point in time, there can be at most one
    2315             :        microblock that hasn't been fully parsed yet, due to the nature
    2316             :        of sequential parsing.  So this microblock has to be fully
    2317             :        parsed. */
    2318           0 :     FD_TEST( mblk->end_txn_idx<=block->txn_parsed_cnt );
    2319           0 :   }
    2320             : 
    2321           0 :   FD_TEST( mblk->curr_txn_idx<mblk->end_txn_idx );
    2322             : 
    2323             :   /* Now mixin. */
    2324           0 :   if( FD_LIKELY( mblk->curr_txn_idx==mblk->start_txn_idx ) ) block->bmtree = fd_bmtree_commit_init( block->bmtree_mem, 32UL, 1UL, 0UL ); /* Optimize for single-transaction microblocks, which are the majority. */
    2325             : 
    2326           0 :   ulong txn_gidx = block->txn_idx[ mblk->curr_txn_idx ];
    2327           0 :   fd_txn_p_t * _txn = sched->txn_pool+txn_gidx;
    2328           0 :   fd_txn_t * txn = TXN(_txn);
    2329           0 :   for( ulong j=0; j<txn->signature_cnt; j++ ) {
    2330           0 :     fd_bmtree_node_t node[ 1 ];
    2331           0 :     fd_bmtree_hash_leaf( node, _txn->payload+txn->signature_off+FD_TXN_SIGNATURE_SZ*j, 64UL, 1UL );
    2332           0 :     fd_bmtree_commit_append( block->bmtree, node, 1UL );
    2333           0 :     mblk->curr_sig_cnt++;
    2334           0 :   }
    2335             : 
    2336             :   /* Release the txn_idx. */
    2337           0 :   txn_bitset_insert( sched->poh_mixin_done_set, txn_gidx );
    2338           0 :   sched->metrics->txn_mixin_done_cnt++;
    2339           0 :   if( txn_bitset_test( sched->exec_done_set, txn_gidx ) && txn_bitset_test( sched->sigverify_done_set, txn_gidx ) ) {
    2340           0 :     fd_rdisp_complete_txn( sched->rdisp, txn_gidx, 1 );
    2341           0 :     sched->txn_pool_free_cnt++;
    2342           0 :     block->txn_done_cnt++;
    2343           0 :     sched->metrics->txn_done_cnt++;
    2344           0 :   }
    2345             : 
    2346           0 :   mblk->curr_txn_idx++;
    2347           0 :   int rv = 2;
    2348           0 :   if( FD_LIKELY( mblk->curr_txn_idx==mblk->end_txn_idx ) ) {
    2349             :     /* Ready to compute the final hash for this microblock. */
    2350           0 :     block->poh_hash_cmp_done_cnt++;
    2351           0 :     sched->metrics->mblk_poh_done_cnt++;
    2352           0 :     uchar * root = fd_bmtree_commit_fini( block->bmtree );
    2353           0 :     uchar mixin_buf[ 64 ];
    2354           0 :     fd_memcpy( mixin_buf, mblk->curr_hash, 32UL );
    2355           0 :     fd_memcpy( mixin_buf+32UL, root, 32UL );
    2356           0 :     fd_sha256_hash( mixin_buf, 64UL, mblk->curr_hash );
    2357           0 :     free_mblk( sched, block, (uint)mblk_idx );
    2358           0 :     if( FD_UNLIKELY( memcmp( mblk->curr_hash, mblk->end_hash, sizeof(fd_hash_t) ) ) ) {
    2359           0 :       FD_BASE58_ENCODE_32_BYTES( mblk->curr_hash->hash, our_str );
    2360           0 :       FD_BASE58_ENCODE_32_BYTES( mblk->end_hash->hash, ref_str );
    2361           0 :       FD_LOG_INFO(( "bad block: poh hash mismatch on mblk %lu, ours %s, claimed %s, hashcnt %lu, txns [%lu,%lu), %u sigs, slot %lu, parent slot %lu", mblk_idx, our_str, ref_str, mblk->hashcnt, mblk->start_txn_idx, mblk->end_txn_idx, mblk->curr_sig_cnt, block->slot, block->parent_slot ));
    2362           0 :       return -1;
    2363           0 :     }
    2364           0 :   } else {
    2365             :     /* There are more transactions to mixin in this microblock. */
    2366           0 :     mblk_slist_idx_push_head( block->mblks_mixin_in_progress, mblk_idx, sched->mblk_pool );
    2367           0 :     rv = 1;
    2368           0 :   }
    2369             : 
    2370           0 :   return rv;
    2371           0 : }
    2372             : 
    2373             : static void
    2374           0 : try_activate_block( fd_sched_t * sched ) {
    2375             :   /* Early return if there's already an active block. */
    2376           0 :   if( FD_LIKELY( sched->active_bank_idx!=ULONG_MAX ) ) return;
    2377             : 
    2378             :   /* See if there are any allocated staging lanes that we can activate
    2379             :      for scheduling ... */
    2380           0 :   ulong staged_bitset = sched->staged_bitset;
    2381           0 :   while( staged_bitset ) {
    2382           0 :     int lane_idx  = fd_ulong_find_lsb( staged_bitset );
    2383           0 :     staged_bitset = fd_ulong_pop_lsb( staged_bitset );
    2384             : 
    2385           0 :     ulong              head_idx     = sched->staged_head_bank_idx[ lane_idx ];
    2386           0 :     fd_sched_block_t * head_block   = block_pool_ele( sched, head_idx );
    2387           0 :     fd_sched_block_t * parent_block = block_pool_ele( sched, head_block->parent_idx );
    2388           0 :     if( FD_UNLIKELY( parent_block->dying ) ) {
    2389             :       /* Invariant: no child of a dying block should be staged. */
    2390           0 :       FD_LOG_CRIT(( "invariant violation: staged_head_bank_idx %lu, slot %lu, parent slot %lu on lane %d has parent_block->dying set, slot %lu, parent slot %lu",
    2391           0 :                     head_idx, head_block->slot, head_block->parent_slot, lane_idx, parent_block->slot, parent_block->parent_slot ));
    2392           0 :     }
    2393             :     //FIXME: restore this invariant check when we have immediate demotion of dying blocks
    2394             :     // if( FD_UNLIKELY( head_block->dying ) ) {
    2395             :     //   /* Invariant: no dying block should be staged. */
    2396             :     //   FD_LOG_CRIT(( "invariant violation: staged_head_bank_idx %lu, slot %lu, prime %lu on lane %u has head_block->dying set",
    2397             :     //                 head_idx, (ulong)head_block->block_id.slot, (ulong)head_block->block_id.prime, lane_idx ));
    2398             :     // }
    2399           0 :     if( block_is_done( parent_block ) && block_is_activatable( head_block ) ) {
    2400             :       /* ... Yes, on this staging lane the parent block is done.  So we
    2401             :          can activate the staged child. */
    2402           0 :       if( FD_UNLIKELY( head_idx!=sched->last_active_bank_idx ) ) { /* Unlikely because only possible under forking or on slot boundary. */
    2403           0 :         if( FD_UNLIKELY( sched->last_active_bank_idx!=head_block->parent_idx ) ) { /* Forking is rare. */
    2404           0 :           FD_LOG_DEBUG(( "activating block %lu:%lu: lane switch to %d", head_block->slot, head_idx, lane_idx ));
    2405           0 :           sched->metrics->lane_switch_cnt++;
    2406           0 :         } else {
    2407           0 :           FD_LOG_DEBUG(( "activating block %lu:%lu: lane %d waking up on slot boundary", head_block->slot, head_idx, lane_idx ));
    2408           0 :         }
    2409           0 :       }
    2410           0 :       sched->active_bank_idx = head_idx;
    2411           0 :       return;
    2412           0 :     }
    2413           0 :   }
    2414             : 
    2415             :   /* ... No, promote unstaged blocks. */
    2416           0 :   ulong root_idx = sched->root_idx;
    2417           0 :   if( FD_UNLIKELY( root_idx==ULONG_MAX ) ) {
    2418           0 :     FD_LOG_CRIT(( "invariant violation: root_idx==ULONG_MAX indicating fd_sched is uninitialized" ));
    2419           0 :   }
    2420             :   /* Find and stage the longest stageable unstaged fork.  This is a
    2421             :      policy decision. */
    2422           0 :   ulong depth = compute_longest_unstaged_fork( sched, root_idx );
    2423           0 :   if( FD_LIKELY( depth>0UL ) ) {
    2424           0 :     if( FD_UNLIKELY( sched->staged_bitset==fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) ) ) {
    2425             :       /* No more staging lanes available.  All of them are occupied by
    2426             :          slow squatters.  Only empty blocks can be demoted, and so
    2427             :          blocks with in-flight transactions, including dying in-flight
    2428             :          blocks, shouldn't be demoted.  We demote all demotable lanes.
    2429             :          Demotion isn't all that expensive, since demotable blocks have
    2430             :          no transactions in them.  If a demoted block proves to be
    2431             :          active still, it'll naturally promote back into a staging lane.
    2432             : 
    2433             :          In fact, all lanes should be demotable at this point.  None of
    2434             :          the lanes have anything dispatchable, otherwise we would have
    2435             :          simply activated one of the dispatchable lanes.  None of the
    2436             :          lanes have anything in-flight either, as we allow for a grace
    2437             :          period while something is in-flight, before we deactivate any
    2438             :          block.  In principle, we could get rid of the grace period and
    2439             :          deactivate right away.  In that case, it's okay if nothing is
    2440             :          demotable at the moment, as that simply implies that all lanes
    2441             :          have in-flight tasks.  We would get another chance to try to
    2442             :          demote when the last in-flight task on any lane completes.
    2443             : 
    2444             :          Another interesting side effect of the current dispatching and
    2445             :          lane switching policy is that each lane should have exactly one
    2446             :          block in it at this point.  A parent block by definition can't
    2447             :          be partially ingested.  Any parent block that is fully ingested
    2448             :          and dispatchable would have made the lane dispatchable, and we
    2449             :          wouldn't be here.  Any parent that is fully ingested and fully
    2450             :          dispatched would be fully done after the grace period.  So
    2451             :          there could only be one block per lane, and it is
    2452             :          simultaneously the head and the tail of the lane.
    2453             : 
    2454             :          A note on why this whole thing does not deadlock:
    2455             : 
    2456             :          One might reasonably wonder what happens if all the lanes are
    2457             :          non-empty, non-dead, but for some reason couldn't be activated
    2458             :          for dispatching.  We would deadlock in this case, as no lane
    2459             :          dispatches to the point of being demotable, and no unstaged
    2460             :          block can be promoted.  Such is not in fact possible.  The only
    2461             :          way a dispatchable lane can be ineligible for activation is if
    2462             :          it has a parent block that isn't done yet.  So a deadlock
    2463             :          happens when this parent block, or any of its dispatchable
    2464             :          ancestors, is unstaged.  An important invariant we maintain is
    2465             :          that a staged block can't have an unstaged stageable parent.
    2466             :          This invariant, by induction, gives us the guarantee that at
    2467             :          least one of the lanes can be activated. */
    2468           0 :       for( int l=0; l<(int)FD_SCHED_MAX_STAGING_LANES; l++ ) {
    2469             :         /* We would be able to assert that all lanes are demotable,
    2470             :            except that abandoned blocks are given no grace period for
    2471             :            deactivation.  So there could be lanes transiently occupied
    2472             :            by dying blocks that are neither demotable (due to in-flight
    2473             :            tasks) nor activatable (due to being dying).  If
    2474             :            rdisp_demote() supported non-empty blocks, then we could
    2475             :            probably restore the assertion. */
    2476           0 :         if( FD_UNLIKELY( !lane_is_demotable( sched, l ) ) ) continue;
    2477           0 :         ulong demoted_cnt = demote_lane( sched, l );
    2478           0 :         if( FD_UNLIKELY( demoted_cnt!=1UL ) ) {
    2479           0 :           FD_LOG_CRIT(( "invariant violation: %lu blocks demoted from lane %d, expected 1 demotion", demoted_cnt, l ));
    2480           0 :         }
    2481           0 :         sched->metrics->lane_demoted_cnt++;
    2482           0 :       }
    2483             :       /* We weren't able to successfully demote anything.  This is
    2484             :          likely because all lanes are occupied by dying blocks with
    2485             :          in-flight tasks.  We would get another chance to try to demote
    2486             :          when the last in-flight task on any lane completes. */
    2487           0 :       if( FD_UNLIKELY( sched->staged_bitset==fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) ) ) return;
    2488           0 :     }
    2489           0 :     FD_TEST( sched->staged_bitset!=fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) );
    2490           0 :     int lane_idx = fd_ulong_find_lsb( ~sched->staged_bitset );
    2491           0 :     if( FD_UNLIKELY( lane_idx>=(int)FD_SCHED_MAX_STAGING_LANES ) ) {
    2492           0 :       FD_LOG_CRIT(( "invariant violation: lane_idx %d, sched->staged_bitset %lx",
    2493           0 :                     lane_idx, sched->staged_bitset ));
    2494           0 :     }
    2495           0 :     ulong head_bank_idx = stage_longest_unstaged_fork( sched, root_idx, lane_idx );
    2496           0 :     if( FD_UNLIKELY( head_bank_idx==ULONG_MAX ) ) {
    2497             :       /* We found a promotable fork depth>0.  This should not happen. */
    2498           0 :       FD_LOG_CRIT(( "invariant violation: head_bank_idx==ULONG_MAX" ));
    2499           0 :     }
    2500             :     /* We don't bother with promotion unless the block is immediately
    2501             :        dispatchable.  So it's okay to set the active block here.  This
    2502             :        doesn't cause out-of-order block replay because any parent block
    2503             :        must be fully done.  If the parent block were dead, this fork
    2504             :        would be marked dead too and ineligible for promotion.  If the
    2505             :        parent block were not dead and not done and staged, we wouldn't
    2506             :        be trying to promote an unstaged fork.  If the parent block were
    2507             :        not dead and not done and unstaged, it would've been part of this
    2508             :        unstaged fork. */
    2509           0 :     fd_sched_block_t * head_block = block_pool_ele( sched, head_bank_idx );
    2510           0 :     FD_LOG_DEBUG(( "activating block %lu:%lu: unstaged promotion to lane %d", head_block->slot, head_bank_idx, lane_idx ));
    2511           0 :     sched->active_bank_idx = head_bank_idx;
    2512           0 :     return;
    2513           0 :   }
    2514             :   /* No unstaged blocks to promote.  So we're done.  Yay. */
    2515           0 : }
    2516             : 
    2517             : static void
    2518           0 : check_or_set_active_block( fd_sched_t * sched ) {
    2519           0 :   if( FD_UNLIKELY( sched->active_bank_idx==ULONG_MAX ) ) {
    2520           0 :     try_activate_block( sched );
    2521           0 :   } else {
    2522           0 :     fd_sched_block_t * active_block = block_pool_ele( sched, sched->active_bank_idx );
    2523           0 :     if( FD_UNLIKELY( block_should_deactivate( active_block ) ) ) {
    2524           0 :       sched->print_buf_sz = 0UL;
    2525           0 :       print_all( sched, active_block );
    2526           0 :       FD_LOG_NOTICE(( "%s", sched->print_buf ));
    2527           0 :       FD_LOG_CRIT(( "invariant violation: should have been deactivated" ));
    2528           0 :     }
    2529           0 :   }
    2530           0 : }
    2531             : 
    2532             : /* This function has two main jobs:
    2533             :    - Mark everything on the fork tree dying.
    2534             :    - Take blocks out of rdisp if possible. */
    2535             : static void
    2536           0 : subtree_mark_and_maybe_prune_rdisp( fd_sched_t * sched, fd_sched_block_t * block ) {
    2537           0 :   if( FD_UNLIKELY( block->rooted ) ) {
    2538           0 :     FD_LOG_CRIT(( "invariant violation: rooted block should not be abandoned, slot %lu, parent slot %lu",
    2539           0 :                   block->slot, block->parent_slot ));
    2540           0 :   }
    2541             :   /* All minority fork nodes pass through this function eventually.  So
    2542             :      this is a good point to check per-node invariants for minority
    2543             :      forks. */
    2544           0 :   if( FD_UNLIKELY( block->staged && !block->in_rdisp ) ) {
    2545           0 :     FD_LOG_CRIT(( "invariant violation: staged block is not in the dispatcher, slot %lu, parent slot %lu",
    2546           0 :                   block->slot, block->parent_slot ));
    2547           0 :   }
    2548             : 
    2549             :   /* Setting the flag is non-optional and can happen more than once. */
    2550           0 :   block->dying = 1;
    2551             : 
    2552             :   /* Removal from dispatcher should only happen once. */
    2553           0 :   if( block->in_rdisp ) {
    2554           0 :     fd_sched_block_t * parent = block_pool_ele( sched, block->parent_idx );
    2555           0 :     if( FD_UNLIKELY( !parent ) ) {
    2556             :       /* Only the root has no parent.  Abandon should never be called on
    2557             :          the root.  So any block we are trying to abandon should have a
    2558             :          parent. */
    2559           0 :       FD_LOG_CRIT(( "invariant violation: parent not found slot %lu, parent slot %lu",
    2560           0 :                     block->slot, block->parent_slot ));
    2561           0 :     }
    2562             : 
    2563             :     /* The dispatcher expects blocks to be abandoned in the same order
    2564             :        that they were added on each lane.  There are no requirements on
    2565             :        the order of abandoning if two blocks are not on the same lane,
    2566             :        or if a block is unstaged.  This means that in general we
    2567             :        shouldn't abandon a child block if the parent hasn't been
    2568             :        abandoned yet, if and only if they are on the same lane.  So wait
    2569             :        until we can abandon the parent, and then descend down the fork
    2570             :        tree to ensure orderly abandoning. */
    2571           0 :     int in_order = !parent->in_rdisp || /* parent is not in the dispatcher */
    2572           0 :                    !parent->staged   || /* parent is in the dispatcher but not staged */
    2573           0 :                    !block->staged    || /* parent is in the dispatcher and staged but this block is unstaged */
    2574           0 :                    block->staging_lane!=parent->staging_lane; /* this block is on a different staging lane than its parent */
    2575             : 
    2576           0 :     if( FD_UNLIKELY( in_order && block->staged && sched->active_bank_idx==sched->staged_head_bank_idx[ block->staging_lane ] && sched->active_bank_idx!=ULONG_MAX ) ) {
    2577           0 :       FD_TEST( block_pool_ele( sched, sched->active_bank_idx )==block );
    2578           0 :       FD_LOG_DEBUG(( "reset active_bank_idx %lu: abandon", sched->active_bank_idx ));
    2579           0 :       sched->last_active_bank_idx = sched->active_bank_idx;
    2580           0 :       sched->active_bank_idx = ULONG_MAX;
    2581           0 :       sched->metrics->deactivate_abandoned_cnt++;
    2582           0 :     }
    2583             : 
    2584             :     /* We inform the dispatcher of an abandon only when there are no
    2585             :        more in-flight transactions.  Otherwise, if the dispatcher
    2586             :        recycles the same txn_id that was just abandoned, and we receive
    2587             :        completion of an in-flight transaction whose txn_id was just
    2588             :        recycled. */
    2589             :     // FIXME The recycling might be fine now that we no longer use
    2590             :     // txn_id to index into anything.  We might be able to just drop
    2591             :     // txn_id on abandoned blocks.  Though would this leak transaction
    2592             :     // content if the txn_id is recycled?
    2593             :     // Note that subtree pruning from sched isn't dependent on the
    2594             :     // in-flight check being present here, as is_prunable already checks
    2595             :     // for in-flight==0.
    2596           0 :     int abandon = in_order && !block_is_in_flight( block );
    2597             : 
    2598           0 :     if( abandon ) {
    2599           0 :       block->in_rdisp = 0;
    2600           0 :       fd_rdisp_abandon_block( sched->rdisp, (ulong)(block-sched->block_pool) );
    2601           0 :       sched->txn_pool_free_cnt += block->txn_parsed_cnt-block->txn_done_cnt; /* in_flight_cnt==0 */
    2602             : 
    2603           0 :       sched->metrics->block_abandoned_cnt++;
    2604           0 :       sched->metrics->txn_abandoned_parsed_cnt    += block->txn_parsed_cnt;
    2605           0 :       sched->metrics->txn_abandoned_exec_done_cnt += block->txn_exec_done_cnt;
    2606           0 :       sched->metrics->txn_abandoned_done_cnt      += block->txn_done_cnt;
    2607             : 
    2608             :       //FIXME when demote supports non-empty blocks, we should demote
    2609             :       //the block from the lane unconditionally and immediately,
    2610             :       //regardles of whether it's safe to abandon or not.  So a block
    2611             :       //would go immediately from staged to unstaged and eventually to
    2612             :       //abandoned.
    2613           0 :       if( FD_LIKELY( block->staged ) ) {
    2614           0 :         FD_LOG_DEBUG(( "block %lu:%lu exited lane %lu: abandon", block->slot, block_to_idx( sched, block ), block->staging_lane ));
    2615           0 :         block->staged = 0;
    2616             :         /* Now release the staging lane.  This will release the lane as
    2617             :            soon as we abandon the head block on a lane.  Technically a
    2618             :            release should only happen when we remove the tail block on a
    2619             :            lane.  This is fine though.  The way we abandon guarantees by
    2620             :            induction that an entire lane will be abandoned.  Only the
    2621             :            head block on a lane can possibly have in-flight
    2622             :            transactions, and so once a head block becomes eligible for
    2623             :            abandoning, the entire lane all the way to the tail block,
    2624             :            will be eligible. */
    2625           0 :         sched->staged_bitset = fd_ulong_clear_bit( sched->staged_bitset, (int)block->staging_lane );
    2626           0 :         sched->staged_head_bank_idx[ block->staging_lane ] = ULONG_MAX;
    2627           0 :       }
    2628           0 :     }
    2629           0 :   }
    2630             : 
    2631             :   /* Abandon the entire fork chaining off of this block. */
    2632           0 :   ulong child_idx = block->child_idx;
    2633           0 :   while( child_idx!=ULONG_MAX ) {
    2634           0 :     fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2635           0 :     subtree_mark_and_maybe_prune_rdisp( sched, child );
    2636           0 :     child_idx = child->sibling_idx;
    2637           0 :   }
    2638           0 : }
    2639             : 
    2640             : /* It's safe to call this function more than once on the same block.
    2641             :    The final call is when there are no more in-flight tasks for this
    2642             :    block, at which point the block will be pruned from sched. */
    2643             : static void
    2644           0 : subtree_abandon( fd_sched_t * sched, fd_sched_block_t * block ) {
    2645           0 :   subtree_mark_and_maybe_prune_rdisp( sched, block );
    2646             :   /* subtree_abandon can happen as a result of one of the following
    2647             :        - Bad block at head of lane
    2648             :        - Bad block at tail of lane
    2649             :        - Root advance
    2650             :        - Root notify
    2651             : 
    2652             :      In the case of bad blocks, it suffices to check the in-flight task
    2653             :      count of the block that is bad.  In the case of root advance, the
    2654             :      refcnt on the bank is checked and that includes the in-flight task
    2655             :      count.  It is only in the case of root notify that there could
    2656             :      potentially be a non-zero in-flight count in the middle of a
    2657             :      subtree.  To be safe, we check the entire subtree here before
    2658             :      pruning. */
    2659           0 :   if( subtree_is_prunable( sched, block ) ) {
    2660           0 :     fd_sched_block_t * parent = block_pool_ele( sched, block->parent_idx );
    2661           0 :     if( FD_LIKELY( parent ) ) {
    2662             :       /* Splice the block out of its parent's children list. */
    2663           0 :       ulong block_idx = block_to_idx( sched, block );
    2664           0 :       ulong * idx_p = &parent->child_idx;
    2665           0 :       while( *idx_p!=block_idx ) {
    2666           0 :         idx_p = &(block_pool_ele( sched, *idx_p )->sibling_idx);
    2667           0 :       }
    2668           0 :       *idx_p = block->sibling_idx;
    2669           0 :     }
    2670           0 :     subtree_prune( sched, block_to_idx( sched, block ), ULONG_MAX );
    2671           0 :   }
    2672           0 : }
    2673             : 
    2674             : static void
    2675           0 : subtree_prune( fd_sched_t * sched, ulong bank_idx, ulong except_idx ) {
    2676           0 :   fd_sched_block_t * head = block_pool_ele( sched, bank_idx );
    2677           0 :   head->parent_idx        = ULONG_MAX;
    2678           0 :   fd_sched_block_t * tail = head;
    2679             : 
    2680           0 :   while( head ) {
    2681           0 :     FD_TEST( head->in_sched );
    2682           0 :     head->in_sched = 0;
    2683           0 :     if( head->refcnt ) {
    2684           0 :       FD_TEST( !head->block_end_done );
    2685           0 :       head->refcnt = 0;
    2686           0 :       if( FD_UNLIKELY( !ref_q_avail( sched->ref_q ) ) ) FD_LOG_CRIT(( "ref_q full" ));
    2687           0 :       ref_q_push_tail( sched->ref_q, block_to_idx( sched, head ) );
    2688           0 :     }
    2689             : 
    2690           0 :     ulong child_idx = head->child_idx;
    2691           0 :     while( child_idx!=ULONG_MAX ) {
    2692           0 :       fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2693             :       /* Add children to be visited.  We abuse the parent_idx field to
    2694             :          link up the next block to visit. */
    2695           0 :       if( child_idx!=except_idx ) {
    2696           0 :         tail->parent_idx = child_idx;
    2697           0 :         tail             = child;
    2698           0 :         tail->parent_idx = ULONG_MAX;
    2699           0 :       }
    2700           0 :       child_idx = child->sibling_idx;
    2701           0 :     }
    2702             : 
    2703             :     /* Prune the current block.  We will never publish halfway into a
    2704             :        staging lane, because anything on the rooted fork should have
    2705             :        finished replaying gracefully and be out of the dispatcher.  In
    2706             :        fact, anything that we are publishing away should be out of the
    2707             :        dispatcher at this point.  And there should be no more in-flight
    2708             :        transactions. */
    2709           0 :     if( FD_UNLIKELY( block_is_in_flight( head ) ) ) {
    2710           0 :       FD_LOG_CRIT(( "invariant violation: block has transactions in flight (%u exec %u sigverify %u poh), slot %lu, parent slot %lu",
    2711           0 :                     head->txn_exec_in_flight_cnt, head->txn_sigverify_in_flight_cnt, head->poh_hashing_in_flight_cnt, head->slot, head->parent_slot ));
    2712           0 :     }
    2713           0 :     if( FD_UNLIKELY( head->in_rdisp ) ) {
    2714             :       /* We should have removed it from the dispatcher when we were
    2715             :          notified of the new root, or when in-flight transactions were
    2716             :          drained. */
    2717           0 :       FD_LOG_CRIT(( "invariant violation: block is in the dispatcher, slot %lu, parent slot %lu", head->slot, head->parent_slot ));
    2718           0 :     }
    2719             : 
    2720             :     /* Return remaining mblk descriptors to the shared pool. */
    2721           0 :     free_mblk_slist( sched, head, head->mblks_unhashed );
    2722           0 :     free_mblk_slist( sched, head, head->mblks_hashing_in_progress );
    2723           0 :     free_mblk_slist( sched, head, head->mblks_mixin_in_progress );
    2724             : 
    2725           0 :     if( FD_UNLIKELY( !head->block_end_done ) ) {
    2726           0 :       sched->print_buf_sz = 0UL;
    2727           0 :       print_block_metrics( sched, head );
    2728           0 :       if( FD_LIKELY( head->block_start_done ) ) FD_LOG_DEBUG(( "block %lu:%lu replayed partially, pruning without full replay: %s", head->slot, block_to_idx( sched, head ), sched->print_buf ));
    2729           0 :       else FD_LOG_DEBUG(( "block %lu:%lu replayed nothing, pruning without any replay: %s", head->slot, block_to_idx( sched, head ), sched->print_buf ));
    2730           0 :     }
    2731             : 
    2732           0 :     sched->block_pool_popcnt--;
    2733             : 
    2734           0 :     fd_sched_block_t * next = block_pool_ele( sched, head->parent_idx );
    2735             : 
    2736             :     /* We don't have to clear the indices here since no one should be
    2737             :        accessing them.  Defensive programming. */
    2738           0 :     head->parent_idx  = ULONG_MAX;
    2739           0 :     head->child_idx   = ULONG_MAX;
    2740           0 :     head->sibling_idx = ULONG_MAX;
    2741             : 
    2742           0 :     head = next;
    2743           0 :   }
    2744           0 : }
    2745             : 
    2746             : static void
    2747           0 : maybe_switch_block( fd_sched_t * sched, ulong bank_idx ) {
    2748             :   /* This only happens rarely when there are dying in-flight blocks.
    2749             :      Early exit and don't let dying blocks affect replay. */
    2750           0 :   if( FD_UNLIKELY( bank_idx!=sched->active_bank_idx ) ) return;
    2751             : 
    2752           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    2753           0 :   if( FD_UNLIKELY( block_is_done( block ) ) ) {
    2754           0 :     fd_rdisp_remove_block( sched->rdisp, bank_idx );
    2755           0 :     FD_LOG_DEBUG(( "block %lu:%lu exited lane %lu: remove", block->slot, bank_idx, block->staging_lane ));
    2756           0 :     block->in_rdisp = 0;
    2757           0 :     block->staged   = 0;
    2758           0 :     sched->metrics->block_removed_cnt++;
    2759           0 :     FD_LOG_DEBUG(( "reset active_bank_idx %lu: remove", sched->active_bank_idx ));
    2760           0 :     sched->last_active_bank_idx = sched->active_bank_idx;
    2761           0 :     sched->active_bank_idx = ULONG_MAX;
    2762             : 
    2763             :     /* See if there is a child block down the same staging lane.  This
    2764             :        is a policy decision to minimize fork churn.  We could in theory
    2765             :        reevaluate staging lane allocation here and do promotion/demotion
    2766             :        as needed. */
    2767           0 :     ulong child_idx = block->child_idx;
    2768           0 :     while( child_idx!=ULONG_MAX ) {
    2769           0 :       fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2770           0 :       if( FD_LIKELY( child->staged && child->staging_lane==block->staging_lane ) ) {
    2771             :         /* There is a child block down the same staging lane ... */
    2772           0 :         if( FD_LIKELY( !child->dying ) ) {
    2773             :           /* ... and the child isn't dead */
    2774           0 :           if( FD_UNLIKELY( !block_is_activatable( child ) ) ) {
    2775             :             /* ... but the child is not activatable, likely because
    2776             :                there are no transactions available yet. */
    2777           0 :             sched->metrics->deactivate_no_txn_cnt++;
    2778           0 :             try_activate_block( sched );
    2779           0 :             return;
    2780           0 :           }
    2781             :           /* ... and it's immediately dispatchable, so switch the active
    2782             :              block to it, and have the child inherit the head status of
    2783             :              the lane.  This is the common case. */
    2784           0 :           FD_LOG_DEBUG(( "activating block %lu:%lu: child inheritance on lane %lu", child->slot, child_idx, child->staging_lane ));
    2785           0 :           sched->active_bank_idx = child_idx;
    2786           0 :           sched->staged_head_bank_idx[ block->staging_lane ] = child_idx;
    2787           0 :           if( FD_UNLIKELY( !fd_ulong_extract_bit( sched->staged_bitset, (int)block->staging_lane ) ) ) {
    2788           0 :             FD_LOG_CRIT(( "invariant violation: staged_bitset 0x%lx bit %lu is not set, slot %lu, parent slot %lu, child slot %lu, parent slot %lu",
    2789           0 :                           sched->staged_bitset, block->staging_lane, block->slot, block->parent_slot, child->slot, child->parent_slot ));
    2790           0 :           }
    2791           0 :           return;
    2792           0 :         } else {
    2793             :           /* ... but the child block is considered dead, likely because
    2794             :              the parser considers it invalid. */
    2795           0 :           FD_LOG_INFO(( "child block %lu is already dead", child->slot ));
    2796           0 :           subtree_abandon( sched, child );
    2797           0 :           break;
    2798           0 :         }
    2799           0 :       }
    2800           0 :       child_idx = child->sibling_idx;
    2801           0 :     }
    2802             :     /* There isn't a child block down the same staging lane.  This is
    2803             :        the last block in the staging lane.  Release the staging lane. */
    2804           0 :     sched->staged_bitset = fd_ulong_clear_bit( sched->staged_bitset, (int)block->staging_lane );
    2805           0 :     sched->staged_head_bank_idx[ block->staging_lane ] = ULONG_MAX;
    2806           0 :     sched->metrics->deactivate_no_child_cnt++;
    2807           0 :     try_activate_block( sched );
    2808           0 :   } else if( block_should_deactivate( block ) ) {
    2809             :     /* We exhausted the active block, but it's not fully done yet.  We
    2810             :        are just not getting FEC sets for it fast enough.  This could
    2811             :        happen when the network path is congested, or when the leader
    2812             :        simply went down.  Reset the active block. */
    2813           0 :     sched->last_active_bank_idx = sched->active_bank_idx;
    2814           0 :     sched->active_bank_idx = ULONG_MAX;
    2815           0 :     sched->metrics->deactivate_no_txn_cnt++;
    2816           0 :     try_activate_block( sched );
    2817           0 :   }
    2818           0 : }
    2819             : 
    2820             : FD_FN_UNUSED static ulong
    2821           0 : find_and_stage_longest_unstaged_fork( fd_sched_t * sched, int lane_idx ) {
    2822           0 :   ulong root_idx = sched->root_idx;
    2823           0 : 
    2824           0 :   if( FD_UNLIKELY( root_idx==ULONG_MAX ) ) {
    2825           0 :     FD_LOG_CRIT(( "invariant violation: root_idx==ULONG_MAX indicating fd_sched is uninitialized" ));
    2826           0 :   }
    2827           0 : 
    2828           0 :   /* First pass: compute the longest unstaged fork depth for each node
    2829           0 :      in the fork tree. */
    2830           0 :   ulong depth = compute_longest_unstaged_fork( sched, root_idx );
    2831           0 : 
    2832           0 :   /* Second pass: stage blocks on the longest unstaged fork. */
    2833           0 :   ulong head_bank_idx = stage_longest_unstaged_fork( sched, root_idx, lane_idx );
    2834           0 : 
    2835           0 :   if( FD_UNLIKELY( (depth>0UL && head_bank_idx==ULONG_MAX) || (depth==0UL && head_bank_idx!=ULONG_MAX) ) ) {
    2836           0 :     FD_LOG_CRIT(( "invariant violation: depth %lu, head_bank_idx %lu",
    2837           0 :                   depth, head_bank_idx ));
    2838           0 :   }
    2839           0 : 
    2840           0 :   return head_bank_idx;
    2841           0 : }
    2842             : 
    2843             : /* Returns length of the longest stageable unstaged fork, if there is
    2844             :    one, and 0 otherwise. */
    2845             : static ulong
    2846           0 : compute_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx ) {
    2847           0 :   if( FD_UNLIKELY( bank_idx==ULONG_MAX ) ) {
    2848           0 :     FD_LOG_CRIT(( "invariant violation: bank_idx==ULONG_MAX" ));
    2849           0 :   }
    2850             : 
    2851           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    2852             : 
    2853           0 :   ulong max_child_depth = 0UL;
    2854           0 :   ulong child_idx       = block->child_idx;
    2855           0 :   while( child_idx!=ULONG_MAX ) {
    2856           0 :     ulong child_depth = compute_longest_unstaged_fork( sched, child_idx );
    2857           0 :     if( child_depth > max_child_depth ) {
    2858           0 :       max_child_depth = child_depth;
    2859           0 :     }
    2860           0 :     fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2861           0 :     child_idx = child->sibling_idx;
    2862           0 :   }
    2863             : 
    2864           0 :   block->luf_depth = max_child_depth + fd_ulong_if( block_is_promotable( block ), 1UL, 0UL );
    2865           0 :   return block->luf_depth;
    2866           0 : }
    2867             : 
    2868             : static ulong
    2869           0 : stage_longest_unstaged_fork_helper( fd_sched_t * sched, ulong bank_idx, int lane_idx ) {
    2870           0 :   if( FD_UNLIKELY( bank_idx==ULONG_MAX ) ) {
    2871           0 :     FD_LOG_CRIT(( "invariant violation: bank_idx==ULONG_MAX" ));
    2872           0 :   }
    2873             : 
    2874           0 :   fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    2875             : 
    2876           0 :   int   stage_it = fd_int_if( block_is_promotable( block ), 1, 0 );
    2877           0 :   ulong rv       = fd_ulong_if( stage_it, bank_idx, ULONG_MAX );
    2878           0 :   if( FD_LIKELY( stage_it ) ) {
    2879           0 :     block->staged = 1;
    2880           0 :     block->staging_lane = (ulong)lane_idx;
    2881           0 :     fd_rdisp_promote_block( sched->rdisp, bank_idx, block->staging_lane );
    2882           0 :     sched->metrics->block_promoted_cnt++;
    2883           0 :     FD_LOG_DEBUG(( "block %lu:%lu entered lane %lu: promote", block->slot, bank_idx, block->staging_lane ));
    2884           0 :   }
    2885             : 
    2886             :   /* Base case: leaf node. */
    2887           0 :   if( block->child_idx==ULONG_MAX ) return rv;
    2888             : 
    2889           0 :   ulong max_depth      = 0UL;
    2890           0 :   ulong best_child_idx = ULONG_MAX;
    2891           0 :   ulong child_idx      = block->child_idx;
    2892           0 :   while( child_idx!=ULONG_MAX ) {
    2893           0 :     fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2894           0 :     if( child->luf_depth>max_depth ) {
    2895           0 :       max_depth      = child->luf_depth;
    2896           0 :       best_child_idx = child_idx;
    2897           0 :     }
    2898           0 :     child_idx = child->sibling_idx;
    2899           0 :   }
    2900             : 
    2901             :   /* Recursively stage descendants. */
    2902           0 :   if( best_child_idx!=ULONG_MAX ) {
    2903           0 :     ulong head_bank_idx = stage_longest_unstaged_fork_helper( sched, best_child_idx, lane_idx );
    2904           0 :     rv = fd_ulong_if( rv!=ULONG_MAX, rv, head_bank_idx );
    2905           0 :   }
    2906             : 
    2907           0 :   return rv;
    2908           0 : }
    2909             : 
    2910             : /* Returns idx of head block of staged lane on success, idx_null
    2911             :    otherwise. */
    2912             : static ulong
    2913           0 : stage_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx, int lane_idx ) {
    2914           0 :   ulong head_bank_idx = stage_longest_unstaged_fork_helper( sched, bank_idx, lane_idx );
    2915           0 :   if( FD_LIKELY( head_bank_idx!=ULONG_MAX ) ) {
    2916           0 :     sched->metrics->lane_promoted_cnt++;
    2917           0 :     sched->staged_bitset = fd_ulong_set_bit( sched->staged_bitset, lane_idx );
    2918             :     /* No need to update staged_popcnt_wmk because the fact that there
    2919             :        are unstaged blocks implies we already maxed out lanes at one
    2920             :        point. */
    2921           0 :     sched->staged_head_bank_idx[ lane_idx ] = head_bank_idx;
    2922           0 :   }
    2923           0 :   return head_bank_idx;
    2924           0 : }
    2925             : 
    2926             : /* Check if an entire staging lane can be demoted.  Returns 1 if all
    2927             :    blocks in the lane are demotable, 0 otherwise. */
    2928             : static int
    2929           0 : lane_is_demotable( fd_sched_t * sched, int lane_idx ) {
    2930           0 :   ulong bank_idx = sched->staged_head_bank_idx[ lane_idx ];
    2931             : 
    2932           0 :   while( bank_idx!=ULONG_MAX ) {
    2933           0 :     fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    2934           0 :     FD_TEST( block->staged );
    2935           0 :     FD_TEST( block->staging_lane==(ulong)lane_idx );
    2936             : 
    2937           0 :     if( FD_UNLIKELY( !block_is_demotable( block ) ) ) {
    2938             :       /* Found a non-demotable block.  Early exit. */
    2939           0 :       return 0;
    2940           0 :     }
    2941             : 
    2942             :     /* Find the child in the same staging lane. */
    2943           0 :     ulong child_idx = block->child_idx;
    2944           0 :     ulong next_bank_idx = ULONG_MAX;
    2945           0 :     while( child_idx!=ULONG_MAX ) {
    2946           0 :       fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2947           0 :       if( child->staged && child->staging_lane==(ulong)lane_idx ) {
    2948           0 :         next_bank_idx = child_idx;
    2949           0 :         break;
    2950           0 :       }
    2951           0 :       child_idx = child->sibling_idx;
    2952           0 :     }
    2953           0 :     bank_idx = next_bank_idx;
    2954           0 :   }
    2955             : 
    2956           0 :   return 1;
    2957           0 : }
    2958             : 
    2959             : /* Demote all blocks in a staging lane.  Assumes that all blocks in the
    2960             :    lane are demotable.  Returns the number of blocks demoted. */
    2961             : static ulong
    2962           0 : demote_lane( fd_sched_t * sched, int lane_idx ) {
    2963           0 :   ulong bank_idx = sched->staged_head_bank_idx[ lane_idx ];
    2964           0 :   uint  demoted_cnt = 0U;
    2965             : 
    2966           0 :   while( bank_idx!=ULONG_MAX ) {
    2967           0 :     fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
    2968           0 :     FD_TEST( block->staged );
    2969           0 :     FD_TEST( block->staging_lane==(ulong)lane_idx );
    2970             : 
    2971           0 :     int ret = fd_rdisp_demote_block( sched->rdisp, bank_idx );
    2972           0 :     if( FD_UNLIKELY( ret!=0 ) ) {
    2973           0 :       FD_LOG_CRIT(( "fd_rdisp_demote_block failed for slot %lu, bank_idx %lu, lane %d", block->slot, bank_idx, lane_idx ));
    2974           0 :     }
    2975           0 :     FD_LOG_DEBUG(( "block %lu:%lu exited lane %lu: demote", block->slot, bank_idx, block->staging_lane ));
    2976           0 :     block->staged = 0;
    2977           0 :     demoted_cnt++;
    2978             : 
    2979             :     /* Find the child in the same staging lane. */
    2980           0 :     ulong child_idx = block->child_idx;
    2981           0 :     ulong next_bank_idx = ULONG_MAX;
    2982           0 :     while( child_idx!=ULONG_MAX ) {
    2983           0 :       fd_sched_block_t * child = block_pool_ele( sched, child_idx );
    2984           0 :       if( child->staged && child->staging_lane==(ulong)lane_idx ) {
    2985           0 :         next_bank_idx = child_idx;
    2986           0 :         break;
    2987           0 :       }
    2988           0 :       child_idx = child->sibling_idx;
    2989           0 :     }
    2990           0 :     bank_idx = next_bank_idx;
    2991           0 :   }
    2992             : 
    2993             :   /* Clear the lane. */
    2994           0 :   sched->staged_bitset = fd_ulong_clear_bit( sched->staged_bitset, lane_idx );
    2995           0 :   sched->staged_head_bank_idx[ lane_idx ] = ULONG_MAX;
    2996             : 
    2997           0 :   sched->metrics->block_demoted_cnt += demoted_cnt;
    2998           0 :   FD_LOG_DEBUG(( "demoted %u blocks in lane %d", demoted_cnt, lane_idx ));
    2999           0 :   return demoted_cnt;
    3000           0 : }

Generated by: LCOV version 1.14