Line data Source code
1 : #include <stdio.h> /* for vsnprintf */
2 : #include <stdarg.h> /* for va_list */
3 :
4 : #include "fd_sched.h"
5 : #include "fd_execrp.h" /* for poh hash value */
6 : #include "../../util/math/fd_stat.h" /* for sorted search */
7 : #include "../../ballet/block/fd_microblock.h"
8 : #include "../../disco/fd_disco_base.h" /* for FD_MAX_TXN_PER_SLOT */
9 : #include "../../disco/metrics/fd_metrics.h" /* for fd_metrics_convert_seconds_to_ticks and etc. */
10 : #include "../../disco/pack/fd_chkdup.h"
11 : #include "../../disco/shred/fd_shredder.h" /* FD_SHREDDER_CHAINED_FEC_SET_PAYLOAD_SZ */
12 : #include "../../discof/poh/fd_poh.h" /* for MAX_SKIPPED_TICKS */
13 : #include "../../flamenco/runtime/fd_runtime.h" /* for fd_runtime_load_txn_address_lookup_tables */
14 : #include "../../flamenco/runtime/fd_system_ids.h"
15 : #include "../../flamenco/runtime/sysvar/fd_sysvar_slot_hashes.h" /* for ALUTs */
16 : #include "../../flamenco/accdb/fd_accdb_sync.h"
17 :
18 0 : #define FD_SCHED_MAX_STAGING_LANES_LOG (2)
19 0 : #define FD_SCHED_MAX_STAGING_LANES (1UL<<FD_SCHED_MAX_STAGING_LANES_LOG)
20 : #define FD_SCHED_MAX_EXEC_TILE_CNT (64UL)
21 0 : #define FD_SCHED_MAX_PRINT_BUF_SZ (2UL<<20)
22 :
23 : #define FD_SCHED_MAX_MBLK_PER_SLOT (MAX_SKIPPED_TICKS)
24 0 : #define FD_SCHED_MAX_POH_HASHES_PER_TASK (4096UL) /* This seems to be the sweet spot. */
25 :
26 : /* 64 ticks per slot, and a single gigantic microblock containing min
27 : size transactions. */
28 : FD_STATIC_ASSERT( FD_MAX_TXN_PER_SLOT_SHRED==((FD_SHRED_DATA_PAYLOAD_MAX_PER_SLOT-65UL*sizeof(fd_microblock_hdr_t))/FD_TXN_MIN_SERIALIZED_SZ), max_txn_per_slot_shred );
29 :
30 : /* We size the buffer to be able to hold residual data from the previous
31 : FEC set that only becomes parseable after the next FEC set is
32 : ingested, as well as the incoming FEC set. The largest minimally
33 : parseable unit of data is a transaction. So that much data may
34 : straddle FEC set boundaries. Other minimally parseable units of data
35 : include the microblock header and the microblock count within a
36 : batch. */
37 0 : #define FD_SCHED_MAX_PAYLOAD_PER_FEC (63985UL)
38 : FD_STATIC_ASSERT( FD_SCHED_MAX_PAYLOAD_PER_FEC>=FD_SHREDDER_CHAINED_FEC_SET_PAYLOAD_SZ, bump sched fec size bound ); /* FD_SHREDDER_CHAINED_FEC_SET_PAYLOAD_SZ is the bound we want, but older ledgers have larger FEC sets. */
39 : #define FD_SCHED_MAX_FEC_BUF_SZ (FD_SCHED_MAX_PAYLOAD_PER_FEC+FD_TXN_MTU)
40 : FD_STATIC_ASSERT( FD_TXN_MTU>=sizeof(fd_microblock_hdr_t), resize buffer for residual data );
41 : FD_STATIC_ASSERT( FD_TXN_MTU>=sizeof(ulong), resize buffer for residual data );
42 :
43 0 : #define FD_SCHED_MAX_TXN_PER_FEC ((FD_SCHED_MAX_PAYLOAD_PER_FEC-1UL)/FD_TXN_MIN_SERIALIZED_SZ+1UL) /* 478 */
44 0 : #define FD_SCHED_MAX_MBLK_PER_FEC ((FD_SCHED_MAX_PAYLOAD_PER_FEC-1UL)/sizeof(fd_microblock_hdr_t)+1UL) /* 1334 */
45 :
46 : FD_STATIC_ASSERT( FD_SCHED_MIN_DEPTH>=FD_SCHED_MAX_TXN_PER_FEC, limits );
47 : FD_STATIC_ASSERT( FD_SCHED_MAX_DEPTH<=FD_RDISP_MAX_DEPTH, limits );
48 :
49 0 : #define FD_SCHED_MAGIC (0xace8a79c181f89b6UL) /* echo -n "fd_sched_v0" | sha512sum | head -c 16 */
50 :
51 0 : #define FD_SCHED_OK (0)
52 0 : #define FD_SCHED_AGAIN_LATER (1)
53 0 : #define FD_SCHED_BAD_BLOCK (2)
54 :
55 :
56 : /* Structs. */
57 :
58 : struct fd_sched_mblk {
59 : ulong start_txn_idx; /* inclusive parse idx */
60 : ulong end_txn_idx; /* non-inclusive parse idx */
61 : ulong curr_txn_idx; /* next txn to mixin, parse idx */
62 : ulong hashcnt; /* number of pure hashes, excluding final mixin */
63 : ulong curr_hashcnt;
64 : fd_hash_t end_hash[ 1 ];
65 : fd_hash_t curr_hash[ 1 ];
66 : uint curr_sig_cnt;
67 : uint next;
68 : int is_tick;
69 : };
70 : typedef struct fd_sched_mblk fd_sched_mblk_t;
71 :
72 : #define SLIST_NAME mblk_slist
73 : #define SLIST_ELE_T fd_sched_mblk_t
74 0 : #define SLIST_IDX_T uint
75 0 : #define SLIST_NEXT next
76 : #include "../../util/tmpl/fd_slist.c"
77 :
78 : #define SET_NAME txn_bitset
79 : #define SET_MAX FD_SCHED_MAX_DEPTH
80 : #include "../../util/tmpl/fd_set.c"
81 :
82 : struct fd_sched_block {
83 : ulong slot;
84 : ulong parent_slot;
85 : ulong parent_idx; /* Index of the parent in the pool. */
86 : ulong child_idx; /* Index of the left-child in the pool. */
87 : ulong sibling_idx; /* Index of the right-sibling in the pool. */
88 :
89 : /* Counters. */
90 : uint txn_parsed_cnt;
91 : /* txn_queued_cnt = txn_parsed_cnt-txn_in_flight_cnt-txn_done_cnt */
92 : uint txn_exec_in_flight_cnt;
93 : uint txn_exec_done_cnt;
94 : uint txn_sigverify_in_flight_cnt;
95 : uint txn_sigverify_done_cnt;
96 : uint poh_hashing_in_flight_cnt;
97 : uint poh_hashing_done_cnt;
98 : uint poh_hash_cmp_done_cnt; /* poh_hashing_done_cnt==poh_hash_cmp_done_cnt+len(mixin_in_progress) */
99 : uint txn_done_cnt; /* A transaction is considered done when all types of tasks associated with it are done. */
100 : uint shred_cnt;
101 : uint mblk_cnt; /* Total number of microblocks, including ticks and non ticks.
102 : mblk_cnt==len(unhashed)+len(hashing_in_progress)+hashing_in_flight_cnt+len(mixin_in_progress)+hash_cmp_done_cnt */
103 : uint mblk_tick_cnt; /* Total number of tick microblocks. */
104 : uint mblk_freed_cnt; /* This is ==hash_cmp_done_cnt in most cases, except for aborted
105 : blocks, where the freed cnt will catch up to mblk_cnt and surpass
106 : hash_cmp_done_cnt when the block is reaped. */
107 : uint mblk_unhashed_cnt; /* ==len(unhashed) */
108 : ulong hashcnt; /* How many hashes this block wants replay to do. A mixin/record counts as one hash. */
109 : ulong txn_pool_max_popcnt; /* Peak transaction pool occupancy during the time this block was replaying. */
110 : ulong mblk_pool_max_popcnt; /* Peak mblk pool occupancy. */
111 : ulong block_pool_max_popcnt; /* Peak block pool occupancy. */
112 : ulong txn_idx[ FD_MAX_TXN_PER_SLOT ]; /* Indexed by parse order. */
113 :
114 : /* PoH verify. */
115 : fd_hash_t poh_hash[ 1 ]; /* running end_hash of last parsed mblk */
116 : int last_mblk_is_tick;
117 : mblk_slist_t mblks_unhashed[ 1 ]; /* A microblock, once parsed out, is in one of these queues. It
118 : generally progresses from unhashed to hashing to mixin. When a
119 : microblock is being hashed/in-flight, it'll be transiently out of
120 : any of the queues. Once a microblock progresses through all stages
121 : of work, it'll be immediately freed. */
122 : mblk_slist_t mblks_hashing_in_progress[ 1 ];
123 : mblk_slist_t mblks_mixin_in_progress[ 1 ];
124 : uchar bmtree_mem[ FD_BMTREE_COMMIT_FOOTPRINT(0) ] __attribute__((aligned(FD_BMTREE_COMMIT_ALIGN)));
125 : fd_bmtree_commit_t * bmtree;
126 : ulong max_tick_hashcnt;
127 : ulong curr_tick_hashcnt; /* Starts at 0, accumulates hashcnt, resets to 0 on the next tick. */
128 : ulong tick_height; /* Block is built off of a parent block with this many ticks. */
129 : ulong max_tick_height; /* Block should end with precisely this many ticks. */
130 : ulong hashes_per_tick; /* Fixed per block, feature gated, known after bank clone. */
131 : int inconsistent_hashes_per_tick;
132 :
133 : /* Parser state. */
134 : uchar txn[ FD_TXN_MAX_SZ ] __attribute__((aligned(alignof(fd_txn_t))));
135 : ulong mblks_rem; /* Number of microblocks remaining in the current batch. */
136 : ulong txns_rem; /* Number of transactions remaining in the current microblock. */
137 : uint fec_buf_sz; /* Size of the fec_buf in bytes. */
138 : uint fec_buf_soff; /* Starting offset into fec_buf for unparsed transactions. */
139 : uint fec_buf_boff; /* Byte offset into raw block data of the first byte currently in fec_buf */
140 : uint fec_eob:1; /* FEC end-of-batch: set if the last FEC set in the batch is being
141 : ingested. */
142 : uint fec_sob:1; /* FEC start-of-batch: set if the parser expects to be receiving a new
143 : batch. */
144 :
145 : /* Block state. */
146 : uint fec_eos:1; /* FEC end-of-stream: set if the last FEC set in the block has been
147 : ingested. */
148 : uint rooted:1; /* Set if the block is rooted. */
149 : uint dying:1; /* Set if the block has been abandoned and no transactions should be
150 : scheduled from it. */
151 : uint refcnt:1; /* Starts at 1 when the block is added, set to 0 if caller has been
152 : informed to decrement refcnt for sched. */
153 : uint in_sched:1; /* Set if the block is being tracked by the scheduler. */
154 : uint in_rdisp:1; /* Set if the block is being tracked by the dispatcher, either as staged
155 : or unstaged. */
156 : uint block_start_signaled:1; /* Set if the start-of-block sentinel has been dispatched. */
157 : uint block_end_signaled:1; /* Set if the end-of-block sentinel has been dispatched. */
158 : uint block_start_done:1; /* Set if the start-of-block processing has been completed. */
159 : uint block_end_done:1; /* Set if the end-of-block processing has been completed. */
160 : uint staged:1; /* Set if the block is in a dispatcher staging lane; a staged block is
161 : tracked by the dispatcher. */
162 : ulong staging_lane; /* Ignored if staged==0. */
163 : ulong luf_depth; /* Depth of longest unstaged fork starting from this node; only
164 : stageable unstaged descendants are counted. */
165 : uchar fec_buf[ FD_SCHED_MAX_FEC_BUF_SZ ]; /* The previous FEC set could have some residual data that only becomes
166 : parseable after the next FEC set is ingested. */
167 : uint shred_blk_offs[ FD_SHRED_BLK_MAX ]; /* The byte offsets into block data of ingested shreds */
168 : };
169 : typedef struct fd_sched_block fd_sched_block_t;
170 :
171 : FD_STATIC_ASSERT( sizeof(fd_hash_t)==sizeof(((fd_microblock_hdr_t *)0)->hash), unexpected poh hash size );
172 :
173 :
174 : struct fd_sched_metrics {
175 : uint block_added_cnt;
176 : uint block_added_staged_cnt;
177 : uint block_added_unstaged_cnt;
178 : uint block_added_dead_ood_cnt;
179 : uint block_removed_cnt;
180 : uint block_abandoned_cnt;
181 : uint block_bad_cnt;
182 : uint block_promoted_cnt;
183 : uint block_demoted_cnt;
184 : uint deactivate_no_child_cnt;
185 : uint deactivate_no_txn_cnt;
186 : uint deactivate_pruned_cnt;
187 : uint deactivate_abandoned_cnt;
188 : uint lane_switch_cnt;
189 : uint lane_promoted_cnt;
190 : uint lane_demoted_cnt;
191 : uint fork_observed_cnt;
192 : uint alut_success_cnt;
193 : uint alut_serializing_cnt;
194 : uint txn_abandoned_parsed_cnt;
195 : uint txn_abandoned_exec_done_cnt;
196 : uint txn_abandoned_done_cnt;
197 : uint txn_max_in_flight_cnt;
198 : ulong txn_weighted_in_flight_cnt;
199 : ulong txn_weighted_in_flight_tickcount;
200 : ulong txn_none_in_flight_tickcount;
201 : ulong txn_parsed_cnt;
202 : ulong txn_exec_done_cnt;
203 : ulong txn_sigverify_done_cnt;
204 : ulong txn_mixin_done_cnt;
205 : ulong txn_done_cnt;
206 : ulong mblk_parsed_cnt;
207 : ulong mblk_poh_hashed_cnt;
208 : ulong mblk_poh_done_cnt;
209 : ulong bytes_ingested_cnt;
210 : ulong bytes_ingested_unparsed_cnt;
211 : ulong bytes_dropped_cnt;
212 : ulong fec_cnt;
213 : };
214 : typedef struct fd_sched_metrics fd_sched_metrics_t;
215 :
216 : #define DEQUE_NAME ref_q
217 0 : #define DEQUE_T ulong
218 : #include "../../util/tmpl/fd_deque_dynamic.c"
219 :
220 : struct fd_sched {
221 : fd_acct_addr_t aluts[ 256 ]; /* Resolve ALUT accounts into this buffer for more parallelism. */
222 : char print_buf[ FD_SCHED_MAX_PRINT_BUF_SZ ];
223 : ulong print_buf_sz;
224 : fd_chkdup_t chkdup[ 1 ];
225 : fd_sched_metrics_t metrics[ 1 ];
226 : ulong canary; /* == FD_SCHED_MAGIC */
227 : ulong depth; /* Immutable. */
228 : ulong block_cnt_max; /* Immutable. */
229 : ulong exec_cnt; /* Immutable. */
230 : long txn_in_flight_last_tick;
231 : long next_ready_last_tick;
232 : ulong next_ready_last_bank_idx;
233 : ulong root_idx;
234 : fd_rdisp_t * rdisp;
235 : ulong txn_exec_ready_bitset[ 1 ];
236 : ulong sigverify_ready_bitset[ 1 ];
237 : ulong poh_ready_bitset[ 1 ];
238 : ulong active_bank_idx; /* Index of the actively replayed block, or ULONG_MAX if no block is
239 : actively replayed; has to have a transaction to dispatch; staged
240 : blocks that have no transactions to dispatch are not eligible for
241 : being active. */
242 : ulong last_active_bank_idx;
243 : ulong staged_bitset; /* Bit i set if staging lane i is occupied. */
244 : ulong staged_head_bank_idx[ FD_SCHED_MAX_STAGING_LANES ]; /* Head of the linear chain in each staging lane, ignored if bit i is
245 : not set in the bitset. */
246 : ulong staged_popcnt_wmk;
247 : ulong txn_pool_free_cnt;
248 : fd_txn_p_t * txn_pool; /* Just a flat array. */
249 : fd_sched_txn_info_t * txn_info_pool; /* Just a flat array. */
250 : fd_sched_mblk_t * mblk_pool; /* Just a flat array. */
251 : ulong mblk_pool_free_cnt;
252 : uint mblk_pool_free_head;
253 : ulong tile_to_bank_idx[ FD_SCHED_MAX_EXEC_TILE_CNT ]; /* Index of the bank that the exec tile is executing against. */
254 : txn_bitset_t exec_done_set[ txn_bitset_word_cnt ]; /* Indexed by txn_idx. */
255 : txn_bitset_t sigverify_done_set[ txn_bitset_word_cnt ]; /* Indexed by txn_idx. */
256 : txn_bitset_t poh_mixin_done_set[ txn_bitset_word_cnt ]; /* Indexed by txn_idx. */
257 : fd_sched_block_t * block_pool; /* Just a flat array. */
258 : ulong block_pool_popcnt;
259 : ulong * ref_q;
260 : };
261 : typedef struct fd_sched fd_sched_t;
262 :
263 :
264 : /* Internal helpers. */
265 :
266 : static int
267 : verify_ticks_eager( fd_sched_block_t * block );
268 :
269 : static int
270 : verify_ticks_final( fd_sched_block_t * block );
271 :
272 : static void
273 : add_block( fd_sched_t * sched,
274 : ulong bank_idx,
275 : ulong parent_bank_idx );
276 :
277 : FD_WARN_UNUSED static int
278 : fd_sched_parse( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx );
279 :
280 : FD_WARN_UNUSED static int
281 : fd_sched_parse_txn( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx );
282 :
283 : static void
284 : dispatch_sigverify( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out );
285 :
286 : static void
287 : dispatch_poh( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out );
288 :
289 : FD_WARN_UNUSED static int
290 : maybe_mixin( fd_sched_t * sched, fd_sched_block_t * block );
291 :
292 : static void
293 0 : free_mblk( fd_sched_t * sched, fd_sched_block_t * block, uint mblk_idx ) {
294 0 : sched->mblk_pool[ mblk_idx ].next = sched->mblk_pool_free_head;
295 0 : sched->mblk_pool_free_head = mblk_idx;
296 0 : sched->mblk_pool_free_cnt++;
297 0 : block->mblk_freed_cnt++;
298 0 : }
299 :
300 : static void
301 0 : free_mblk_slist( fd_sched_t * sched, fd_sched_block_t * block, mblk_slist_t * list ) {
302 0 : while( !mblk_slist_is_empty( list, sched->mblk_pool ) ) {
303 0 : uint idx = (uint)mblk_slist_idx_pop_head( list, sched->mblk_pool );
304 0 : free_mblk( sched, block, idx );
305 0 : }
306 0 : }
307 :
308 : static void
309 : try_activate_block( fd_sched_t * sched );
310 :
311 : static void
312 : check_or_set_active_block( fd_sched_t * sched );
313 :
314 : static void
315 : subtree_abandon( fd_sched_t * sched, fd_sched_block_t * block );
316 :
317 : static void
318 : subtree_prune( fd_sched_t * sched, ulong bank_idx, ulong except_idx );
319 :
320 : static void
321 : maybe_switch_block( fd_sched_t * sched, ulong bank_idx );
322 :
323 : FD_FN_UNUSED static ulong
324 : find_and_stage_longest_unstaged_fork( fd_sched_t * sched, int lane_idx );
325 :
326 : static ulong
327 : compute_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx );
328 :
329 : static ulong
330 : stage_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx, int lane_idx );
331 :
332 : static int
333 : lane_is_demotable( fd_sched_t * sched, int lane_idx );
334 :
335 : static ulong
336 : demote_lane( fd_sched_t * sched, int lane_idx );
337 :
338 : static inline fd_sched_block_t *
339 0 : block_pool_ele( fd_sched_t * sched, ulong idx ) {
340 0 : FD_TEST( idx<sched->block_cnt_max || idx==ULONG_MAX );
341 0 : return idx==ULONG_MAX ? NULL : sched->block_pool+idx;
342 0 : }
343 :
344 : FD_FN_UNUSED static inline int
345 0 : block_is_void( fd_sched_block_t * block ) {
346 0 : /* We've seen everything in the block and no transaction got parsed
347 0 : out. */
348 0 : return block->fec_eos && block->txn_parsed_cnt==0;
349 0 : }
350 :
351 : static inline int
352 0 : block_should_signal_end( fd_sched_block_t * block ) {
353 : /* Under the current policy of eager synchronous PoH mixin, hashing
354 : done plus fec_eos imply that all mixins have been done. */
355 0 : if( FD_UNLIKELY( !( !block->fec_eos || ((block->mblk_cnt==block->poh_hashing_done_cnt&&block->mblk_cnt==block->poh_hash_cmp_done_cnt)||block->mblk_cnt!=block->poh_hashing_done_cnt) ) ) ) FD_LOG_CRIT(( "invariant violation: slot %lu fec_eos %d mblk_cnt %u poh_hashing_done_cnt %u poh_hash_cmp_done_cnt %u", block->slot, block->fec_eos, block->mblk_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt ));
356 0 : return block->fec_eos && block->txn_parsed_cnt==block->txn_done_cnt && block->mblk_cnt==block->poh_hashing_done_cnt && block->block_start_done && !block->block_end_signaled;
357 0 : }
358 :
359 : static inline int
360 0 : block_will_signal_end( fd_sched_block_t * block ) {
361 0 : return block->fec_eos && !block->block_end_signaled;
362 0 : }
363 :
364 : /* Is there something known to be dispatchable in the block? This is an
365 : important liveness property. A block that doesn't contain any known
366 : dispatchable tasks will be deactivated or demoted. */
367 : static inline int
368 0 : block_is_dispatchable( fd_sched_block_t * block ) {
369 0 : ulong exec_queued_cnt = block->txn_parsed_cnt-block->txn_exec_in_flight_cnt-block->txn_exec_done_cnt;
370 0 : ulong sigverify_queued_cnt = block->txn_parsed_cnt-block->txn_sigverify_in_flight_cnt-block->txn_sigverify_done_cnt;
371 0 : ulong poh_queued_cnt = block->mblk_cnt-block->poh_hashing_in_flight_cnt-block->poh_hashing_done_cnt;
372 0 : return exec_queued_cnt>0UL ||
373 0 : sigverify_queued_cnt>0UL ||
374 0 : poh_queued_cnt>0UL ||
375 0 : !block->block_start_signaled ||
376 0 : block_will_signal_end( block );
377 0 : }
378 :
379 : static inline int
380 0 : block_is_in_flight( fd_sched_block_t * block ) {
381 0 : return block->txn_exec_in_flight_cnt || block->txn_sigverify_in_flight_cnt || block->poh_hashing_in_flight_cnt || (block->block_end_signaled && !block->block_end_done);
382 0 : }
383 :
384 : static inline int
385 0 : block_is_done( fd_sched_block_t * block ) {
386 0 : return block->fec_eos && block->txn_parsed_cnt==block->txn_done_cnt && block->mblk_cnt==block->poh_hash_cmp_done_cnt && block->block_start_done && block->block_end_done;
387 0 : }
388 :
389 : static inline int
390 0 : block_is_stageable( fd_sched_block_t * block ) {
391 0 : int rv = !block_is_done( block ) && !block->dying;
392 0 : if( FD_UNLIKELY( rv && !block->in_rdisp ) ) {
393 : /* Invariant: stageable blocks may be currently staged or unstaged,
394 : but must be in the dispatcher either way. When a block
395 : transitions to DONE, it will be immediately removed from the
396 : dispatcher. When a block transitions to DYING, it will be
397 : eventually abandoned from the dispatcher. */
398 0 : FD_LOG_CRIT(( "invariant violation: stageable block->in_rdisp==0, txn_parsed_cnt %u, txn_done_cnt %u, fec_eos %u,, slot %lu, parent slot %lu",
399 0 : block->txn_parsed_cnt, block->txn_done_cnt, (uint)block->fec_eos, block->slot, block->parent_slot ));
400 0 : }
401 0 : return rv;
402 0 : }
403 :
404 : static inline int
405 0 : block_is_promotable( fd_sched_block_t * block ) {
406 0 : return block_is_stageable( block ) && block_is_dispatchable( block ) && !block->staged;
407 0 : }
408 :
409 : static inline int
410 0 : block_is_demotable( fd_sched_block_t * block ) {
411 : /* A block can only be demoted from rdisp if it is empty, meaning no
412 : PENDING, READY, or DISPATCHED transactions. This is equivalent to
413 : having no in-flight transactions (DISPATCHED) and no queued
414 : transactions (PENDING or READY). This function actually implements
415 : a stronger requirement. We consider a block demotable only if
416 : there are no in-flight or queued tasks of any kind. */
417 0 : return !block_is_in_flight( block ) && !block_is_dispatchable( block ) && block->staged;
418 0 : }
419 :
420 : static inline int
421 0 : block_is_activatable( fd_sched_block_t * block ) {
422 0 : return block_is_stageable( block ) && block_is_dispatchable( block ) && block->staged;
423 0 : }
424 :
425 : static inline int
426 0 : block_should_deactivate( fd_sched_block_t * block ) {
427 : /* We allow a grace period, during which a block has nothing to
428 : dispatch, but has something in-flight. The block is allowed to
429 : stay activated and ingest FEC sets during this time. The block
430 : will be deactivated if there's still nothing to dispatch by the
431 : time all in-flight tasks are completed. */
432 0 : return !block_is_activatable( block ) && !block_is_in_flight( block );
433 0 : }
434 :
435 : static inline int
436 0 : block_is_prunable( fd_sched_block_t * block ) {
437 0 : return !block->in_rdisp && !block_is_in_flight( block );
438 0 : }
439 :
440 : static int
441 0 : subtree_is_prunable( fd_sched_t * sched, fd_sched_block_t * block ) {
442 0 : if( FD_UNLIKELY( !block_is_prunable( block ) ) ) return 0;
443 :
444 0 : ulong child_idx = block->child_idx;
445 0 : while( child_idx!=ULONG_MAX ) {
446 0 : fd_sched_block_t * child_block = block_pool_ele( sched, child_idx );
447 0 : if( FD_UNLIKELY( !subtree_is_prunable( sched, child_block ) ) ) {
448 0 : return 0;
449 0 : }
450 0 : child_idx = child_block->sibling_idx;
451 0 : }
452 :
453 0 : return 1;
454 0 : }
455 :
456 : static inline ulong
457 0 : block_to_idx( fd_sched_t * sched, fd_sched_block_t * block ) { return (ulong)(block-sched->block_pool); }
458 :
459 : __attribute__((format(printf,2,3)))
460 : static void
461 : fd_sched_printf( fd_sched_t * sched,
462 : char const * fmt,
463 0 : ... ) {
464 0 : va_list ap;
465 0 : ulong len;
466 0 : va_start( ap, fmt );
467 0 : int ret = vsnprintf( sched->print_buf+sched->print_buf_sz,
468 0 : FD_SCHED_MAX_PRINT_BUF_SZ-sched->print_buf_sz,
469 0 : fmt, ap );
470 0 : va_end( ap );
471 0 : len = fd_ulong_if( ret<0, 0UL, fd_ulong_min( (ulong)ret, FD_SCHED_MAX_PRINT_BUF_SZ-sched->print_buf_sz-1UL ) );
472 0 : sched->print_buf[ sched->print_buf_sz+len ] = '\0';
473 0 : sched->print_buf_sz += len;
474 0 : }
475 :
476 : FD_FN_UNUSED static void
477 0 : print_histogram( fd_sched_t * sched, fd_histf_t * hist, ulong converter, char * title ) {
478 0 : fd_sched_printf( sched, " +---------------------+----------------------+--------------+\n" );
479 0 : fd_sched_printf( sched, " | %-19s | | Count |\n", title );
480 0 : fd_sched_printf( sched, " +---------------------+----------------------+--------------+\n" );
481 0 :
482 0 : ulong total_count = 0;
483 0 : for( ulong i=0UL; i<fd_histf_bucket_cnt( hist ); i++ ) {
484 0 : total_count += fd_histf_cnt( hist, i );
485 0 : }
486 0 :
487 0 : for( ulong i=0UL; i< fd_histf_bucket_cnt( hist ); i++ ) {
488 0 : ulong bucket_count = fd_histf_cnt( hist, i );
489 0 :
490 0 : char * lt_str;
491 0 : char lt_buf[ 64 ];
492 0 : if( FD_UNLIKELY( i==fd_histf_bucket_cnt( hist )-1UL ) ) {
493 0 : lt_str = "+Inf";
494 0 : } else {
495 0 : ulong edge = fd_histf_right( hist, i );
496 0 : if( converter==FD_METRICS_CONVERTER_NANOSECONDS ) {
497 0 : edge = fd_metrics_convert_ticks_to_nanoseconds( edge-1UL );
498 0 : FD_TEST( fd_cstr_printf_check( lt_buf, sizeof( lt_buf ), NULL, "<= %lu nanos", edge ) );
499 0 : } else if( converter==FD_METRICS_CONVERTER_NONE ) {
500 0 : FD_TEST( fd_cstr_printf_check( lt_buf, sizeof( lt_buf ), NULL, "<= %lu", edge-1UL ) );
501 0 : }
502 0 : lt_str = lt_buf;
503 0 : }
504 0 :
505 0 : /* Create visual bar - scale to max 20 characters. */
506 0 : char bar_buf[ 22 ];
507 0 : if( bucket_count>0UL && total_count>0UL ) {
508 0 : ulong bar_length = (bucket_count*20UL)/total_count;
509 0 : if( !bar_length ) bar_length = 1;
510 0 : for( ulong j=0UL; j<bar_length; j++ ) { bar_buf[ j ] = '*'; }
511 0 : bar_buf[ bar_length ] = '\0';
512 0 : } else {
513 0 : bar_buf[ 0 ] = '\0';
514 0 : }
515 0 :
516 0 : fd_sched_printf( sched, " | %19s | %-20s | %12lu |\n", lt_str, bar_buf, bucket_count );
517 0 : }
518 0 : }
519 :
520 : FD_FN_UNUSED static void
521 0 : print_block_metrics( fd_sched_t * sched, fd_sched_block_t * block ) {
522 0 : fd_sched_printf( sched, "block idx %lu, block slot %lu, parent_slot %lu, fec_eos %d, rooted %d, txn_parsed_cnt %u, txn_exec_done_cnt %u, txn_sigverify_done_cnt %u, poh_hashing_done_cnt %u, poh_hash_cmp_done_cnt %u, txn_done_cnt %u, shred_cnt %u, mblk_cnt %u, mblk_freed_cnt %u, mblk_tick_cnt %u, mblk_unhashed_cnt %u, hashcnt %lu, txn_pool_max_popcnt %lu/%lu, mblk_pool_max_popcnt %lu/%lu, block_pool_max_popcnt %lu/%lu, mblks_rem %lu, txns_rem %lu, fec_buf_sz %u, fec_buf_boff %u, fec_buf_soff %u, fec_eob %d, fec_sob %d\n",
523 0 : block_to_idx( sched, block ), block->slot, block->parent_slot, block->fec_eos, block->rooted, block->txn_parsed_cnt, block->txn_exec_done_cnt, block->txn_sigverify_done_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt, block->txn_done_cnt, block->shred_cnt, block->mblk_cnt, block->mblk_freed_cnt, block->mblk_tick_cnt, block->mblk_unhashed_cnt, block->hashcnt, block->txn_pool_max_popcnt, sched->depth, block->mblk_pool_max_popcnt, sched->depth, block->block_pool_max_popcnt, sched->block_cnt_max, block->mblks_rem, block->txns_rem, block->fec_buf_sz, block->fec_buf_boff, block->fec_buf_soff, block->fec_eob, block->fec_sob );
524 0 : }
525 :
526 : FD_FN_UNUSED static void
527 0 : print_block_debug( fd_sched_t * sched, fd_sched_block_t * block ) {
528 0 : fd_sched_printf( sched, "block idx %lu, block slot %lu, parent_slot %lu, staged %d (lane %lu), dying %d, in_rdisp %d, fec_eos %d, rooted %d, block_start_signaled %d, block_end_signaled %d, block_start_done %d, block_end_done %d, txn_parsed_cnt %u, txn_exec_in_flight_cnt %u, txn_exec_done_cnt %u, txn_sigverify_in_flight_cnt %u, txn_sigverify_done_cnt %u, poh_hashing_in_flight_cnt %u, poh_hashing_done_cnt %u, poh_hash_cmp_done_cnt %u, txn_done_cnt %u, shred_cnt %u, mblk_cnt %u, mblk_freed_cnt %u, mblk_tick_cnt %u, mblk_unhashed_cnt %u, hashcnt %lu, txn_pool_max_popcnt %lu/%lu, mblk_pool_max_popcnt %lu/%lu, block_pool_max_popcnt %lu/%lu, max_tick_hashcnt %lu, curr_tick_hashcnt %lu, mblks_rem %lu, txns_rem %lu, fec_buf_sz %u, fec_buf_boff %u, fec_buf_soff %u, fec_eob %d, fec_sob %d\n",
529 0 : block_to_idx( sched, block ), block->slot, block->parent_slot, block->staged, block->staging_lane, block->dying, block->in_rdisp, block->fec_eos, block->rooted, block->block_start_signaled, block->block_end_signaled, block->block_start_done, block->block_end_done, block->txn_parsed_cnt, block->txn_exec_in_flight_cnt, block->txn_exec_done_cnt, block->txn_sigverify_in_flight_cnt, block->txn_sigverify_done_cnt, block->poh_hashing_in_flight_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt, block->txn_done_cnt, block->shred_cnt, block->mblk_cnt, block->mblk_freed_cnt, block->mblk_tick_cnt, block->mblk_unhashed_cnt, block->hashcnt, block->txn_pool_max_popcnt, sched->depth, block->mblk_pool_max_popcnt, sched->depth, block->block_pool_max_popcnt, sched->block_cnt_max, block->max_tick_hashcnt, block->curr_tick_hashcnt, block->mblks_rem, block->txns_rem, block->fec_buf_sz, block->fec_buf_boff, block->fec_buf_soff, block->fec_eob, block->fec_sob );
530 0 : }
531 :
532 : FD_FN_UNUSED static void
533 0 : print_block_and_parent( fd_sched_t * sched, fd_sched_block_t * block ) {
534 0 : print_block_debug( sched, block );
535 0 : fd_sched_block_t * parent = block_pool_ele( sched, block->parent_idx );
536 0 : if( FD_LIKELY( parent ) ) print_block_debug( sched, parent );
537 0 : }
538 :
539 : FD_FN_UNUSED static void
540 0 : print_metrics( fd_sched_t * sched ) {
541 0 : fd_sched_printf( sched, "metrics: block_added_cnt %u, block_added_staged_cnt %u, block_added_unstaged_cnt %u, block_added_dead_ood_cnt %u, block_removed_cnt %u, block_abandoned_cnt %u, block_bad_cnt %u, block_promoted_cnt %u, block_demoted_cnt %u, deactivate_no_child_cnt %u, deactivate_no_txn_cnt %u, deactivate_pruned_cnt %u, deactivate_abandoned_cnt %u, lane_switch_cnt %u, lane_promoted_cnt %u, lane_demoted_cnt %u, fork_observed_cnt %u, alut_success_cnt %u, alut_serializing_cnt %u, txn_abandoned_parsed_cnt %u, txn_abandoned_exec_done_cnt %u, txn_abandoned_done_cnt %u, txn_max_in_flight_cnt %u, txn_weighted_in_flight_cnt %lu, txn_weighted_in_flight_tickcount %lu, txn_none_in_flight_tickcount %lu, txn_parsed_cnt %lu, txn_exec_done_cnt %lu, txn_sigverify_done_cnt %lu, txn_mixin_done_cnt %lu, txn_done_cnt %lu, mblk_parsed_cnt %lu, mblk_poh_hashed_cnt %lu, mblk_poh_done_cnt %lu, bytes_ingested_cnt %lu, bytes_ingested_unparsed_cnt %lu, bytes_dropped_cnt %lu, fec_cnt %lu\n",
542 0 : sched->metrics->block_added_cnt, sched->metrics->block_added_staged_cnt, sched->metrics->block_added_unstaged_cnt, sched->metrics->block_added_dead_ood_cnt, sched->metrics->block_removed_cnt, sched->metrics->block_abandoned_cnt, sched->metrics->block_bad_cnt, sched->metrics->block_promoted_cnt, sched->metrics->block_demoted_cnt, sched->metrics->deactivate_no_child_cnt, sched->metrics->deactivate_no_txn_cnt, sched->metrics->deactivate_pruned_cnt, sched->metrics->deactivate_abandoned_cnt, sched->metrics->lane_switch_cnt, sched->metrics->lane_promoted_cnt, sched->metrics->lane_demoted_cnt, sched->metrics->fork_observed_cnt, sched->metrics->alut_success_cnt, sched->metrics->alut_serializing_cnt, sched->metrics->txn_abandoned_parsed_cnt, sched->metrics->txn_abandoned_exec_done_cnt, sched->metrics->txn_abandoned_done_cnt, sched->metrics->txn_max_in_flight_cnt, sched->metrics->txn_weighted_in_flight_cnt, sched->metrics->txn_weighted_in_flight_tickcount, sched->metrics->txn_none_in_flight_tickcount, sched->metrics->txn_parsed_cnt, sched->metrics->txn_exec_done_cnt, sched->metrics->txn_sigverify_done_cnt, sched->metrics->txn_mixin_done_cnt, sched->metrics->txn_done_cnt, sched->metrics->mblk_parsed_cnt, sched->metrics->mblk_poh_hashed_cnt, sched->metrics->mblk_poh_done_cnt, sched->metrics->bytes_ingested_cnt, sched->metrics->bytes_ingested_unparsed_cnt, sched->metrics->bytes_dropped_cnt, sched->metrics->fec_cnt );
543 :
544 0 : }
545 :
546 : FD_FN_UNUSED static void
547 0 : print_sched( fd_sched_t * sched ) {
548 0 : fd_sched_printf( sched, "sched canary 0x%lx, exec_cnt %lu, root_idx %lu, txn_exec_ready_bitset[ 0 ] 0x%lx, sigverify_ready_bitset[ 0 ] 0x%lx, poh_ready_bitset[ 0 ] 0x%lx, active_idx %lu, staged_bitset %lu, staged_head_idx[0] %lu, staged_head_idx[1] %lu, staged_head_idx[2] %lu, staged_head_idx[3] %lu, staged_popcnt_wmk %lu, txn_pool_free_cnt %lu/%lu, block_pool_popcnt %lu/%lu\n",
549 0 : sched->canary, sched->exec_cnt, sched->root_idx, sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ], sched->active_bank_idx, sched->staged_bitset, sched->staged_head_bank_idx[ 0 ], sched->staged_head_bank_idx[ 1 ], sched->staged_head_bank_idx[ 2 ], sched->staged_head_bank_idx[ 3 ], sched->staged_popcnt_wmk, sched->txn_pool_free_cnt, sched->depth, sched->block_pool_popcnt, sched->block_cnt_max );
550 0 : fd_sched_block_t * active_block = block_pool_ele( sched, sched->active_bank_idx );
551 0 : if( active_block ) print_block_debug( sched, active_block );
552 0 : for( int l=0; l<(int)FD_SCHED_MAX_STAGING_LANES; l++ ) {
553 0 : if( fd_ulong_extract_bit( sched->staged_bitset, l ) ) {
554 0 : fd_sched_block_t * block = block_pool_ele( sched, sched->staged_head_bank_idx[ l ] );
555 0 : print_block_debug( sched, block );
556 0 : }
557 0 : }
558 0 : }
559 :
560 : FD_FN_UNUSED static void
561 0 : print_all( fd_sched_t * sched, fd_sched_block_t * block ) {
562 0 : print_metrics( sched );
563 0 : print_sched( sched );
564 0 : print_block_and_parent( sched, block );
565 0 : }
566 :
567 : static void
568 0 : handle_bad_block( fd_sched_t * sched, fd_sched_block_t * block ) {
569 0 : sched->print_buf_sz = 0UL;
570 0 : print_all( sched, block );
571 0 : FD_LOG_DEBUG(( "%s", sched->print_buf ));
572 0 : subtree_abandon( sched, block );
573 0 : sched->metrics->block_bad_cnt++;
574 0 : check_or_set_active_block( sched );
575 0 : }
576 :
577 :
578 : /* Public functions. */
579 :
580 : ulong
581 0 : fd_sched_align( void ) {
582 0 : return fd_ulong_max( alignof(fd_sched_t),
583 0 : fd_ulong_max( fd_rdisp_align(),
584 0 : fd_ulong_max( alignof(fd_sched_block_t), 64UL ))); /* Minimally cache line aligned. */
585 0 : }
586 :
587 : ulong
588 : fd_sched_footprint( ulong depth,
589 0 : ulong block_cnt_max ) {
590 0 : if( FD_UNLIKELY( depth<FD_SCHED_MIN_DEPTH || depth>FD_SCHED_MAX_DEPTH ) ) return 0UL; /* bad depth */
591 0 : if( FD_UNLIKELY( !block_cnt_max ) ) return 0UL; /* bad block_cnt_max */
592 0 : if( FD_UNLIKELY( depth>UINT_MAX-1UL ) ) return 0UL; /* mblk_pool use uint as pointers */
593 :
594 0 : ulong l = FD_LAYOUT_INIT;
595 0 : l = FD_LAYOUT_APPEND( l, fd_sched_align(), sizeof(fd_sched_t) );
596 0 : l = FD_LAYOUT_APPEND( l, fd_rdisp_align(), fd_rdisp_footprint( depth, block_cnt_max ) ); /* dispatcher */
597 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_sched_block_t), block_cnt_max*sizeof(fd_sched_block_t) ); /* block pool */
598 0 : l = FD_LAYOUT_APPEND( l, ref_q_align(), ref_q_footprint( block_cnt_max ) );
599 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_txn_p_t), depth*sizeof(fd_txn_p_t) ); /* txn_pool */
600 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_sched_txn_info_t), depth*sizeof(fd_sched_txn_info_t) ); /* txn_info_pool */
601 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_sched_mblk_t), depth*sizeof(fd_sched_mblk_t) ); /* mblk_pool */
602 0 : return FD_LAYOUT_FINI( l, fd_sched_align() );
603 0 : }
604 :
605 : void *
606 : fd_sched_new( void * mem,
607 : fd_rng_t * rng,
608 : ulong depth,
609 : ulong block_cnt_max,
610 0 : ulong exec_cnt ) {
611 :
612 0 : if( FD_UNLIKELY( !mem ) ) {
613 0 : FD_LOG_WARNING(( "NULL mem" ));
614 0 : return NULL;
615 0 : }
616 :
617 0 : if( FD_UNLIKELY( !rng ) ) {
618 0 : FD_LOG_WARNING(( "NULL rng" ));
619 0 : return NULL;
620 0 : }
621 :
622 0 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)mem, fd_sched_align() ) ) ) {
623 0 : FD_LOG_WARNING(( "misaligned mem (%p)", mem ));
624 0 : return NULL;
625 0 : }
626 :
627 0 : if( FD_UNLIKELY( depth<FD_SCHED_MIN_DEPTH || depth>FD_SCHED_MAX_DEPTH ) ) {
628 0 : FD_LOG_WARNING(( "bad depth (%lu)", depth ));
629 0 : return NULL;
630 0 : }
631 :
632 0 : if( FD_UNLIKELY( !block_cnt_max ) ) {
633 0 : FD_LOG_WARNING(( "bad block_cnt_max (%lu)", block_cnt_max ));
634 0 : return NULL;
635 0 : }
636 :
637 0 : if( FD_UNLIKELY( depth>UINT_MAX-1UL ) ) {
638 0 : FD_LOG_WARNING(( "bad depth (%lu)", depth ));
639 0 : return NULL;
640 0 : }
641 :
642 0 : if( FD_UNLIKELY( !exec_cnt || exec_cnt>FD_SCHED_MAX_EXEC_TILE_CNT ) ) {
643 0 : FD_LOG_WARNING(( "bad exec_cnt (%lu)", exec_cnt ));
644 0 : return NULL;
645 0 : }
646 :
647 0 : FD_SCRATCH_ALLOC_INIT( l, mem );
648 0 : fd_sched_t * sched = FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(), sizeof(fd_sched_t) );
649 0 : void * _rdisp = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(), fd_rdisp_footprint( depth, block_cnt_max ) );
650 0 : void * _bpool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_block_t), block_cnt_max*sizeof(fd_sched_block_t) );
651 0 : void * _ref_q = FD_SCRATCH_ALLOC_APPEND( l, ref_q_align(), ref_q_footprint( block_cnt_max ) );
652 0 : fd_txn_p_t * _txn_pool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_txn_p_t), depth*sizeof(fd_txn_p_t) );
653 0 : fd_sched_txn_info_t * _txn_info_pool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_txn_info_t), depth*sizeof(fd_sched_txn_info_t) );
654 0 : fd_sched_mblk_t * _mblk_pool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_mblk_t), depth*sizeof(fd_sched_mblk_t) );
655 0 : FD_SCRATCH_ALLOC_FINI( l, fd_sched_align() );
656 :
657 0 : sched->txn_pool = _txn_pool;
658 0 : sched->txn_info_pool = _txn_info_pool;
659 0 : sched->mblk_pool = _mblk_pool;
660 :
661 0 : fd_rdisp_new( _rdisp, depth, block_cnt_max, fd_rng_ulong( rng ) );
662 :
663 0 : fd_sched_block_t * bpool = (fd_sched_block_t *)_bpool;
664 0 : for( ulong i=0; i<block_cnt_max; i++ ) {
665 0 : bpool[ i ].in_sched = 0;
666 0 : mblk_slist_new( bpool[ i ].mblks_unhashed );
667 0 : mblk_slist_new( bpool[ i ].mblks_hashing_in_progress );
668 0 : mblk_slist_new( bpool[ i ].mblks_mixin_in_progress );
669 0 : }
670 :
671 0 : FD_TEST( fd_chkdup_new( sched->chkdup, rng ) );
672 :
673 0 : fd_memset( sched->metrics, 0, sizeof(fd_sched_metrics_t) );
674 0 : sched->txn_in_flight_last_tick = LONG_MAX;
675 0 : sched->next_ready_last_tick = LONG_MAX;
676 0 : sched->next_ready_last_bank_idx = ULONG_MAX;
677 :
678 0 : sched->canary = FD_SCHED_MAGIC;
679 0 : sched->depth = depth;
680 0 : sched->block_cnt_max = block_cnt_max;
681 0 : sched->exec_cnt = exec_cnt;
682 0 : sched->root_idx = ULONG_MAX;
683 0 : sched->active_bank_idx = ULONG_MAX;
684 0 : sched->last_active_bank_idx = ULONG_MAX;
685 0 : sched->staged_bitset = 0UL;
686 0 : sched->staged_popcnt_wmk = 0UL;
687 :
688 0 : sched->txn_exec_ready_bitset[ 0 ] = fd_ulong_mask_lsb( (int)exec_cnt );
689 0 : sched->sigverify_ready_bitset[ 0 ] = fd_ulong_mask_lsb( (int)exec_cnt );
690 0 : sched->poh_ready_bitset[ 0 ] = fd_ulong_mask_lsb( (int)exec_cnt );
691 :
692 0 : sched->txn_pool_free_cnt = depth-1UL; /* -1 because index 0 is unusable as a sentinel reserved by the dispatcher */
693 :
694 0 : for( ulong i=0UL; i<depth-1UL; i++ ) sched->mblk_pool[ i ].next = (uint)(i+1UL);
695 0 : sched->mblk_pool[ depth-1UL ].next = UINT_MAX;
696 0 : sched->mblk_pool_free_head = 0U;
697 0 : sched->mblk_pool_free_cnt = depth;
698 :
699 0 : txn_bitset_new( sched->exec_done_set );
700 0 : txn_bitset_new( sched->sigverify_done_set );
701 0 : txn_bitset_new( sched->poh_mixin_done_set );
702 :
703 0 : sched->block_pool_popcnt = 0UL;
704 :
705 0 : ref_q_new( _ref_q, block_cnt_max );
706 :
707 0 : return sched;
708 0 : }
709 :
710 : fd_sched_t *
711 0 : fd_sched_join( void * mem ) {
712 :
713 0 : if( FD_UNLIKELY( !mem ) ) {
714 0 : FD_LOG_WARNING(( "NULL mem" ));
715 0 : return NULL;
716 0 : }
717 :
718 0 : fd_sched_t * sched = (fd_sched_t *)mem;
719 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
720 0 : ulong depth = sched->depth;
721 0 : ulong block_cnt_max = sched->block_cnt_max;
722 :
723 0 : FD_SCRATCH_ALLOC_INIT( l, mem );
724 0 : /* */ FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(), sizeof(fd_sched_t) );
725 0 : void * _rdisp = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(), fd_rdisp_footprint( depth, block_cnt_max ) );
726 0 : void * _bpool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_block_t), block_cnt_max*sizeof(fd_sched_block_t) );
727 0 : void * _ref_q = FD_SCRATCH_ALLOC_APPEND( l, ref_q_align(), ref_q_footprint( block_cnt_max ) );
728 0 : /* */ FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_txn_p_t), depth*sizeof(fd_txn_p_t) );
729 0 : /* */ FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_txn_info_t), depth*sizeof(fd_sched_txn_info_t) );
730 0 : /* */ FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_mblk_t), depth*sizeof(fd_sched_mblk_t) );
731 0 : FD_SCRATCH_ALLOC_FINI( l, fd_sched_align() );
732 :
733 0 : sched->rdisp = fd_rdisp_join( _rdisp );
734 0 : sched->ref_q = ref_q_join( _ref_q );
735 0 : sched->block_pool = _bpool;
736 :
737 0 : for( ulong i=0; i<block_cnt_max; i++ ) {
738 0 : mblk_slist_join( sched->block_pool[ i ].mblks_unhashed );
739 0 : mblk_slist_join( sched->block_pool[ i ].mblks_hashing_in_progress );
740 0 : mblk_slist_join( sched->block_pool[ i ].mblks_mixin_in_progress );
741 0 : }
742 :
743 0 : txn_bitset_join( sched->exec_done_set );
744 0 : txn_bitset_join( sched->sigverify_done_set );
745 0 : txn_bitset_join( sched->poh_mixin_done_set );
746 :
747 0 : return sched;
748 0 : }
749 :
750 : int
751 0 : fd_sched_fec_can_ingest( fd_sched_t * sched, fd_sched_fec_t * fec ) {
752 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
753 0 : FD_TEST( fec->bank_idx<sched->block_cnt_max );
754 0 : FD_TEST( fec->parent_bank_idx<sched->block_cnt_max );
755 :
756 0 : if( FD_UNLIKELY( fec->fec->data_sz>FD_SCHED_MAX_PAYLOAD_PER_FEC ) ) {
757 0 : sched->print_buf_sz = 0UL;
758 0 : print_metrics( sched );
759 0 : print_sched( sched );
760 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
761 0 : FD_LOG_CRIT(( "invalid FEC set: fec->data_sz %lu, slot %lu, parent slot %lu", fec->fec->data_sz, fec->slot, fec->parent_slot ));
762 0 : }
763 :
764 0 : ulong fec_buf_sz = 0UL;
765 0 : fd_sched_block_t * block = block_pool_ele( sched, fec->bank_idx );
766 0 : if( FD_LIKELY( !fec->is_first_in_block ) ) {
767 0 : fec_buf_sz += block->fec_buf_sz-block->fec_buf_soff;
768 0 : } else {
769 : /* No residual data as this is a fresh new block. */
770 0 : }
771 : /* Addition is safe and won't overflow because we checked the FEC set
772 : size above. */
773 0 : fec_buf_sz += fec->fec->data_sz;
774 : /* Assuming every transaction is min size, do we have enough free
775 : entries in the txn pool? For a more precise txn count, we would
776 : have to do some parsing. */
777 0 : return sched->txn_pool_free_cnt>=fec_buf_sz/FD_TXN_MIN_SERIALIZED_SZ && sched->mblk_pool_free_cnt>=fec_buf_sz/sizeof(fd_microblock_hdr_t);
778 0 : }
779 :
780 : ulong
781 0 : fd_sched_can_ingest_cnt( fd_sched_t * sched ) {
782 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
783 : /* Worst case, we need one byte from the incoming data to extract a
784 : transaction out of the residual data, and the rest of the incoming
785 : data contributes toward min sized transactions. */
786 0 : return fd_ulong_min( sched->txn_pool_free_cnt/FD_SCHED_MAX_TXN_PER_FEC, sched->mblk_pool_free_cnt/FD_SCHED_MAX_MBLK_PER_FEC );
787 0 : }
788 :
789 : int
790 0 : fd_sched_is_drained( fd_sched_t * sched ) {
791 0 : int nothing_inflight = sched->exec_cnt==(ulong)fd_ulong_popcnt( sched->txn_exec_ready_bitset[ 0 ]&sched->sigverify_ready_bitset[ 0 ]&sched->poh_ready_bitset[ 0 ] );
792 0 : int nothing_queued = sched->active_bank_idx==ULONG_MAX;
793 0 : return nothing_inflight && nothing_queued;
794 0 : }
795 :
796 : FD_WARN_UNUSED int
797 : fd_sched_fec_ingest( fd_sched_t * sched,
798 0 : fd_sched_fec_t * fec ) {
799 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
800 0 : FD_TEST( fec->bank_idx<sched->block_cnt_max );
801 0 : FD_TEST( fec->parent_bank_idx<sched->block_cnt_max );
802 0 : FD_TEST( ref_q_empty( sched->ref_q ) );
803 :
804 0 : fd_sched_block_t * block = block_pool_ele( sched, fec->bank_idx );
805 :
806 0 : if( FD_UNLIKELY( fec->fec->data_sz>FD_SCHED_MAX_PAYLOAD_PER_FEC ) ) {
807 0 : sched->print_buf_sz = 0UL;
808 0 : print_all( sched, block );
809 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
810 0 : FD_LOG_CRIT(( "invalid FEC set: fec->data_sz %lu, slot %lu, parent slot %lu", fec->fec->data_sz, fec->slot, fec->parent_slot ));
811 0 : }
812 :
813 0 : sched->metrics->fec_cnt++;
814 :
815 0 : if( FD_UNLIKELY( fec->is_first_in_block ) ) {
816 : /* This is a new block. */
817 0 : add_block( sched, fec->bank_idx, fec->parent_bank_idx );
818 0 : block->slot = fec->slot;
819 0 : block->parent_slot = fec->parent_slot;
820 :
821 0 : if( FD_UNLIKELY( block->dying ) ) {
822 : /* The child of a dead block is also dead. We added it to our
823 : fork tree just so we could track an entire lineage of dead
824 : children and propagate the dead property to the entire lineage,
825 : in case there were frags for more than one dead children
826 : in-flight at the time the parent was abandoned. That being
827 : said, we shouldn't need to add the dead child to the
828 : dispatcher. */
829 0 : sched->metrics->block_added_dead_ood_cnt++;
830 :
831 : /* Ignore the FEC set for a dead block. */
832 0 : sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
833 0 : return 1;
834 0 : }
835 :
836 : /* Try to find a staging lane for this block. */
837 0 : int alloc_lane = 0;
838 0 : fd_sched_block_t * parent_block = block_pool_ele( sched, fec->parent_bank_idx );
839 0 : if( FD_LIKELY( parent_block->staged ) ) {
840 : /* Parent is staged. So see if we can continue down the same
841 : staging lane. */
842 0 : ulong staging_lane = parent_block->staging_lane;
843 0 : ulong child_idx = parent_block->child_idx;
844 0 : while( child_idx!=ULONG_MAX ) {
845 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
846 0 : if( child->staged && child->staging_lane==staging_lane ) {
847 : /* Found a child on the same lane. So we're done. */
848 0 : staging_lane = FD_RDISP_UNSTAGED;
849 0 : break;
850 0 : }
851 0 : child_idx = child->sibling_idx;
852 0 : }
853 : /* No child is staged on the same lane as the parent. So stage
854 : this block. This is the common case. */
855 0 : if( FD_LIKELY( staging_lane!=FD_RDISP_UNSTAGED ) ) {
856 0 : block->in_rdisp = 1;
857 0 : block->staged = 1;
858 0 : block->staging_lane = staging_lane;
859 0 : fd_rdisp_add_block( sched->rdisp, fec->bank_idx, staging_lane );
860 0 : sched->metrics->block_added_cnt++;
861 0 : sched->metrics->block_added_staged_cnt++;
862 0 : FD_LOG_DEBUG(( "block %lu:%lu entered lane %lu: add", block->slot, fec->bank_idx, staging_lane ));
863 0 : } else {
864 0 : alloc_lane = 1;
865 0 : }
866 0 : } else {
867 0 : if( block_is_stageable( parent_block ) ) {
868 : /* Parent is unstaged but stageable. So let's be unstaged too.
869 : This is not only a policy decision to be lazy and not promote
870 : the parent at the moment, but also an important invariant
871 : that we maintain for deadlock freeness in the face of staging
872 : lane shortage. See the comments in lane eviction for how
873 : this invariant is relevant. */
874 0 : block->in_rdisp = 1;
875 0 : block->staged = 0;
876 0 : fd_rdisp_add_block( sched->rdisp, fec->bank_idx, FD_RDISP_UNSTAGED );
877 0 : sched->metrics->block_added_cnt++;
878 0 : sched->metrics->block_added_unstaged_cnt++;
879 0 : FD_LOG_DEBUG(( "block %lu:%lu entered lane unstaged: add", block->slot, fec->bank_idx ));
880 0 : } else {
881 0 : alloc_lane = 1;
882 0 : }
883 0 : }
884 0 : if( FD_UNLIKELY( alloc_lane ) ) {
885 : /* We weren't able to inherit the parent's staging lane. So try
886 : to find a new staging lane. */
887 0 : if( FD_LIKELY( sched->staged_bitset!=fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) ) ) { /* Optimize for lane available. */
888 0 : int lane_idx = fd_ulong_find_lsb( ~sched->staged_bitset );
889 0 : if( FD_UNLIKELY( lane_idx>=(int)FD_SCHED_MAX_STAGING_LANES ) ) {
890 0 : FD_LOG_CRIT(( "invariant violation: lane_idx %d, sched->staged_bitset %lx",
891 0 : lane_idx, sched->staged_bitset ));
892 0 : }
893 0 : sched->staged_bitset = fd_ulong_set_bit( sched->staged_bitset, lane_idx );
894 0 : sched->staged_head_bank_idx[ lane_idx ] = fec->bank_idx;
895 0 : sched->staged_popcnt_wmk = fd_ulong_max( sched->staged_popcnt_wmk, (ulong)fd_ulong_popcnt( sched->staged_bitset ) );
896 0 : block->in_rdisp = 1;
897 0 : block->staged = 1;
898 0 : block->staging_lane = (ulong)lane_idx;
899 0 : fd_rdisp_add_block( sched->rdisp, fec->bank_idx, block->staging_lane );
900 0 : sched->metrics->block_added_cnt++;
901 0 : sched->metrics->block_added_staged_cnt++;
902 0 : FD_LOG_DEBUG(( "block %lu:%lu entered lane %lu: add", block->slot, fec->bank_idx, block->staging_lane ));
903 0 : } else {
904 : /* No lanes available. */
905 0 : block->in_rdisp = 1;
906 0 : block->staged = 0;
907 0 : fd_rdisp_add_block( sched->rdisp, fec->bank_idx, FD_RDISP_UNSTAGED );
908 0 : sched->metrics->block_added_cnt++;
909 0 : sched->metrics->block_added_unstaged_cnt++;
910 0 : FD_LOG_DEBUG(( "block %lu:%lu entered lane unstaged: add", block->slot, fec->bank_idx ));
911 0 : }
912 0 : }
913 0 : }
914 :
915 0 : block->txn_pool_max_popcnt = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt - 1UL );
916 0 : block->mblk_pool_max_popcnt = fd_ulong_max( block->mblk_pool_max_popcnt, sched->depth - sched->mblk_pool_free_cnt );
917 0 : block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt );
918 :
919 0 : if( FD_UNLIKELY( block->dying ) ) {
920 : /* Ignore the FEC set for a dead block. */
921 0 : sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
922 0 : return 1;
923 0 : }
924 :
925 0 : if( FD_UNLIKELY( !block->in_rdisp ) ) {
926 : /* Invariant: block must be in the dispatcher at this point. */
927 0 : sched->print_buf_sz = 0UL;
928 0 : print_all( sched, block );
929 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
930 0 : FD_LOG_CRIT(( "invariant violation: block->in_rdisp==0, slot %lu, parent slot %lu",
931 0 : block->slot, block->parent_slot ));
932 0 : }
933 :
934 0 : if( FD_UNLIKELY( block->fec_eos ) ) {
935 : /* This means something is wrong upstream. We're getting more FEC
936 : sets for a block that has already ended, or so we were told. */
937 0 : sched->print_buf_sz = 0UL;
938 0 : print_all( sched, block );
939 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
940 0 : FD_LOG_CRIT(( "invariant violation: block->fec_eos set but getting more FEC sets, slot %lu, parent slot %lu", fec->slot, fec->parent_slot ));
941 0 : }
942 0 : if( FD_UNLIKELY( block->fec_eob ) ) {
943 : /* If the previous FEC set ingestion and parse was successful,
944 : block->fec_eob should be cleared. The fact that fec_eob is set
945 : means that the previous batch didn't parse properly. So this is
946 : a bad block. We should refuse to replay down the fork. */
947 0 : FD_LOG_INFO(( "bad block: failed to parse, slot %lu, parent slot %lu", fec->slot, fec->parent_slot ));
948 0 : handle_bad_block( sched, block );
949 0 : sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
950 0 : return 0;
951 0 : }
952 0 : if( FD_UNLIKELY( block->child_idx!=ULONG_MAX ) ) {
953 : /* This means something is wrong upstream. FEC sets are not being
954 : delivered in replay order. We got a child block FEC set before
955 : this block was completely delivered. */
956 0 : sched->print_buf_sz = 0UL;
957 0 : print_all( sched, block );
958 0 : fd_sched_block_t * child_block = block_pool_ele( sched, block->child_idx );
959 0 : print_block_debug( sched, child_block );
960 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
961 0 : FD_LOG_CRIT(( "invariant violation: block->child_idx %lu, slot %lu, parent slot %lu", block->child_idx, fec->slot, fec->parent_slot ));
962 0 : }
963 :
964 0 : FD_TEST( block->fec_buf_sz>=block->fec_buf_soff );
965 0 : if( FD_LIKELY( block->fec_buf_sz>block->fec_buf_soff ) ) {
966 : /* If there is residual data from the previous FEC set within the
967 : same batch, we move it to the beginning of the buffer and append
968 : the new FEC set. */
969 0 : memmove( block->fec_buf, block->fec_buf+block->fec_buf_soff, block->fec_buf_sz-block->fec_buf_soff );
970 0 : }
971 0 : block->fec_buf_boff += block->fec_buf_soff;
972 0 : block->fec_buf_sz -= block->fec_buf_soff;
973 0 : block->fec_buf_soff = 0;
974 : /* Addition is safe and won't overflow because we checked the FEC
975 : set size above. */
976 0 : if( FD_UNLIKELY( block->fec_buf_sz+fec->fec->data_sz>FD_SCHED_MAX_FEC_BUF_SZ ) ) {
977 : /* In a conformant block, there shouldn't be more than a
978 : transaction's worth of residual data left over from the previous
979 : FEC set within the same batch. So if this condition doesn't
980 : hold, it's a bad block. Instead of crashing, we should refuse to
981 : replay down the fork. */
982 0 : FD_LOG_INFO(( "bad block: fec_buf_sz %u, fec->data_sz %lu, slot %lu, parent slot %lu", block->fec_buf_sz, fec->fec->data_sz, fec->slot, fec->parent_slot ));
983 0 : handle_bad_block( sched, block );
984 0 : sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
985 0 : return 0;
986 0 : }
987 :
988 : /* Append the new FEC set to the end of the buffer. */
989 0 : fd_memcpy( block->fec_buf+block->fec_buf_sz, fec->data, fec->fec->data_sz );
990 0 : block->fec_buf_sz += (uint)fec->fec->data_sz;
991 0 : sched->metrics->bytes_ingested_cnt += fec->fec->data_sz;
992 :
993 0 : block->fec_eob = fec->is_last_in_batch;
994 0 : block->fec_eos = fec->is_last_in_block;
995 :
996 0 : ulong block_sz = block->shred_cnt>0 ? block->shred_blk_offs[ block->shred_cnt-1 ] : 0UL;
997 0 : for( ulong i=0; i<fec->shred_cnt; i++ ) {
998 0 : if( FD_LIKELY( i<32UL ) ) {
999 0 : block->shred_blk_offs[ block->shred_cnt++ ] = (uint)block_sz + fec->fec->shred_offs[ i ];
1000 0 : } else if( FD_UNLIKELY( i!=fec->shred_cnt-1UL ) ) {
1001 : /* We don't track shred boundaries after 32 shreds, assume they're
1002 : sized uniformly */
1003 0 : ulong num_overflow_shreds = fec->shred_cnt-32UL;
1004 0 : ulong overflow_idx = i-32UL;
1005 0 : ulong overflow_data_sz = fec->fec->data_sz-fec->fec->shred_offs[ 31 ];
1006 0 : block->shred_blk_offs[ block->shred_cnt++ ] = (uint)block_sz + fec->fec->shred_offs[ 31 ] + (uint)(overflow_data_sz / num_overflow_shreds * (overflow_idx + 1UL));
1007 0 : } else {
1008 0 : block->shred_blk_offs[ block->shred_cnt++ ] = (uint)block_sz + (uint)fec->fec->data_sz;
1009 0 : }
1010 0 : }
1011 :
1012 0 : int err = fd_sched_parse( sched, block, fec->alut_ctx );
1013 :
1014 0 : if( FD_UNLIKELY( err==FD_SCHED_BAD_BLOCK ) ) {
1015 0 : handle_bad_block( sched, block );
1016 0 : sched->metrics->bytes_dropped_cnt += block->fec_buf_sz-block->fec_buf_soff;
1017 0 : return 0;
1018 0 : }
1019 :
1020 0 : if( FD_UNLIKELY( (fec->is_last_in_batch||fec->is_last_in_block) && (block->txns_rem||block->mblks_rem||block->fec_eob) ) ) {
1021 : /* A malformed block that fails to parse out exactly as many
1022 : transactions and microblocks as it should.
1023 :
1024 : Upon getting a last-in-batch FEC, everything in the ongoing batch
1025 : should completely parse, and the eob flag should be reset. There
1026 : should be no go around for a last-in-batch FEC. */
1027 0 : FD_LOG_INFO(( "bad block: bytes_rem %u, txns_rem %lu, mblks_rem %lu, fec_eob %d, slot %lu, parent slot %lu", block->fec_buf_sz-block->fec_buf_soff, block->txns_rem, block->mblks_rem, block->fec_eob, block->slot, block->parent_slot ));
1028 0 : handle_bad_block( sched, block );
1029 0 : return 0;
1030 0 : }
1031 :
1032 0 : if( FD_UNLIKELY( block->fec_eos && !block->last_mblk_is_tick ) ) {
1033 : /* The last microblock should be a tick.
1034 :
1035 : Note that this early parse-time detection could cause us to throw
1036 : a slightly different error from Agave, in the case that there are
1037 : too few ticks, since the tick count check precedes the trailing
1038 : entry check in Agave. That being said, ultimately a
1039 : TRAILING_ENTRY renders a block invalid, regardless of anything
1040 : else. */
1041 0 : FD_LOG_INFO(( "bad block: TRAILING_ENTRY, slot %lu, parent slot %lu, mblk_cnt %u", block->slot, block->parent_slot, block->mblk_cnt ));
1042 0 : handle_bad_block( sched, block );
1043 0 : return 0;
1044 0 : }
1045 :
1046 : /* We just received a FEC set, which may have made all transactions in
1047 : a partially parsed microblock available. If this were a malformed
1048 : block that ends in a non-tick microblock, there's not going to be a
1049 : hashing task from the missing ending tick to drain the mixin queue.
1050 : So we try to drain the mixin queue right here. Another option is
1051 : to drain it at dispatch time, when we are about to dispatch the end
1052 : of block signal, right before the check for whether block should
1053 : end. */
1054 0 : int mixin_res;
1055 0 : while( (mixin_res=maybe_mixin( sched, block )) ) {
1056 0 : if( FD_UNLIKELY( mixin_res==-1 ) ) {
1057 0 : handle_bad_block( sched, block );
1058 0 : return 0;
1059 0 : }
1060 0 : FD_TEST( mixin_res==1||mixin_res==2 );
1061 0 : }
1062 :
1063 : /* Check if we need to set the active block. */
1064 0 : check_or_set_active_block( sched );
1065 :
1066 0 : return 1;
1067 0 : }
1068 :
1069 : ulong
1070 0 : fd_sched_task_next_ready( fd_sched_t * sched, fd_sched_task_t * out ) {
1071 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1072 0 : FD_TEST( ref_q_empty( sched->ref_q ) );
1073 :
1074 0 : ulong exec_ready_bitset0 = sched->txn_exec_ready_bitset[ 0 ];
1075 0 : ulong exec_fully_ready_bitset = sched->sigverify_ready_bitset[ 0 ] & sched->poh_ready_bitset[ 0 ] & exec_ready_bitset0;
1076 0 : if( FD_UNLIKELY( !exec_fully_ready_bitset ) ) {
1077 : /* Early exit if no exec tiles available. */
1078 0 : return 0UL;
1079 0 : }
1080 :
1081 0 : if( FD_UNLIKELY( sched->active_bank_idx==ULONG_MAX ) ) {
1082 : /* No need to try activating a block. If we're in this state,
1083 : there's truly nothing to execute. We will activate something
1084 : when we ingest a FEC set with transactions. */
1085 0 : return 0UL;
1086 0 : }
1087 :
1088 0 : out->task_type = FD_SCHED_TT_NULL;
1089 :
1090 : /* We could in theory reevaluate staging lane allocation here and do
1091 : promotion/demotion as needed. It's a policy decision to minimize
1092 : fork churn for now and just execute down the same active fork. */
1093 :
1094 0 : ulong bank_idx = sched->active_bank_idx;
1095 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
1096 0 : if( FD_UNLIKELY( block_should_deactivate( block ) ) ) {
1097 0 : sched->print_buf_sz = 0UL;
1098 0 : print_all( sched, block );
1099 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
1100 0 : FD_LOG_CRIT(( "invariant violation: active_bank_idx %lu is not activatable nor has anything in-flight", sched->active_bank_idx ));
1101 0 : }
1102 :
1103 0 : block->txn_pool_max_popcnt = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt - 1UL );
1104 0 : block->mblk_pool_max_popcnt = fd_ulong_max( block->mblk_pool_max_popcnt, sched->depth - sched->mblk_pool_free_cnt );
1105 0 : block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt );
1106 :
1107 0 : if( FD_UNLIKELY( !block->block_start_signaled ) ) {
1108 0 : out->task_type = FD_SCHED_TT_BLOCK_START;
1109 0 : out->block_start->bank_idx = bank_idx;
1110 0 : out->block_start->parent_bank_idx = block->parent_idx;
1111 0 : out->block_start->slot = block->slot;
1112 0 : block->block_start_signaled = 1;
1113 0 : sched->next_ready_last_tick = fd_tickcount();
1114 0 : sched->next_ready_last_bank_idx = bank_idx;
1115 0 : return 1UL;
1116 0 : }
1117 :
1118 0 : ulong exec_tile_idx0 = fd_ulong_if( !!exec_fully_ready_bitset, (ulong)fd_ulong_find_lsb( exec_fully_ready_bitset ), ULONG_MAX );
1119 0 : ulong exec_queued_cnt = block->txn_parsed_cnt-block->txn_exec_in_flight_cnt-block->txn_exec_done_cnt;
1120 0 : if( FD_LIKELY( exec_queued_cnt>0UL && fd_ulong_popcnt( exec_fully_ready_bitset ) ) ) { /* Optimize for no fork switching. */
1121 : /* Transaction execution has the highest priority. Current mainnet
1122 : block times are very much dominated by critical path transaction
1123 : execution. To achieve the fastest block replay speed, we can't
1124 : afford to make any mistake in critical path dispatching. Any
1125 : deviation from perfect critical path dispatching is basically
1126 : irrecoverable. As such, we try to keep all the exec tiles busy
1127 : with transaction execution, but we allow at most one transaction
1128 : to be in-flight per exec tile. This is to ensure that whenever a
1129 : critical path transaction completes, we have at least one exec
1130 : tile, e.g. the one that just completed said transaction, readily
1131 : available to continue executing down the critical path. */
1132 0 : out->txn_exec->txn_idx = fd_rdisp_get_next_ready( sched->rdisp, bank_idx );
1133 0 : if( FD_UNLIKELY( out->txn_exec->txn_idx==0UL ) ) {
1134 : /* There are transactions queued but none ready for execution.
1135 : This implies that there must be in-flight transactions on whose
1136 : completion the queued transactions depend. So we return and
1137 : wait for those in-flight transactions to retire. This is a
1138 : policy decision to execute as much as we can down the current
1139 : fork. */
1140 0 : if( FD_UNLIKELY( !block->txn_exec_in_flight_cnt ) ) {
1141 0 : sched->print_buf_sz = 0UL;
1142 0 : print_all( sched, block );
1143 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
1144 0 : FD_LOG_CRIT(( "invariant violation: no ready transaction found but block->txn_exec_in_flight_cnt==0" ));
1145 0 : }
1146 :
1147 : /* Next up are PoH tasks. Same dispatching policy as sigverify
1148 : tasks. */
1149 0 : ulong poh_ready_bitset = exec_fully_ready_bitset;
1150 0 : ulong poh_hashing_queued_cnt = block->mblk_cnt-block->poh_hashing_in_flight_cnt-block->poh_hashing_done_cnt;
1151 0 : if( FD_LIKELY( poh_hashing_queued_cnt>0UL && fd_ulong_popcnt( poh_ready_bitset )>fd_int_if( block->txn_exec_in_flight_cnt>0U, 0, 1 ) ) ) {
1152 0 : dispatch_poh( sched, block, bank_idx, fd_ulong_find_lsb( poh_ready_bitset ), out );
1153 0 : sched->next_ready_last_tick = fd_tickcount();
1154 0 : sched->next_ready_last_bank_idx = bank_idx;
1155 0 : return 1UL;
1156 0 : }
1157 :
1158 : /* Dispatch more sigverify tasks only if at least one exec tile is
1159 : executing transactions or completely idle. Allow at most one
1160 : sigverify task in-flight per tile, and only dispatch to
1161 : completely idle tiles. */
1162 0 : ulong sigverify_ready_bitset = exec_fully_ready_bitset;
1163 0 : ulong sigverify_queued_cnt = block->txn_parsed_cnt-block->txn_sigverify_in_flight_cnt-block->txn_sigverify_done_cnt;
1164 0 : if( FD_LIKELY( sigverify_queued_cnt>0UL && fd_ulong_popcnt( sigverify_ready_bitset )>fd_int_if( block->txn_exec_in_flight_cnt>0U, 0, 1 ) ) ) {
1165 0 : dispatch_sigverify( sched, block, bank_idx, fd_ulong_find_lsb( sigverify_ready_bitset ), out );
1166 0 : sched->next_ready_last_tick = sched->txn_info_pool[ out->txn_sigverify->txn_idx ].tick_sigverify_disp = fd_tickcount();
1167 0 : sched->next_ready_last_bank_idx = bank_idx;
1168 0 : return 1UL;
1169 0 : }
1170 0 : return 0UL;
1171 0 : }
1172 0 : out->task_type = FD_SCHED_TT_TXN_EXEC;
1173 0 : out->txn_exec->bank_idx = bank_idx;
1174 0 : out->txn_exec->slot = block->slot;
1175 0 : out->txn_exec->exec_idx = exec_tile_idx0;
1176 0 : FD_TEST( out->txn_exec->exec_idx!=ULONG_MAX );
1177 :
1178 0 : long now = fd_tickcount();
1179 0 : ulong delta = (ulong)(now-sched->txn_in_flight_last_tick);
1180 0 : ulong txn_exec_busy_cnt = sched->exec_cnt-(ulong)fd_ulong_popcnt( exec_ready_bitset0 );
1181 0 : sched->metrics->txn_none_in_flight_tickcount += fd_ulong_if( txn_exec_busy_cnt==0UL && sched->txn_in_flight_last_tick!=LONG_MAX, delta, 0UL );
1182 0 : sched->metrics->txn_weighted_in_flight_tickcount += fd_ulong_if( txn_exec_busy_cnt!=0UL, delta, 0UL );
1183 0 : sched->metrics->txn_weighted_in_flight_cnt += delta*txn_exec_busy_cnt;
1184 0 : sched->txn_in_flight_last_tick = now;
1185 :
1186 0 : sched->txn_info_pool[ out->txn_exec->txn_idx ].tick_exec_disp = now;
1187 :
1188 0 : sched->txn_exec_ready_bitset[ 0 ] = fd_ulong_clear_bit( exec_ready_bitset0, (int)exec_tile_idx0);
1189 0 : sched->tile_to_bank_idx[ exec_tile_idx0 ] = bank_idx;
1190 :
1191 0 : block->txn_exec_in_flight_cnt++;
1192 0 : sched->metrics->txn_max_in_flight_cnt = fd_uint_max( sched->metrics->txn_max_in_flight_cnt, block->txn_exec_in_flight_cnt );
1193 :
1194 0 : ulong total_exec_busy_cnt = sched->exec_cnt-(ulong)fd_ulong_popcnt( sched->txn_exec_ready_bitset[ 0 ]&sched->sigverify_ready_bitset[ 0 ]&sched->poh_ready_bitset[ 0 ] );
1195 0 : if( FD_UNLIKELY( (~sched->txn_exec_ready_bitset[ 0 ])&(~sched->sigverify_ready_bitset[ 0 ])&(~sched->poh_ready_bitset[ 0 ])&fd_ulong_mask_lsb( (int)sched->exec_cnt ) ) ) FD_LOG_CRIT(( "invariant violation: txn_exec_ready_bitset 0x%lx sigverify_ready_bitset 0x%lx poh_ready_bitset 0x%lx", sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ] ));
1196 0 : if( FD_UNLIKELY( block->txn_exec_in_flight_cnt+block->txn_sigverify_in_flight_cnt+block->poh_hashing_in_flight_cnt!=total_exec_busy_cnt ) ) {
1197 : /* Ideally we'd simply assert that the two sides of the equation
1198 : are equal. But abandoned blocks throw a wrench into this. We
1199 : allow abandoned blocks to have in-flight transactions that are
1200 : naturally drained while we try to dispatch from another block.
1201 : In such cases, the total number of in-flight transactions
1202 : should include the abandoned blocks too. The contract is that
1203 : blocks with in-flight transactions cannot be abandoned or
1204 : demoted from rdisp. So a dying block has to be the head of one
1205 : of the staging lanes. */
1206 : // FIXME This contract no longer true if we implement immediate
1207 : // demotion of abandoned blocks.
1208 0 : ulong total_in_flight = 0UL;
1209 0 : for( int l=0; l<(int)FD_SCHED_MAX_STAGING_LANES; l++ ) {
1210 0 : if( fd_ulong_extract_bit( sched->staged_bitset, l ) ) {
1211 0 : fd_sched_block_t * staged_block = block_pool_ele( sched, sched->staged_head_bank_idx[ l ] );
1212 0 : if( FD_UNLIKELY( block_is_in_flight( staged_block )&&!(staged_block==block||staged_block->dying) ) ) {
1213 0 : sched->print_buf_sz = 0UL;
1214 0 : print_all( sched, staged_block );
1215 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
1216 0 : FD_LOG_CRIT(( "invariant violation: in-flight block is neither active nor dying" ));
1217 0 : }
1218 0 : total_in_flight += staged_block->txn_exec_in_flight_cnt;
1219 0 : total_in_flight += staged_block->txn_sigverify_in_flight_cnt;
1220 0 : total_in_flight += staged_block->poh_hashing_in_flight_cnt;
1221 0 : }
1222 0 : }
1223 0 : if( FD_UNLIKELY( total_in_flight!=total_exec_busy_cnt ) ) {
1224 0 : sched->print_buf_sz = 0UL;
1225 0 : print_all( sched, block );
1226 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
1227 0 : FD_LOG_CRIT(( "invariant violation: total_in_flight %lu != total_exec_busy_cnt %lu", total_in_flight, total_exec_busy_cnt ));
1228 0 : }
1229 0 : FD_LOG_DEBUG(( "exec_busy_cnt %lu checks out", total_exec_busy_cnt ));
1230 0 : }
1231 0 : sched->next_ready_last_tick = now;
1232 0 : sched->next_ready_last_bank_idx = bank_idx;
1233 0 : return 1UL;
1234 0 : }
1235 :
1236 : /* At this point txn_queued_cnt==0 */
1237 :
1238 : /* Next up are PoH tasks. Same dispatching policy as sigverify. */
1239 0 : ulong poh_ready_bitset = exec_fully_ready_bitset;
1240 0 : ulong poh_hashing_queued_cnt = block->mblk_cnt-block->poh_hashing_in_flight_cnt-block->poh_hashing_done_cnt;
1241 0 : if( FD_LIKELY( poh_hashing_queued_cnt>0UL && fd_ulong_popcnt( poh_ready_bitset )>fd_int_if( block->fec_eos||block->txn_exec_in_flight_cnt>0U||sched->exec_cnt==1UL, 0, 1 ) ) ) {
1242 0 : dispatch_poh( sched, block, bank_idx, fd_ulong_find_lsb( poh_ready_bitset ), out );
1243 0 : sched->next_ready_last_tick = fd_tickcount();
1244 0 : sched->next_ready_last_bank_idx = bank_idx;
1245 0 : return 1UL;
1246 0 : }
1247 :
1248 : /* Try to dispatch a sigverify task, but leave one exec tile idle for
1249 : critical path execution, unless there's not going to be any more
1250 : real transactions for the critical path. In the degenerate case of
1251 : only one exec tile, keep it busy. */
1252 0 : ulong sigverify_ready_bitset = exec_fully_ready_bitset;
1253 0 : ulong sigverify_queued_cnt = block->txn_parsed_cnt-block->txn_sigverify_in_flight_cnt-block->txn_sigverify_done_cnt;
1254 0 : if( FD_LIKELY( sigverify_queued_cnt>0UL && fd_ulong_popcnt( sigverify_ready_bitset )>fd_int_if( block->fec_eos||block->txn_exec_in_flight_cnt>0U||sched->exec_cnt==1UL, 0, 1 ) ) ) {
1255 0 : dispatch_sigverify( sched, block, bank_idx, fd_ulong_find_lsb( sigverify_ready_bitset ), out );
1256 0 : sched->next_ready_last_tick = sched->txn_info_pool[ out->txn_sigverify->txn_idx ].tick_sigverify_disp = fd_tickcount();
1257 0 : sched->next_ready_last_bank_idx = bank_idx;
1258 0 : return 1UL;
1259 0 : }
1260 :
1261 0 : if( FD_UNLIKELY( block_should_signal_end( block ) ) ) {
1262 0 : FD_TEST( block->block_start_signaled );
1263 0 : if( FD_UNLIKELY( verify_ticks_final( block ) ) ) {
1264 : /* Tick verification can't be done at parse time (except for
1265 : TRAILING_ENTRY), because we may not know the expected number of
1266 : hashes yet. It can't be driven by transaction dispatch or
1267 : completion, because the block may be empty. Similary, it can't
1268 : be driven by PoH hashing, because a bad block may simply not
1269 : have any microblocks. */
1270 0 : handle_bad_block( sched, block );
1271 0 : out->task_type = FD_SCHED_TT_MARK_DEAD;
1272 0 : out->mark_dead->bank_idx = bank_idx;
1273 0 : sched->next_ready_last_tick = fd_tickcount();
1274 0 : sched->next_ready_last_bank_idx = bank_idx;
1275 0 : return 1UL;
1276 0 : }
1277 0 : out->task_type = FD_SCHED_TT_BLOCK_END;
1278 0 : out->block_end->bank_idx = bank_idx;
1279 0 : block->block_end_signaled = 1;
1280 0 : FD_TEST( block->refcnt );
1281 0 : block->refcnt = 0;
1282 0 : FD_TEST( ref_q_avail( sched->ref_q ) );
1283 0 : ref_q_push_tail( sched->ref_q, bank_idx );
1284 0 : sched->next_ready_last_tick = fd_tickcount();
1285 0 : sched->next_ready_last_bank_idx = bank_idx;
1286 0 : return 1UL;
1287 0 : }
1288 :
1289 : /* Nothing queued for the active block. If we haven't received all
1290 : the FEC sets for it, then return and wait for more FEC sets, while
1291 : there are in-flight transactions. This is a policy decision to
1292 : minimize fork churn and allow for executing down the current fork
1293 : as much as we can. If we have received all the FEC sets for it,
1294 : then we'd still like to return and wait for the in-flight
1295 : transactions to retire, before switching to a different block.
1296 :
1297 : Either way, there should be in-flight transactions. We deactivate
1298 : the active block the moment we exhausted transactions from it. */
1299 0 : if( FD_UNLIKELY( !block_is_in_flight( block ) ) ) {
1300 0 : sched->print_buf_sz = 0UL;
1301 0 : print_all( sched, block );
1302 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
1303 0 : FD_LOG_CRIT(( "invariant violation: expected in-flight transactions but none" ));
1304 0 : }
1305 :
1306 0 : return 0UL;
1307 0 : }
1308 :
1309 : int
1310 0 : fd_sched_task_done( fd_sched_t * sched, ulong task_type, ulong txn_idx, ulong exec_idx, void * data ) {
1311 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1312 :
1313 0 : ulong bank_idx = ULONG_MAX;
1314 0 : switch( task_type ) {
1315 0 : case FD_SCHED_TT_BLOCK_START:
1316 0 : case FD_SCHED_TT_BLOCK_END: {
1317 0 : (void)txn_idx;
1318 0 : (void)data;
1319 0 : bank_idx = sched->active_bank_idx;
1320 0 : break;
1321 0 : }
1322 0 : case FD_SCHED_TT_TXN_EXEC:
1323 0 : case FD_SCHED_TT_TXN_SIGVERIFY: {
1324 0 : (void)data;
1325 0 : FD_TEST( txn_idx < sched->depth );
1326 0 : bank_idx = sched->tile_to_bank_idx[ exec_idx ];
1327 0 : break;
1328 0 : }
1329 0 : case FD_SCHED_TT_POH_HASH: {
1330 0 : (void)txn_idx;
1331 0 : bank_idx = sched->tile_to_bank_idx[ exec_idx ];
1332 0 : break;
1333 0 : }
1334 0 : default: FD_LOG_CRIT(( "unsupported task_type %lu", task_type ));
1335 0 : }
1336 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
1337 :
1338 0 : if( FD_UNLIKELY( !block->in_sched ) ) {
1339 0 : FD_LOG_CRIT(( "invariant violation: block->in_sched==0, slot %lu, parent slot %lu, idx %lu",
1340 0 : block->slot, block->parent_slot, bank_idx ));
1341 0 : }
1342 0 : if( FD_UNLIKELY( !block->staged ) ) {
1343 : /* Invariant: only staged blocks can have in-flight transactions. */
1344 0 : FD_LOG_CRIT(( "invariant violation: block->staged==0, slot %lu, parent slot %lu",
1345 0 : block->slot, block->parent_slot ));
1346 0 : }
1347 0 : if( FD_UNLIKELY( !block->in_rdisp ) ) {
1348 : /* Invariant: staged blocks must be in the dispatcher. */
1349 0 : FD_LOG_CRIT(( "invariant violation: block->in_rdisp==0, slot %lu, parent slot %lu",
1350 0 : block->slot, block->parent_slot ));
1351 0 : }
1352 :
1353 0 : block->txn_pool_max_popcnt = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt - 1UL );
1354 0 : block->mblk_pool_max_popcnt = fd_ulong_max( block->mblk_pool_max_popcnt, sched->depth - sched->mblk_pool_free_cnt );
1355 0 : block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt );
1356 :
1357 0 : int exec_tile_idx = (int)exec_idx;
1358 :
1359 0 : switch( task_type ) {
1360 0 : case FD_SCHED_TT_BLOCK_START: {
1361 0 : FD_TEST( !block->block_start_done );
1362 0 : block->block_start_done = 1;
1363 0 : break;
1364 0 : }
1365 0 : case FD_SCHED_TT_BLOCK_END: {
1366 : /* It may seem redundant to be invoking task_done() on these
1367 : somewhat fake tasks. But these are necessary to drive state
1368 : transition for empty blocks or slow blocks. */
1369 0 : FD_TEST( !block->block_end_done );
1370 0 : block->block_end_done = 1;
1371 0 : sched->print_buf_sz = 0UL;
1372 0 : print_block_metrics( sched, block );
1373 0 : FD_LOG_DEBUG(( "block %lu:%lu replayed fully: %s", block->slot, bank_idx, sched->print_buf ));
1374 0 : break;
1375 0 : }
1376 0 : case FD_SCHED_TT_TXN_EXEC: {
1377 0 : long now = fd_tickcount();
1378 0 : ulong delta = (ulong)(now-sched->txn_in_flight_last_tick);
1379 0 : ulong txn_exec_busy_cnt = sched->exec_cnt-(ulong)fd_ulong_popcnt( sched->txn_exec_ready_bitset[ 0 ] );
1380 0 : sched->metrics->txn_weighted_in_flight_tickcount += delta;
1381 0 : sched->metrics->txn_weighted_in_flight_cnt += delta*txn_exec_busy_cnt;
1382 0 : sched->txn_in_flight_last_tick = now;
1383 :
1384 0 : sched->txn_info_pool[ txn_idx ].tick_exec_done = now;
1385 :
1386 0 : block->txn_exec_done_cnt++;
1387 0 : block->txn_exec_in_flight_cnt--;
1388 0 : FD_TEST( !fd_ulong_extract_bit( sched->txn_exec_ready_bitset[ 0 ], exec_tile_idx ) );
1389 0 : sched->txn_exec_ready_bitset[ 0 ] = fd_ulong_set_bit( sched->txn_exec_ready_bitset[ 0 ], exec_tile_idx );
1390 0 : sched->metrics->txn_exec_done_cnt++;
1391 0 : txn_bitset_insert( sched->exec_done_set, txn_idx );
1392 0 : sched->txn_info_pool[ txn_idx ].flags |= FD_SCHED_TXN_EXEC_DONE;
1393 0 : if( txn_bitset_test( sched->sigverify_done_set, txn_idx ) && txn_bitset_test( sched->poh_mixin_done_set, txn_idx ) ) {
1394 : /* Release the txn_idx if all tasks on it are done. This is
1395 : guaranteed to only happen once per transaction because
1396 : whichever one completed first would not release. */
1397 0 : fd_rdisp_complete_txn( sched->rdisp, txn_idx, 1 );
1398 0 : sched->txn_pool_free_cnt++;
1399 0 : block->txn_done_cnt++;
1400 0 : sched->metrics->txn_done_cnt++;
1401 0 : } else {
1402 0 : fd_rdisp_complete_txn( sched->rdisp, txn_idx, 0 );
1403 0 : }
1404 0 : break;
1405 0 : }
1406 0 : case FD_SCHED_TT_TXN_SIGVERIFY: {
1407 0 : sched->txn_info_pool[ txn_idx ].tick_sigverify_done = fd_tickcount();
1408 0 : block->txn_sigverify_done_cnt++;
1409 0 : block->txn_sigverify_in_flight_cnt--;
1410 0 : FD_TEST( !fd_ulong_extract_bit( sched->sigverify_ready_bitset[ 0 ], exec_tile_idx ) );
1411 0 : sched->sigverify_ready_bitset[ 0 ] = fd_ulong_set_bit( sched->sigverify_ready_bitset[ 0 ], exec_tile_idx );
1412 0 : sched->metrics->txn_sigverify_done_cnt++;
1413 0 : txn_bitset_insert( sched->sigverify_done_set, txn_idx );
1414 0 : sched->txn_info_pool[ txn_idx ].flags |= FD_SCHED_TXN_SIGVERIFY_DONE;
1415 0 : if( txn_bitset_test( sched->exec_done_set, txn_idx ) && txn_bitset_test( sched->poh_mixin_done_set, txn_idx ) ) {
1416 : /* Release the txn_idx if all tasks on it are done. This is
1417 : guaranteed to only happen once per transaction because
1418 : whichever one completed first would not release. */
1419 0 : fd_rdisp_complete_txn( sched->rdisp, txn_idx, 1 );
1420 0 : sched->txn_pool_free_cnt++;
1421 0 : block->txn_done_cnt++;
1422 0 : sched->metrics->txn_done_cnt++;
1423 0 : }
1424 0 : break;
1425 0 : }
1426 0 : case FD_SCHED_TT_POH_HASH: {
1427 0 : block->poh_hashing_in_flight_cnt--;
1428 0 : FD_TEST( !fd_ulong_extract_bit( sched->poh_ready_bitset[ 0 ], exec_tile_idx ) );
1429 0 : sched->poh_ready_bitset[ 0 ] = fd_ulong_set_bit( sched->poh_ready_bitset[ 0 ], exec_tile_idx );
1430 0 : fd_execrp_poh_hash_done_msg_t * msg = fd_type_pun( data );
1431 0 : fd_sched_mblk_t * mblk = sched->mblk_pool+msg->mblk_idx;
1432 0 : mblk->curr_hashcnt += msg->hashcnt;
1433 0 : memcpy( mblk->curr_hash, msg->hash, sizeof(fd_hash_t) );
1434 0 : ulong hashcnt_todo = mblk->hashcnt-mblk->curr_hashcnt;
1435 0 : if( !hashcnt_todo ) {
1436 0 : block->poh_hashing_done_cnt++;
1437 0 : sched->metrics->mblk_poh_hashed_cnt++;
1438 0 : if( FD_LIKELY( !mblk->is_tick ) ) {
1439 : /* This is not a tick. Enqueue for mixin. */
1440 0 : mblk_slist_idx_push_tail( block->mblks_mixin_in_progress, msg->mblk_idx, sched->mblk_pool );
1441 0 : } else {
1442 : /* This is a tick. No need to mixin. Check the hash value
1443 : right away. */
1444 0 : block->poh_hash_cmp_done_cnt++;
1445 0 : sched->metrics->mblk_poh_done_cnt++;
1446 0 : free_mblk( sched, block, (uint)msg->mblk_idx );
1447 0 : if( FD_UNLIKELY( memcmp( mblk->curr_hash, mblk->end_hash, sizeof(fd_hash_t) ) ) ) {
1448 0 : FD_BASE58_ENCODE_32_BYTES( mblk->curr_hash->hash, our_str );
1449 0 : FD_BASE58_ENCODE_32_BYTES( mblk->end_hash->hash, ref_str );
1450 0 : FD_LOG_INFO(( "bad block: poh hash mismatch on mblk %lu, ours %s, claimed %s, hashcnt %lu, is_tick, slot %lu, parent slot %lu", msg->mblk_idx, our_str, ref_str, mblk->hashcnt, block->slot, block->parent_slot ));
1451 0 : handle_bad_block( sched, block );
1452 0 : return -1;
1453 0 : }
1454 0 : }
1455 : /* Try to drain the mixin queue. */
1456 0 : int mixin_res;
1457 0 : while( (mixin_res=maybe_mixin( sched, block )) ) {
1458 0 : if( FD_UNLIKELY( mixin_res==-1 ) ) {
1459 0 : handle_bad_block( sched, block );
1460 0 : return -1;
1461 0 : }
1462 0 : FD_TEST( mixin_res==1||mixin_res==2 );
1463 0 : }
1464 0 : } else {
1465 0 : mblk_slist_idx_push_tail( block->mblks_hashing_in_progress, msg->mblk_idx, sched->mblk_pool );
1466 0 : }
1467 0 : if( FD_UNLIKELY( verify_ticks_eager( block ) ) ) {
1468 0 : handle_bad_block( sched, block );
1469 0 : return -1;
1470 0 : }
1471 0 : break;
1472 0 : }
1473 0 : }
1474 :
1475 0 : if( FD_UNLIKELY( block->dying && !block_is_in_flight( block ) ) ) {
1476 0 : if( FD_UNLIKELY( sched->active_bank_idx==bank_idx ) ) {
1477 0 : FD_LOG_CRIT(( "invariant violation: active block shouldn't be dying, bank_idx %lu, slot %lu, parent slot %lu",
1478 0 : bank_idx, block->slot, block->parent_slot ));
1479 0 : }
1480 0 : FD_LOG_DEBUG(( "dying block %lu drained", block->slot ));
1481 0 : subtree_abandon( sched, block );
1482 0 : try_activate_block( sched );
1483 0 : return 0;
1484 0 : }
1485 :
1486 0 : if( FD_UNLIKELY( !block->dying && sched->active_bank_idx!=bank_idx ) ) {
1487 : /* Block is not dead. So we should be actively replaying it. */
1488 0 : fd_sched_block_t * active_block = block_pool_ele( sched, sched->active_bank_idx );
1489 0 : FD_LOG_CRIT(( "invariant violation: sched->active_bank_idx %lu, slot %lu, parent slot %lu, bank_idx %lu, slot %lu, parent slot %lu",
1490 0 : sched->active_bank_idx, active_block->slot, active_block->parent_slot,
1491 0 : bank_idx, block->slot, block->parent_slot ));
1492 0 : }
1493 :
1494 0 : maybe_switch_block( sched, bank_idx );
1495 :
1496 0 : return 0;
1497 0 : }
1498 :
1499 : void
1500 0 : fd_sched_block_abandon( fd_sched_t * sched, ulong bank_idx ) {
1501 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1502 0 : FD_TEST( bank_idx<sched->block_cnt_max );
1503 :
1504 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
1505 0 : if( FD_UNLIKELY( !block->in_sched ) ) {
1506 0 : FD_LOG_CRIT(( "invariant violation: block->in_sched==0, slot %lu, parent slot %lu, idx %lu",
1507 0 : block->slot, block->parent_slot, bank_idx ));
1508 0 : }
1509 :
1510 0 : FD_LOG_INFO(( "abandoning block %lu slot %lu", bank_idx, block->slot ));
1511 0 : sched->print_buf_sz = 0UL;
1512 0 : print_all( sched, block );
1513 0 : FD_LOG_DEBUG(( "%s", sched->print_buf ));
1514 :
1515 0 : subtree_abandon( sched, block );
1516 0 : try_activate_block( sched );
1517 0 : }
1518 :
1519 : void
1520 0 : fd_sched_block_add_done( fd_sched_t * sched, ulong bank_idx, ulong parent_bank_idx, ulong slot ) {
1521 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1522 0 : FD_TEST( bank_idx<sched->block_cnt_max );
1523 :
1524 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
1525 0 : add_block( sched, bank_idx, parent_bank_idx );
1526 0 : block->slot = slot;
1527 0 : block->fec_eos = 1;
1528 0 : block->block_start_signaled = 1;
1529 0 : block->block_end_signaled = 1;
1530 0 : block->block_start_done = 1;
1531 0 : block->block_end_done = 1;
1532 0 : block->refcnt = 0;
1533 0 : if( FD_LIKELY( parent_bank_idx!=ULONG_MAX ) ) {
1534 0 : fd_sched_block_t * parent_block = block_pool_ele( sched, parent_bank_idx );
1535 0 : block->parent_slot = parent_block->slot;
1536 0 : }
1537 0 : if( FD_UNLIKELY( parent_bank_idx==ULONG_MAX ) ) {
1538 : /* Assumes that a NULL parent implies the snapshot slot. */
1539 0 : block->parent_slot = ULONG_MAX;
1540 0 : block->rooted = 1;
1541 0 : sched->root_idx = bank_idx;
1542 0 : }
1543 0 : }
1544 :
1545 : void
1546 0 : fd_sched_advance_root( fd_sched_t * sched, ulong root_idx ) {
1547 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1548 0 : FD_TEST( root_idx<sched->block_cnt_max );
1549 0 : FD_TEST( sched->root_idx<sched->block_cnt_max );
1550 0 : FD_TEST( ref_q_empty( sched->ref_q ) );
1551 :
1552 0 : fd_sched_block_t * new_root = block_pool_ele( sched, root_idx );
1553 0 : fd_sched_block_t * old_root = block_pool_ele( sched, sched->root_idx );
1554 0 : if( FD_UNLIKELY( !old_root->rooted ) ) {
1555 0 : FD_LOG_CRIT(( "invariant violation: old_root is not rooted, slot %lu, parent slot %lu",
1556 0 : old_root->slot, old_root->parent_slot ));
1557 0 : }
1558 :
1559 : /* Early exit if the new root is the same as the old root. */
1560 0 : if( FD_UNLIKELY( root_idx==sched->root_idx ) ) {
1561 0 : FD_LOG_INFO(( "new root is the same as the old root, slot %lu, parent slot %lu",
1562 0 : new_root->slot, new_root->parent_slot ));
1563 0 : return;
1564 0 : }
1565 :
1566 0 : subtree_prune( sched, sched->root_idx, root_idx );
1567 :
1568 0 : new_root->parent_idx = ULONG_MAX;
1569 0 : sched->root_idx = root_idx;
1570 0 : }
1571 :
1572 : void
1573 0 : fd_sched_root_notify( fd_sched_t * sched, ulong root_idx ) {
1574 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1575 0 : FD_TEST( root_idx<sched->block_cnt_max );
1576 0 : FD_TEST( sched->root_idx<sched->block_cnt_max );
1577 0 : FD_TEST( ref_q_empty( sched->ref_q ) );
1578 :
1579 0 : fd_sched_block_t * block = block_pool_ele( sched, root_idx );
1580 0 : fd_sched_block_t * old_root = block_pool_ele( sched, sched->root_idx );
1581 0 : if( FD_UNLIKELY( !old_root->rooted ) ) {
1582 0 : FD_LOG_CRIT(( "invariant violation: old_root is not rooted, slot %lu, parent slot %lu",
1583 0 : old_root->slot, old_root->parent_slot ));
1584 0 : }
1585 :
1586 : /* Early exit if the new root is the same as the old root. */
1587 0 : if( FD_UNLIKELY( root_idx==sched->root_idx ) ) {
1588 0 : FD_LOG_INFO(( "new root is the same as the old root, slot %lu, parent slot %lu",
1589 0 : block->slot, block->parent_slot ));
1590 0 : return;
1591 0 : }
1592 :
1593 : /* Mark every node from the new root up through its parents to the
1594 : old root as being rooted. */
1595 0 : fd_sched_block_t * curr = block;
1596 0 : fd_sched_block_t * prev = NULL;
1597 0 : while( curr ) {
1598 0 : if( FD_UNLIKELY( !block_is_done( curr ) ) ) {
1599 0 : FD_LOG_CRIT(( "invariant violation: rooting a block that is not done, slot %lu, parent slot %lu",
1600 0 : curr->slot, curr->parent_slot ));
1601 0 : }
1602 0 : if( FD_UNLIKELY( curr->dying ) ) {
1603 0 : FD_LOG_CRIT(( "invariant violation: rooting a block that is dying, slot %lu, parent slot %lu",
1604 0 : curr->slot, curr->parent_slot ));
1605 0 : }
1606 0 : if( FD_UNLIKELY( curr->staged ) ) {
1607 0 : FD_LOG_CRIT(( "invariant violation: rooting a block that is staged, slot %lu, parent slot %lu",
1608 0 : curr->slot, curr->parent_slot ));
1609 0 : }
1610 0 : if( FD_UNLIKELY( curr->in_rdisp ) ) {
1611 0 : FD_LOG_CRIT(( "invariant violation: rooting a block that is in the dispatcher, slot %lu, parent slot %lu",
1612 0 : curr->slot, curr->parent_slot ));
1613 0 : }
1614 0 : curr->rooted = 1;
1615 0 : prev = curr;
1616 0 : curr = block_pool_ele( sched, curr->parent_idx );
1617 0 : }
1618 :
1619 : /* If we didn't reach the old root, the new root is not a descendant. */
1620 0 : if( FD_UNLIKELY( prev!=old_root ) ) {
1621 0 : FD_LOG_CRIT(( "invariant violation: new root is not a descendant of old root, new root slot %lu, parent slot %lu, old root slot %lu, parent slot %lu",
1622 0 : block->slot, block->parent_slot, old_root->slot, old_root->parent_slot ));
1623 0 : }
1624 :
1625 0 : ulong old_active_bank_idx = sched->active_bank_idx;
1626 :
1627 : /* Now traverse from old root towards new root, and abandon all
1628 : minority forks. */
1629 0 : curr = old_root;
1630 0 : while( curr && curr->rooted && curr!=block ) { /* curr!=block to avoid abandoning good forks. */
1631 0 : fd_sched_block_t * rooted_child_block = NULL;
1632 0 : ulong child_idx = curr->child_idx;
1633 0 : while( child_idx!=ULONG_MAX ) {
1634 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
1635 0 : child_idx = child->sibling_idx;
1636 0 : if( child->rooted ) {
1637 0 : rooted_child_block = child;
1638 0 : } else {
1639 : /* This is a minority fork. */
1640 0 : ulong abandoned_cnt = sched->metrics->block_abandoned_cnt;
1641 0 : subtree_abandon( sched, child );
1642 0 : abandoned_cnt = sched->metrics->block_abandoned_cnt-abandoned_cnt;
1643 0 : if( FD_UNLIKELY( abandoned_cnt ) ) FD_LOG_DEBUG(( "abandoned %lu blocks on minority fork starting at block %lu:%lu", abandoned_cnt, child->slot, block_to_idx( sched, child ) ));
1644 0 : }
1645 0 : }
1646 0 : curr = rooted_child_block;
1647 0 : }
1648 :
1649 : /* If the active block got abandoned, we need to reset it. */
1650 0 : if( sched->active_bank_idx==ULONG_MAX ) {
1651 0 : sched->metrics->deactivate_pruned_cnt += fd_uint_if( old_active_bank_idx!=ULONG_MAX, 1U, 0U );
1652 0 : try_activate_block( sched );
1653 0 : }
1654 0 : }
1655 :
1656 : ulong
1657 0 : fd_sched_pruned_block_next( fd_sched_t * sched ) {
1658 0 : if( !ref_q_empty( sched->ref_q ) ) {
1659 0 : ulong bank_idx = ref_q_pop_head( sched->ref_q );
1660 0 : return bank_idx;
1661 0 : }
1662 0 : return ULONG_MAX;
1663 0 : }
1664 :
1665 : void
1666 0 : fd_sched_set_poh_params( fd_sched_t * sched, ulong bank_idx, ulong tick_height, ulong max_tick_height, ulong hashes_per_tick, fd_hash_t const * start_poh ) {
1667 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1668 0 : FD_TEST( bank_idx<sched->block_cnt_max );
1669 0 : FD_TEST( max_tick_height>tick_height );
1670 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
1671 0 : block->tick_height = tick_height;
1672 0 : block->max_tick_height = max_tick_height;
1673 0 : block->hashes_per_tick = hashes_per_tick;
1674 : #if FD_SCHED_SKIP_POH
1675 : /* No-op. */
1676 : #else
1677 0 : if( FD_LIKELY( block->mblk_cnt ) ) {
1678 : /* Fix up the first mblk's curr_hash. */
1679 0 : FD_TEST( block->mblk_unhashed_cnt );
1680 0 : FD_TEST( !mblk_slist_is_empty( block->mblks_unhashed, sched->mblk_pool ) );
1681 0 : FD_TEST( !block->mblk_freed_cnt );
1682 0 : fd_sched_mblk_t * first_mblk = sched->mblk_pool + mblk_slist_idx_peek_head( block->mblks_unhashed, sched->mblk_pool );
1683 0 : memcpy( first_mblk->curr_hash, start_poh, sizeof(fd_hash_t) );
1684 0 : } else {
1685 0 : memcpy( block->poh_hash, start_poh, sizeof(fd_hash_t) );
1686 0 : }
1687 0 : #endif
1688 0 : }
1689 :
1690 : fd_txn_p_t *
1691 0 : fd_sched_get_txn( fd_sched_t * sched, ulong txn_idx ) {
1692 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1693 0 : if( FD_UNLIKELY( txn_idx>=sched->depth ) ) {
1694 0 : return NULL;
1695 0 : }
1696 0 : return sched->txn_pool+txn_idx;
1697 0 : }
1698 :
1699 : fd_sched_txn_info_t *
1700 0 : fd_sched_get_txn_info( fd_sched_t * sched, ulong txn_idx ) {
1701 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1702 0 : if( FD_UNLIKELY( txn_idx>=sched->depth ) ) {
1703 0 : return NULL;
1704 0 : }
1705 0 : return sched->txn_info_pool+txn_idx;
1706 0 : }
1707 :
1708 : fd_hash_t *
1709 0 : fd_sched_get_poh( fd_sched_t * sched, ulong bank_idx ) {
1710 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1711 0 : FD_TEST( bank_idx<sched->block_cnt_max );
1712 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
1713 0 : FD_TEST( block->fec_eos );
1714 0 : FD_TEST( block->mblk_cnt );
1715 0 : return block->poh_hash;
1716 0 : }
1717 :
1718 : uint
1719 0 : fd_sched_get_shred_cnt( fd_sched_t * sched, ulong bank_idx ) {
1720 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1721 0 : FD_TEST( bank_idx<sched->block_cnt_max );
1722 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
1723 0 : return block->shred_cnt;
1724 0 : }
1725 :
1726 : void
1727 0 : fd_sched_metrics_write( fd_sched_t * sched ) {
1728 0 : FD_MGAUGE_SET( REPLAY, SCHED_ACTIVE_BANK_IDX, sched->active_bank_idx );
1729 0 : FD_MGAUGE_SET( REPLAY, SCHED_LAST_DISPATCH_BANK_IDX, sched->next_ready_last_bank_idx );
1730 0 : FD_MGAUGE_SET( REPLAY, SCHED_LAST_DISPATCH_TIME_NANOS, fd_ulong_if( sched->next_ready_last_tick!=LONG_MAX, (ulong)sched->next_ready_last_tick, ULONG_MAX ) );
1731 0 : FD_MGAUGE_SET( REPLAY, SCHED_STAGING_LANE_POPCNT, (ulong)fd_ulong_popcnt( sched->staged_bitset ) );
1732 0 : FD_MGAUGE_SET( REPLAY, SCHED_STAGING_LANE_POPCNT_WMK, sched->staged_popcnt_wmk );
1733 0 : FD_MGAUGE_SET( REPLAY, SCHED_STAGING_LANE_HEAD_BANK_IDX0, fd_ulong_if( fd_ulong_extract_bit( sched->staged_bitset, 0 ), sched->staged_head_bank_idx[ 0 ], ULONG_MAX ) );
1734 0 : FD_MGAUGE_SET( REPLAY, SCHED_STAGING_LANE_HEAD_BANK_IDX1, fd_ulong_if( fd_ulong_extract_bit( sched->staged_bitset, 1 ), sched->staged_head_bank_idx[ 1 ], ULONG_MAX ) );
1735 0 : FD_MGAUGE_SET( REPLAY, SCHED_STAGING_LANE_HEAD_BANK_IDX2, fd_ulong_if( fd_ulong_extract_bit( sched->staged_bitset, 2 ), sched->staged_head_bank_idx[ 2 ], ULONG_MAX ) );
1736 0 : FD_MGAUGE_SET( REPLAY, SCHED_STAGING_LANE_HEAD_BANK_IDX3, fd_ulong_if( fd_ulong_extract_bit( sched->staged_bitset, 3 ), sched->staged_head_bank_idx[ 3 ], ULONG_MAX ) );
1737 0 : FD_MGAUGE_SET( REPLAY, SCHED_TXN_POOL_POPCNT, sched->depth-sched->txn_pool_free_cnt-1UL );
1738 0 : FD_MGAUGE_SET( REPLAY, SCHED_TXN_POOL_SIZE, sched->depth-1UL );
1739 0 : FD_MGAUGE_SET( REPLAY, SCHED_MBLK_POOL_POPCNT, sched->depth-sched->mblk_pool_free_cnt );
1740 0 : FD_MGAUGE_SET( REPLAY, SCHED_MBLK_POOL_SIZE, sched->depth );
1741 0 : FD_MGAUGE_SET( REPLAY, SCHED_BLOCK_POOL_POPCNT, sched->block_pool_popcnt );
1742 0 : FD_MGAUGE_SET( REPLAY, SCHED_BLOCK_POOL_SIZE, sched->block_cnt_max );
1743 :
1744 0 : FD_MCNT_SET( REPLAY, SCHED_BLOCK_ADDED_STAGED, sched->metrics->block_added_staged_cnt );
1745 0 : FD_MCNT_SET( REPLAY, SCHED_BLOCK_ADDED_UNSTAGED, sched->metrics->block_added_unstaged_cnt );
1746 0 : FD_MCNT_SET( REPLAY, SCHED_BLOCK_REPLAYED, sched->metrics->block_removed_cnt );
1747 0 : FD_MCNT_SET( REPLAY, SCHED_BLOCK_ABANDONED, sched->metrics->block_abandoned_cnt );
1748 0 : FD_MCNT_SET( REPLAY, SCHED_BLOCK_BAD, sched->metrics->block_bad_cnt );
1749 0 : FD_MCNT_SET( REPLAY, SCHED_BLOCK_PROMOTED, sched->metrics->block_promoted_cnt );
1750 0 : FD_MCNT_SET( REPLAY, SCHED_BLOCK_DEMOTED, sched->metrics->block_demoted_cnt );
1751 0 : FD_MCNT_SET( REPLAY, SCHED_DEACTIVATE_NO_CHILD, sched->metrics->deactivate_no_child_cnt );
1752 0 : FD_MCNT_SET( REPLAY, SCHED_DEACTIVATE_NO_WORK, sched->metrics->deactivate_no_txn_cnt );
1753 0 : FD_MCNT_SET( REPLAY, SCHED_DEACTIVATE_ABANDONED, sched->metrics->deactivate_abandoned_cnt );
1754 0 : FD_MCNT_SET( REPLAY, SCHED_DEACTIVATE_MINORITY, sched->metrics->deactivate_pruned_cnt );
1755 0 : FD_MCNT_SET( REPLAY, SCHED_LANE_SWITCH, sched->metrics->lane_switch_cnt );
1756 0 : FD_MCNT_SET( REPLAY, SCHED_LANE_PROMOTE, sched->metrics->lane_promoted_cnt );
1757 0 : FD_MCNT_SET( REPLAY, SCHED_LANE_DEMOTE, sched->metrics->lane_demoted_cnt );
1758 0 : FD_MCNT_SET( REPLAY, SCHED_FORK_OBSERVED, sched->metrics->fork_observed_cnt );
1759 0 : FD_MCNT_SET( REPLAY, SCHED_ALUT_SUCCESS, sched->metrics->alut_success_cnt );
1760 0 : FD_MCNT_SET( REPLAY, SCHED_ALUT_FAILURE, sched->metrics->alut_serializing_cnt );
1761 0 : FD_MCNT_SET( REPLAY, SCHED_TXN_ABANDONED_PARSED, sched->metrics->txn_abandoned_parsed_cnt );
1762 0 : FD_MCNT_SET( REPLAY, SCHED_TXN_ABANDONED_EXEC, sched->metrics->txn_abandoned_exec_done_cnt );
1763 0 : FD_MCNT_SET( REPLAY, SCHED_TXN_ABANDONED_DONE, sched->metrics->txn_abandoned_done_cnt );
1764 0 : FD_MCNT_SET( REPLAY, SCHED_WEIGHTED_IN_FLIGHT, sched->metrics->txn_weighted_in_flight_cnt );
1765 0 : FD_MCNT_SET( REPLAY, SCHED_WEIGHTED_IN_FLIGHT_DURATION, sched->metrics->txn_weighted_in_flight_tickcount );
1766 0 : FD_MCNT_SET( REPLAY, SCHED_NONE_IN_FLIGHT_DURATION, sched->metrics->txn_none_in_flight_tickcount );
1767 0 : FD_MCNT_SET( REPLAY, SCHED_TXN_PARSED, sched->metrics->txn_parsed_cnt );
1768 0 : FD_MCNT_SET( REPLAY, SCHED_TXN_EXEC, sched->metrics->txn_exec_done_cnt );
1769 0 : FD_MCNT_SET( REPLAY, SCHED_TXN_SIGVERIFY, sched->metrics->txn_sigverify_done_cnt );
1770 0 : FD_MCNT_SET( REPLAY, SCHED_TXN_MIXIN, sched->metrics->txn_mixin_done_cnt );
1771 0 : FD_MCNT_SET( REPLAY, SCHED_TXN_DONE, sched->metrics->txn_done_cnt );
1772 0 : FD_MCNT_SET( REPLAY, SCHED_MBLK_PARSED, sched->metrics->mblk_parsed_cnt );
1773 0 : FD_MCNT_SET( REPLAY, SCHED_MBLK_HASHED, sched->metrics->mblk_poh_hashed_cnt );
1774 0 : FD_MCNT_SET( REPLAY, SCHED_MBLK_DONE, sched->metrics->mblk_poh_done_cnt );
1775 0 : FD_MCNT_SET( REPLAY, SCHED_BYTES_INGESTED, sched->metrics->bytes_ingested_cnt );
1776 0 : FD_MCNT_SET( REPLAY, SCHED_BYTES_INGESTED_PADDING, sched->metrics->bytes_ingested_unparsed_cnt );
1777 0 : FD_MCNT_SET( REPLAY, SCHED_BYTES_DROPPED, sched->metrics->bytes_dropped_cnt );
1778 0 : FD_MCNT_SET( REPLAY, SCHED_FEC, sched->metrics->fec_cnt );
1779 0 : }
1780 :
1781 : char *
1782 0 : fd_sched_get_state_cstr( fd_sched_t * sched ) {
1783 0 : sched->print_buf_sz = 0UL;
1784 0 : print_metrics( sched );
1785 0 : print_sched( sched );
1786 0 : return sched->print_buf;
1787 0 : }
1788 :
1789 0 : void * fd_sched_leave ( fd_sched_t * sched ) { return sched; }
1790 0 : void * fd_sched_delete( void * mem ) { return mem; }
1791 :
1792 :
1793 : /* Internal helpers. */
1794 :
1795 : static void
1796 : add_block( fd_sched_t * sched,
1797 : ulong bank_idx,
1798 0 : ulong parent_bank_idx ) {
1799 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
1800 0 : FD_TEST( !block->in_sched );
1801 0 : sched->block_pool_popcnt++;
1802 :
1803 0 : block->txn_parsed_cnt = 0U;
1804 0 : block->txn_exec_in_flight_cnt = 0U;
1805 0 : block->txn_exec_done_cnt = 0U;
1806 0 : block->txn_sigverify_in_flight_cnt = 0U;
1807 0 : block->txn_sigverify_done_cnt = 0U;
1808 0 : block->poh_hashing_in_flight_cnt = 0U;
1809 0 : block->poh_hashing_done_cnt = 0U;
1810 0 : block->poh_hash_cmp_done_cnt = 0U;
1811 0 : block->txn_done_cnt = 0U;
1812 0 : block->shred_cnt = 0U;
1813 0 : block->mblk_cnt = 0U;
1814 0 : block->mblk_freed_cnt = 0U;
1815 0 : block->mblk_tick_cnt = 0U;
1816 0 : block->mblk_unhashed_cnt = 0U;
1817 0 : block->hashcnt = 0UL;
1818 0 : block->txn_pool_max_popcnt = sched->depth - sched->txn_pool_free_cnt - 1UL;
1819 0 : block->mblk_pool_max_popcnt = sched->depth - sched->mblk_pool_free_cnt;
1820 0 : block->block_pool_max_popcnt = sched->block_pool_popcnt;
1821 :
1822 0 : mblk_slist_remove_all( block->mblks_unhashed, sched->mblk_pool );
1823 0 : mblk_slist_remove_all( block->mblks_hashing_in_progress, sched->mblk_pool );
1824 0 : mblk_slist_remove_all( block->mblks_mixin_in_progress, sched->mblk_pool );
1825 0 : block->last_mblk_is_tick = 0;
1826 0 : block->max_tick_hashcnt = 0UL;
1827 0 : block->curr_tick_hashcnt = 0UL;
1828 0 : block->tick_height = ULONG_MAX;
1829 0 : block->max_tick_height = ULONG_MAX;
1830 0 : block->hashes_per_tick = ULONG_MAX;
1831 0 : block->inconsistent_hashes_per_tick = 0;
1832 :
1833 0 : block->mblks_rem = 0UL;
1834 0 : block->txns_rem = 0UL;
1835 0 : block->fec_buf_sz = 0U;
1836 0 : block->fec_buf_boff = 0U;
1837 0 : block->fec_buf_soff = 0U;
1838 0 : block->fec_eob = 0;
1839 0 : block->fec_sob = 1;
1840 :
1841 0 : block->fec_eos = 0;
1842 0 : block->rooted = 0;
1843 0 : block->dying = 0;
1844 0 : block->refcnt = 1;
1845 0 : block->in_sched = 1;
1846 0 : block->in_rdisp = 0;
1847 0 : block->block_start_signaled = 0;
1848 0 : block->block_end_signaled = 0;
1849 0 : block->block_start_done = 0;
1850 0 : block->block_end_done = 0;
1851 0 : block->staged = 0;
1852 :
1853 0 : block->luf_depth = 0UL;
1854 :
1855 : /* New leaf node, no child, no sibling. */
1856 0 : block->child_idx = ULONG_MAX;
1857 0 : block->sibling_idx = ULONG_MAX;
1858 0 : block->parent_idx = ULONG_MAX;
1859 :
1860 0 : if( FD_UNLIKELY( parent_bank_idx==ULONG_MAX ) ) {
1861 0 : return;
1862 0 : }
1863 :
1864 : /* node->parent link */
1865 0 : fd_sched_block_t * parent_block = block_pool_ele( sched, parent_bank_idx );
1866 0 : block->parent_idx = parent_bank_idx;
1867 :
1868 : /* parent->node and sibling->node links */
1869 0 : ulong child_idx = bank_idx;
1870 0 : if( FD_LIKELY( parent_block->child_idx==ULONG_MAX ) ) { /* Optimize for no forking. */
1871 0 : parent_block->child_idx = child_idx;
1872 0 : } else {
1873 0 : fd_sched_block_t * curr_block = block_pool_ele( sched, parent_block->child_idx );
1874 0 : while( curr_block->sibling_idx!=ULONG_MAX ) {
1875 0 : curr_block = block_pool_ele( sched, curr_block->sibling_idx );
1876 0 : }
1877 0 : curr_block->sibling_idx = child_idx;
1878 0 : sched->metrics->fork_observed_cnt++;
1879 0 : }
1880 :
1881 0 : if( FD_UNLIKELY( parent_block->dying ) ) {
1882 0 : block->dying = 1;
1883 0 : }
1884 0 : }
1885 :
1886 : /* Agave invokes verify_ticks() anywhere between once per slot and once
1887 : per entry batch, before tranactions are parsed or dispatched for
1888 : execution. We can't do quite the same thing due to out-of-order
1889 : scheduling and the fact that we allow parsing to run well ahead of
1890 : block boundaries. Out-of-order scheduling is good, so is overlapping
1891 : parsing with execution. The easiest thing for us would be to just
1892 : delay verify_ticks() wholesale till the end of a slot, except that
1893 : this opens us up to bogus tick and hash counts, potentially causing
1894 : runaway consumption of compute cycles. Of all the checks that are
1895 : performed in verify_ticks(), two types are relevant to mitigating
1896 : this risk. One is constraining the number of ticks, and the other is
1897 : constraining the number of hashes per tick. So we implement these
1898 : checks here, and perform them on the fly as eagerly as possible.
1899 :
1900 : Returns 0 on success. */
1901 : static int
1902 0 : verify_ticks_eager( fd_sched_block_t * block ) {
1903 0 : FD_TEST( block->hashes_per_tick!=ULONG_MAX ); /* PoH params initialized. */
1904 :
1905 0 : if( FD_UNLIKELY( block->mblk_tick_cnt+block->tick_height>block->max_tick_height ) ) {
1906 0 : FD_LOG_INFO(( "bad block: TOO_MANY_TICKS, slot %lu, parent slot %lu, tick_cnt %u, tick_height %lu, max_tick_height %lu", block->slot, block->parent_slot, block->mblk_tick_cnt, block->tick_height, block->max_tick_height ));
1907 0 : return -1;
1908 0 : }
1909 0 : if( FD_UNLIKELY( block->hashes_per_tick>1UL && block->mblk_tick_cnt && (block->hashes_per_tick!=block->max_tick_hashcnt||block->inconsistent_hashes_per_tick) ) ) {
1910 0 : FD_LOG_INFO(( "bad block: INVALID_TICK_HASH_COUNT, slot %lu, parent slot %lu, expected %lu, got %lu", block->slot, block->parent_slot, block->hashes_per_tick, block->max_tick_hashcnt ));
1911 0 : return -1;
1912 0 : }
1913 0 : if( FD_UNLIKELY( block->hashes_per_tick>1UL && block->curr_tick_hashcnt>block->hashes_per_tick ) ) { /* >1 to ignore low power hashing or no hashing cases */
1914 : /* We couldn't really check this at parse time because we may not
1915 : have the expected hashes per tick value yet. We couldn't delay
1916 : this till after all PoH hashing is done, because this would be a
1917 : DoS vector. This can't be merged with the above check, because a
1918 : malformed block might not end with a tick. As in, a block might
1919 : end with a non-tick microblock with a high hashcnt. Note that
1920 : checking the hashcnt between ticks transitively places an upper
1921 : bound on the hashcnt of individual microblocks, thus mitigating
1922 : the DoS vector. */
1923 0 : FD_LOG_INFO(( "bad block: INVALID_TICK_HASH_COUNT, observed cumulative tick_hashcnt %lu, expected %lu, slot %lu, parent slot %lu", block->curr_tick_hashcnt, block->hashes_per_tick, block->slot, block->parent_slot ));
1924 0 : return -1;
1925 0 : }
1926 :
1927 0 : return 0;
1928 0 : }
1929 :
1930 : /* https://github.com/anza-xyz/agave/blob/v3.0.6/ledger/src/blockstore_processor.rs#L1057
1931 :
1932 : The only check we don't do here is TRAILING_ENTRY, which can be done
1933 : independently when we parse the final FEC set of a block.
1934 :
1935 : Returns 0 on success. */
1936 : static int
1937 0 : verify_ticks_final( fd_sched_block_t * block ) {
1938 0 : FD_TEST( block->fec_eos );
1939 :
1940 0 : if( FD_UNLIKELY( block->mblk_tick_cnt+block->tick_height<block->max_tick_height ) ) {
1941 0 : FD_LOG_INFO(( "bad block: TOO_FEW_TICKS, slot %lu, parent slot %lu, tick_cnt %u, tick_height %lu, max_tick_height %lu", block->slot, block->parent_slot, block->mblk_tick_cnt, block->tick_height, block->max_tick_height ));
1942 0 : return -1;
1943 0 : }
1944 :
1945 0 : return verify_ticks_eager( block );
1946 0 : }
1947 :
1948 0 : #define CHECK( cond ) do { \
1949 0 : if( FD_UNLIKELY( !(cond) ) ) { \
1950 0 : return FD_SCHED_AGAIN_LATER; \
1951 0 : } \
1952 0 : } while( 0 )
1953 :
1954 : /* CHECK that it is safe to read at least n more bytes. */
1955 0 : #define CHECK_LEFT( n ) CHECK( (n)<=(block->fec_buf_sz-block->fec_buf_soff) )
1956 :
1957 : /* Consume as much as possible from the buffer. By the end of this
1958 : function, we will either have residual data that is unparseable only
1959 : because it is a batch that straddles FEC set boundaries, or we will
1960 : have reached the end of a batch. In the former case, any remaining
1961 : bytes should be concatenated with the next FEC set for further
1962 : parsing. In the latter case, any remaining bytes should be thrown
1963 : away. */
1964 : FD_WARN_UNUSED static int
1965 0 : fd_sched_parse( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx ) {
1966 0 : while( 1 ) {
1967 0 : while( block->txns_rem>0UL ) {
1968 0 : int err;
1969 0 : if( FD_UNLIKELY( (err=fd_sched_parse_txn( sched, block, alut_ctx ))!=FD_SCHED_OK ) ) {
1970 0 : return err;
1971 0 : }
1972 0 : }
1973 0 : if( block->txns_rem==0UL && block->mblks_rem>0UL ) {
1974 0 : if( FD_UNLIKELY( block->mblk_cnt>=FD_SCHED_MAX_MBLK_PER_SLOT ) ) {
1975 : /* A valid block shouldn't contain more than this amount of
1976 : microblocks. */
1977 0 : FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, mblk_cnt %u (%u ticks) >= %lu", block->slot, block->parent_slot, block->mblk_cnt, block->mblk_tick_cnt, FD_SCHED_MAX_MBLK_PER_SLOT ));
1978 0 : return FD_SCHED_BAD_BLOCK;
1979 0 : }
1980 :
1981 0 : CHECK_LEFT( sizeof(fd_microblock_hdr_t) );
1982 0 : fd_microblock_hdr_t * hdr = (fd_microblock_hdr_t *)fd_type_pun( block->fec_buf+block->fec_buf_soff );
1983 0 : block->fec_buf_soff += (uint)sizeof(fd_microblock_hdr_t);
1984 :
1985 0 : if( FD_UNLIKELY( hdr->txn_cnt>fd_ulong_sat_sub( FD_MAX_TXN_PER_SLOT, block->txn_parsed_cnt ) ) ) {
1986 0 : FD_LOG_INFO(( "bad block: illegally many transactions specified in microblock header in slot %lu, parent slot %lu, txn_parsed_cnt %u, hdr->txn_cnt %lu", block->slot, block->parent_slot, block->txn_parsed_cnt, hdr->txn_cnt ));
1987 0 : return FD_SCHED_BAD_BLOCK;
1988 0 : }
1989 0 : if( FD_UNLIKELY( hdr->hash_cnt>fd_ulong_sat_sub( FD_RUNTIME_MAX_HASHES_PER_TICK, block->curr_tick_hashcnt ) ) ) {
1990 0 : FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, curr_tick_hashcnt %lu, hdr->hash_cnt %lu", block->slot, block->parent_slot, block->curr_tick_hashcnt, hdr->hash_cnt ));
1991 0 : return FD_SCHED_BAD_BLOCK;
1992 0 : }
1993 :
1994 0 : block->mblks_rem--;
1995 0 : block->txns_rem = hdr->txn_cnt;
1996 :
1997 0 : FD_TEST( sched->mblk_pool_free_cnt ); /* can_ingest should have guaranteed sufficient free capacity. */
1998 0 : uint mblk_idx = sched->mblk_pool_free_head;
1999 0 : sched->mblk_pool_free_head = sched->mblk_pool[ mblk_idx ].next;
2000 0 : sched->mblk_pool_free_cnt--;
2001 :
2002 0 : fd_sched_mblk_t * mblk = sched->mblk_pool+mblk_idx;
2003 0 : mblk->start_txn_idx = block->txn_parsed_cnt;
2004 0 : mblk->end_txn_idx = mblk->start_txn_idx+hdr->txn_cnt;
2005 : /* One might think that every microblock needs to have at least
2006 : one hash, otherwise the block should be considered invalid. A
2007 : vanilla validator certainly produces microblocks that conform
2008 : to this. But a modded validator could in theory produce zero
2009 : hash microblocks. Agave's replay stage will happily take those
2010 : microblocks. The Agave implementation-defined way of doing PoH
2011 : verify is as follows:
2012 :
2013 : For a tick microblock, do the same number of hashes as
2014 : specified by the microblock. Zero hashes are allowed, in which
2015 : case this tick would have the same ending hash value as the
2016 : previous microblock.
2017 :
2018 : For a transaction microblock, if the number of hashes specified
2019 : by the microblock is <= 1, then do zero pure hashes, and simply
2020 : do a mixin/record. Otherwise, do (number of hashes-1) amount
2021 : of pure hashing, and then do a mixin. However, note that for
2022 : the purposes of tick_verify, the number of hashes specified by
2023 : the microblock is taken verbatim.
2024 :
2025 : https://github.com/anza-xyz/agave/blob/v3.0.6/entry/src/entry.rs#L232
2026 :
2027 : We implement the above for consensus. */
2028 0 : mblk->hashcnt = fd_ulong_sat_sub( hdr->hash_cnt, fd_ulong_if( !hdr->txn_cnt, 0UL, 1UL ) ); /* For pure hashing, implement the above. */
2029 0 : memcpy( mblk->end_hash, hdr->hash, sizeof(fd_hash_t) );
2030 0 : memcpy( mblk->curr_hash, block->poh_hash, sizeof(fd_hash_t) );
2031 0 : mblk->curr_txn_idx = mblk->start_txn_idx;
2032 0 : mblk->curr_hashcnt = 0UL;
2033 0 : mblk->curr_sig_cnt = 0U;
2034 0 : mblk->is_tick = !hdr->txn_cnt;
2035 :
2036 : /* Update block tracking. */
2037 0 : block->curr_tick_hashcnt = fd_ulong_sat_add( hdr->hash_cnt, block->curr_tick_hashcnt ); /* For tick_verify, take the number of hashes verbatim. */
2038 0 : block->hashcnt += mblk->hashcnt+fd_ulong_if( !hdr->txn_cnt, 0UL, 1UL );
2039 0 : memcpy( block->poh_hash, hdr->hash, sizeof(fd_hash_t) );
2040 0 : block->last_mblk_is_tick = mblk->is_tick;
2041 0 : block->mblk_cnt++;
2042 0 : sched->metrics->mblk_parsed_cnt++;
2043 0 : if( FD_UNLIKELY( !hdr->txn_cnt ) ) {
2044 : /* This is a tick microblock. */
2045 0 : if( FD_UNLIKELY( block->mblk_tick_cnt && block->max_tick_hashcnt!=block->curr_tick_hashcnt ) ) {
2046 0 : block->inconsistent_hashes_per_tick = 1;
2047 0 : if( FD_LIKELY( block->hashes_per_tick!=ULONG_MAX && block->hashes_per_tick>1UL ) ) {
2048 : /* >1 to ignore low power hashing or hashing disabled */
2049 0 : FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, tick idx %u, max hashcnt %lu, curr hashcnt %lu, hashes_per_tick %lu", block->slot, block->parent_slot, block->mblk_tick_cnt, block->max_tick_hashcnt, block->curr_tick_hashcnt, block->hashes_per_tick ));
2050 0 : return FD_SCHED_BAD_BLOCK;
2051 0 : }
2052 0 : }
2053 0 : block->max_tick_hashcnt = fd_ulong_max( block->curr_tick_hashcnt, block->max_tick_hashcnt );
2054 0 : block->curr_tick_hashcnt = 0UL;
2055 0 : block->mblk_tick_cnt++;
2056 0 : }
2057 : #if FD_SCHED_SKIP_POH
2058 : block->poh_hashing_done_cnt++;
2059 : block->poh_hash_cmp_done_cnt++;
2060 : free_mblk( sched, block, mblk_idx );
2061 : #else
2062 0 : mblk_slist_idx_push_tail( block->mblks_unhashed, mblk_idx, sched->mblk_pool );
2063 0 : block->mblk_unhashed_cnt++;
2064 0 : #endif
2065 0 : continue;
2066 0 : }
2067 0 : if( block->txns_rem==0UL && block->mblks_rem==0UL && block->fec_sob ) {
2068 0 : CHECK_LEFT( sizeof(ulong) );
2069 0 : FD_TEST( block->fec_buf_soff==0U );
2070 0 : block->mblks_rem = FD_LOAD( ulong, block->fec_buf );
2071 0 : block->fec_buf_soff += (uint)sizeof(ulong);
2072 :
2073 0 : if( FD_UNLIKELY( block->mblks_rem>fd_ulong_sat_sub( FD_SCHED_MAX_MBLK_PER_SLOT, block->mblk_cnt ) ) ) {
2074 0 : FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, mblk_cnt %u (%u ticks), hdr->mblk_cnt %lu >= %lu", block->slot, block->parent_slot, block->mblk_cnt, block->mblk_tick_cnt, block->mblks_rem, FD_SCHED_MAX_MBLK_PER_SLOT ));
2075 0 : return FD_SCHED_BAD_BLOCK;
2076 0 : }
2077 :
2078 0 : block->fec_sob = 0;
2079 0 : continue;
2080 0 : }
2081 0 : if( block->txns_rem==0UL && block->mblks_rem==0UL ) {
2082 0 : break;
2083 0 : }
2084 0 : }
2085 0 : if( block->fec_eob ) {
2086 : /* Ignore trailing bytes at the end of a batch. */
2087 0 : sched->metrics->bytes_ingested_unparsed_cnt += block->fec_buf_sz-block->fec_buf_soff;
2088 0 : block->fec_buf_boff += block->fec_buf_sz;
2089 0 : block->fec_buf_soff = 0U;
2090 0 : block->fec_buf_sz = 0U;
2091 0 : block->fec_sob = 1;
2092 0 : block->fec_eob = 0;
2093 0 : }
2094 0 : return FD_SCHED_OK;
2095 0 : }
2096 :
2097 : FD_WARN_UNUSED static int
2098 0 : fd_sched_parse_txn( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx ) {
2099 0 : fd_txn_t * txn = fd_type_pun( block->txn );
2100 :
2101 0 : uchar * payload = block->fec_buf+block->fec_buf_soff;
2102 0 : ulong pay_sz = 0UL;
2103 0 : ulong txn_sz = fd_txn_parse_core( payload,
2104 0 : fd_ulong_min( FD_TXN_MTU, block->fec_buf_sz-block->fec_buf_soff ),
2105 0 : txn,
2106 0 : NULL,
2107 0 : &pay_sz );
2108 :
2109 0 : if( FD_UNLIKELY( !pay_sz || !txn_sz ) ) {
2110 : /* Can't parse out a full transaction. */
2111 0 : return FD_SCHED_AGAIN_LATER;
2112 0 : }
2113 :
2114 0 : if( FD_UNLIKELY( block->txn_parsed_cnt>=FD_MAX_TXN_PER_SLOT ) ) {
2115 : /* The block contains more transactions than a valid block would.
2116 : Mark the block dead instead of keep processing it. */
2117 0 : FD_LOG_INFO(( "bad block: illegally many transactions in slot %lu, parent slot %lu, txn_parsed_cnt %u", block->slot, block->parent_slot, block->txn_parsed_cnt ));
2118 0 : return FD_SCHED_BAD_BLOCK;
2119 0 : }
2120 :
2121 0 : ulong imm_cnt = fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_IMM );
2122 0 : ulong alt_cnt = fd_txn_account_cnt( txn, FD_TXN_ACCT_CAT_ALT );
2123 :
2124 : /* Try to expand ALUTs. */
2125 0 : int serializing = 0;
2126 0 : if( alt_cnt>0UL ) {
2127 0 : fd_accdb_ro_t ro[1];
2128 0 : if( FD_LIKELY( fd_accdb_open_ro( alut_ctx->accdb, ro, alut_ctx->xid, &fd_sysvar_slot_hashes_id ) ) ) {
2129 0 : fd_slot_hashes_t slot_hashes_view[1];
2130 0 : if( FD_LIKELY( fd_sysvar_slot_hashes_view( slot_hashes_view,
2131 0 : fd_accdb_ref_data_const( ro ),
2132 0 : fd_accdb_ref_data_sz( ro ) ) ) ) {
2133 0 : serializing = !!fd_runtime_load_txn_address_lookup_tables( NULL, txn, payload, alut_ctx->accdb, alut_ctx->xid, alut_ctx->els, slot_hashes_view, sched->aluts );
2134 0 : sched->metrics->alut_success_cnt += (uint)!serializing;
2135 0 : } else {
2136 0 : serializing = 1;
2137 0 : }
2138 0 : fd_accdb_close_ro( alut_ctx->accdb, ro );
2139 0 : } else {
2140 0 : serializing = 1;
2141 0 : }
2142 0 : }
2143 :
2144 : /* Transactions should not have duplicate accounts.
2145 : https://github.com/anza-xyz/agave/blob/v3.1.11/ledger/src/blockstore_processor.rs#L778-L790 */
2146 0 : fd_acct_addr_t const * imms = fd_txn_get_acct_addrs( txn, payload );
2147 0 : fd_acct_addr_t * alts = (!alt_cnt||serializing) ? NULL : sched->aluts;
2148 0 : alt_cnt = alts ? alt_cnt : 0UL;
2149 0 : if( FD_UNLIKELY( fd_chkdup_check( sched->chkdup, imms, imm_cnt, alts, alt_cnt ) ) ) {
2150 0 : FD_LOG_INFO(( "bad block: duplicate accounts in slot %lu, parent slot %lu, txn_parsed_cnt %u", block->slot, block->parent_slot, block->txn_parsed_cnt ));
2151 0 : return FD_SCHED_BAD_BLOCK;
2152 0 : }
2153 :
2154 0 : ulong bank_idx = (ulong)(block-sched->block_pool);
2155 0 : ulong txn_idx = fd_rdisp_add_txn( sched->rdisp, bank_idx, txn, payload, alts, serializing );
2156 0 : FD_TEST( txn_idx!=0UL );
2157 0 : sched->metrics->txn_parsed_cnt++;
2158 0 : sched->metrics->alut_serializing_cnt += (uint)serializing;
2159 0 : sched->txn_pool_free_cnt--;
2160 0 : fd_txn_p_t * txn_p = sched->txn_pool + txn_idx;
2161 0 : txn_p->payload_sz = pay_sz;
2162 :
2163 0 : txn_p->start_shred_idx = (ushort)fd_sort_up_uint_split( block->shred_blk_offs, block->shred_cnt, block->fec_buf_boff+block->fec_buf_soff );
2164 0 : txn_p->start_shred_idx = fd_ushort_if( txn_p->start_shred_idx>0U, (ushort)(txn_p->start_shred_idx-1U), txn_p->start_shred_idx );
2165 0 : txn_p->end_shred_idx = (ushort)fd_sort_up_uint_split( block->shred_blk_offs, block->shred_cnt, block->fec_buf_boff+block->fec_buf_soff+(uint)pay_sz );
2166 :
2167 0 : fd_memcpy( txn_p->payload, payload, pay_sz );
2168 0 : fd_memcpy( TXN(txn_p), txn, txn_sz );
2169 0 : txn_bitset_remove( sched->exec_done_set, txn_idx );
2170 0 : txn_bitset_remove( sched->sigverify_done_set, txn_idx );
2171 0 : txn_bitset_remove( sched->poh_mixin_done_set, txn_idx );
2172 0 : sched->txn_info_pool[ txn_idx ].flags = 0UL;
2173 0 : sched->txn_info_pool[ txn_idx ].txn_err = 0;
2174 0 : sched->txn_info_pool[ txn_idx ].tick_parsed = fd_tickcount();
2175 0 : sched->txn_info_pool[ txn_idx ].tick_sigverify_disp = LONG_MAX;
2176 0 : sched->txn_info_pool[ txn_idx ].tick_sigverify_done = LONG_MAX;
2177 0 : sched->txn_info_pool[ txn_idx ].tick_exec_disp = LONG_MAX;
2178 0 : sched->txn_info_pool[ txn_idx ].tick_exec_done = LONG_MAX;
2179 0 : block->txn_idx[ block->txn_parsed_cnt ] = txn_idx;
2180 0 : block->fec_buf_soff += (uint)pay_sz;
2181 0 : block->txn_parsed_cnt++;
2182 : #if FD_SCHED_SKIP_SIGVERIFY
2183 : txn_bitset_insert( sched->sigverify_done_set, txn_idx );
2184 : block->txn_sigverify_done_cnt++;
2185 : #endif
2186 : #if FD_SCHED_SKIP_POH
2187 : txn_bitset_insert( sched->poh_mixin_done_set, txn_idx );
2188 : #endif
2189 0 : block->txns_rem--;
2190 0 : return FD_SCHED_OK;
2191 0 : }
2192 :
2193 : #undef CHECK
2194 : #undef CHECK_LEFT
2195 :
2196 : static void
2197 0 : dispatch_sigverify( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out ) {
2198 : /* Dispatch transactions for sigverify in parse order. */
2199 0 : out->task_type = FD_SCHED_TT_TXN_SIGVERIFY;
2200 0 : out->txn_sigverify->bank_idx = bank_idx;
2201 0 : out->txn_sigverify->txn_idx = block->txn_idx[ block->txn_sigverify_done_cnt+block->txn_sigverify_in_flight_cnt ];
2202 0 : out->txn_sigverify->exec_idx = (ulong)exec_tile_idx;
2203 0 : sched->sigverify_ready_bitset[ 0 ] = fd_ulong_clear_bit( sched->sigverify_ready_bitset[ 0 ], exec_tile_idx );
2204 0 : sched->tile_to_bank_idx[ exec_tile_idx ] = bank_idx;
2205 0 : block->txn_sigverify_in_flight_cnt++;
2206 0 : if( FD_UNLIKELY( (~sched->txn_exec_ready_bitset[ 0 ])&(~sched->sigverify_ready_bitset[ 0 ])&(~sched->poh_ready_bitset[ 0 ])&fd_ulong_mask_lsb( (int)sched->exec_cnt ) ) ) FD_LOG_CRIT(( "invariant violation: txn_exec_ready_bitset 0x%lx sigverify_ready_bitset 0x%lx poh_ready_bitset 0x%lx", sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ] ));
2207 0 : }
2208 :
2209 : /* Assumes there is a PoH task available for dispatching. */
2210 : static void
2211 0 : dispatch_poh( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out ) {
2212 0 : fd_sched_mblk_t * mblk = NULL;
2213 0 : uint mblk_idx;
2214 0 : if( FD_LIKELY( !mblk_slist_is_empty( block->mblks_hashing_in_progress, sched->mblk_pool ) ) ) {
2215 : /* There's a PoH task in progress, just continue working on that. */
2216 0 : mblk_idx = (uint)mblk_slist_idx_pop_head( block->mblks_hashing_in_progress, sched->mblk_pool );
2217 0 : mblk = sched->mblk_pool+mblk_idx;
2218 0 : } else {
2219 : /* No in progress PoH task, so start a new one. */
2220 0 : FD_TEST( block->mblk_unhashed_cnt );
2221 0 : mblk_idx = (uint)mblk_slist_idx_pop_head( block->mblks_unhashed, sched->mblk_pool );
2222 0 : mblk = sched->mblk_pool+mblk_idx;
2223 0 : block->mblk_unhashed_cnt--;
2224 0 : }
2225 0 : out->task_type = FD_SCHED_TT_POH_HASH;
2226 0 : out->poh_hash->bank_idx = bank_idx;
2227 0 : out->poh_hash->mblk_idx = mblk_idx;
2228 0 : out->poh_hash->exec_idx = (ulong)exec_tile_idx;
2229 0 : ulong hashcnt_todo = mblk->hashcnt-mblk->curr_hashcnt;
2230 0 : out->poh_hash->hashcnt = fd_ulong_min( hashcnt_todo, FD_SCHED_MAX_POH_HASHES_PER_TASK );
2231 0 : memcpy( out->poh_hash->hash, mblk->curr_hash, sizeof(fd_hash_t) );
2232 0 : sched->poh_ready_bitset[ 0 ] = fd_ulong_clear_bit( sched->poh_ready_bitset[ 0 ], exec_tile_idx );
2233 0 : sched->tile_to_bank_idx[ exec_tile_idx ] = bank_idx;
2234 0 : block->poh_hashing_in_flight_cnt++;
2235 0 : if( FD_UNLIKELY( (~sched->txn_exec_ready_bitset[ 0 ])&(~sched->sigverify_ready_bitset[ 0 ])&(~sched->poh_ready_bitset[ 0 ])&fd_ulong_mask_lsb( (int)sched->exec_cnt ) ) ) FD_LOG_CRIT(( "invariant violation: txn_exec_ready_bitset 0x%lx sigverify_ready_bitset 0x%lx poh_ready_bitset 0x%lx", sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ] ));
2236 0 : }
2237 :
2238 : /* Does up to one transaction mixin. Returns 1 if one mixin was done, 2
2239 : if that mixin also completed a microblock, 0 if no transaction mixin
2240 : was available, -1 if there is a PoH verify error. */
2241 : FD_WARN_UNUSED static int
2242 0 : maybe_mixin( fd_sched_t * sched, fd_sched_block_t * block ) {
2243 0 : if( FD_UNLIKELY( mblk_slist_is_empty( block->mblks_mixin_in_progress, sched->mblk_pool ) ) ) return 0;
2244 0 : FD_TEST( block->poh_hashing_done_cnt-block->poh_hash_cmp_done_cnt>0 );
2245 :
2246 : /* The microblock we would like to do mixin on is at the head of the
2247 : queue. It may have had some mixin, it may have never had any
2248 : mixin. In the case of the former, we should continue to mixin the
2249 : same head microblock until it's done, lest the per-block bmtree
2250 : gets clobbered when we start a new one. */
2251 0 : ulong mblk_idx = mblk_slist_idx_pop_head( block->mblks_mixin_in_progress, sched->mblk_pool );
2252 0 : fd_sched_mblk_t * mblk = sched->mblk_pool+mblk_idx;
2253 :
2254 0 : if( FD_UNLIKELY( mblk->end_txn_idx>block->txn_parsed_cnt ) ) {
2255 : /* A partially parsed microblock is by definition at the end of the
2256 : FEC stream. If such a microblock is in progress, there should be
2257 : no other microblock in this block so far that hasn't been
2258 : dispatched, because microblocks are dispatched in parse order. */
2259 0 : if( FD_UNLIKELY( block->mblk_unhashed_cnt ) ) {
2260 0 : sched->print_buf_sz = 0UL;
2261 0 : print_all( sched, block );
2262 0 : FD_LOG_CRIT(( "invariant violation end_txn_idx %lu: %s", mblk->end_txn_idx, sched->print_buf ));
2263 0 : }
2264 :
2265 : /* If we've decided to start mixin on a partially parsed microblock,
2266 : there better be nothing else in-progress. Otherwise, they might
2267 : clobber the per-block bmtree for mixin. */
2268 0 : if( FD_UNLIKELY( mblk->curr_txn_idx!=mblk->start_txn_idx && (block->poh_hashing_in_flight_cnt||!mblk_slist_is_empty( block->mblks_hashing_in_progress, sched->mblk_pool )||!mblk_slist_is_empty( block->mblks_mixin_in_progress, sched->mblk_pool )) ) ) {
2269 0 : sched->print_buf_sz = 0UL;
2270 0 : print_all( sched, block );
2271 0 : FD_LOG_CRIT(( "invariant violation end_txn_idx %lu start_txn_idx %lu curr_txn_idx %lu: %s", mblk->end_txn_idx, mblk->start_txn_idx, mblk->curr_txn_idx, sched->print_buf ));
2272 0 : }
2273 0 : }
2274 :
2275 : /* Very rarely, we've finished hashing, but not all transactions in
2276 : the microblock have been parsed out. This can happen if we haven't
2277 : received all the FEC sets for this microblock. We can't yet fully
2278 : mixin the microblock. So we'll stick it back into the end of the
2279 : queue, and try to see if there's a fully parsed microblock.
2280 : Unless, there's truly nothing else to mixin. Then we would start
2281 : mixin with the partially parsed microblock. We do this because the
2282 : txn pool is meant to be an OOO scheduling window not tied to
2283 : max_live_slots sizing requirements, so there shouldn't be a way for
2284 : external input to tie up txn pool entries for longer than
2285 : necessary. */
2286 0 : if( FD_UNLIKELY( mblk->curr_txn_idx>=block->txn_parsed_cnt || /* Nothing more to mixin for this microblock. */
2287 0 : (mblk->end_txn_idx>block->txn_parsed_cnt && /* There is something to mixin, but the microblock isn't fully parsed yet ... */
2288 0 : mblk->curr_txn_idx==mblk->start_txn_idx && /* ... and we haven't started mixin on it yet ... */
2289 0 : (block->poh_hashing_in_flight_cnt || /* ... and another microblock is in-progress and might preempt this microblock and clobber the bmtree, so we shouldn't start the partial microblock just yet. */
2290 0 : !mblk_slist_is_empty( block->mblks_hashing_in_progress, sched->mblk_pool ) ||
2291 0 : !mblk_slist_is_empty( block->mblks_mixin_in_progress, sched->mblk_pool ))) ) ) {
2292 0 : mblk_slist_idx_push_tail( block->mblks_mixin_in_progress, mblk_idx, sched->mblk_pool );
2293 :
2294 : /* No other microblock in the mixin queue. */
2295 0 : if( FD_UNLIKELY( block->poh_hashing_done_cnt-block->poh_hash_cmp_done_cnt==1 ) ) return 0;
2296 :
2297 : /* At this point, there's at least one more microblock in the mixin
2298 : queue we could try. It's a predecessor (in parse order) that
2299 : finished hashing later than the partially parsed microblock at
2300 : the head of the mixin queue. */
2301 :
2302 : /* It should never clobber the bmtree for a microblock that has had some mixin done on it. */
2303 0 : if( FD_UNLIKELY( mblk->curr_txn_idx!=mblk->start_txn_idx ) ) {
2304 0 : sched->print_buf_sz = 0UL;
2305 0 : print_all( sched, block );
2306 0 : FD_LOG_CRIT(( "invariant violation curr_txn_idx %lu start_txn_idx %lu: %s", mblk->curr_txn_idx, mblk->start_txn_idx, sched->print_buf ));
2307 0 : }
2308 :
2309 0 : mblk_idx = mblk_slist_idx_pop_head( block->mblks_mixin_in_progress, sched->mblk_pool );
2310 0 : mblk = sched->mblk_pool+mblk_idx;
2311 :
2312 : /* It should be a fresh microblock for mixin. */
2313 0 : FD_TEST( mblk->curr_txn_idx==mblk->start_txn_idx );
2314 : /* Invariant: at any given point in time, there can be at most one
2315 : microblock that hasn't been fully parsed yet, due to the nature
2316 : of sequential parsing. So this microblock has to be fully
2317 : parsed. */
2318 0 : FD_TEST( mblk->end_txn_idx<=block->txn_parsed_cnt );
2319 0 : }
2320 :
2321 0 : FD_TEST( mblk->curr_txn_idx<mblk->end_txn_idx );
2322 :
2323 : /* Now mixin. */
2324 0 : if( FD_LIKELY( mblk->curr_txn_idx==mblk->start_txn_idx ) ) block->bmtree = fd_bmtree_commit_init( block->bmtree_mem, 32UL, 1UL, 0UL ); /* Optimize for single-transaction microblocks, which are the majority. */
2325 :
2326 0 : ulong txn_gidx = block->txn_idx[ mblk->curr_txn_idx ];
2327 0 : fd_txn_p_t * _txn = sched->txn_pool+txn_gidx;
2328 0 : fd_txn_t * txn = TXN(_txn);
2329 0 : for( ulong j=0; j<txn->signature_cnt; j++ ) {
2330 0 : fd_bmtree_node_t node[ 1 ];
2331 0 : fd_bmtree_hash_leaf( node, _txn->payload+txn->signature_off+FD_TXN_SIGNATURE_SZ*j, 64UL, 1UL );
2332 0 : fd_bmtree_commit_append( block->bmtree, node, 1UL );
2333 0 : mblk->curr_sig_cnt++;
2334 0 : }
2335 :
2336 : /* Release the txn_idx. */
2337 0 : txn_bitset_insert( sched->poh_mixin_done_set, txn_gidx );
2338 0 : sched->metrics->txn_mixin_done_cnt++;
2339 0 : if( txn_bitset_test( sched->exec_done_set, txn_gidx ) && txn_bitset_test( sched->sigverify_done_set, txn_gidx ) ) {
2340 0 : fd_rdisp_complete_txn( sched->rdisp, txn_gidx, 1 );
2341 0 : sched->txn_pool_free_cnt++;
2342 0 : block->txn_done_cnt++;
2343 0 : sched->metrics->txn_done_cnt++;
2344 0 : }
2345 :
2346 0 : mblk->curr_txn_idx++;
2347 0 : int rv = 2;
2348 0 : if( FD_LIKELY( mblk->curr_txn_idx==mblk->end_txn_idx ) ) {
2349 : /* Ready to compute the final hash for this microblock. */
2350 0 : block->poh_hash_cmp_done_cnt++;
2351 0 : sched->metrics->mblk_poh_done_cnt++;
2352 0 : uchar * root = fd_bmtree_commit_fini( block->bmtree );
2353 0 : uchar mixin_buf[ 64 ];
2354 0 : fd_memcpy( mixin_buf, mblk->curr_hash, 32UL );
2355 0 : fd_memcpy( mixin_buf+32UL, root, 32UL );
2356 0 : fd_sha256_hash( mixin_buf, 64UL, mblk->curr_hash );
2357 0 : free_mblk( sched, block, (uint)mblk_idx );
2358 0 : if( FD_UNLIKELY( memcmp( mblk->curr_hash, mblk->end_hash, sizeof(fd_hash_t) ) ) ) {
2359 0 : FD_BASE58_ENCODE_32_BYTES( mblk->curr_hash->hash, our_str );
2360 0 : FD_BASE58_ENCODE_32_BYTES( mblk->end_hash->hash, ref_str );
2361 0 : FD_LOG_INFO(( "bad block: poh hash mismatch on mblk %lu, ours %s, claimed %s, hashcnt %lu, txns [%lu,%lu), %u sigs, slot %lu, parent slot %lu", mblk_idx, our_str, ref_str, mblk->hashcnt, mblk->start_txn_idx, mblk->end_txn_idx, mblk->curr_sig_cnt, block->slot, block->parent_slot ));
2362 0 : return -1;
2363 0 : }
2364 0 : } else {
2365 : /* There are more transactions to mixin in this microblock. */
2366 0 : mblk_slist_idx_push_head( block->mblks_mixin_in_progress, mblk_idx, sched->mblk_pool );
2367 0 : rv = 1;
2368 0 : }
2369 :
2370 0 : return rv;
2371 0 : }
2372 :
2373 : static void
2374 0 : try_activate_block( fd_sched_t * sched ) {
2375 : /* Early return if there's already an active block. */
2376 0 : if( FD_LIKELY( sched->active_bank_idx!=ULONG_MAX ) ) return;
2377 :
2378 : /* See if there are any allocated staging lanes that we can activate
2379 : for scheduling ... */
2380 0 : ulong staged_bitset = sched->staged_bitset;
2381 0 : while( staged_bitset ) {
2382 0 : int lane_idx = fd_ulong_find_lsb( staged_bitset );
2383 0 : staged_bitset = fd_ulong_pop_lsb( staged_bitset );
2384 :
2385 0 : ulong head_idx = sched->staged_head_bank_idx[ lane_idx ];
2386 0 : fd_sched_block_t * head_block = block_pool_ele( sched, head_idx );
2387 0 : fd_sched_block_t * parent_block = block_pool_ele( sched, head_block->parent_idx );
2388 0 : if( FD_UNLIKELY( parent_block->dying ) ) {
2389 : /* Invariant: no child of a dying block should be staged. */
2390 0 : FD_LOG_CRIT(( "invariant violation: staged_head_bank_idx %lu, slot %lu, parent slot %lu on lane %d has parent_block->dying set, slot %lu, parent slot %lu",
2391 0 : head_idx, head_block->slot, head_block->parent_slot, lane_idx, parent_block->slot, parent_block->parent_slot ));
2392 0 : }
2393 : //FIXME: restore this invariant check when we have immediate demotion of dying blocks
2394 : // if( FD_UNLIKELY( head_block->dying ) ) {
2395 : // /* Invariant: no dying block should be staged. */
2396 : // FD_LOG_CRIT(( "invariant violation: staged_head_bank_idx %lu, slot %lu, prime %lu on lane %u has head_block->dying set",
2397 : // head_idx, (ulong)head_block->block_id.slot, (ulong)head_block->block_id.prime, lane_idx ));
2398 : // }
2399 0 : if( block_is_done( parent_block ) && block_is_activatable( head_block ) ) {
2400 : /* ... Yes, on this staging lane the parent block is done. So we
2401 : can activate the staged child. */
2402 0 : if( FD_UNLIKELY( head_idx!=sched->last_active_bank_idx ) ) { /* Unlikely because only possible under forking or on slot boundary. */
2403 0 : if( FD_UNLIKELY( sched->last_active_bank_idx!=head_block->parent_idx ) ) { /* Forking is rare. */
2404 0 : FD_LOG_DEBUG(( "activating block %lu:%lu: lane switch to %d", head_block->slot, head_idx, lane_idx ));
2405 0 : sched->metrics->lane_switch_cnt++;
2406 0 : } else {
2407 0 : FD_LOG_DEBUG(( "activating block %lu:%lu: lane %d waking up on slot boundary", head_block->slot, head_idx, lane_idx ));
2408 0 : }
2409 0 : }
2410 0 : sched->active_bank_idx = head_idx;
2411 0 : return;
2412 0 : }
2413 0 : }
2414 :
2415 : /* ... No, promote unstaged blocks. */
2416 0 : ulong root_idx = sched->root_idx;
2417 0 : if( FD_UNLIKELY( root_idx==ULONG_MAX ) ) {
2418 0 : FD_LOG_CRIT(( "invariant violation: root_idx==ULONG_MAX indicating fd_sched is uninitialized" ));
2419 0 : }
2420 : /* Find and stage the longest stageable unstaged fork. This is a
2421 : policy decision. */
2422 0 : ulong depth = compute_longest_unstaged_fork( sched, root_idx );
2423 0 : if( FD_LIKELY( depth>0UL ) ) {
2424 0 : if( FD_UNLIKELY( sched->staged_bitset==fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) ) ) {
2425 : /* No more staging lanes available. All of them are occupied by
2426 : slow squatters. Only empty blocks can be demoted, and so
2427 : blocks with in-flight transactions, including dying in-flight
2428 : blocks, shouldn't be demoted. We demote all demotable lanes.
2429 : Demotion isn't all that expensive, since demotable blocks have
2430 : no transactions in them. If a demoted block proves to be
2431 : active still, it'll naturally promote back into a staging lane.
2432 :
2433 : In fact, all lanes should be demotable at this point. None of
2434 : the lanes have anything dispatchable, otherwise we would have
2435 : simply activated one of the dispatchable lanes. None of the
2436 : lanes have anything in-flight either, as we allow for a grace
2437 : period while something is in-flight, before we deactivate any
2438 : block. In principle, we could get rid of the grace period and
2439 : deactivate right away. In that case, it's okay if nothing is
2440 : demotable at the moment, as that simply implies that all lanes
2441 : have in-flight tasks. We would get another chance to try to
2442 : demote when the last in-flight task on any lane completes.
2443 :
2444 : Another interesting side effect of the current dispatching and
2445 : lane switching policy is that each lane should have exactly one
2446 : block in it at this point. A parent block by definition can't
2447 : be partially ingested. Any parent block that is fully ingested
2448 : and dispatchable would have made the lane dispatchable, and we
2449 : wouldn't be here. Any parent that is fully ingested and fully
2450 : dispatched would be fully done after the grace period. So
2451 : there could only be one block per lane, and it is
2452 : simultaneously the head and the tail of the lane.
2453 :
2454 : A note on why this whole thing does not deadlock:
2455 :
2456 : One might reasonably wonder what happens if all the lanes are
2457 : non-empty, non-dead, but for some reason couldn't be activated
2458 : for dispatching. We would deadlock in this case, as no lane
2459 : dispatches to the point of being demotable, and no unstaged
2460 : block can be promoted. Such is not in fact possible. The only
2461 : way a dispatchable lane can be ineligible for activation is if
2462 : it has a parent block that isn't done yet. So a deadlock
2463 : happens when this parent block, or any of its dispatchable
2464 : ancestors, is unstaged. An important invariant we maintain is
2465 : that a staged block can't have an unstaged stageable parent.
2466 : This invariant, by induction, gives us the guarantee that at
2467 : least one of the lanes can be activated. */
2468 0 : for( int l=0; l<(int)FD_SCHED_MAX_STAGING_LANES; l++ ) {
2469 : /* We would be able to assert that all lanes are demotable,
2470 : except that abandoned blocks are given no grace period for
2471 : deactivation. So there could be lanes transiently occupied
2472 : by dying blocks that are neither demotable (due to in-flight
2473 : tasks) nor activatable (due to being dying). If
2474 : rdisp_demote() supported non-empty blocks, then we could
2475 : probably restore the assertion. */
2476 0 : if( FD_UNLIKELY( !lane_is_demotable( sched, l ) ) ) continue;
2477 0 : ulong demoted_cnt = demote_lane( sched, l );
2478 0 : if( FD_UNLIKELY( demoted_cnt!=1UL ) ) {
2479 0 : FD_LOG_CRIT(( "invariant violation: %lu blocks demoted from lane %d, expected 1 demotion", demoted_cnt, l ));
2480 0 : }
2481 0 : sched->metrics->lane_demoted_cnt++;
2482 0 : }
2483 : /* We weren't able to successfully demote anything. This is
2484 : likely because all lanes are occupied by dying blocks with
2485 : in-flight tasks. We would get another chance to try to demote
2486 : when the last in-flight task on any lane completes. */
2487 0 : if( FD_UNLIKELY( sched->staged_bitset==fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) ) ) return;
2488 0 : }
2489 0 : FD_TEST( sched->staged_bitset!=fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) );
2490 0 : int lane_idx = fd_ulong_find_lsb( ~sched->staged_bitset );
2491 0 : if( FD_UNLIKELY( lane_idx>=(int)FD_SCHED_MAX_STAGING_LANES ) ) {
2492 0 : FD_LOG_CRIT(( "invariant violation: lane_idx %d, sched->staged_bitset %lx",
2493 0 : lane_idx, sched->staged_bitset ));
2494 0 : }
2495 0 : ulong head_bank_idx = stage_longest_unstaged_fork( sched, root_idx, lane_idx );
2496 0 : if( FD_UNLIKELY( head_bank_idx==ULONG_MAX ) ) {
2497 : /* We found a promotable fork depth>0. This should not happen. */
2498 0 : FD_LOG_CRIT(( "invariant violation: head_bank_idx==ULONG_MAX" ));
2499 0 : }
2500 : /* We don't bother with promotion unless the block is immediately
2501 : dispatchable. So it's okay to set the active block here. This
2502 : doesn't cause out-of-order block replay because any parent block
2503 : must be fully done. If the parent block were dead, this fork
2504 : would be marked dead too and ineligible for promotion. If the
2505 : parent block were not dead and not done and staged, we wouldn't
2506 : be trying to promote an unstaged fork. If the parent block were
2507 : not dead and not done and unstaged, it would've been part of this
2508 : unstaged fork. */
2509 0 : fd_sched_block_t * head_block = block_pool_ele( sched, head_bank_idx );
2510 0 : FD_LOG_DEBUG(( "activating block %lu:%lu: unstaged promotion to lane %d", head_block->slot, head_bank_idx, lane_idx ));
2511 0 : sched->active_bank_idx = head_bank_idx;
2512 0 : return;
2513 0 : }
2514 : /* No unstaged blocks to promote. So we're done. Yay. */
2515 0 : }
2516 :
2517 : static void
2518 0 : check_or_set_active_block( fd_sched_t * sched ) {
2519 0 : if( FD_UNLIKELY( sched->active_bank_idx==ULONG_MAX ) ) {
2520 0 : try_activate_block( sched );
2521 0 : } else {
2522 0 : fd_sched_block_t * active_block = block_pool_ele( sched, sched->active_bank_idx );
2523 0 : if( FD_UNLIKELY( block_should_deactivate( active_block ) ) ) {
2524 0 : sched->print_buf_sz = 0UL;
2525 0 : print_all( sched, active_block );
2526 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
2527 0 : FD_LOG_CRIT(( "invariant violation: should have been deactivated" ));
2528 0 : }
2529 0 : }
2530 0 : }
2531 :
2532 : /* This function has two main jobs:
2533 : - Mark everything on the fork tree dying.
2534 : - Take blocks out of rdisp if possible. */
2535 : static void
2536 0 : subtree_mark_and_maybe_prune_rdisp( fd_sched_t * sched, fd_sched_block_t * block ) {
2537 0 : if( FD_UNLIKELY( block->rooted ) ) {
2538 0 : FD_LOG_CRIT(( "invariant violation: rooted block should not be abandoned, slot %lu, parent slot %lu",
2539 0 : block->slot, block->parent_slot ));
2540 0 : }
2541 : /* All minority fork nodes pass through this function eventually. So
2542 : this is a good point to check per-node invariants for minority
2543 : forks. */
2544 0 : if( FD_UNLIKELY( block->staged && !block->in_rdisp ) ) {
2545 0 : FD_LOG_CRIT(( "invariant violation: staged block is not in the dispatcher, slot %lu, parent slot %lu",
2546 0 : block->slot, block->parent_slot ));
2547 0 : }
2548 :
2549 : /* Setting the flag is non-optional and can happen more than once. */
2550 0 : block->dying = 1;
2551 :
2552 : /* Removal from dispatcher should only happen once. */
2553 0 : if( block->in_rdisp ) {
2554 0 : fd_sched_block_t * parent = block_pool_ele( sched, block->parent_idx );
2555 0 : if( FD_UNLIKELY( !parent ) ) {
2556 : /* Only the root has no parent. Abandon should never be called on
2557 : the root. So any block we are trying to abandon should have a
2558 : parent. */
2559 0 : FD_LOG_CRIT(( "invariant violation: parent not found slot %lu, parent slot %lu",
2560 0 : block->slot, block->parent_slot ));
2561 0 : }
2562 :
2563 : /* The dispatcher expects blocks to be abandoned in the same order
2564 : that they were added on each lane. There are no requirements on
2565 : the order of abandoning if two blocks are not on the same lane,
2566 : or if a block is unstaged. This means that in general we
2567 : shouldn't abandon a child block if the parent hasn't been
2568 : abandoned yet, if and only if they are on the same lane. So wait
2569 : until we can abandon the parent, and then descend down the fork
2570 : tree to ensure orderly abandoning. */
2571 0 : int in_order = !parent->in_rdisp || /* parent is not in the dispatcher */
2572 0 : !parent->staged || /* parent is in the dispatcher but not staged */
2573 0 : !block->staged || /* parent is in the dispatcher and staged but this block is unstaged */
2574 0 : block->staging_lane!=parent->staging_lane; /* this block is on a different staging lane than its parent */
2575 :
2576 0 : if( FD_UNLIKELY( in_order && block->staged && sched->active_bank_idx==sched->staged_head_bank_idx[ block->staging_lane ] && sched->active_bank_idx!=ULONG_MAX ) ) {
2577 0 : FD_TEST( block_pool_ele( sched, sched->active_bank_idx )==block );
2578 0 : FD_LOG_DEBUG(( "reset active_bank_idx %lu: abandon", sched->active_bank_idx ));
2579 0 : sched->last_active_bank_idx = sched->active_bank_idx;
2580 0 : sched->active_bank_idx = ULONG_MAX;
2581 0 : sched->metrics->deactivate_abandoned_cnt++;
2582 0 : }
2583 :
2584 : /* We inform the dispatcher of an abandon only when there are no
2585 : more in-flight transactions. Otherwise, if the dispatcher
2586 : recycles the same txn_id that was just abandoned, and we receive
2587 : completion of an in-flight transaction whose txn_id was just
2588 : recycled. */
2589 : // FIXME The recycling might be fine now that we no longer use
2590 : // txn_id to index into anything. We might be able to just drop
2591 : // txn_id on abandoned blocks. Though would this leak transaction
2592 : // content if the txn_id is recycled?
2593 : // Note that subtree pruning from sched isn't dependent on the
2594 : // in-flight check being present here, as is_prunable already checks
2595 : // for in-flight==0.
2596 0 : int abandon = in_order && !block_is_in_flight( block );
2597 :
2598 0 : if( abandon ) {
2599 0 : block->in_rdisp = 0;
2600 0 : fd_rdisp_abandon_block( sched->rdisp, (ulong)(block-sched->block_pool) );
2601 0 : sched->txn_pool_free_cnt += block->txn_parsed_cnt-block->txn_done_cnt; /* in_flight_cnt==0 */
2602 :
2603 0 : sched->metrics->block_abandoned_cnt++;
2604 0 : sched->metrics->txn_abandoned_parsed_cnt += block->txn_parsed_cnt;
2605 0 : sched->metrics->txn_abandoned_exec_done_cnt += block->txn_exec_done_cnt;
2606 0 : sched->metrics->txn_abandoned_done_cnt += block->txn_done_cnt;
2607 :
2608 : //FIXME when demote supports non-empty blocks, we should demote
2609 : //the block from the lane unconditionally and immediately,
2610 : //regardles of whether it's safe to abandon or not. So a block
2611 : //would go immediately from staged to unstaged and eventually to
2612 : //abandoned.
2613 0 : if( FD_LIKELY( block->staged ) ) {
2614 0 : FD_LOG_DEBUG(( "block %lu:%lu exited lane %lu: abandon", block->slot, block_to_idx( sched, block ), block->staging_lane ));
2615 0 : block->staged = 0;
2616 : /* Now release the staging lane. This will release the lane as
2617 : soon as we abandon the head block on a lane. Technically a
2618 : release should only happen when we remove the tail block on a
2619 : lane. This is fine though. The way we abandon guarantees by
2620 : induction that an entire lane will be abandoned. Only the
2621 : head block on a lane can possibly have in-flight
2622 : transactions, and so once a head block becomes eligible for
2623 : abandoning, the entire lane all the way to the tail block,
2624 : will be eligible. */
2625 0 : sched->staged_bitset = fd_ulong_clear_bit( sched->staged_bitset, (int)block->staging_lane );
2626 0 : sched->staged_head_bank_idx[ block->staging_lane ] = ULONG_MAX;
2627 0 : }
2628 0 : }
2629 0 : }
2630 :
2631 : /* Abandon the entire fork chaining off of this block. */
2632 0 : ulong child_idx = block->child_idx;
2633 0 : while( child_idx!=ULONG_MAX ) {
2634 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
2635 0 : subtree_mark_and_maybe_prune_rdisp( sched, child );
2636 0 : child_idx = child->sibling_idx;
2637 0 : }
2638 0 : }
2639 :
2640 : /* It's safe to call this function more than once on the same block.
2641 : The final call is when there are no more in-flight tasks for this
2642 : block, at which point the block will be pruned from sched. */
2643 : static void
2644 0 : subtree_abandon( fd_sched_t * sched, fd_sched_block_t * block ) {
2645 0 : subtree_mark_and_maybe_prune_rdisp( sched, block );
2646 : /* subtree_abandon can happen as a result of one of the following
2647 : - Bad block at head of lane
2648 : - Bad block at tail of lane
2649 : - Root advance
2650 : - Root notify
2651 :
2652 : In the case of bad blocks, it suffices to check the in-flight task
2653 : count of the block that is bad. In the case of root advance, the
2654 : refcnt on the bank is checked and that includes the in-flight task
2655 : count. It is only in the case of root notify that there could
2656 : potentially be a non-zero in-flight count in the middle of a
2657 : subtree. To be safe, we check the entire subtree here before
2658 : pruning. */
2659 0 : if( subtree_is_prunable( sched, block ) ) {
2660 0 : fd_sched_block_t * parent = block_pool_ele( sched, block->parent_idx );
2661 0 : if( FD_LIKELY( parent ) ) {
2662 : /* Splice the block out of its parent's children list. */
2663 0 : ulong block_idx = block_to_idx( sched, block );
2664 0 : ulong * idx_p = &parent->child_idx;
2665 0 : while( *idx_p!=block_idx ) {
2666 0 : idx_p = &(block_pool_ele( sched, *idx_p )->sibling_idx);
2667 0 : }
2668 0 : *idx_p = block->sibling_idx;
2669 0 : }
2670 0 : subtree_prune( sched, block_to_idx( sched, block ), ULONG_MAX );
2671 0 : }
2672 0 : }
2673 :
2674 : static void
2675 0 : subtree_prune( fd_sched_t * sched, ulong bank_idx, ulong except_idx ) {
2676 0 : fd_sched_block_t * head = block_pool_ele( sched, bank_idx );
2677 0 : head->parent_idx = ULONG_MAX;
2678 0 : fd_sched_block_t * tail = head;
2679 :
2680 0 : while( head ) {
2681 0 : FD_TEST( head->in_sched );
2682 0 : head->in_sched = 0;
2683 0 : if( head->refcnt ) {
2684 0 : FD_TEST( !head->block_end_done );
2685 0 : head->refcnt = 0;
2686 0 : if( FD_UNLIKELY( !ref_q_avail( sched->ref_q ) ) ) FD_LOG_CRIT(( "ref_q full" ));
2687 0 : ref_q_push_tail( sched->ref_q, block_to_idx( sched, head ) );
2688 0 : }
2689 :
2690 0 : ulong child_idx = head->child_idx;
2691 0 : while( child_idx!=ULONG_MAX ) {
2692 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
2693 : /* Add children to be visited. We abuse the parent_idx field to
2694 : link up the next block to visit. */
2695 0 : if( child_idx!=except_idx ) {
2696 0 : tail->parent_idx = child_idx;
2697 0 : tail = child;
2698 0 : tail->parent_idx = ULONG_MAX;
2699 0 : }
2700 0 : child_idx = child->sibling_idx;
2701 0 : }
2702 :
2703 : /* Prune the current block. We will never publish halfway into a
2704 : staging lane, because anything on the rooted fork should have
2705 : finished replaying gracefully and be out of the dispatcher. In
2706 : fact, anything that we are publishing away should be out of the
2707 : dispatcher at this point. And there should be no more in-flight
2708 : transactions. */
2709 0 : if( FD_UNLIKELY( block_is_in_flight( head ) ) ) {
2710 0 : FD_LOG_CRIT(( "invariant violation: block has transactions in flight (%u exec %u sigverify %u poh), slot %lu, parent slot %lu",
2711 0 : head->txn_exec_in_flight_cnt, head->txn_sigverify_in_flight_cnt, head->poh_hashing_in_flight_cnt, head->slot, head->parent_slot ));
2712 0 : }
2713 0 : if( FD_UNLIKELY( head->in_rdisp ) ) {
2714 : /* We should have removed it from the dispatcher when we were
2715 : notified of the new root, or when in-flight transactions were
2716 : drained. */
2717 0 : FD_LOG_CRIT(( "invariant violation: block is in the dispatcher, slot %lu, parent slot %lu", head->slot, head->parent_slot ));
2718 0 : }
2719 :
2720 : /* Return remaining mblk descriptors to the shared pool. */
2721 0 : free_mblk_slist( sched, head, head->mblks_unhashed );
2722 0 : free_mblk_slist( sched, head, head->mblks_hashing_in_progress );
2723 0 : free_mblk_slist( sched, head, head->mblks_mixin_in_progress );
2724 :
2725 0 : if( FD_UNLIKELY( !head->block_end_done ) ) {
2726 0 : sched->print_buf_sz = 0UL;
2727 0 : print_block_metrics( sched, head );
2728 0 : if( FD_LIKELY( head->block_start_done ) ) FD_LOG_DEBUG(( "block %lu:%lu replayed partially, pruning without full replay: %s", head->slot, block_to_idx( sched, head ), sched->print_buf ));
2729 0 : else FD_LOG_DEBUG(( "block %lu:%lu replayed nothing, pruning without any replay: %s", head->slot, block_to_idx( sched, head ), sched->print_buf ));
2730 0 : }
2731 :
2732 0 : sched->block_pool_popcnt--;
2733 :
2734 0 : fd_sched_block_t * next = block_pool_ele( sched, head->parent_idx );
2735 :
2736 : /* We don't have to clear the indices here since no one should be
2737 : accessing them. Defensive programming. */
2738 0 : head->parent_idx = ULONG_MAX;
2739 0 : head->child_idx = ULONG_MAX;
2740 0 : head->sibling_idx = ULONG_MAX;
2741 :
2742 0 : head = next;
2743 0 : }
2744 0 : }
2745 :
2746 : static void
2747 0 : maybe_switch_block( fd_sched_t * sched, ulong bank_idx ) {
2748 : /* This only happens rarely when there are dying in-flight blocks.
2749 : Early exit and don't let dying blocks affect replay. */
2750 0 : if( FD_UNLIKELY( bank_idx!=sched->active_bank_idx ) ) return;
2751 :
2752 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
2753 0 : if( FD_UNLIKELY( block_is_done( block ) ) ) {
2754 0 : fd_rdisp_remove_block( sched->rdisp, bank_idx );
2755 0 : FD_LOG_DEBUG(( "block %lu:%lu exited lane %lu: remove", block->slot, bank_idx, block->staging_lane ));
2756 0 : block->in_rdisp = 0;
2757 0 : block->staged = 0;
2758 0 : sched->metrics->block_removed_cnt++;
2759 0 : FD_LOG_DEBUG(( "reset active_bank_idx %lu: remove", sched->active_bank_idx ));
2760 0 : sched->last_active_bank_idx = sched->active_bank_idx;
2761 0 : sched->active_bank_idx = ULONG_MAX;
2762 :
2763 : /* See if there is a child block down the same staging lane. This
2764 : is a policy decision to minimize fork churn. We could in theory
2765 : reevaluate staging lane allocation here and do promotion/demotion
2766 : as needed. */
2767 0 : ulong child_idx = block->child_idx;
2768 0 : while( child_idx!=ULONG_MAX ) {
2769 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
2770 0 : if( FD_LIKELY( child->staged && child->staging_lane==block->staging_lane ) ) {
2771 : /* There is a child block down the same staging lane ... */
2772 0 : if( FD_LIKELY( !child->dying ) ) {
2773 : /* ... and the child isn't dead */
2774 0 : if( FD_UNLIKELY( !block_is_activatable( child ) ) ) {
2775 : /* ... but the child is not activatable, likely because
2776 : there are no transactions available yet. */
2777 0 : sched->metrics->deactivate_no_txn_cnt++;
2778 0 : try_activate_block( sched );
2779 0 : return;
2780 0 : }
2781 : /* ... and it's immediately dispatchable, so switch the active
2782 : block to it, and have the child inherit the head status of
2783 : the lane. This is the common case. */
2784 0 : FD_LOG_DEBUG(( "activating block %lu:%lu: child inheritance on lane %lu", child->slot, child_idx, child->staging_lane ));
2785 0 : sched->active_bank_idx = child_idx;
2786 0 : sched->staged_head_bank_idx[ block->staging_lane ] = child_idx;
2787 0 : if( FD_UNLIKELY( !fd_ulong_extract_bit( sched->staged_bitset, (int)block->staging_lane ) ) ) {
2788 0 : FD_LOG_CRIT(( "invariant violation: staged_bitset 0x%lx bit %lu is not set, slot %lu, parent slot %lu, child slot %lu, parent slot %lu",
2789 0 : sched->staged_bitset, block->staging_lane, block->slot, block->parent_slot, child->slot, child->parent_slot ));
2790 0 : }
2791 0 : return;
2792 0 : } else {
2793 : /* ... but the child block is considered dead, likely because
2794 : the parser considers it invalid. */
2795 0 : FD_LOG_INFO(( "child block %lu is already dead", child->slot ));
2796 0 : subtree_abandon( sched, child );
2797 0 : break;
2798 0 : }
2799 0 : }
2800 0 : child_idx = child->sibling_idx;
2801 0 : }
2802 : /* There isn't a child block down the same staging lane. This is
2803 : the last block in the staging lane. Release the staging lane. */
2804 0 : sched->staged_bitset = fd_ulong_clear_bit( sched->staged_bitset, (int)block->staging_lane );
2805 0 : sched->staged_head_bank_idx[ block->staging_lane ] = ULONG_MAX;
2806 0 : sched->metrics->deactivate_no_child_cnt++;
2807 0 : try_activate_block( sched );
2808 0 : } else if( block_should_deactivate( block ) ) {
2809 : /* We exhausted the active block, but it's not fully done yet. We
2810 : are just not getting FEC sets for it fast enough. This could
2811 : happen when the network path is congested, or when the leader
2812 : simply went down. Reset the active block. */
2813 0 : sched->last_active_bank_idx = sched->active_bank_idx;
2814 0 : sched->active_bank_idx = ULONG_MAX;
2815 0 : sched->metrics->deactivate_no_txn_cnt++;
2816 0 : try_activate_block( sched );
2817 0 : }
2818 0 : }
2819 :
2820 : FD_FN_UNUSED static ulong
2821 0 : find_and_stage_longest_unstaged_fork( fd_sched_t * sched, int lane_idx ) {
2822 0 : ulong root_idx = sched->root_idx;
2823 0 :
2824 0 : if( FD_UNLIKELY( root_idx==ULONG_MAX ) ) {
2825 0 : FD_LOG_CRIT(( "invariant violation: root_idx==ULONG_MAX indicating fd_sched is uninitialized" ));
2826 0 : }
2827 0 :
2828 0 : /* First pass: compute the longest unstaged fork depth for each node
2829 0 : in the fork tree. */
2830 0 : ulong depth = compute_longest_unstaged_fork( sched, root_idx );
2831 0 :
2832 0 : /* Second pass: stage blocks on the longest unstaged fork. */
2833 0 : ulong head_bank_idx = stage_longest_unstaged_fork( sched, root_idx, lane_idx );
2834 0 :
2835 0 : if( FD_UNLIKELY( (depth>0UL && head_bank_idx==ULONG_MAX) || (depth==0UL && head_bank_idx!=ULONG_MAX) ) ) {
2836 0 : FD_LOG_CRIT(( "invariant violation: depth %lu, head_bank_idx %lu",
2837 0 : depth, head_bank_idx ));
2838 0 : }
2839 0 :
2840 0 : return head_bank_idx;
2841 0 : }
2842 :
2843 : /* Returns length of the longest stageable unstaged fork, if there is
2844 : one, and 0 otherwise. */
2845 : static ulong
2846 0 : compute_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx ) {
2847 0 : if( FD_UNLIKELY( bank_idx==ULONG_MAX ) ) {
2848 0 : FD_LOG_CRIT(( "invariant violation: bank_idx==ULONG_MAX" ));
2849 0 : }
2850 :
2851 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
2852 :
2853 0 : ulong max_child_depth = 0UL;
2854 0 : ulong child_idx = block->child_idx;
2855 0 : while( child_idx!=ULONG_MAX ) {
2856 0 : ulong child_depth = compute_longest_unstaged_fork( sched, child_idx );
2857 0 : if( child_depth > max_child_depth ) {
2858 0 : max_child_depth = child_depth;
2859 0 : }
2860 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
2861 0 : child_idx = child->sibling_idx;
2862 0 : }
2863 :
2864 0 : block->luf_depth = max_child_depth + fd_ulong_if( block_is_promotable( block ), 1UL, 0UL );
2865 0 : return block->luf_depth;
2866 0 : }
2867 :
2868 : static ulong
2869 0 : stage_longest_unstaged_fork_helper( fd_sched_t * sched, ulong bank_idx, int lane_idx ) {
2870 0 : if( FD_UNLIKELY( bank_idx==ULONG_MAX ) ) {
2871 0 : FD_LOG_CRIT(( "invariant violation: bank_idx==ULONG_MAX" ));
2872 0 : }
2873 :
2874 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
2875 :
2876 0 : int stage_it = fd_int_if( block_is_promotable( block ), 1, 0 );
2877 0 : ulong rv = fd_ulong_if( stage_it, bank_idx, ULONG_MAX );
2878 0 : if( FD_LIKELY( stage_it ) ) {
2879 0 : block->staged = 1;
2880 0 : block->staging_lane = (ulong)lane_idx;
2881 0 : fd_rdisp_promote_block( sched->rdisp, bank_idx, block->staging_lane );
2882 0 : sched->metrics->block_promoted_cnt++;
2883 0 : FD_LOG_DEBUG(( "block %lu:%lu entered lane %lu: promote", block->slot, bank_idx, block->staging_lane ));
2884 0 : }
2885 :
2886 : /* Base case: leaf node. */
2887 0 : if( block->child_idx==ULONG_MAX ) return rv;
2888 :
2889 0 : ulong max_depth = 0UL;
2890 0 : ulong best_child_idx = ULONG_MAX;
2891 0 : ulong child_idx = block->child_idx;
2892 0 : while( child_idx!=ULONG_MAX ) {
2893 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
2894 0 : if( child->luf_depth>max_depth ) {
2895 0 : max_depth = child->luf_depth;
2896 0 : best_child_idx = child_idx;
2897 0 : }
2898 0 : child_idx = child->sibling_idx;
2899 0 : }
2900 :
2901 : /* Recursively stage descendants. */
2902 0 : if( best_child_idx!=ULONG_MAX ) {
2903 0 : ulong head_bank_idx = stage_longest_unstaged_fork_helper( sched, best_child_idx, lane_idx );
2904 0 : rv = fd_ulong_if( rv!=ULONG_MAX, rv, head_bank_idx );
2905 0 : }
2906 :
2907 0 : return rv;
2908 0 : }
2909 :
2910 : /* Returns idx of head block of staged lane on success, idx_null
2911 : otherwise. */
2912 : static ulong
2913 0 : stage_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx, int lane_idx ) {
2914 0 : ulong head_bank_idx = stage_longest_unstaged_fork_helper( sched, bank_idx, lane_idx );
2915 0 : if( FD_LIKELY( head_bank_idx!=ULONG_MAX ) ) {
2916 0 : sched->metrics->lane_promoted_cnt++;
2917 0 : sched->staged_bitset = fd_ulong_set_bit( sched->staged_bitset, lane_idx );
2918 : /* No need to update staged_popcnt_wmk because the fact that there
2919 : are unstaged blocks implies we already maxed out lanes at one
2920 : point. */
2921 0 : sched->staged_head_bank_idx[ lane_idx ] = head_bank_idx;
2922 0 : }
2923 0 : return head_bank_idx;
2924 0 : }
2925 :
2926 : /* Check if an entire staging lane can be demoted. Returns 1 if all
2927 : blocks in the lane are demotable, 0 otherwise. */
2928 : static int
2929 0 : lane_is_demotable( fd_sched_t * sched, int lane_idx ) {
2930 0 : ulong bank_idx = sched->staged_head_bank_idx[ lane_idx ];
2931 :
2932 0 : while( bank_idx!=ULONG_MAX ) {
2933 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
2934 0 : FD_TEST( block->staged );
2935 0 : FD_TEST( block->staging_lane==(ulong)lane_idx );
2936 :
2937 0 : if( FD_UNLIKELY( !block_is_demotable( block ) ) ) {
2938 : /* Found a non-demotable block. Early exit. */
2939 0 : return 0;
2940 0 : }
2941 :
2942 : /* Find the child in the same staging lane. */
2943 0 : ulong child_idx = block->child_idx;
2944 0 : ulong next_bank_idx = ULONG_MAX;
2945 0 : while( child_idx!=ULONG_MAX ) {
2946 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
2947 0 : if( child->staged && child->staging_lane==(ulong)lane_idx ) {
2948 0 : next_bank_idx = child_idx;
2949 0 : break;
2950 0 : }
2951 0 : child_idx = child->sibling_idx;
2952 0 : }
2953 0 : bank_idx = next_bank_idx;
2954 0 : }
2955 :
2956 0 : return 1;
2957 0 : }
2958 :
2959 : /* Demote all blocks in a staging lane. Assumes that all blocks in the
2960 : lane are demotable. Returns the number of blocks demoted. */
2961 : static ulong
2962 0 : demote_lane( fd_sched_t * sched, int lane_idx ) {
2963 0 : ulong bank_idx = sched->staged_head_bank_idx[ lane_idx ];
2964 0 : uint demoted_cnt = 0U;
2965 :
2966 0 : while( bank_idx!=ULONG_MAX ) {
2967 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
2968 0 : FD_TEST( block->staged );
2969 0 : FD_TEST( block->staging_lane==(ulong)lane_idx );
2970 :
2971 0 : int ret = fd_rdisp_demote_block( sched->rdisp, bank_idx );
2972 0 : if( FD_UNLIKELY( ret!=0 ) ) {
2973 0 : FD_LOG_CRIT(( "fd_rdisp_demote_block failed for slot %lu, bank_idx %lu, lane %d", block->slot, bank_idx, lane_idx ));
2974 0 : }
2975 0 : FD_LOG_DEBUG(( "block %lu:%lu exited lane %lu: demote", block->slot, bank_idx, block->staging_lane ));
2976 0 : block->staged = 0;
2977 0 : demoted_cnt++;
2978 :
2979 : /* Find the child in the same staging lane. */
2980 0 : ulong child_idx = block->child_idx;
2981 0 : ulong next_bank_idx = ULONG_MAX;
2982 0 : while( child_idx!=ULONG_MAX ) {
2983 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
2984 0 : if( child->staged && child->staging_lane==(ulong)lane_idx ) {
2985 0 : next_bank_idx = child_idx;
2986 0 : break;
2987 0 : }
2988 0 : child_idx = child->sibling_idx;
2989 0 : }
2990 0 : bank_idx = next_bank_idx;
2991 0 : }
2992 :
2993 : /* Clear the lane. */
2994 0 : sched->staged_bitset = fd_ulong_clear_bit( sched->staged_bitset, lane_idx );
2995 0 : sched->staged_head_bank_idx[ lane_idx ] = ULONG_MAX;
2996 :
2997 0 : sched->metrics->block_demoted_cnt += demoted_cnt;
2998 0 : FD_LOG_DEBUG(( "demoted %u blocks in lane %d", demoted_cnt, lane_idx ));
2999 0 : return demoted_cnt;
3000 0 : }
|