Line data Source code
1 : #include <stdio.h> /* for vsnprintf */
2 : #include <stdarg.h> /* for va_list */
3 :
4 : #include "fd_sched.h"
5 : #include "fd_execrp.h" /* for poh hash value */
6 : #include "../../util/math/fd_stat.h" /* for sorted search */
7 : #include "../../disco/fd_disco_base.h" /* for FD_MAX_TXN_PER_SLOT */
8 : #include "../../disco/metrics/fd_metrics.h" /* for fd_metrics_convert_seconds_to_ticks and etc. */
9 : #include "../../discof/poh/fd_poh.h" /* for MAX_SKIPPED_TICKS */
10 : #include "../../flamenco/runtime/fd_runtime.h" /* for fd_runtime_load_txn_address_lookup_tables */
11 : #include "../../flamenco/runtime/sysvar/fd_sysvar_slot_hashes.h" /* for ALUTs */
12 :
13 0 : #define FD_SCHED_MAX_STAGING_LANES_LOG (2)
14 0 : #define FD_SCHED_MAX_STAGING_LANES (1UL<<FD_SCHED_MAX_STAGING_LANES_LOG)
15 0 : #define FD_SCHED_MAX_EXEC_TILE_CNT (64UL)
16 0 : #define FD_SCHED_MAX_PRINT_BUF_SZ (2UL<<20)
17 :
18 : #define FD_SCHED_MAX_MBLK_PER_SLOT (MAX_SKIPPED_TICKS)
19 0 : #define FD_SCHED_MAX_MBLK_IN_PROGRESS_PER_SLOT (2UL*FD_SCHED_MAX_EXEC_TILE_CNT)
20 0 : #define FD_SCHED_MAX_POH_HASHES_PER_TASK (4096UL) /* This seems to be the sweet spot. */
21 :
22 : /* 64 ticks per slot, and a single gigantic microblock containing min
23 : size transactions. */
24 : FD_STATIC_ASSERT( FD_MAX_TXN_PER_SLOT_SHRED==((FD_SHRED_DATA_PAYLOAD_MAX_PER_SLOT-65UL*sizeof(fd_microblock_hdr_t))/FD_TXN_MIN_SERIALIZED_SZ), max_txn_per_slot_shred );
25 :
26 : /* We size the buffer to be able to hold residual data from the previous
27 : FEC set that only becomes parseable after the next FEC set is
28 : ingested, as well as the incoming FEC set. The largest minimally
29 : parseable unit of data is a transaction. So that much data may
30 : straddle FEC set boundaries. Other minimally parseable units of data
31 : include the microblock header and the microblock count within a
32 : batch. */
33 0 : #define FD_SCHED_MAX_PAYLOAD_PER_FEC (FD_STORE_DATA_MAX)
34 : #define FD_SCHED_MAX_FEC_BUF_SZ (FD_SCHED_MAX_PAYLOAD_PER_FEC+FD_TXN_MTU)
35 : FD_STATIC_ASSERT( FD_TXN_MTU>=sizeof(fd_microblock_hdr_t), resize buffer for residual data );
36 : FD_STATIC_ASSERT( FD_TXN_MTU>=sizeof(ulong), resize buffer for residual data );
37 :
38 0 : #define FD_SCHED_MAX_TXN_PER_FEC ((FD_SCHED_MAX_PAYLOAD_PER_FEC-1UL)/FD_TXN_MIN_SERIALIZED_SZ+1UL) /* 478 */
39 :
40 : FD_STATIC_ASSERT( FD_SCHED_MIN_DEPTH>=FD_SCHED_MAX_TXN_PER_FEC, limits );
41 : FD_STATIC_ASSERT( FD_SCHED_MAX_DEPTH<=FD_RDISP_MAX_DEPTH, limits );
42 :
43 0 : #define FD_SCHED_MAGIC (0xace8a79c181f89b6UL) /* echo -n "fd_sched_v0" | sha512sum | head -c 16 */
44 :
45 0 : #define FD_SCHED_OK (0)
46 0 : #define FD_SCHED_AGAIN_LATER (1)
47 0 : #define FD_SCHED_BAD_BLOCK (2)
48 :
49 :
50 : /* Structs. */
51 :
52 : struct fd_sched_mblk {
53 : ulong end_txn_idx; /* non-inclusive parse idx */
54 : ulong hashcnt; /* number of hashes to do, excluding the final mixin */
55 : fd_hash_t end_hash[ 1 ];
56 : };
57 : typedef struct fd_sched_mblk fd_sched_mblk_t;
58 :
59 : struct fd_sched_mblk_in_progress {
60 : ulong curr_txn_idx; /* next up to be added into bmtree, parse idx */
61 : ulong curr_hashcnt;
62 : fd_hash_t curr_hash[ 1 ];
63 : uint curr_sig_cnt;
64 : uint mblk_idx; /* pointer back to the original mblk descriptor */
65 : uint next; /* slist */
66 : int is_tick;
67 : };
68 : typedef struct fd_sched_mblk_in_progress fd_sched_mblk_in_progress_t;
69 :
70 : #define SLIST_NAME mblk_in_progress_slist
71 : #define SLIST_ELE_T fd_sched_mblk_in_progress_t
72 0 : #define SLIST_IDX_T uint
73 0 : #define SLIST_NEXT next
74 : #include "../../util/tmpl/fd_slist.c"
75 :
76 : #define SET_NAME txn_bitset
77 : #define SET_MAX FD_SCHED_MAX_DEPTH
78 : #include "../../util/tmpl/fd_set.c"
79 :
80 : #define SET_NAME mblk_in_progress_bitset
81 0 : #define SET_MAX FD_SCHED_MAX_MBLK_IN_PROGRESS_PER_SLOT
82 : #include "../../util/tmpl/fd_set.c"
83 :
84 : struct fd_sched_block {
85 : ulong slot;
86 : ulong parent_slot;
87 : ulong parent_idx; /* Index of the parent in the pool. */
88 : ulong child_idx; /* Index of the left-child in the pool. */
89 : ulong sibling_idx; /* Index of the right-sibling in the pool. */
90 :
91 : /* Counters. */
92 : uint txn_parsed_cnt;
93 : /* txn_queued_cnt = txn_parsed_cnt-txn_in_flight_cnt-txn_done_cnt */
94 : uint txn_exec_in_flight_cnt;
95 : uint txn_exec_done_cnt;
96 : uint txn_sigverify_in_flight_cnt;
97 : uint txn_sigverify_done_cnt;
98 : uint poh_hashing_in_flight_cnt;
99 : uint poh_hashing_done_cnt;
100 : uint poh_hash_cmp_done_cnt; /* poh_hashing_done_cnt==poh_hash_cmp_done_cnt+len(mixin_in_progress) */
101 : uint txn_done_cnt; /* A transaction is considered done when all types of tasks associated with it are done. */
102 : uint shred_cnt;
103 : uint fec_cnt;
104 : uint mblk_cnt; /* Total number of microblocks, including ticks and non ticks. */
105 : uint mblk_tick_cnt; /* Total number of tick microblocks. */
106 : uint mblk_unhashed_cnt; /* mblk_cnt==mblk_unhashed_cnt+len(hashing_in_progress)+len(mixin_in_progress)+poh_hash_cmp_done_cnt */
107 : ulong hashcnt; /* How many hashes this block wants replay to do. A mixin/record counts as one hash. */
108 : ulong txn_pool_max_popcnt; /* Peak transaction pool occupancy during the time this block was replaying. */
109 : ulong block_pool_max_popcnt; /* Peak block pool occupancy. */
110 : ulong txn_idx[ FD_MAX_TXN_PER_SLOT ]; /* Indexed by parse order. */
111 : long txn_disp_ticks[ FD_MAX_TXN_PER_SLOT ]; /* Indexed by parse order. */
112 : long txn_done_ticks[ FD_MAX_TXN_PER_SLOT ]; /* Indexed by parse order. */
113 : fd_ed25519_sig_t txn_sigs[ FD_MAX_TXN_PER_SLOT ]; /* Indexed by parse order. */
114 :
115 : /* PoH verify. */
116 : fd_hash_t start_hash[ 1 ];
117 : fd_sched_mblk_t mblk_pool[ FD_SCHED_MAX_MBLK_PER_SLOT ]; /* Indexed by parse order. */
118 : mblk_in_progress_slist_t mblks_hashing_in_progress[ 1 ]; /* Microblocks progress from the unhashed state into hashing ... */
119 : mblk_in_progress_slist_t mblks_mixin_in_progress[ 1 ]; /* ... and then from hashing into the mixin queue. FIFO. */
120 : mblk_in_progress_bitset_t mblk_in_progress_pool_free_bitset[ mblk_in_progress_bitset_word_cnt ];
121 : fd_sched_mblk_in_progress_t mblk_in_progress_pool[ FD_SCHED_MAX_MBLK_IN_PROGRESS_PER_SLOT ];
122 : uchar bmtree_mem[ FD_BMTREE_COMMIT_FOOTPRINT(0) ] __attribute__((aligned(FD_BMTREE_COMMIT_ALIGN)));
123 : fd_bmtree_commit_t * bmtree;
124 : ulong prev_tick_hashcnt;
125 : ulong curr_tick_hashcnt; /* Starts at 0, accumulates hashcnt, resets to 0 on the next tick. */
126 : ulong tick_height; /* Block is built off of a parent block with this many ticks. */
127 : ulong max_tick_height; /* Block should end with precisely this many ticks. */
128 : ulong hashes_per_tick; /* Fixed per block, feature gated, known after bank clone. */
129 : int inconsistent_hashes_per_tick;
130 :
131 : /* Parser state. */
132 : uchar txn[ FD_TXN_MAX_SZ ] __attribute__((aligned(alignof(fd_txn_t))));
133 : ulong mblks_rem; /* Number of microblocks remaining in the current batch. */
134 : ulong txns_rem; /* Number of transactions remaining in the current microblock. */
135 : fd_acct_addr_t aluts[ 256 ]; /* Resolve ALUT accounts into this buffer for more parallelism. */
136 : uint fec_buf_sz; /* Size of the fec_buf in bytes. */
137 : uint fec_buf_soff; /* Starting offset into fec_buf for unparsed transactions. */
138 : uint fec_buf_boff; /* Byte offset into raw block data of the first byte currently in fec_buf */
139 : uint fec_eob:1; /* FEC end-of-batch: set if the last FEC set in the batch is being
140 : ingested. */
141 : uint fec_sob:1; /* FEC start-of-batch: set if the parser expects to be receiving a new
142 : batch. */
143 :
144 : /* Block state. */
145 : uint fec_eos:1; /* FEC end-of-stream: set if the last FEC set in the block has been
146 : ingested. */
147 : uint rooted:1; /* Set if the block is rooted. */
148 : uint dying:1; /* Set if the block has been abandoned and no transactions should be
149 : scheduled from it. */
150 : uint refcnt:1; /* Starts at 1 when the block is added, set to 0 if caller has been
151 : informed to decrement refcnt for sched. */
152 : uint in_sched:1; /* Set if the block is being tracked by the scheduler. */
153 : uint in_rdisp:1; /* Set if the block is being tracked by the dispatcher, either as staged
154 : or unstaged. */
155 : uint block_start_signaled:1; /* Set if the start-of-block sentinel has been dispatched. */
156 : uint block_end_signaled:1; /* Set if the end-of-block sentinel has been dispatched. */
157 : uint block_start_done:1; /* Set if the start-of-block processing has been completed. */
158 : uint block_end_done:1; /* Set if the end-of-block processing has been completed. */
159 : uint staged:1; /* Set if the block is in a dispatcher staging lane; a staged block is
160 : tracked by the dispatcher. */
161 : ulong staging_lane; /* Ignored if staged==0. */
162 : ulong luf_depth; /* Depth of longest unstaged fork starting from this node; only
163 : stageable unstaged descendants are counted. */
164 : uchar fec_buf[ FD_SCHED_MAX_FEC_BUF_SZ ]; /* The previous FEC set could have some residual data that only becomes
165 : parseable after the next FEC set is ingested. */
166 : uint shred_blk_offs[ FD_SHRED_BLK_MAX ]; /* The byte offsets into block data of ingested shreds */
167 : };
168 : typedef struct fd_sched_block fd_sched_block_t;
169 :
170 : FD_STATIC_ASSERT( sizeof(fd_hash_t)==sizeof(((fd_microblock_hdr_t *)0)->hash), unexpected poh hash size );
171 :
172 :
173 : struct fd_sched_metrics {
174 : uint block_added_cnt;
175 : uint block_added_staged_cnt;
176 : uint block_added_unstaged_cnt;
177 : uint block_added_dead_ood_cnt;
178 : uint block_removed_cnt;
179 : uint block_abandoned_cnt;
180 : uint block_bad_cnt;
181 : uint block_promoted_cnt;
182 : uint block_demoted_cnt;
183 : uint deactivate_no_child_cnt;
184 : uint deactivate_no_txn_cnt;
185 : uint deactivate_pruned_cnt;
186 : uint deactivate_abandoned_cnt;
187 : uint lane_switch_cnt;
188 : uint lane_promoted_cnt;
189 : uint lane_demoted_cnt;
190 : uint fork_observed_cnt;
191 : uint alut_success_cnt;
192 : uint alut_serializing_cnt;
193 : uint txn_abandoned_parsed_cnt;
194 : uint txn_abandoned_exec_done_cnt;
195 : uint txn_abandoned_done_cnt;
196 : uint txn_max_in_flight_cnt;
197 : ulong txn_weighted_in_flight_cnt;
198 : ulong txn_weighted_in_flight_tickcount;
199 : ulong txn_none_in_flight_tickcount;
200 : ulong txn_parsed_cnt;
201 : ulong txn_exec_done_cnt;
202 : ulong txn_sigverify_done_cnt;
203 : ulong txn_done_cnt;
204 : ulong bytes_ingested_cnt;
205 : ulong bytes_ingested_unparsed_cnt;
206 : ulong bytes_dropped_cnt;
207 : ulong fec_cnt;
208 : };
209 : typedef struct fd_sched_metrics fd_sched_metrics_t;
210 :
211 : #define DEQUE_NAME ref_q
212 0 : #define DEQUE_T ulong
213 : #include "../../util/tmpl/fd_deque_dynamic.c"
214 :
215 : struct fd_sched {
216 : char print_buf[ FD_SCHED_MAX_PRINT_BUF_SZ ];
217 : ulong print_buf_sz;
218 : fd_sched_metrics_t metrics[ 1 ];
219 : ulong canary; /* == FD_SCHED_MAGIC */
220 : ulong depth; /* Immutable. */
221 : ulong block_cnt_max; /* Immutable. */
222 : ulong exec_cnt; /* Immutable. */
223 : long txn_in_flight_last_tick;
224 : ulong root_idx;
225 : fd_rdisp_t * rdisp;
226 : ulong txn_exec_ready_bitset[ 1 ];
227 : ulong sigverify_ready_bitset[ 1 ];
228 : ulong poh_ready_bitset[ 1 ];
229 : ulong active_bank_idx; /* Index of the actively replayed block, or ULONG_MAX if no block is
230 : actively replayed; has to have a transaction to dispatch; staged
231 : blocks that have no transactions to dispatch are not eligible for
232 : being active. */
233 : ulong last_active_bank_idx;
234 : ulong staged_bitset; /* Bit i set if staging lane i is occupied. */
235 : ulong staged_head_bank_idx[ FD_SCHED_MAX_STAGING_LANES ]; /* Head of the linear chain in each staging lane, ignored if bit i is
236 : not set in the bitset. */
237 : ulong txn_pool_free_cnt;
238 : fd_txn_p_t * txn_pool;
239 : uint * txn_idx_to_parse_idx;
240 : ulong tile_to_bank_idx[ FD_SCHED_MAX_EXEC_TILE_CNT ]; /* Index of the bank that the exec tile is executing against. */
241 : txn_bitset_t exec_done_set[ txn_bitset_word_cnt ]; /* Indexed by txn_idx. */
242 : txn_bitset_t sigverify_done_set[ txn_bitset_word_cnt ]; /* Indexed by txn_idx. */
243 : txn_bitset_t poh_mixin_done_set[ txn_bitset_word_cnt ]; /* Indexed by txn_idx. */
244 : fd_sched_block_t * block_pool; /* Just a flat array. */
245 : ulong block_pool_popcnt;
246 : ulong * ref_q;
247 : };
248 : typedef struct fd_sched fd_sched_t;
249 :
250 :
251 : /* Internal helpers. */
252 :
253 : static int
254 : verify_ticks( fd_sched_block_t * block );
255 :
256 : static void
257 : add_block( fd_sched_t * sched,
258 : ulong bank_idx,
259 : ulong parent_bank_idx );
260 :
261 : FD_WARN_UNUSED static int
262 : fd_sched_parse( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx );
263 :
264 : FD_WARN_UNUSED static int
265 : fd_sched_parse_txn( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx );
266 :
267 : static void
268 : dispatch_sigverify( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out );
269 :
270 : static void
271 : dispatch_poh( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out );
272 :
273 : FD_WARN_UNUSED static int
274 : maybe_mixin( fd_sched_t * sched, fd_sched_block_t * block );
275 :
276 : static void
277 : try_activate_block( fd_sched_t * sched );
278 :
279 : static void
280 : check_or_set_active_block( fd_sched_t * sched );
281 :
282 : static void
283 : subtree_abandon( fd_sched_t * sched, fd_sched_block_t * block );
284 :
285 : static void
286 : subtree_prune( fd_sched_t * sched, ulong bank_idx, ulong except_idx );
287 :
288 : static void
289 : maybe_switch_block( fd_sched_t * sched, ulong bank_idx );
290 :
291 : FD_FN_UNUSED static ulong
292 : find_and_stage_longest_unstaged_fork( fd_sched_t * sched, int lane_idx );
293 :
294 : static ulong
295 : compute_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx );
296 :
297 : static ulong
298 : stage_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx, int lane_idx );
299 :
300 : static int
301 : lane_is_demotable( fd_sched_t * sched, int lane_idx );
302 :
303 : static ulong
304 : demote_lane( fd_sched_t * sched, int lane_idx );
305 :
306 : static inline fd_sched_block_t *
307 0 : block_pool_ele( fd_sched_t * sched, ulong idx ) {
308 0 : FD_TEST( idx<sched->block_cnt_max || idx==ULONG_MAX );
309 0 : return idx==ULONG_MAX ? NULL : sched->block_pool+idx;
310 0 : }
311 :
312 : FD_FN_UNUSED static inline int
313 0 : block_is_void( fd_sched_block_t * block ) {
314 0 : /* We've seen everything in the block and no transaction got parsed
315 0 : out. */
316 0 : return block->fec_eos && block->txn_parsed_cnt==0;
317 0 : }
318 :
319 : static inline int
320 0 : block_should_signal_end( fd_sched_block_t * block ) {
321 : /* Under the current policy of eager synchronous PoH mixin, hashing
322 : done plus fec_eos imply that all mixins have been done. */
323 0 : if( FD_UNLIKELY( !( !block->fec_eos || ((block->mblk_cnt==block->poh_hashing_done_cnt&&block->mblk_cnt==block->poh_hash_cmp_done_cnt)||block->mblk_cnt!=block->poh_hashing_done_cnt) ) ) ) FD_LOG_CRIT(( "invariant violation: slot %lu fec_eos %d mblk_cnt %u poh_hashing_done_cnt %u poh_hash_cmp_done_cnt %u", block->slot, block->fec_eos, block->mblk_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt ));
324 0 : return block->fec_eos && block->txn_parsed_cnt==block->txn_done_cnt && block->mblk_cnt==block->poh_hashing_done_cnt && block->block_start_done && !block->block_end_signaled;
325 0 : }
326 :
327 : static inline int
328 0 : block_will_signal_end( fd_sched_block_t * block ) {
329 0 : return block->fec_eos && !block->block_end_signaled;
330 0 : }
331 :
332 : /* Is there something known to be dispatchable in the block? This is an
333 : important liveness property. A block that doesn't contain any known
334 : dispatchable tasks will be deactivated or demoted. */
335 : static inline int
336 0 : block_is_dispatchable( fd_sched_block_t * block ) {
337 0 : ulong exec_queued_cnt = block->txn_parsed_cnt-block->txn_exec_in_flight_cnt-block->txn_exec_done_cnt;
338 0 : ulong sigverify_queued_cnt = block->txn_parsed_cnt-block->txn_sigverify_in_flight_cnt-block->txn_sigverify_done_cnt;
339 0 : ulong poh_queued_cnt = block->mblk_cnt-block->poh_hashing_in_flight_cnt-block->poh_hashing_done_cnt;
340 0 : return exec_queued_cnt>0UL ||
341 0 : sigverify_queued_cnt>0UL ||
342 0 : poh_queued_cnt>0UL ||
343 0 : !block->block_start_signaled ||
344 0 : block_will_signal_end( block );
345 0 : }
346 :
347 : static inline int
348 0 : block_is_in_flight( fd_sched_block_t * block ) {
349 0 : return block->txn_exec_in_flight_cnt || block->txn_sigverify_in_flight_cnt || block->poh_hashing_in_flight_cnt || (block->block_end_signaled && !block->block_end_done);
350 0 : }
351 :
352 : static inline int
353 0 : block_is_done( fd_sched_block_t * block ) {
354 0 : return block->fec_eos && block->txn_parsed_cnt==block->txn_done_cnt && block->mblk_cnt==block->poh_hash_cmp_done_cnt && block->block_start_done && block->block_end_done;
355 0 : }
356 :
357 : static inline int
358 0 : block_is_stageable( fd_sched_block_t * block ) {
359 0 : int rv = !block_is_done( block ) && !block->dying;
360 0 : if( FD_UNLIKELY( rv && !block->in_rdisp ) ) {
361 : /* Invariant: stageable blocks may be currently staged or unstaged,
362 : but must be in the dispatcher either way. When a block
363 : transitions to DONE, it will be immediately removed from the
364 : dispatcher. When a block transitions to DYING, it will be
365 : eventually abandoned from the dispatcher. */
366 0 : FD_LOG_CRIT(( "invariant violation: stageable block->in_rdisp==0, txn_parsed_cnt %u, txn_done_cnt %u, fec_eos %u,, slot %lu, parent slot %lu",
367 0 : block->txn_parsed_cnt, block->txn_done_cnt, (uint)block->fec_eos, block->slot, block->parent_slot ));
368 0 : }
369 0 : return rv;
370 0 : }
371 :
372 : static inline int
373 0 : block_is_promotable( fd_sched_block_t * block ) {
374 0 : return block_is_stageable( block ) && block_is_dispatchable( block ) && !block->staged;
375 0 : }
376 :
377 : static inline int
378 0 : block_is_demotable( fd_sched_block_t * block ) {
379 : /* A block can only be demoted from rdisp if it is empty, meaning no
380 : PENDING, READY, or DISPATCHED transactions. This is equivalent to
381 : having no in-flight transactions (DISPATCHED) and no queued
382 : transactions (PENDING or READY). This function actually implements
383 : a stronger requirement. We consider a block demotable only if
384 : there are no in-flight or queued tasks of any kind. */
385 0 : return !block_is_in_flight( block ) && !block_is_dispatchable( block ) && block->staged;
386 0 : }
387 :
388 : static inline int
389 0 : block_is_activatable( fd_sched_block_t * block ) {
390 0 : return block_is_stageable( block ) && block_is_dispatchable( block ) && block->staged;
391 0 : }
392 :
393 : static inline int
394 0 : block_should_deactivate( fd_sched_block_t * block ) {
395 : /* We allow a grace period, during which a block has nothing to
396 : dispatch, but has something in-flight. The block is allowed to
397 : stay activated and ingest FEC sets during this time. The block
398 : will be deactivated if there's still nothing to dispatch by the
399 : time all in-flight tasks are completed. */
400 0 : return !block_is_activatable( block ) && !block_is_in_flight( block );
401 0 : }
402 :
403 : static inline int
404 0 : block_is_prunable( fd_sched_block_t * block ) {
405 0 : return !block->in_rdisp && !block_is_in_flight( block );
406 0 : }
407 :
408 : static inline ulong
409 0 : block_to_idx( fd_sched_t * sched, fd_sched_block_t * block ) { return (ulong)(block-sched->block_pool); }
410 :
411 : __attribute__((format(printf,2,3)))
412 : static void
413 : fd_sched_printf( fd_sched_t * sched,
414 : char const * fmt,
415 0 : ... ) {
416 0 : va_list ap;
417 0 : ulong len;
418 0 : va_start( ap, fmt );
419 0 : int ret = vsnprintf( sched->print_buf+sched->print_buf_sz,
420 0 : FD_SCHED_MAX_PRINT_BUF_SZ-sched->print_buf_sz,
421 0 : fmt, ap );
422 0 : va_end( ap );
423 0 : len = fd_ulong_if( ret<0, 0UL, fd_ulong_min( (ulong)ret, FD_SCHED_MAX_PRINT_BUF_SZ-sched->print_buf_sz-1UL ) );
424 0 : sched->print_buf[ sched->print_buf_sz+len ] = '\0';
425 0 : sched->print_buf_sz += len;
426 0 : }
427 :
428 : FD_FN_UNUSED static void
429 0 : log_block_txns( fd_sched_t * sched, fd_sched_block_t * block ) {
430 0 : for( ulong i=0UL; i<block->txn_parsed_cnt; i++ ) {
431 0 : sched->print_buf_sz = 0UL;
432 0 : FD_BASE58_ENCODE_64_BYTES( block->txn_sigs[ i ], sig_str );
433 0 : long disp_tick = block->txn_disp_ticks[ i ];
434 0 : long done_tick = block->txn_done_ticks[ i ];
435 0 : if( FD_LIKELY( disp_tick!=LONG_MAX && done_tick!=LONG_MAX ) ) fd_sched_printf( sched, "['%s',%ld,%ld],", sig_str, disp_tick, done_tick );
436 0 : else if( FD_LIKELY( disp_tick!=LONG_MAX ) ) fd_sched_printf( sched, "['%s',%ld,None],", sig_str, disp_tick );
437 0 : else fd_sched_printf( sched, "['%s',None,None],", sig_str );
438 0 : FD_LOG_DEBUG(( "%s", sched->print_buf ));
439 0 : }
440 0 : }
441 :
442 : FD_FN_UNUSED static void
443 0 : print_histogram( fd_sched_t * sched, fd_histf_t * hist, ulong converter, char * title ) {
444 0 : fd_sched_printf( sched, " +---------------------+----------------------+--------------+\n" );
445 0 : fd_sched_printf( sched, " | %-19s | | Count |\n", title );
446 0 : fd_sched_printf( sched, " +---------------------+----------------------+--------------+\n" );
447 0 :
448 0 : ulong total_count = 0;
449 0 : for( ulong i=0UL; i<fd_histf_bucket_cnt( hist ); i++ ) {
450 0 : total_count += fd_histf_cnt( hist, i );
451 0 : }
452 0 :
453 0 : for( ulong i=0UL; i< fd_histf_bucket_cnt( hist ); i++ ) {
454 0 : ulong bucket_count = fd_histf_cnt( hist, i );
455 0 :
456 0 : char * lt_str;
457 0 : char lt_buf[ 64 ];
458 0 : if( FD_UNLIKELY( i==fd_histf_bucket_cnt( hist )-1UL ) ) {
459 0 : lt_str = "+Inf";
460 0 : } else {
461 0 : ulong edge = fd_histf_right( hist, i );
462 0 : if( converter==FD_METRICS_CONVERTER_NANOSECONDS ) {
463 0 : edge = fd_metrics_convert_ticks_to_nanoseconds( edge-1UL );
464 0 : FD_TEST( fd_cstr_printf_check( lt_buf, sizeof( lt_buf ), NULL, "<= %lu nanos", edge ) );
465 0 : } else if( converter==FD_METRICS_CONVERTER_NONE ) {
466 0 : FD_TEST( fd_cstr_printf_check( lt_buf, sizeof( lt_buf ), NULL, "<= %lu", edge-1UL ) );
467 0 : }
468 0 : lt_str = lt_buf;
469 0 : }
470 0 :
471 0 : /* Create visual bar - scale to max 20 characters. */
472 0 : char bar_buf[ 22 ];
473 0 : if( bucket_count>0UL && total_count>0UL ) {
474 0 : ulong bar_length = (bucket_count*20UL)/total_count;
475 0 : if( !bar_length ) bar_length = 1;
476 0 : for( ulong j=0UL; j<bar_length; j++ ) { bar_buf[ j ] = '*'; }
477 0 : bar_buf[ bar_length ] = '\0';
478 0 : } else {
479 0 : bar_buf[ 0 ] = '\0';
480 0 : }
481 0 :
482 0 : fd_sched_printf( sched, " | %19s | %-20s | %12lu |\n", lt_str, bar_buf, bucket_count );
483 0 : }
484 0 : }
485 :
486 : FD_FN_UNUSED static void
487 0 : print_block_metrics( fd_sched_t * sched, fd_sched_block_t * block ) {
488 0 : fd_sched_printf( sched, "block idx %lu, block slot %lu, parent_slot %lu, fec_eos %d, rooted %d, txn_parsed_cnt %u, txn_exec_done_cnt %u, txn_sigverify_done_cnt %u, poh_hashing_done_cnt %u, poh_hash_cmp_done_cnt %u, txn_done_cnt %u, shred_cnt %u, fec_cnt %u, mblk_cnt %u, mblk_tick_cnt %u, mblk_unhashed_cnt %u, hashcnt %lu, txn_pool_max_popcnt %lu/%lu, block_pool_max_popcnt %lu/%lu, mblks_rem %lu, txns_rem %lu, fec_buf_sz %u, fec_buf_boff %u, fec_buf_soff %u, fec_eob %d, fec_sob %d\n",
489 0 : block_to_idx( sched, block ), block->slot, block->parent_slot, block->fec_eos, block->rooted, block->txn_parsed_cnt, block->txn_exec_done_cnt, block->txn_sigverify_done_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt, block->txn_done_cnt, block->shred_cnt, block->fec_cnt, block->mblk_cnt, block->mblk_tick_cnt, block->mblk_unhashed_cnt, block->hashcnt, block->txn_pool_max_popcnt, sched->depth, block->block_pool_max_popcnt, sched->block_cnt_max, block->mblks_rem, block->txns_rem, block->fec_buf_sz, block->fec_buf_boff, block->fec_buf_soff, block->fec_eob, block->fec_sob );
490 0 : }
491 :
492 : FD_FN_UNUSED static void
493 0 : print_block_debug( fd_sched_t * sched, fd_sched_block_t * block ) {
494 0 : fd_sched_printf( sched, "block idx %lu, block slot %lu, parent_slot %lu, staged %d (lane %lu), dying %d, in_rdisp %d, fec_eos %d, rooted %d, block_start_signaled %d, block_end_signaled %d, block_start_done %d, block_end_done %d, txn_parsed_cnt %u, txn_exec_in_flight_cnt %u, txn_exec_done_cnt %u, txn_sigverify_in_flight_cnt %u, txn_sigverify_done_cnt %u, poh_hashing_in_flight_cnt %u, poh_hashing_done_cnt %u, poh_hash_cmp_done_cnt %u, txn_done_cnt %u, shred_cnt %u, fec_cnt %u, mblk_cnt %u, mblk_tick_cnt %u, mblk_unhashed_cnt %u, hashcnt %lu, txn_pool_max_popcnt %lu/%lu, block_pool_max_popcnt %lu/%lu, prev_tick_hashcnt %lu, curr_tick_hashcnt %lu, mblks_rem %lu, txns_rem %lu, fec_buf_sz %u, fec_buf_boff %u, fec_buf_soff %u, fec_eob %d, fec_sob %d\n",
495 0 : block_to_idx( sched, block ), block->slot, block->parent_slot, block->staged, block->staging_lane, block->dying, block->in_rdisp, block->fec_eos, block->rooted, block->block_start_signaled, block->block_end_signaled, block->block_start_done, block->block_end_done, block->txn_parsed_cnt, block->txn_exec_in_flight_cnt, block->txn_exec_done_cnt, block->txn_sigverify_in_flight_cnt, block->txn_sigverify_done_cnt, block->poh_hashing_in_flight_cnt, block->poh_hashing_done_cnt, block->poh_hash_cmp_done_cnt, block->txn_done_cnt, block->shred_cnt, block->fec_cnt, block->mblk_cnt, block->mblk_tick_cnt, block->mblk_unhashed_cnt, block->hashcnt, block->txn_pool_max_popcnt, sched->depth, block->block_pool_max_popcnt, sched->block_cnt_max, block->prev_tick_hashcnt, block->curr_tick_hashcnt, block->mblks_rem, block->txns_rem, block->fec_buf_sz, block->fec_buf_boff, block->fec_buf_soff, block->fec_eob, block->fec_sob );
496 0 : }
497 :
498 : FD_FN_UNUSED static void
499 0 : print_block_and_parent( fd_sched_t * sched, fd_sched_block_t * block ) {
500 0 : print_block_debug( sched, block );
501 0 : fd_sched_block_t * parent = block_pool_ele( sched, block->parent_idx );
502 0 : if( FD_LIKELY( parent ) ) print_block_debug( sched, parent );
503 0 : }
504 :
505 : FD_FN_UNUSED static void
506 0 : print_metrics( fd_sched_t * sched ) {
507 0 : fd_sched_printf( sched, "metrics: block_added_cnt %u, block_added_staged_cnt %u, block_added_unstaged_cnt %u, block_added_dead_ood_cnt %u, block_removed_cnt %u, block_abandoned_cnt %u, block_bad_cnt %u, block_promoted_cnt %u, block_demoted_cnt %u, deactivate_no_child_cnt %u, deactivate_no_txn_cnt %u, deactivate_pruned_cnt %u, deactivate_abandoned_cnt %u, lane_switch_cnt %u, lane_promoted_cnt %u, lane_demoted_cnt %u, fork_observed_cnt %u, alut_success_cnt %u, alut_serializing_cnt %u, txn_abandoned_parsed_cnt %u, txn_abandoned_exec_done_cnt %u, txn_abandoned_done_cnt %u, txn_max_in_flight_cnt %u, txn_weighted_in_flight_cnt %lu, txn_weighted_in_flight_tickcount %lu, txn_none_in_flight_tickcount %lu, txn_parsed_cnt %lu, txn_exec_done_cnt %lu, txn_sigverify_done_cnt %lu, txn_done_cnt %lu, bytes_ingested_cnt %lu, bytes_ingested_unparsed_cnt %lu, bytes_dropped_cnt %lu, fec_cnt %lu\n",
508 0 : sched->metrics->block_added_cnt, sched->metrics->block_added_staged_cnt, sched->metrics->block_added_unstaged_cnt, sched->metrics->block_added_dead_ood_cnt, sched->metrics->block_removed_cnt, sched->metrics->block_abandoned_cnt, sched->metrics->block_bad_cnt, sched->metrics->block_promoted_cnt, sched->metrics->block_demoted_cnt, sched->metrics->deactivate_no_child_cnt, sched->metrics->deactivate_no_txn_cnt, sched->metrics->deactivate_pruned_cnt, sched->metrics->deactivate_abandoned_cnt, sched->metrics->lane_switch_cnt, sched->metrics->lane_promoted_cnt, sched->metrics->lane_demoted_cnt, sched->metrics->fork_observed_cnt, sched->metrics->alut_success_cnt, sched->metrics->alut_serializing_cnt, sched->metrics->txn_abandoned_parsed_cnt, sched->metrics->txn_abandoned_exec_done_cnt, sched->metrics->txn_abandoned_done_cnt, sched->metrics->txn_max_in_flight_cnt, sched->metrics->txn_weighted_in_flight_cnt, sched->metrics->txn_weighted_in_flight_tickcount, sched->metrics->txn_none_in_flight_tickcount, sched->metrics->txn_parsed_cnt, sched->metrics->txn_exec_done_cnt, sched->metrics->txn_sigverify_done_cnt, sched->metrics->txn_done_cnt, sched->metrics->bytes_ingested_cnt, sched->metrics->bytes_ingested_unparsed_cnt, sched->metrics->bytes_dropped_cnt, sched->metrics->fec_cnt );
509 :
510 0 : }
511 :
512 : FD_FN_UNUSED static void
513 0 : print_sched( fd_sched_t * sched ) {
514 0 : fd_sched_printf( sched, "sched canary 0x%lx, exec_cnt %lu, root_idx %lu, txn_exec_ready_bitset[ 0 ] 0x%lx, sigverify_ready_bitset[ 0 ] 0x%lx, poh_ready_bitset[ 0 ] 0x%lx, active_idx %lu, staged_bitset %lu, staged_head_idx[0] %lu, staged_head_idx[1] %lu, staged_head_idx[2] %lu, staged_head_idx[3] %lu, txn_pool_free_cnt %lu/%lu, block_pool_popcnt %lu/%lu\n",
515 0 : sched->canary, sched->exec_cnt, sched->root_idx, sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ], sched->active_bank_idx, sched->staged_bitset, sched->staged_head_bank_idx[ 0 ], sched->staged_head_bank_idx[ 1 ], sched->staged_head_bank_idx[ 2 ], sched->staged_head_bank_idx[ 3 ], sched->txn_pool_free_cnt, sched->depth, sched->block_pool_popcnt, sched->block_cnt_max );
516 0 : fd_sched_block_t * active_block = block_pool_ele( sched, sched->active_bank_idx );
517 0 : if( active_block ) print_block_debug( sched, active_block );
518 0 : for( int l=0; l<(int)FD_SCHED_MAX_STAGING_LANES; l++ ) {
519 0 : if( fd_ulong_extract_bit( sched->staged_bitset, l ) ) {
520 0 : fd_sched_block_t * block = block_pool_ele( sched, sched->staged_head_bank_idx[ l ] );
521 0 : print_block_debug( sched, block );
522 0 : }
523 0 : }
524 0 : }
525 :
526 : FD_FN_UNUSED static void
527 0 : print_all( fd_sched_t * sched, fd_sched_block_t * block ) {
528 0 : print_metrics( sched );
529 0 : print_sched( sched );
530 0 : print_block_and_parent( sched, block );
531 0 : }
532 :
533 : static void
534 0 : handle_bad_block( fd_sched_t * sched, fd_sched_block_t * block ) {
535 0 : sched->print_buf_sz = 0UL;
536 0 : print_all( sched, block );
537 0 : FD_LOG_DEBUG(( "%s", sched->print_buf ));
538 0 : subtree_abandon( sched, block );
539 0 : sched->metrics->block_bad_cnt++;
540 0 : check_or_set_active_block( sched );
541 0 : }
542 :
543 :
544 : /* Public functions. */
545 :
546 : ulong
547 0 : fd_sched_align( void ) {
548 0 : return fd_ulong_max( alignof(fd_sched_t),
549 0 : fd_ulong_max( fd_rdisp_align(),
550 0 : fd_ulong_max( alignof(fd_sched_block_t), 64UL ))); /* Minimally cache line aligned. */
551 0 : }
552 :
553 : ulong
554 : fd_sched_footprint( ulong depth,
555 0 : ulong block_cnt_max ) {
556 0 : if( FD_UNLIKELY( depth<FD_SCHED_MIN_DEPTH || depth>FD_SCHED_MAX_DEPTH ) ) return 0UL; /* bad depth */
557 0 : if( FD_UNLIKELY( !block_cnt_max ) ) return 0UL; /* bad block_cnt_max */
558 0 : ulong l = FD_LAYOUT_INIT;
559 0 : l = FD_LAYOUT_APPEND( l, fd_sched_align(), sizeof(fd_sched_t) );
560 0 : l = FD_LAYOUT_APPEND( l, fd_rdisp_align(), fd_rdisp_footprint( depth, block_cnt_max ) ); /* dispatcher */
561 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_sched_block_t), block_cnt_max*sizeof(fd_sched_block_t) ); /* block pool */
562 0 : l = FD_LAYOUT_APPEND( l, ref_q_align(), ref_q_footprint( block_cnt_max ) );
563 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_txn_p_t), depth*sizeof(fd_txn_p_t) ); /* txn_pool */
564 0 : l = FD_LAYOUT_APPEND( l, sizeof(uint), depth*sizeof(uint) ); /* txn_idx_to_parse_idx */
565 0 : return FD_LAYOUT_FINI( l, fd_sched_align() );
566 0 : }
567 :
568 : void *
569 : fd_sched_new( void * mem,
570 : ulong depth,
571 : ulong block_cnt_max,
572 0 : ulong exec_cnt ) {
573 :
574 0 : if( FD_UNLIKELY( !mem ) ) {
575 0 : FD_LOG_WARNING(( "NULL mem" ));
576 0 : return NULL;
577 0 : }
578 :
579 0 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)mem, fd_sched_align() ) ) ) {
580 0 : FD_LOG_WARNING(( "misaligned mem (%p)", mem ));
581 0 : return NULL;
582 0 : }
583 :
584 0 : if( FD_UNLIKELY( depth<32UL || depth>FD_SCHED_MAX_DEPTH ) ) {
585 0 : FD_LOG_WARNING(( "bad depth (%lu)", depth ));
586 0 : return NULL;
587 0 : }
588 :
589 0 : if( FD_UNLIKELY( !block_cnt_max ) ) {
590 0 : FD_LOG_WARNING(( "bad block_cnt_max (%lu)", block_cnt_max ));
591 0 : return NULL;
592 0 : }
593 :
594 0 : if( FD_UNLIKELY( !exec_cnt || exec_cnt>FD_SCHED_MAX_EXEC_TILE_CNT ) ) {
595 0 : FD_LOG_WARNING(( "bad exec_cnt (%lu)", exec_cnt ));
596 0 : return NULL;
597 0 : }
598 :
599 0 : FD_SCRATCH_ALLOC_INIT( l, mem );
600 0 : fd_sched_t * sched = FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(), sizeof(fd_sched_t) );
601 0 : void * _rdisp = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(), fd_rdisp_footprint( depth, block_cnt_max ) );
602 0 : void * _bpool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_block_t), block_cnt_max*sizeof(fd_sched_block_t) );
603 0 : void * _ref_q = FD_SCRATCH_ALLOC_APPEND( l, ref_q_align(), ref_q_footprint( block_cnt_max ) );
604 0 : fd_txn_p_t * txn_pool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_txn_p_t), depth*sizeof(fd_txn_p_t) );
605 0 : uint * txn_idx_to_parse_idx = FD_SCRATCH_ALLOC_APPEND( l, sizeof(uint), depth*sizeof(uint) );
606 0 : FD_SCRATCH_ALLOC_FINI( l, fd_sched_align() );
607 :
608 0 : sched->txn_pool = txn_pool;
609 0 : sched->txn_idx_to_parse_idx = txn_idx_to_parse_idx;
610 :
611 0 : ulong seed = ((ulong)fd_tickcount()) ^ FD_SCHED_MAGIC;
612 0 : fd_rdisp_new( _rdisp, depth, block_cnt_max, seed );
613 :
614 0 : fd_sched_block_t * bpool = (fd_sched_block_t *)_bpool;
615 0 : for( ulong i=0; i<block_cnt_max; i++ ) {
616 0 : bpool[ i ].in_sched = 0;
617 0 : mblk_in_progress_slist_new( bpool[ i ].mblks_hashing_in_progress );
618 0 : mblk_in_progress_slist_new( bpool[ i ].mblks_mixin_in_progress );
619 0 : mblk_in_progress_bitset_new( bpool[ i ].mblk_in_progress_pool_free_bitset );
620 0 : }
621 :
622 0 : fd_memset( sched->metrics, 0, sizeof(fd_sched_metrics_t) );
623 0 : sched->txn_in_flight_last_tick = LONG_MAX;
624 :
625 0 : sched->canary = FD_SCHED_MAGIC;
626 0 : sched->depth = depth;
627 0 : sched->block_cnt_max = block_cnt_max;
628 0 : sched->exec_cnt = exec_cnt;
629 0 : sched->root_idx = ULONG_MAX;
630 0 : sched->active_bank_idx = ULONG_MAX;
631 0 : sched->last_active_bank_idx = ULONG_MAX;
632 0 : sched->staged_bitset = 0UL;
633 :
634 0 : sched->txn_exec_ready_bitset[ 0 ] = fd_ulong_mask_lsb( (int)exec_cnt );
635 0 : sched->sigverify_ready_bitset[ 0 ] = fd_ulong_mask_lsb( (int)exec_cnt );
636 0 : sched->poh_ready_bitset[ 0 ] = fd_ulong_mask_lsb( (int)exec_cnt );
637 :
638 0 : sched->txn_pool_free_cnt = depth-1UL; /* -1 because index 0 is unusable as a sentinel reserved by the dispatcher */
639 :
640 0 : txn_bitset_new( sched->exec_done_set );
641 0 : txn_bitset_new( sched->sigverify_done_set );
642 0 : txn_bitset_new( sched->poh_mixin_done_set );
643 :
644 0 : sched->block_pool_popcnt = 0UL;
645 :
646 0 : ref_q_new( _ref_q, block_cnt_max );
647 :
648 0 : return sched;
649 0 : }
650 :
651 : fd_sched_t *
652 0 : fd_sched_join( void * mem ) {
653 :
654 0 : if( FD_UNLIKELY( !mem ) ) {
655 0 : FD_LOG_WARNING(( "NULL mem" ));
656 0 : return NULL;
657 0 : }
658 :
659 0 : fd_sched_t * sched = (fd_sched_t *)mem;
660 0 : ulong depth = sched->depth;
661 0 : ulong block_cnt_max = sched->block_cnt_max;
662 :
663 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
664 :
665 0 : FD_SCRATCH_ALLOC_INIT( l, mem );
666 0 : /* */ FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(), sizeof(fd_sched_t) );
667 0 : void * _rdisp = FD_SCRATCH_ALLOC_APPEND( l, fd_rdisp_align(), fd_rdisp_footprint( depth, block_cnt_max ) );
668 0 : void * _bpool = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sched_block_t), block_cnt_max*sizeof(fd_sched_block_t) );
669 0 : void * _ref_q = FD_SCRATCH_ALLOC_APPEND( l, ref_q_align(), ref_q_footprint( block_cnt_max ) );
670 0 : FD_SCRATCH_ALLOC_FINI( l, fd_sched_align() );
671 :
672 0 : sched->rdisp = fd_rdisp_join( _rdisp );
673 0 : sched->ref_q = ref_q_join( _ref_q );
674 0 : sched->block_pool = _bpool;
675 :
676 0 : for( ulong i=0; i<block_cnt_max; i++ ) {
677 0 : mblk_in_progress_slist_join( sched->block_pool[ i ].mblks_hashing_in_progress );
678 0 : mblk_in_progress_slist_join( sched->block_pool[ i ].mblks_mixin_in_progress );
679 0 : mblk_in_progress_bitset_join( sched->block_pool[ i ].mblk_in_progress_pool_free_bitset );
680 0 : }
681 :
682 0 : txn_bitset_join( sched->exec_done_set );
683 0 : txn_bitset_join( sched->sigverify_done_set );
684 0 : txn_bitset_join( sched->poh_mixin_done_set );
685 :
686 0 : return sched;
687 0 : }
688 :
689 : int
690 0 : fd_sched_fec_can_ingest( fd_sched_t * sched, fd_sched_fec_t * fec ) {
691 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
692 0 : FD_TEST( fec->bank_idx<sched->block_cnt_max );
693 0 : FD_TEST( fec->parent_bank_idx<sched->block_cnt_max );
694 :
695 0 : if( FD_UNLIKELY( fec->fec->data_sz>FD_SCHED_MAX_PAYLOAD_PER_FEC ) ) {
696 0 : sched->print_buf_sz = 0UL;
697 0 : print_metrics( sched );
698 0 : print_sched( sched );
699 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
700 0 : FD_LOG_CRIT(( "invalid FEC set: fec->data_sz %lu, slot %lu, parent slot %lu", fec->fec->data_sz, fec->slot, fec->parent_slot ));
701 0 : }
702 :
703 0 : ulong fec_buf_sz = 0UL;
704 0 : fd_sched_block_t * block = block_pool_ele( sched, fec->bank_idx );
705 0 : if( FD_LIKELY( !fec->is_first_in_block ) ) {
706 0 : fec_buf_sz += block->fec_buf_sz-block->fec_buf_soff;
707 0 : } else {
708 : /* No residual data as this is a fresh new block. */
709 0 : }
710 : /* Addition is safe and won't overflow because we checked the FEC set
711 : size above. */
712 0 : fec_buf_sz += fec->fec->data_sz;
713 : /* Assuming every transaction is min size, do we have enough free
714 : entries in the txn pool? For a more precise txn count, we would
715 : have to do some parsing. */
716 0 : return sched->txn_pool_free_cnt>=fec_buf_sz/FD_TXN_MIN_SERIALIZED_SZ;
717 0 : }
718 :
719 : int
720 : fd_sched_can_ingest( fd_sched_t * sched,
721 0 : ulong fec_cnt ) {
722 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
723 : /* Worst case, we need one byte from the incoming data to extract a
724 : transaction out of the residual data, and the rest of the incoming
725 : data contributes toward min sized transactions. */
726 0 : return sched->txn_pool_free_cnt>=(FD_SCHED_MAX_TXN_PER_FEC*fec_cnt);
727 0 : }
728 :
729 : FD_WARN_UNUSED int
730 : fd_sched_fec_ingest( fd_sched_t * sched,
731 0 : fd_sched_fec_t * fec ) {
732 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
733 0 : FD_TEST( fec->bank_idx<sched->block_cnt_max );
734 0 : FD_TEST( fec->parent_bank_idx<sched->block_cnt_max );
735 0 : FD_TEST( ref_q_empty( sched->ref_q ) );
736 :
737 0 : fd_sched_block_t * block = block_pool_ele( sched, fec->bank_idx );
738 :
739 0 : if( FD_UNLIKELY( fec->fec->data_sz>FD_SCHED_MAX_PAYLOAD_PER_FEC ) ) {
740 0 : sched->print_buf_sz = 0UL;
741 0 : print_all( sched, block );
742 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
743 0 : FD_LOG_CRIT(( "invalid FEC set: fec->data_sz %lu, slot %lu, parent slot %lu", fec->fec->data_sz, fec->slot, fec->parent_slot ));
744 0 : }
745 :
746 0 : if( FD_UNLIKELY( fec->is_first_in_block ) ) {
747 : /* This is a new block. */
748 0 : add_block( sched, fec->bank_idx, fec->parent_bank_idx );
749 0 : block->slot = fec->slot;
750 0 : block->parent_slot = fec->parent_slot;
751 :
752 0 : if( FD_UNLIKELY( block->dying ) ) {
753 : /* The child of a dead block is also dead. We added it to our
754 : fork tree just so we could track an entire lineage of dead
755 : children and propagate the dead property to the entire lineage,
756 : in case there were frags for more than one dead children
757 : in-flight at the time the parent was abandoned. That being
758 : said, we shouldn't need to add the dead child to the
759 : dispatcher. */
760 0 : sched->metrics->block_added_dead_ood_cnt++;
761 :
762 : /* Ignore the FEC set for a dead block. */
763 0 : sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
764 0 : return 1;
765 0 : }
766 :
767 : /* Try to find a staging lane for this block. */
768 0 : int alloc_lane = 0;
769 0 : fd_sched_block_t * parent_block = block_pool_ele( sched, fec->parent_bank_idx );
770 0 : if( FD_LIKELY( parent_block->staged ) ) {
771 : /* Parent is staged. So see if we can continue down the same
772 : staging lane. */
773 0 : ulong staging_lane = parent_block->staging_lane;
774 0 : ulong child_idx = parent_block->child_idx;
775 0 : while( child_idx!=ULONG_MAX ) {
776 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
777 0 : if( child->staged && child->staging_lane==staging_lane ) {
778 : /* Found a child on the same lane. So we're done. */
779 0 : staging_lane = FD_RDISP_UNSTAGED;
780 0 : break;
781 0 : }
782 0 : child_idx = child->sibling_idx;
783 0 : }
784 : /* No child is staged on the same lane as the parent. So stage
785 : this block. This is the common case. */
786 0 : if( FD_LIKELY( staging_lane!=FD_RDISP_UNSTAGED ) ) {
787 0 : block->in_rdisp = 1;
788 0 : block->staged = 1;
789 0 : block->staging_lane = staging_lane;
790 0 : fd_rdisp_add_block( sched->rdisp, fec->bank_idx, staging_lane );
791 0 : sched->metrics->block_added_cnt++;
792 0 : sched->metrics->block_added_staged_cnt++;
793 0 : FD_LOG_DEBUG(( "block %lu:%lu entered lane %lu: add", block->slot, fec->bank_idx, staging_lane ));
794 0 : } else {
795 0 : alloc_lane = 1;
796 0 : }
797 0 : } else {
798 0 : if( block_is_stageable( parent_block ) ) {
799 : /* Parent is unstaged but stageable. So let's be unstaged too.
800 : This is not only a policy decision to be lazy and not promote
801 : the parent at the moment, but also an important invariant
802 : that we maintain for deadlock freeness in the face of staging
803 : lane shortage. See the comments in lane eviction for how
804 : this invariant is relevant. */
805 0 : block->in_rdisp = 1;
806 0 : block->staged = 0;
807 0 : fd_rdisp_add_block( sched->rdisp, fec->bank_idx, FD_RDISP_UNSTAGED );
808 0 : sched->metrics->block_added_cnt++;
809 0 : sched->metrics->block_added_unstaged_cnt++;
810 0 : FD_LOG_DEBUG(( "block %lu:%lu entered lane unstaged: add", block->slot, fec->bank_idx ));
811 0 : } else {
812 0 : alloc_lane = 1;
813 0 : }
814 0 : }
815 0 : if( FD_UNLIKELY( alloc_lane ) ) {
816 : /* We weren't able to inherit the parent's staging lane. So try
817 : to find a new staging lane. */
818 0 : if( FD_LIKELY( sched->staged_bitset!=fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) ) ) { /* Optimize for lane available. */
819 0 : int lane_idx = fd_ulong_find_lsb( ~sched->staged_bitset );
820 0 : if( FD_UNLIKELY( lane_idx>=(int)FD_SCHED_MAX_STAGING_LANES ) ) {
821 0 : FD_LOG_CRIT(( "invariant violation: lane_idx %d, sched->staged_bitset %lx",
822 0 : lane_idx, sched->staged_bitset ));
823 0 : }
824 0 : sched->staged_bitset = fd_ulong_set_bit( sched->staged_bitset, lane_idx );
825 0 : sched->staged_head_bank_idx[ lane_idx ] = fec->bank_idx;
826 0 : block->in_rdisp = 1;
827 0 : block->staged = 1;
828 0 : block->staging_lane = (ulong)lane_idx;
829 0 : fd_rdisp_add_block( sched->rdisp, fec->bank_idx, block->staging_lane );
830 0 : sched->metrics->block_added_cnt++;
831 0 : sched->metrics->block_added_staged_cnt++;
832 0 : FD_LOG_DEBUG(( "block %lu:%lu entered lane %lu: add", block->slot, fec->bank_idx, block->staging_lane ));
833 0 : } else {
834 : /* No lanes available. */
835 0 : block->in_rdisp = 1;
836 0 : block->staged = 0;
837 0 : fd_rdisp_add_block( sched->rdisp, fec->bank_idx, FD_RDISP_UNSTAGED );
838 0 : sched->metrics->block_added_cnt++;
839 0 : sched->metrics->block_added_unstaged_cnt++;
840 0 : FD_LOG_DEBUG(( "block %lu:%lu entered lane unstaged: add", block->slot, fec->bank_idx ));
841 0 : }
842 0 : }
843 0 : }
844 :
845 0 : block->txn_pool_max_popcnt = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt );
846 0 : block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt );
847 :
848 0 : if( FD_UNLIKELY( block->dying ) ) {
849 : /* Ignore the FEC set for a dead block. */
850 0 : sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
851 0 : return 1;
852 0 : }
853 :
854 0 : if( FD_UNLIKELY( !block->in_rdisp ) ) {
855 : /* Invariant: block must be in the dispatcher at this point. */
856 0 : sched->print_buf_sz = 0UL;
857 0 : print_all( sched, block );
858 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
859 0 : FD_LOG_CRIT(( "invariant violation: block->in_rdisp==0, slot %lu, parent slot %lu",
860 0 : block->slot, block->parent_slot ));
861 0 : }
862 :
863 0 : if( FD_UNLIKELY( block->fec_eos ) ) {
864 : /* This means something is wrong upstream. We're getting more FEC
865 : sets for a block that has already ended, or so we were told. */
866 0 : sched->print_buf_sz = 0UL;
867 0 : print_all( sched, block );
868 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
869 0 : FD_LOG_CRIT(( "invariant violation: block->fec_eos set but getting more FEC sets, slot %lu, parent slot %lu", fec->slot, fec->parent_slot ));
870 0 : }
871 0 : if( FD_UNLIKELY( block->fec_eob && fec->is_last_in_batch ) ) {
872 : /* If the previous FEC set ingestion and parse was successful,
873 : block->fec_eob should be cleared. The fact that fec_eob is set
874 : means that the previous batch didn't parse properly. So this is
875 : a bad block. We should refuse to replay down the fork. */
876 0 : FD_LOG_INFO(( "bad block: failed to parse, slot %lu, parent slot %lu", fec->slot, fec->parent_slot ));
877 0 : handle_bad_block( sched, block );
878 0 : sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
879 0 : return 0;
880 0 : }
881 0 : if( FD_UNLIKELY( block->child_idx!=ULONG_MAX ) ) {
882 : /* This means something is wrong upstream. FEC sets are not being
883 : delivered in replay order. We got a child block FEC set before
884 : this block was completely delivered. */
885 0 : sched->print_buf_sz = 0UL;
886 0 : print_all( sched, block );
887 0 : fd_sched_block_t * child_block = block_pool_ele( sched, block->child_idx );
888 0 : print_block_debug( sched, child_block );
889 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
890 0 : FD_LOG_CRIT(( "invariant violation: block->child_idx %lu, slot %lu, parent slot %lu", block->child_idx, fec->slot, fec->parent_slot ));
891 0 : }
892 :
893 0 : FD_TEST( block->fec_buf_sz>=block->fec_buf_soff );
894 0 : if( FD_LIKELY( block->fec_buf_sz>block->fec_buf_soff ) ) {
895 : /* If there is residual data from the previous FEC set within the
896 : same batch, we move it to the beginning of the buffer and append
897 : the new FEC set. */
898 0 : memmove( block->fec_buf, block->fec_buf+block->fec_buf_soff, block->fec_buf_sz-block->fec_buf_soff );
899 0 : }
900 0 : block->fec_buf_boff += block->fec_buf_soff;
901 0 : block->fec_buf_sz -= block->fec_buf_soff;
902 0 : block->fec_buf_soff = 0;
903 : /* Addition is safe and won't overflow because we checked the FEC
904 : set size above. */
905 0 : if( FD_UNLIKELY( block->fec_buf_sz+fec->fec->data_sz>FD_SCHED_MAX_FEC_BUF_SZ ) ) {
906 : /* In a conformant block, there shouldn't be more than a
907 : transaction's worth of residual data left over from the previous
908 : FEC set within the same batch. So if this condition doesn't
909 : hold, it's a bad block. Instead of crashing, we should refuse to
910 : replay down the fork. */
911 0 : FD_LOG_INFO(( "bad block: fec_buf_sz %u, fec->data_sz %lu, slot %lu, parent slot %lu", block->fec_buf_sz, fec->fec->data_sz, fec->slot, fec->parent_slot ));
912 0 : handle_bad_block( sched, block );
913 0 : sched->metrics->bytes_dropped_cnt += fec->fec->data_sz;
914 0 : return 0;
915 0 : }
916 :
917 : /* Append the new FEC set to the end of the buffer. */
918 0 : fd_memcpy( block->fec_buf+block->fec_buf_sz, fec->fec->data, fec->fec->data_sz );
919 0 : block->fec_buf_sz += (uint)fec->fec->data_sz;
920 0 : sched->metrics->bytes_ingested_cnt += fec->fec->data_sz;
921 :
922 0 : block->fec_eob = fec->is_last_in_batch;
923 0 : block->fec_eos = fec->is_last_in_block;
924 :
925 0 : ulong block_sz = block->shred_cnt>0 ? block->shred_blk_offs[ block->shred_cnt-1 ] : 0UL;
926 0 : for( ulong i=0; i<fec->shred_cnt; i++ ) {
927 0 : if( FD_LIKELY( i<32UL ) ) {
928 0 : block->shred_blk_offs[ block->shred_cnt++ ] = (uint)block_sz + fec->fec->block_offs[ i ];
929 0 : } else if( FD_UNLIKELY( i!=fec->shred_cnt-1UL ) ) {
930 : /* We don't track shred boundaries after 32 shreds, assume they're
931 : sized uniformly */
932 0 : ulong num_overflow_shreds = fec->shred_cnt-32UL;
933 0 : ulong overflow_idx = i-32UL;
934 0 : ulong overflow_data_sz = fec->fec->data_sz-fec->fec->block_offs[ 31 ];
935 0 : block->shred_blk_offs[ block->shred_cnt++ ] = (uint)block_sz + fec->fec->block_offs[ 31 ] + (uint)(overflow_data_sz / num_overflow_shreds * (overflow_idx + 1UL));
936 0 : } else {
937 0 : block->shred_blk_offs[ block->shred_cnt++ ] = (uint)block_sz + (uint)fec->fec->data_sz;
938 0 : }
939 0 : }
940 :
941 0 : int err = fd_sched_parse( sched, block, fec->alut_ctx );
942 :
943 0 : block->fec_cnt++;
944 0 : sched->metrics->fec_cnt++;
945 :
946 0 : if( FD_UNLIKELY( err==FD_SCHED_BAD_BLOCK ) ) {
947 0 : handle_bad_block( sched, block );
948 0 : sched->metrics->bytes_dropped_cnt += block->fec_buf_sz-block->fec_buf_soff;
949 0 : return 0;
950 0 : }
951 :
952 : /* We just received a FEC set, which may have made all transactions in
953 : a partially parsed microblock available. If this were a malformed
954 : block that ends in a non-tick microblock, there's not going to be a
955 : hashing task from the missing ending tick to drain the mixin queue.
956 : So we try to drain the mixin queue right here. Another option is
957 : to drain it at dispatch time, when we are about to dispatch the end
958 : of block signal, right before the check for whether block should
959 : end. */
960 0 : int mixin_res;
961 0 : while( (mixin_res=maybe_mixin( sched, block )) ) {
962 0 : if( FD_UNLIKELY( mixin_res==-1 ) ) {
963 0 : handle_bad_block( sched, block );
964 0 : return 0;
965 0 : }
966 0 : FD_TEST( mixin_res==1||mixin_res==2 );
967 0 : }
968 :
969 : /* Check if we need to set the active block. */
970 0 : check_or_set_active_block( sched );
971 :
972 0 : return 1;
973 0 : }
974 :
975 : ulong
976 0 : fd_sched_task_next_ready( fd_sched_t * sched, fd_sched_task_t * out ) {
977 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
978 0 : FD_TEST( ref_q_empty( sched->ref_q ) );
979 :
980 0 : ulong exec_ready_bitset0 = sched->txn_exec_ready_bitset[ 0 ];
981 0 : ulong exec_fully_ready_bitset = sched->sigverify_ready_bitset[ 0 ] & sched->poh_ready_bitset[ 0 ] & exec_ready_bitset0;
982 0 : if( FD_UNLIKELY( !exec_fully_ready_bitset ) ) {
983 : /* Early exit if no exec tiles available. */
984 0 : return 0UL;
985 0 : }
986 :
987 0 : if( FD_UNLIKELY( sched->active_bank_idx==ULONG_MAX ) ) {
988 : /* No need to try activating a block. If we're in this state,
989 : there's truly nothing to execute. We will activate something
990 : when we ingest a FEC set with transactions. */
991 0 : return 0UL;
992 0 : }
993 :
994 0 : out->task_type = FD_SCHED_TT_NULL;
995 :
996 : /* We could in theory reevaluate staging lane allocation here and do
997 : promotion/demotion as needed. It's a policy decision to minimize
998 : fork churn for now and just execute down the same active fork. */
999 :
1000 0 : ulong bank_idx = sched->active_bank_idx;
1001 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
1002 0 : if( FD_UNLIKELY( block_should_deactivate( block ) ) ) {
1003 0 : sched->print_buf_sz = 0UL;
1004 0 : print_all( sched, block );
1005 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
1006 0 : FD_LOG_CRIT(( "invariant violation: active_bank_idx %lu is not activatable nor has anything in-flight", sched->active_bank_idx ));
1007 0 : }
1008 :
1009 0 : block->txn_pool_max_popcnt = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt );
1010 0 : block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt );
1011 :
1012 0 : if( FD_UNLIKELY( !block->block_start_signaled ) ) {
1013 0 : out->task_type = FD_SCHED_TT_BLOCK_START;
1014 0 : out->block_start->bank_idx = bank_idx;
1015 0 : out->block_start->parent_bank_idx = block->parent_idx;
1016 0 : out->block_start->slot = block->slot;
1017 0 : block->block_start_signaled = 1;
1018 0 : return 1UL;
1019 0 : }
1020 :
1021 0 : ulong exec_tile_idx0 = fd_ulong_if( !!exec_fully_ready_bitset, (ulong)fd_ulong_find_lsb( exec_fully_ready_bitset ), ULONG_MAX );
1022 0 : ulong exec_queued_cnt = block->txn_parsed_cnt-block->txn_exec_in_flight_cnt-block->txn_exec_done_cnt;
1023 0 : if( FD_LIKELY( exec_queued_cnt>0UL && fd_ulong_popcnt( exec_fully_ready_bitset ) ) ) { /* Optimize for no fork switching. */
1024 : /* Transaction execution has the highest priority. Current mainnet
1025 : block times are very much dominated by critical path transaction
1026 : execution. To achieve the fastest block replay speed, we can't
1027 : afford to make any mistake in critical path dispatching. Any
1028 : deviation from perfect critical path dispatching is basically
1029 : irrecoverable. As such, we try to keep all the exec tiles busy
1030 : with transaction execution, but we allow at most one transaction
1031 : to be in-flight per exec tile. This is to ensure that whenever a
1032 : critical path transaction completes, we have at least one exec
1033 : tile, e.g. the one that just completed said transaction, readily
1034 : available to continue executing down the critical path. */
1035 0 : out->txn_exec->txn_idx = fd_rdisp_get_next_ready( sched->rdisp, bank_idx );
1036 0 : if( FD_UNLIKELY( out->txn_exec->txn_idx==0UL ) ) {
1037 : /* There are transactions queued but none ready for execution.
1038 : This implies that there must be in-flight transactions on whose
1039 : completion the queued transactions depend. So we return and
1040 : wait for those in-flight transactions to retire. This is a
1041 : policy decision to execute as much as we can down the current
1042 : fork. */
1043 0 : if( FD_UNLIKELY( !block->txn_exec_in_flight_cnt ) ) {
1044 0 : sched->print_buf_sz = 0UL;
1045 0 : print_all( sched, block );
1046 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
1047 0 : FD_LOG_CRIT(( "invariant violation: no ready transaction found but block->txn_exec_in_flight_cnt==0" ));
1048 0 : }
1049 :
1050 : /* Next up are PoH tasks. Same dispatching policy as sigverify
1051 : tasks. */
1052 0 : ulong poh_ready_bitset = exec_fully_ready_bitset;
1053 0 : ulong poh_hashing_queued_cnt = block->mblk_cnt-block->poh_hashing_in_flight_cnt-block->poh_hashing_done_cnt;
1054 0 : if( FD_LIKELY( poh_hashing_queued_cnt>0UL && fd_ulong_popcnt( poh_ready_bitset )>fd_int_if( block->txn_exec_in_flight_cnt>0U, 0, 1 ) ) ) {
1055 0 : dispatch_poh( sched, block, bank_idx, fd_ulong_find_lsb( poh_ready_bitset ), out );
1056 0 : return 1UL;
1057 0 : }
1058 :
1059 : /* Dispatch more sigverify tasks only if at least one exec tile is
1060 : executing transactions or completely idle. Allow at most one
1061 : sigverify task in-flight per tile, and only dispatch to
1062 : completely idle tiles. */
1063 0 : ulong sigverify_ready_bitset = exec_fully_ready_bitset;
1064 0 : ulong sigverify_queued_cnt = block->txn_parsed_cnt-block->txn_sigverify_in_flight_cnt-block->txn_sigverify_done_cnt;
1065 0 : if( FD_LIKELY( sigverify_queued_cnt>0UL && fd_ulong_popcnt( sigverify_ready_bitset )>fd_int_if( block->txn_exec_in_flight_cnt>0U, 0, 1 ) ) ) {
1066 0 : dispatch_sigverify( sched, block, bank_idx, fd_ulong_find_lsb( sigverify_ready_bitset ), out );
1067 0 : return 1UL;
1068 0 : }
1069 0 : return 0UL;
1070 0 : }
1071 0 : out->task_type = FD_SCHED_TT_TXN_EXEC;
1072 0 : out->txn_exec->bank_idx = bank_idx;
1073 0 : out->txn_exec->slot = block->slot;
1074 0 : out->txn_exec->exec_idx = exec_tile_idx0;
1075 0 : FD_TEST( out->txn_exec->exec_idx!=ULONG_MAX );
1076 :
1077 0 : long now = fd_tickcount();
1078 0 : ulong delta = (ulong)(now-sched->txn_in_flight_last_tick);
1079 0 : ulong txn_exec_busy_cnt = sched->exec_cnt-(ulong)fd_ulong_popcnt( exec_ready_bitset0 );
1080 0 : sched->metrics->txn_none_in_flight_tickcount += fd_ulong_if( txn_exec_busy_cnt==0UL && sched->txn_in_flight_last_tick!=LONG_MAX, delta, 0UL );
1081 0 : sched->metrics->txn_weighted_in_flight_tickcount += fd_ulong_if( txn_exec_busy_cnt!=0UL, delta, 0UL );
1082 0 : sched->metrics->txn_weighted_in_flight_cnt += delta*txn_exec_busy_cnt;
1083 0 : sched->txn_in_flight_last_tick = now;
1084 :
1085 0 : block->txn_disp_ticks[ sched->txn_idx_to_parse_idx[ out->txn_exec->txn_idx ] ] = now;
1086 :
1087 0 : sched->txn_exec_ready_bitset[ 0 ] = fd_ulong_clear_bit( exec_ready_bitset0, (int)exec_tile_idx0);
1088 0 : sched->tile_to_bank_idx[ exec_tile_idx0 ] = bank_idx;
1089 :
1090 0 : block->txn_exec_in_flight_cnt++;
1091 0 : sched->metrics->txn_max_in_flight_cnt = fd_uint_max( sched->metrics->txn_max_in_flight_cnt, block->txn_exec_in_flight_cnt );
1092 :
1093 0 : ulong total_exec_busy_cnt = sched->exec_cnt-(ulong)fd_ulong_popcnt( sched->txn_exec_ready_bitset[ 0 ]&sched->sigverify_ready_bitset[ 0 ]&sched->poh_ready_bitset[ 0 ] );
1094 0 : if( FD_UNLIKELY( (~sched->txn_exec_ready_bitset[ 0 ])&(~sched->sigverify_ready_bitset[ 0 ])&(~sched->poh_ready_bitset[ 0 ])&fd_ulong_mask_lsb( (int)sched->exec_cnt ) ) ) FD_LOG_CRIT(( "invariant violation: txn_exec_ready_bitset 0x%lx sigverify_ready_bitset 0x%lx poh_ready_bitset 0x%lx", sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ] ));
1095 0 : if( FD_UNLIKELY( block->txn_exec_in_flight_cnt+block->txn_sigverify_in_flight_cnt+block->poh_hashing_in_flight_cnt!=total_exec_busy_cnt ) ) {
1096 : /* Ideally we'd simply assert that the two sides of the equation
1097 : are equal. But abandoned blocks throw a wrench into this. We
1098 : allow abandoned blocks to have in-flight transactions that are
1099 : naturally drained while we try to dispatch from another block.
1100 : In such cases, the total number of in-flight transactions
1101 : should include the abandoned blocks too. The contract is that
1102 : blocks with in-flight transactions cannot be abandoned or
1103 : demoted from rdisp. So a dying block has to be the head of one
1104 : of the staging lanes. */
1105 : // FIXME This contract no longer true if we implement immediate
1106 : // demotion of abandoned blocks.
1107 0 : ulong total_in_flight = 0UL;
1108 0 : for( int l=0; l<(int)FD_SCHED_MAX_STAGING_LANES; l++ ) {
1109 0 : if( fd_ulong_extract_bit( sched->staged_bitset, l ) ) {
1110 0 : fd_sched_block_t * staged_block = block_pool_ele( sched, sched->staged_head_bank_idx[ l ] );
1111 0 : if( FD_UNLIKELY( block_is_in_flight( staged_block )&&!(staged_block==block||staged_block->dying) ) ) {
1112 0 : sched->print_buf_sz = 0UL;
1113 0 : print_all( sched, staged_block );
1114 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
1115 0 : FD_LOG_CRIT(( "invariant violation: in-flight block is neither active nor dying" ));
1116 0 : }
1117 0 : total_in_flight += staged_block->txn_exec_in_flight_cnt;
1118 0 : total_in_flight += staged_block->txn_sigverify_in_flight_cnt;
1119 0 : total_in_flight += staged_block->poh_hashing_in_flight_cnt;
1120 0 : }
1121 0 : }
1122 0 : if( FD_UNLIKELY( total_in_flight!=total_exec_busy_cnt ) ) {
1123 0 : sched->print_buf_sz = 0UL;
1124 0 : print_all( sched, block );
1125 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
1126 0 : FD_LOG_CRIT(( "invariant violation: total_in_flight %lu != total_exec_busy_cnt %lu", total_in_flight, total_exec_busy_cnt ));
1127 0 : }
1128 0 : FD_LOG_DEBUG(( "exec_busy_cnt %lu checks out", total_exec_busy_cnt ));
1129 0 : }
1130 0 : return 1UL;
1131 0 : }
1132 :
1133 : /* At this point txn_queued_cnt==0 */
1134 :
1135 : /* Next up are PoH tasks. Same dispatching policy as sigverify. */
1136 0 : ulong poh_ready_bitset = exec_fully_ready_bitset;
1137 0 : ulong poh_hashing_queued_cnt = block->mblk_cnt-block->poh_hashing_in_flight_cnt-block->poh_hashing_done_cnt;
1138 0 : if( FD_LIKELY( poh_hashing_queued_cnt>0UL && fd_ulong_popcnt( poh_ready_bitset )>fd_int_if( block->fec_eos||block->txn_exec_in_flight_cnt>0U||sched->exec_cnt==1UL, 0, 1 ) ) ) {
1139 0 : dispatch_poh( sched, block, bank_idx, fd_ulong_find_lsb( poh_ready_bitset ), out );
1140 0 : return 1UL;
1141 0 : }
1142 :
1143 : /* Try to dispatch a sigverify task, but leave one exec tile idle for
1144 : critical path execution, unless there's not going to be any more
1145 : real transactions for the critical path. In the degenerate case of
1146 : only one exec tile, keep it busy. */
1147 0 : ulong sigverify_ready_bitset = exec_fully_ready_bitset;
1148 0 : ulong sigverify_queued_cnt = block->txn_parsed_cnt-block->txn_sigverify_in_flight_cnt-block->txn_sigverify_done_cnt;
1149 0 : if( FD_LIKELY( sigverify_queued_cnt>0UL && fd_ulong_popcnt( sigverify_ready_bitset )>fd_int_if( block->fec_eos||block->txn_exec_in_flight_cnt>0U||sched->exec_cnt==1UL, 0, 1 ) ) ) {
1150 0 : dispatch_sigverify( sched, block, bank_idx, fd_ulong_find_lsb( sigverify_ready_bitset ), out );
1151 0 : return 1UL;
1152 0 : }
1153 :
1154 0 : if( FD_UNLIKELY( block_should_signal_end( block ) ) ) {
1155 0 : FD_TEST( block->block_start_signaled );
1156 0 : if( FD_UNLIKELY( verify_ticks( block ) ) ) {
1157 : /* Tick verification can't be done at parse time, because we may
1158 : not know the expected number of hashes yet. It can't be driven
1159 : by transaction dispatch/completion, because the block may be
1160 : empty. Similary, it can't be driven by PoH hashing, because a
1161 : bad block may simply not have any microblocks. */
1162 0 : handle_bad_block( sched, block );
1163 0 : out->task_type = FD_SCHED_TT_MARK_DEAD;
1164 0 : out->mark_dead->bank_idx = bank_idx;
1165 0 : return 1UL;
1166 0 : }
1167 0 : out->task_type = FD_SCHED_TT_BLOCK_END;
1168 0 : out->block_end->bank_idx = bank_idx;
1169 0 : block->block_end_signaled = 1;
1170 0 : FD_TEST( block->refcnt );
1171 0 : block->refcnt = 0;
1172 0 : if( FD_UNLIKELY( !ref_q_avail( sched->ref_q ) ) ) FD_LOG_CRIT(( "ref_q full" ));
1173 0 : ref_q_push_tail( sched->ref_q, bank_idx );
1174 0 : return 1UL;
1175 0 : }
1176 :
1177 : /* Nothing queued for the active block. If we haven't received all
1178 : the FEC sets for it, then return and wait for more FEC sets, while
1179 : there are in-flight transactions. This is a policy decision to
1180 : minimize fork churn and allow for executing down the current fork
1181 : as much as we can. If we have received all the FEC sets for it,
1182 : then we'd still like to return and wait for the in-flight
1183 : transactions to retire, before switching to a different block.
1184 :
1185 : Either way, there should be in-flight transactions. We deactivate
1186 : the active block the moment we exhausted transactions from it. */
1187 0 : if( FD_UNLIKELY( !block_is_in_flight( block ) ) ) {
1188 0 : sched->print_buf_sz = 0UL;
1189 0 : print_all( sched, block );
1190 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
1191 0 : FD_LOG_CRIT(( "invariant violation: expected in-flight transactions but none" ));
1192 0 : }
1193 :
1194 0 : return 0UL;
1195 0 : }
1196 :
1197 : int
1198 0 : fd_sched_task_done( fd_sched_t * sched, ulong task_type, ulong txn_idx, ulong exec_idx, void * data ) {
1199 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1200 :
1201 0 : ulong bank_idx = ULONG_MAX;
1202 0 : switch( task_type ) {
1203 0 : case FD_SCHED_TT_BLOCK_START:
1204 0 : case FD_SCHED_TT_BLOCK_END: {
1205 0 : (void)txn_idx;
1206 0 : (void)data;
1207 0 : bank_idx = sched->active_bank_idx;
1208 0 : break;
1209 0 : }
1210 0 : case FD_SCHED_TT_TXN_EXEC:
1211 0 : case FD_SCHED_TT_TXN_SIGVERIFY: {
1212 0 : (void)data;
1213 0 : FD_TEST( txn_idx < sched->depth );
1214 0 : bank_idx = sched->tile_to_bank_idx[ exec_idx ];
1215 0 : break;
1216 0 : }
1217 0 : case FD_SCHED_TT_POH_HASH: {
1218 0 : (void)txn_idx;
1219 0 : bank_idx = sched->tile_to_bank_idx[ exec_idx ];
1220 0 : break;
1221 0 : }
1222 0 : default: FD_LOG_CRIT(( "unsupported task_type %lu", task_type ));
1223 0 : }
1224 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
1225 :
1226 0 : if( FD_UNLIKELY( !block->in_sched ) ) {
1227 0 : FD_LOG_CRIT(( "invariant violation: block->in_sched==0, slot %lu, parent slot %lu, idx %lu",
1228 0 : block->slot, block->parent_slot, bank_idx ));
1229 0 : }
1230 0 : if( FD_UNLIKELY( !block->staged ) ) {
1231 : /* Invariant: only staged blocks can have in-flight transactions. */
1232 0 : FD_LOG_CRIT(( "invariant violation: block->staged==0, slot %lu, parent slot %lu",
1233 0 : block->slot, block->parent_slot ));
1234 0 : }
1235 0 : if( FD_UNLIKELY( !block->in_rdisp ) ) {
1236 : /* Invariant: staged blocks must be in the dispatcher. */
1237 0 : FD_LOG_CRIT(( "invariant violation: block->in_rdisp==0, slot %lu, parent slot %lu",
1238 0 : block->slot, block->parent_slot ));
1239 0 : }
1240 :
1241 0 : block->txn_pool_max_popcnt = fd_ulong_max( block->txn_pool_max_popcnt, sched->depth - sched->txn_pool_free_cnt );
1242 0 : block->block_pool_max_popcnt = fd_ulong_max( block->block_pool_max_popcnt, sched->block_pool_popcnt );
1243 :
1244 0 : int exec_tile_idx = (int)exec_idx;
1245 :
1246 0 : switch( task_type ) {
1247 0 : case FD_SCHED_TT_BLOCK_START: {
1248 0 : FD_TEST( !block->block_start_done );
1249 0 : block->block_start_done = 1;
1250 0 : break;
1251 0 : }
1252 0 : case FD_SCHED_TT_BLOCK_END: {
1253 : /* It may seem redundant to be invoking task_done() on these
1254 : somewhat fake tasks. But these are necessary to drive state
1255 : transition for empty blocks or slow blocks. */
1256 0 : FD_TEST( !block->block_end_done );
1257 0 : block->block_end_done = 1;
1258 0 : sched->print_buf_sz = 0UL;
1259 0 : print_block_metrics( sched, block );
1260 0 : FD_LOG_DEBUG(( "block %lu:%lu replayed fully: %s", block->slot, bank_idx, sched->print_buf ));
1261 0 : break;
1262 0 : }
1263 0 : case FD_SCHED_TT_TXN_EXEC: {
1264 0 : long now = fd_tickcount();
1265 0 : ulong delta = (ulong)(now-sched->txn_in_flight_last_tick);
1266 0 : ulong txn_exec_busy_cnt = sched->exec_cnt-(ulong)fd_ulong_popcnt( sched->txn_exec_ready_bitset[ 0 ] );
1267 0 : sched->metrics->txn_weighted_in_flight_tickcount += delta;
1268 0 : sched->metrics->txn_weighted_in_flight_cnt += delta*txn_exec_busy_cnt;
1269 0 : sched->txn_in_flight_last_tick = now;
1270 :
1271 0 : block->txn_done_ticks[ sched->txn_idx_to_parse_idx[ txn_idx ] ] = now;
1272 :
1273 0 : block->txn_exec_done_cnt++;
1274 0 : block->txn_exec_in_flight_cnt--;
1275 0 : FD_TEST( !fd_ulong_extract_bit( sched->txn_exec_ready_bitset[ 0 ], exec_tile_idx ) );
1276 0 : sched->txn_exec_ready_bitset[ 0 ] = fd_ulong_set_bit( sched->txn_exec_ready_bitset[ 0 ], exec_tile_idx );
1277 0 : sched->metrics->txn_exec_done_cnt++;
1278 0 : txn_bitset_insert( sched->exec_done_set, txn_idx );
1279 0 : if( txn_bitset_test( sched->sigverify_done_set, txn_idx ) && txn_bitset_test( sched->poh_mixin_done_set, txn_idx ) ) {
1280 : /* Release the txn_idx if all tasks on it are done. This is
1281 : guaranteed to only happen once per transaction because
1282 : whichever one completed first would not release. */
1283 0 : fd_rdisp_complete_txn( sched->rdisp, txn_idx, 1 );
1284 0 : sched->txn_pool_free_cnt++;
1285 0 : block->txn_done_cnt++;
1286 0 : sched->metrics->txn_done_cnt++;
1287 0 : } else {
1288 0 : fd_rdisp_complete_txn( sched->rdisp, txn_idx, 0 );
1289 0 : }
1290 0 : break;
1291 0 : }
1292 0 : case FD_SCHED_TT_TXN_SIGVERIFY: {
1293 0 : block->txn_sigverify_done_cnt++;
1294 0 : block->txn_sigverify_in_flight_cnt--;
1295 0 : FD_TEST( !fd_ulong_extract_bit( sched->sigverify_ready_bitset[ 0 ], exec_tile_idx ) );
1296 0 : sched->sigverify_ready_bitset[ 0 ] = fd_ulong_set_bit( sched->sigverify_ready_bitset[ 0 ], exec_tile_idx );
1297 0 : sched->metrics->txn_sigverify_done_cnt++;
1298 0 : txn_bitset_insert( sched->sigverify_done_set, txn_idx );
1299 0 : if( txn_bitset_test( sched->exec_done_set, txn_idx ) && txn_bitset_test( sched->poh_mixin_done_set, txn_idx ) ) {
1300 : /* Release the txn_idx if all tasks on it are done. This is
1301 : guaranteed to only happen once per transaction because
1302 : whichever one completed first would not release. */
1303 0 : fd_rdisp_complete_txn( sched->rdisp, txn_idx, 1 );
1304 0 : sched->txn_pool_free_cnt++;
1305 0 : block->txn_done_cnt++;
1306 0 : sched->metrics->txn_done_cnt++;
1307 0 : }
1308 0 : break;
1309 0 : }
1310 0 : case FD_SCHED_TT_POH_HASH: {
1311 0 : block->poh_hashing_in_flight_cnt--;
1312 0 : FD_TEST( !fd_ulong_extract_bit( sched->poh_ready_bitset[ 0 ], exec_tile_idx ) );
1313 0 : sched->poh_ready_bitset[ 0 ] = fd_ulong_set_bit( sched->poh_ready_bitset[ 0 ], exec_tile_idx );
1314 0 : fd_execrp_poh_hash_done_msg_t * msg = fd_type_pun( data );
1315 0 : fd_sched_mblk_in_progress_t * mblk = block->mblk_in_progress_pool+msg->mblk_idx;
1316 0 : mblk->curr_hashcnt += msg->hashcnt;
1317 0 : memcpy( mblk->curr_hash, msg->hash, sizeof(fd_hash_t) );
1318 0 : fd_sched_mblk_t * mblk_desc = block->mblk_pool+mblk->mblk_idx;
1319 0 : ulong hashcnt_todo = mblk_desc->hashcnt-mblk->curr_hashcnt;
1320 0 : if( !hashcnt_todo ) {
1321 0 : block->poh_hashing_done_cnt++;
1322 0 : if( FD_LIKELY( !mblk->is_tick ) ) {
1323 : /* This is not a tick. Enqueue for mixin. */
1324 0 : mblk_in_progress_slist_idx_push_tail( block->mblks_mixin_in_progress, msg->mblk_idx, block->mblk_in_progress_pool );
1325 0 : } else {
1326 : /* This is a tick. No need to mixin. Check the hash value
1327 : right away. */
1328 0 : if( FD_UNLIKELY( memcmp( mblk->curr_hash, mblk_desc->end_hash, sizeof(fd_hash_t) ) ) ) {
1329 0 : FD_BASE58_ENCODE_32_BYTES( mblk->curr_hash->hash, our_str );
1330 0 : FD_BASE58_ENCODE_32_BYTES( mblk_desc->end_hash->hash, ref_str );
1331 0 : FD_LOG_INFO(( "bad block: poh hash mismatch on mblk %u, ours %s, claimed %s, hashcnt %lu, is_tick, slot %lu, parent slot %lu", mblk->mblk_idx, our_str, ref_str, mblk_desc->hashcnt, block->slot, block->parent_slot ));
1332 0 : handle_bad_block( sched, block );
1333 0 : return -1;
1334 0 : }
1335 0 : block->poh_hash_cmp_done_cnt++;
1336 0 : mblk_in_progress_bitset_insert( block->mblk_in_progress_pool_free_bitset, msg->mblk_idx );
1337 0 : }
1338 : /* Try to drain the mixin queue. */
1339 0 : int mixin_res;
1340 0 : while( (mixin_res=maybe_mixin( sched, block )) ) {
1341 0 : if( FD_UNLIKELY( mixin_res==-1 ) ) {
1342 0 : handle_bad_block( sched, block );
1343 0 : return -1;
1344 0 : }
1345 0 : FD_TEST( mixin_res==1||mixin_res==2 );
1346 0 : }
1347 0 : } else {
1348 0 : mblk_in_progress_slist_idx_push_tail( block->mblks_hashing_in_progress, msg->mblk_idx, block->mblk_in_progress_pool );
1349 0 : }
1350 0 : FD_TEST( block->hashes_per_tick!=ULONG_MAX );
1351 0 : if( FD_UNLIKELY( block->curr_tick_hashcnt>block->hashes_per_tick && block->hashes_per_tick>1UL ) ) { /* >1 to ignore low power hashing or no hashing cases */
1352 : /* We couldn't really check this at parse time because we may
1353 : not have the expected hashes per tick value yet. We couldn't
1354 : delay this till after all PoH hashing is done, because this
1355 : would be a DoS vector. This is a good place to do the check.
1356 : Note that checking the hashcnt between ticks transitively
1357 : places an upper bound on the hashcnt of individual
1358 : microblocks, thus mitigating the DoS vector. */
1359 0 : FD_LOG_INFO(( "bad block: observed cumulative tick_hashcnt %lu, expected %lu, slot %lu, parent slot %lu", block->curr_tick_hashcnt, block->hashes_per_tick, block->slot, block->parent_slot ));
1360 0 : handle_bad_block( sched, block );
1361 0 : return -1;
1362 0 : }
1363 0 : break;
1364 0 : }
1365 0 : }
1366 :
1367 0 : if( FD_UNLIKELY( block->dying && !block_is_in_flight( block ) ) ) {
1368 0 : if( FD_UNLIKELY( sched->active_bank_idx==bank_idx ) ) {
1369 0 : FD_LOG_CRIT(( "invariant violation: active block shouldn't be dying, bank_idx %lu, slot %lu, parent slot %lu",
1370 0 : bank_idx, block->slot, block->parent_slot ));
1371 0 : }
1372 0 : FD_LOG_DEBUG(( "dying block %lu drained", block->slot ));
1373 0 : subtree_abandon( sched, block );
1374 0 : return 0;
1375 0 : }
1376 :
1377 0 : if( FD_UNLIKELY( !block->dying && sched->active_bank_idx!=bank_idx ) ) {
1378 : /* Block is not dead. So we should be actively replaying it. */
1379 0 : fd_sched_block_t * active_block = block_pool_ele( sched, sched->active_bank_idx );
1380 0 : FD_LOG_CRIT(( "invariant violation: sched->active_bank_idx %lu, slot %lu, parent slot %lu, bank_idx %lu, slot %lu, parent slot %lu",
1381 0 : sched->active_bank_idx, active_block->slot, active_block->parent_slot,
1382 0 : bank_idx, block->slot, block->parent_slot ));
1383 0 : }
1384 :
1385 0 : maybe_switch_block( sched, bank_idx );
1386 :
1387 0 : return 0;
1388 0 : }
1389 :
1390 : void
1391 0 : fd_sched_block_abandon( fd_sched_t * sched, ulong bank_idx ) {
1392 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1393 0 : FD_TEST( bank_idx<sched->block_cnt_max );
1394 :
1395 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
1396 0 : if( FD_UNLIKELY( !block->in_sched ) ) {
1397 0 : FD_LOG_CRIT(( "invariant violation: block->in_sched==0, slot %lu, parent slot %lu, idx %lu",
1398 0 : block->slot, block->parent_slot, bank_idx ));
1399 0 : }
1400 :
1401 0 : FD_LOG_INFO(( "abandoning block %lu slot %lu", bank_idx, block->slot ));
1402 0 : sched->print_buf_sz = 0UL;
1403 0 : print_all( sched, block );
1404 0 : FD_LOG_DEBUG(( "%s", sched->print_buf ));
1405 0 : log_block_txns( sched, block );
1406 :
1407 0 : subtree_abandon( sched, block );
1408 0 : try_activate_block( sched );
1409 0 : }
1410 :
1411 : void
1412 0 : fd_sched_block_add_done( fd_sched_t * sched, ulong bank_idx, ulong parent_bank_idx, ulong slot ) {
1413 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1414 0 : FD_TEST( bank_idx<sched->block_cnt_max );
1415 :
1416 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
1417 0 : add_block( sched, bank_idx, parent_bank_idx );
1418 0 : block->slot = slot;
1419 0 : block->txn_parsed_cnt = UINT_MAX;
1420 0 : block->txn_exec_done_cnt = UINT_MAX;
1421 0 : block->txn_sigverify_done_cnt = UINT_MAX;
1422 0 : block->poh_hashing_done_cnt = UINT_MAX;
1423 0 : block->poh_hash_cmp_done_cnt = UINT_MAX;
1424 0 : block->txn_done_cnt = UINT_MAX;
1425 0 : block->mblk_cnt = UINT_MAX;
1426 0 : block->fec_eos = 1;
1427 0 : block->block_start_signaled = 1;
1428 0 : block->block_end_signaled = 1;
1429 0 : block->block_start_done = 1;
1430 0 : block->block_end_done = 1;
1431 0 : block->refcnt = 0;
1432 0 : if( FD_LIKELY( parent_bank_idx!=ULONG_MAX ) ) {
1433 0 : fd_sched_block_t * parent_block = block_pool_ele( sched, parent_bank_idx );
1434 0 : block->parent_slot = parent_block->slot;
1435 0 : }
1436 0 : if( FD_UNLIKELY( parent_bank_idx==ULONG_MAX ) ) {
1437 : /* Assumes that a NULL parent implies the snapshot slot. */
1438 0 : block->parent_slot = ULONG_MAX;
1439 0 : block->rooted = 1;
1440 0 : sched->root_idx = bank_idx;
1441 0 : }
1442 0 : }
1443 :
1444 : void
1445 0 : fd_sched_advance_root( fd_sched_t * sched, ulong root_idx ) {
1446 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1447 0 : FD_TEST( root_idx<sched->block_cnt_max );
1448 0 : FD_TEST( sched->root_idx<sched->block_cnt_max );
1449 0 : FD_TEST( ref_q_empty( sched->ref_q ) );
1450 :
1451 0 : fd_sched_block_t * new_root = block_pool_ele( sched, root_idx );
1452 0 : fd_sched_block_t * old_root = block_pool_ele( sched, sched->root_idx );
1453 0 : if( FD_UNLIKELY( !old_root->rooted ) ) {
1454 0 : FD_LOG_CRIT(( "invariant violation: old_root is not rooted, slot %lu, parent slot %lu",
1455 0 : old_root->slot, old_root->parent_slot ));
1456 0 : }
1457 :
1458 : /* Early exit if the new root is the same as the old root. */
1459 0 : if( FD_UNLIKELY( root_idx==sched->root_idx ) ) {
1460 0 : FD_LOG_INFO(( "new root is the same as the old root, slot %lu, parent slot %lu",
1461 0 : new_root->slot, new_root->parent_slot ));
1462 0 : return;
1463 0 : }
1464 :
1465 0 : subtree_prune( sched, sched->root_idx, root_idx );
1466 :
1467 0 : new_root->parent_idx = ULONG_MAX;
1468 0 : sched->root_idx = root_idx;
1469 0 : }
1470 :
1471 : void
1472 0 : fd_sched_root_notify( fd_sched_t * sched, ulong root_idx ) {
1473 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1474 0 : FD_TEST( root_idx<sched->block_cnt_max );
1475 0 : FD_TEST( sched->root_idx<sched->block_cnt_max );
1476 0 : FD_TEST( ref_q_empty( sched->ref_q ) );
1477 :
1478 0 : fd_sched_block_t * block = block_pool_ele( sched, root_idx );
1479 0 : fd_sched_block_t * old_root = block_pool_ele( sched, sched->root_idx );
1480 0 : if( FD_UNLIKELY( !old_root->rooted ) ) {
1481 0 : FD_LOG_CRIT(( "invariant violation: old_root is not rooted, slot %lu, parent slot %lu",
1482 0 : old_root->slot, old_root->parent_slot ));
1483 0 : }
1484 :
1485 : /* Early exit if the new root is the same as the old root. */
1486 0 : if( FD_UNLIKELY( root_idx==sched->root_idx ) ) {
1487 0 : FD_LOG_INFO(( "new root is the same as the old root, slot %lu, parent slot %lu",
1488 0 : block->slot, block->parent_slot ));
1489 0 : return;
1490 0 : }
1491 :
1492 : /* Mark every node from the new root up through its parents to the
1493 : old root as being rooted. */
1494 0 : fd_sched_block_t * curr = block;
1495 0 : fd_sched_block_t * prev = NULL;
1496 0 : while( curr ) {
1497 0 : if( FD_UNLIKELY( !block_is_done( curr ) ) ) {
1498 0 : FD_LOG_CRIT(( "invariant violation: rooting a block that is not done, slot %lu, parent slot %lu",
1499 0 : curr->slot, curr->parent_slot ));
1500 0 : }
1501 0 : if( FD_UNLIKELY( curr->dying ) ) {
1502 0 : FD_LOG_CRIT(( "invariant violation: rooting a block that is dying, slot %lu, parent slot %lu",
1503 0 : curr->slot, curr->parent_slot ));
1504 0 : }
1505 0 : if( FD_UNLIKELY( curr->staged ) ) {
1506 0 : FD_LOG_CRIT(( "invariant violation: rooting a block that is staged, slot %lu, parent slot %lu",
1507 0 : curr->slot, curr->parent_slot ));
1508 0 : }
1509 0 : if( FD_UNLIKELY( curr->in_rdisp ) ) {
1510 0 : FD_LOG_CRIT(( "invariant violation: rooting a block that is in the dispatcher, slot %lu, parent slot %lu",
1511 0 : curr->slot, curr->parent_slot ));
1512 0 : }
1513 0 : curr->rooted = 1;
1514 0 : prev = curr;
1515 0 : curr = block_pool_ele( sched, curr->parent_idx );
1516 0 : }
1517 :
1518 : /* If we didn't reach the old root, the new root is not a descendant. */
1519 0 : if( FD_UNLIKELY( prev!=old_root ) ) {
1520 0 : FD_LOG_CRIT(( "invariant violation: new root is not a descendant of old root, new root slot %lu, parent slot %lu, old root slot %lu, parent slot %lu",
1521 0 : block->slot, block->parent_slot, old_root->slot, old_root->parent_slot ));
1522 0 : }
1523 :
1524 0 : ulong old_active_bank_idx = sched->active_bank_idx;
1525 :
1526 : /* Now traverse from old root towards new root, and abandon all
1527 : minority forks. */
1528 0 : curr = old_root;
1529 0 : while( curr && curr->rooted && curr!=block ) { /* curr!=block to avoid abandoning good forks. */
1530 0 : fd_sched_block_t * rooted_child_block = NULL;
1531 0 : ulong child_idx = curr->child_idx;
1532 0 : while( child_idx!=ULONG_MAX ) {
1533 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
1534 0 : if( child->rooted ) {
1535 0 : rooted_child_block = child;
1536 0 : } else {
1537 : /* This is a minority fork. */
1538 0 : ulong abandoned_cnt = sched->metrics->block_abandoned_cnt;
1539 0 : subtree_abandon( sched, child );
1540 0 : abandoned_cnt = sched->metrics->block_abandoned_cnt-abandoned_cnt;
1541 0 : if( FD_UNLIKELY( abandoned_cnt ) ) FD_LOG_DEBUG(( "abandoned %lu blocks on minority fork starting at block %lu:%lu", abandoned_cnt, child->slot, child_idx ));
1542 0 : }
1543 0 : child_idx = child->sibling_idx;
1544 0 : }
1545 0 : curr = rooted_child_block;
1546 0 : }
1547 :
1548 : /* If the active block got abandoned, we need to reset it. */
1549 0 : if( sched->active_bank_idx==ULONG_MAX ) {
1550 0 : sched->metrics->deactivate_pruned_cnt += fd_uint_if( old_active_bank_idx!=ULONG_MAX, 1U, 0U );
1551 0 : try_activate_block( sched );
1552 0 : }
1553 0 : }
1554 :
1555 : ulong
1556 0 : fd_sched_pruned_block_next( fd_sched_t * sched ) {
1557 0 : if( !ref_q_empty( sched->ref_q ) ) {
1558 0 : ulong bank_idx = ref_q_pop_head( sched->ref_q );
1559 0 : return bank_idx;
1560 0 : }
1561 0 : return ULONG_MAX;
1562 0 : }
1563 :
1564 : void
1565 0 : fd_sched_set_poh_params( fd_sched_t * sched, ulong bank_idx, ulong tick_height, ulong max_tick_height, ulong hashes_per_tick, fd_hash_t const * start_poh ) {
1566 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1567 0 : FD_TEST( bank_idx<sched->block_cnt_max );
1568 0 : FD_TEST( max_tick_height>tick_height );
1569 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
1570 0 : block->tick_height = tick_height;
1571 0 : block->max_tick_height = max_tick_height;
1572 0 : block->hashes_per_tick = hashes_per_tick;
1573 0 : memcpy( block->start_hash, start_poh, sizeof(block->start_hash[0]) );
1574 0 : }
1575 :
1576 : fd_txn_p_t *
1577 0 : fd_sched_get_txn( fd_sched_t * sched, ulong txn_idx ) {
1578 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1579 0 : if( FD_UNLIKELY( txn_idx>=FD_SCHED_MAX_DEPTH ) ) {
1580 0 : return NULL;
1581 0 : }
1582 0 : return sched->txn_pool+txn_idx;
1583 0 : }
1584 :
1585 : fd_hash_t *
1586 0 : fd_sched_get_poh( fd_sched_t * sched, ulong bank_idx ) {
1587 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1588 0 : FD_TEST( bank_idx<sched->block_cnt_max );
1589 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
1590 0 : FD_TEST( block->fec_eos );
1591 0 : FD_TEST( block->mblk_cnt );
1592 0 : return block->mblk_pool[ block->mblk_cnt-1 ].end_hash;
1593 0 : }
1594 :
1595 : uint
1596 0 : fd_sched_get_shred_cnt( fd_sched_t * sched, ulong bank_idx ) {
1597 0 : FD_TEST( sched->canary==FD_SCHED_MAGIC );
1598 0 : FD_TEST( bank_idx<sched->block_cnt_max );
1599 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
1600 0 : return block->shred_cnt;
1601 0 : }
1602 :
1603 : char *
1604 0 : fd_sched_get_state_cstr( fd_sched_t * sched ) {
1605 0 : sched->print_buf_sz = 0UL;
1606 0 : print_metrics( sched );
1607 0 : print_sched( sched );
1608 0 : return sched->print_buf;
1609 0 : }
1610 :
1611 0 : void * fd_sched_leave ( fd_sched_t * sched ) { return sched; }
1612 0 : void * fd_sched_delete( void * mem ) { return mem; }
1613 :
1614 :
1615 : /* Internal helpers. */
1616 :
1617 : static void
1618 : add_block( fd_sched_t * sched,
1619 : ulong bank_idx,
1620 0 : ulong parent_bank_idx ) {
1621 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
1622 0 : FD_TEST( !block->in_sched );
1623 0 : sched->block_pool_popcnt++;
1624 :
1625 0 : block->txn_parsed_cnt = 0U;
1626 0 : block->txn_exec_in_flight_cnt = 0U;
1627 0 : block->txn_exec_done_cnt = 0U;
1628 0 : block->txn_sigverify_in_flight_cnt = 0U;
1629 0 : block->txn_sigverify_done_cnt = 0U;
1630 0 : block->poh_hashing_in_flight_cnt = 0U;
1631 0 : block->poh_hashing_done_cnt = 0U;
1632 0 : block->poh_hash_cmp_done_cnt = 0U;
1633 0 : block->txn_done_cnt = 0U;
1634 0 : block->shred_cnt = 0U;
1635 0 : block->fec_cnt = 0U;
1636 0 : block->mblk_cnt = 0U;
1637 0 : block->mblk_tick_cnt = 0U;
1638 0 : block->mblk_unhashed_cnt = 0U;
1639 0 : block->hashcnt = 0UL;
1640 0 : block->txn_pool_max_popcnt = sched->depth - sched->txn_pool_free_cnt;
1641 0 : block->block_pool_max_popcnt = sched->block_pool_popcnt;
1642 :
1643 0 : mblk_in_progress_bitset_full( block->mblk_in_progress_pool_free_bitset );
1644 0 : mblk_in_progress_slist_remove_all( block->mblks_hashing_in_progress, block->mblk_in_progress_pool );
1645 0 : mblk_in_progress_slist_remove_all( block->mblks_mixin_in_progress, block->mblk_in_progress_pool );
1646 :
1647 0 : block->prev_tick_hashcnt = ULONG_MAX;
1648 0 : block->curr_tick_hashcnt = 0UL;
1649 0 : block->tick_height = ULONG_MAX;
1650 0 : block->max_tick_height = ULONG_MAX;
1651 0 : block->hashes_per_tick = ULONG_MAX;
1652 0 : block->inconsistent_hashes_per_tick = 0;
1653 :
1654 0 : block->mblks_rem = 0UL;
1655 0 : block->txns_rem = 0UL;
1656 0 : block->fec_buf_sz = 0U;
1657 0 : block->fec_buf_boff = 0U;
1658 0 : block->fec_buf_soff = 0U;
1659 0 : block->fec_eob = 0;
1660 0 : block->fec_sob = 1;
1661 :
1662 0 : block->fec_eos = 0;
1663 0 : block->rooted = 0;
1664 0 : block->dying = 0;
1665 0 : block->refcnt = 1;
1666 0 : block->in_sched = 1;
1667 0 : block->in_rdisp = 0;
1668 0 : block->block_start_signaled = 0;
1669 0 : block->block_end_signaled = 0;
1670 0 : block->block_start_done = 0;
1671 0 : block->block_end_done = 0;
1672 0 : block->staged = 0;
1673 :
1674 0 : block->luf_depth = 0UL;
1675 :
1676 : /* New leaf node, no child, no sibling. */
1677 0 : block->child_idx = ULONG_MAX;
1678 0 : block->sibling_idx = ULONG_MAX;
1679 0 : block->parent_idx = ULONG_MAX;
1680 :
1681 0 : if( FD_UNLIKELY( parent_bank_idx==ULONG_MAX ) ) {
1682 0 : return;
1683 0 : }
1684 :
1685 : /* node->parent link */
1686 0 : fd_sched_block_t * parent_block = block_pool_ele( sched, parent_bank_idx );
1687 0 : block->parent_idx = parent_bank_idx;
1688 :
1689 : /* parent->node and sibling->node links */
1690 0 : ulong child_idx = bank_idx;
1691 0 : if( FD_LIKELY( parent_block->child_idx==ULONG_MAX ) ) { /* Optimize for no forking. */
1692 0 : parent_block->child_idx = child_idx;
1693 0 : } else {
1694 0 : fd_sched_block_t * curr_block = block_pool_ele( sched, parent_block->child_idx );
1695 0 : while( curr_block->sibling_idx!=ULONG_MAX ) {
1696 0 : curr_block = block_pool_ele( sched, curr_block->sibling_idx );
1697 0 : }
1698 0 : curr_block->sibling_idx = child_idx;
1699 0 : sched->metrics->fork_observed_cnt++;
1700 0 : }
1701 :
1702 0 : if( FD_UNLIKELY( parent_block->dying ) ) {
1703 0 : block->dying = 1;
1704 0 : }
1705 0 : }
1706 :
1707 : /* https://github.com/anza-xyz/agave/blob/v3.0.6/ledger/src/blockstore_processor.rs#L1057
1708 :
1709 : Returns 0 on success. */
1710 : static int
1711 0 : verify_ticks( fd_sched_block_t * block ) {
1712 0 : FD_TEST( block->fec_eos );
1713 0 : if( FD_UNLIKELY( block->mblk_tick_cnt+block->tick_height>block->max_tick_height ) ) {
1714 0 : FD_LOG_INFO(( "bad block: TOO_MANY_TICKS, slot %lu, parent slot %lu, tick_cnt %u, tick_height %lu, max_tick_height %lu", block->slot, block->parent_slot, block->mblk_tick_cnt, block->tick_height, block->max_tick_height ));
1715 0 : return -1;
1716 0 : }
1717 0 : if( FD_UNLIKELY( block->mblk_tick_cnt+block->tick_height<block->max_tick_height ) ) {
1718 0 : FD_LOG_INFO(( "bad block: TOO_FEW_TICKS, slot %lu, parent slot %lu, tick_cnt %u, tick_height %lu, max_tick_height %lu", block->slot, block->parent_slot, block->mblk_tick_cnt, block->tick_height, block->max_tick_height ));
1719 0 : return -1;
1720 0 : }
1721 : /* Number of ticks > 0 implies that number of microblocks > 0, so the
1722 : subtraction won't underflow. */
1723 0 : ulong last_mblk_idx = block->mblk_cnt-1UL;
1724 0 : ulong start_txn_idx = last_mblk_idx==0UL ? 0UL : block->mblk_pool[ last_mblk_idx-1UL ].end_txn_idx;
1725 0 : ulong end_txn_idx = block->mblk_pool[ last_mblk_idx ].end_txn_idx;
1726 0 : if( FD_UNLIKELY( start_txn_idx!=end_txn_idx ) ) {
1727 : /* The last microblock should be a tick (0 transactions). */
1728 0 : FD_LOG_INFO(( "bad block: TRAILING_ENTRY, slot %lu, parent slot %lu", block->slot, block->parent_slot ));
1729 0 : return -1;
1730 0 : }
1731 0 : if( FD_UNLIKELY( block->hashes_per_tick>1UL && (block->hashes_per_tick!=block->prev_tick_hashcnt||block->inconsistent_hashes_per_tick) ) ) {
1732 : /* >1 since we don't care about checking low power hashing or
1733 : hashing disabled. */
1734 0 : FD_LOG_INFO(( "bad block: INVALID_TICK_HASH_COUNT, slot %lu, parent slot %lu, expected %lu, got %lu", block->slot, block->parent_slot, block->hashes_per_tick, block->prev_tick_hashcnt ));
1735 0 : return -1;
1736 0 : }
1737 :
1738 0 : return 0;
1739 0 : }
1740 :
1741 0 : #define CHECK( cond ) do { \
1742 0 : if( FD_UNLIKELY( !(cond) ) ) { \
1743 0 : return FD_SCHED_AGAIN_LATER; \
1744 0 : } \
1745 0 : } while( 0 )
1746 :
1747 : /* CHECK that it is safe to read at least n more bytes. */
1748 0 : #define CHECK_LEFT( n ) CHECK( (n)<=(block->fec_buf_sz-block->fec_buf_soff) )
1749 :
1750 : /* Consume as much as possible from the buffer. By the end of this
1751 : function, we will either have residual data that is unparseable only
1752 : because it is a batch that straddles FEC set boundaries, or we will
1753 : have reached the end of a batch. In the former case, any remaining
1754 : bytes should be concatenated with the next FEC set for further
1755 : parsing. In the latter case, any remaining bytes should be thrown
1756 : away. */
1757 : FD_WARN_UNUSED static int
1758 0 : fd_sched_parse( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx ) {
1759 0 : while( 1 ) {
1760 0 : while( block->txns_rem>0UL ) {
1761 0 : int err;
1762 0 : if( FD_UNLIKELY( (err=fd_sched_parse_txn( sched, block, alut_ctx ))!=FD_SCHED_OK ) ) {
1763 0 : return err;
1764 0 : }
1765 0 : }
1766 0 : if( block->txns_rem==0UL && block->mblks_rem>0UL ) {
1767 0 : if( FD_UNLIKELY( block->mblk_cnt>=FD_SCHED_MAX_MBLK_PER_SLOT ) ) {
1768 : /* A valid block shouldn't contain more than this amount of
1769 : microblocks. */
1770 0 : FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, mblk_cnt %u (%u ticks) >= %lu", block->slot, block->parent_slot, block->mblk_cnt, block->mblk_tick_cnt, FD_SCHED_MAX_MBLK_PER_SLOT ));
1771 0 : return FD_SCHED_BAD_BLOCK;
1772 0 : }
1773 :
1774 0 : CHECK_LEFT( sizeof(fd_microblock_hdr_t) );
1775 0 : fd_microblock_hdr_t * hdr = (fd_microblock_hdr_t *)fd_type_pun( block->fec_buf+block->fec_buf_soff );
1776 0 : block->fec_buf_soff += (uint)sizeof(fd_microblock_hdr_t);
1777 :
1778 0 : block->mblks_rem--;
1779 0 : block->txns_rem = hdr->txn_cnt;
1780 0 : fd_sched_mblk_t * mblk = block->mblk_pool+block->mblk_cnt;
1781 0 : mblk->end_txn_idx = ( block->mblk_cnt ? block->mblk_pool[ block->mblk_cnt-1 ].end_txn_idx : 0U )+hdr->txn_cnt;
1782 : /* One might think that every microblock needs to have at least
1783 : one hash, otherwise the block should be considered invalid. A
1784 : vanilla validator certainly produces microblocks that conform
1785 : to this. But a modded validator could in theory produce zero
1786 : hash microblocks. Agave's replay stage will happily take those
1787 : microblocks. The Agave implementation-defined way of doing PoH
1788 : verify is as follows:
1789 :
1790 : For a tick microblock, do the same number of hashes as
1791 : specified by the microblock. Zero hashes are allowed, in which
1792 : case this tick would have the same ending hash value as the
1793 : previous microblock.
1794 :
1795 : For a transaction microblock, if the number of hashes specified
1796 : by the microblock is <= 1, then do zero pure hashes, and simply
1797 : do a mixin/record. Otherwise, do (number of hashes-1) amount
1798 : of pure hashing, and then do a mixin. However, note that for
1799 : the purposes of tick_verify, the number of hashes specified by
1800 : the microblock is taken verbatim.
1801 :
1802 : https://github.com/anza-xyz/agave/blob/v3.0.6/entry/src/entry.rs#L232
1803 :
1804 : We implement the above for consensus. */
1805 0 : mblk->hashcnt = fd_ulong_sat_sub( hdr->hash_cnt, fd_ulong_if( !hdr->txn_cnt, 0UL, 1UL ) ); /* For pure hashing, implement the above. */
1806 0 : block->curr_tick_hashcnt += hdr->hash_cnt; /* For tick_verify, take the number of hashes verbatim. */
1807 0 : block->hashcnt += mblk->hashcnt+fd_ulong_if( !hdr->txn_cnt, 0UL, 1UL );
1808 0 : memcpy( mblk->end_hash, hdr->hash, sizeof(fd_hash_t) );
1809 0 : block->mblk_cnt++;
1810 0 : block->mblk_unhashed_cnt++;
1811 0 : if( FD_UNLIKELY( !hdr->txn_cnt ) ) {
1812 : /* This is a tick microblock. */
1813 0 : if( FD_UNLIKELY( block->prev_tick_hashcnt!=ULONG_MAX && block->prev_tick_hashcnt!=block->curr_tick_hashcnt ) ) {
1814 0 : block->inconsistent_hashes_per_tick = 1;
1815 0 : if( FD_LIKELY( block->hashes_per_tick!=ULONG_MAX && block->hashes_per_tick>1UL ) ) {
1816 : /* >1 to ignore low power hashing or hashing disabled */
1817 0 : FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, tick idx %u, prev hashcnt %lu, curr hashcnt %lu, hashes_per_tick %lu", block->slot, block->parent_slot, block->mblk_tick_cnt, block->prev_tick_hashcnt, block->curr_tick_hashcnt, block->hashes_per_tick ));
1818 0 : return FD_SCHED_BAD_BLOCK;
1819 0 : }
1820 0 : }
1821 0 : block->prev_tick_hashcnt = block->curr_tick_hashcnt;
1822 0 : block->curr_tick_hashcnt = 0UL;
1823 0 : block->mblk_tick_cnt++;
1824 0 : }
1825 : #if FD_SCHED_SKIP_POH
1826 : block->mblk_unhashed_cnt--;
1827 : block->poh_hashing_done_cnt++;
1828 : block->poh_hash_cmp_done_cnt++;
1829 : #endif
1830 0 : continue;
1831 0 : }
1832 0 : if( block->txns_rem==0UL && block->mblks_rem==0UL && block->fec_sob ) {
1833 0 : CHECK_LEFT( sizeof(ulong) );
1834 0 : FD_TEST( block->fec_buf_soff==0U );
1835 0 : block->mblks_rem = FD_LOAD( ulong, block->fec_buf );
1836 0 : block->fec_buf_soff += (uint)sizeof(ulong);
1837 :
1838 0 : block->fec_sob = 0;
1839 0 : continue;
1840 0 : }
1841 0 : if( block->txns_rem==0UL && block->mblks_rem==0UL ) {
1842 0 : break;
1843 0 : }
1844 0 : }
1845 0 : if( block->fec_eob ) {
1846 : /* Ignore trailing bytes at the end of a batch. */
1847 0 : sched->metrics->bytes_ingested_unparsed_cnt += block->fec_buf_sz-block->fec_buf_soff;
1848 0 : block->fec_buf_boff += block->fec_buf_sz;
1849 0 : block->fec_buf_soff = 0U;
1850 0 : block->fec_buf_sz = 0U;
1851 0 : block->fec_sob = 1;
1852 0 : block->fec_eob = 0;
1853 0 : }
1854 0 : return FD_SCHED_OK;
1855 0 : }
1856 :
1857 : FD_WARN_UNUSED static int
1858 0 : fd_sched_parse_txn( fd_sched_t * sched, fd_sched_block_t * block, fd_sched_alut_ctx_t * alut_ctx ) {
1859 0 : fd_txn_t * txn = fd_type_pun( block->txn );
1860 :
1861 : /* FIXME: For the replay pipeline, we allow up to 128 instructions per
1862 : transaction. Note that we are not concomitantly bumping the size
1863 : of fd_txn_t. We allow this because transactions like that do get
1864 : packed by other validators, so we have to replay them. Those
1865 : transactions will eventually fail in the runtime, which imposes a
1866 : limit of 64 instructions, but unfortunately they are not tossed out
1867 : at parse time and they land on chain. static_instruction_limit is
1868 : going to enforece this limit at parse time, and transactions like
1869 : that would not land on chain. Then this short term change should
1870 : be rolled back. */
1871 0 : ulong pay_sz = 0UL;
1872 0 : ulong txn_sz = fd_txn_parse_core( block->fec_buf+block->fec_buf_soff,
1873 0 : fd_ulong_min( FD_TXN_MTU, block->fec_buf_sz-block->fec_buf_soff ),
1874 0 : txn,
1875 0 : NULL,
1876 0 : &pay_sz,
1877 0 : ULONG_MAX );
1878 :
1879 0 : if( FD_UNLIKELY( !pay_sz || !txn_sz ) ) {
1880 : /* Can't parse out a full transaction. */
1881 0 : return FD_SCHED_AGAIN_LATER;
1882 0 : }
1883 :
1884 : /* FIXME: remove after static_instruction_limit */
1885 0 : if( FD_UNLIKELY( txn->instr_cnt>FD_TXN_INSTR_MAX*2UL ) ) FD_LOG_CRIT(( "slot %lu txn idx %u has %u instructions, static_instruction_limit will resolve this", block->slot, block->txn_parsed_cnt, txn->instr_cnt ));
1886 :
1887 0 : if( FD_UNLIKELY( block->txn_parsed_cnt>=FD_MAX_TXN_PER_SLOT ) ) {
1888 : /* The block contains more transactions than a valid block would.
1889 : Mark the block dead instead of keep processing it. */
1890 0 : FD_LOG_INFO(( "bad block: slot %lu, parent slot %lu, txn_parsed_cnt %u", block->slot, block->parent_slot, block->txn_parsed_cnt ));
1891 0 : return FD_SCHED_BAD_BLOCK;
1892 0 : }
1893 :
1894 : /* Try to expand ALUTs. */
1895 0 : int has_aluts = txn->transaction_version==FD_TXN_V0 && txn->addr_table_adtl_cnt>0;
1896 0 : int serializing = 0;
1897 0 : if( has_aluts ) {
1898 0 : uchar __attribute__((aligned(FD_SLOT_HASHES_GLOBAL_ALIGN))) slot_hashes_mem[ FD_SYSVAR_SLOT_HASHES_FOOTPRINT ];
1899 0 : fd_slot_hashes_global_t const * slot_hashes_global = fd_sysvar_slot_hashes_read( alut_ctx->accdb, alut_ctx->xid, slot_hashes_mem );
1900 0 : if( FD_LIKELY( slot_hashes_global ) ) {
1901 0 : fd_slot_hash_t * slot_hash = deq_fd_slot_hash_t_join( (uchar *)slot_hashes_global + slot_hashes_global->hashes_offset );
1902 0 : serializing = !!fd_runtime_load_txn_address_lookup_tables( txn, block->fec_buf+block->fec_buf_soff, alut_ctx->accdb, alut_ctx->xid, alut_ctx->els, slot_hash, block->aluts );
1903 0 : sched->metrics->alut_success_cnt += (uint)!serializing;
1904 0 : } else {
1905 0 : serializing = 1;
1906 0 : }
1907 0 : }
1908 :
1909 0 : ulong bank_idx = (ulong)(block-sched->block_pool);
1910 0 : ulong txn_idx = fd_rdisp_add_txn( sched->rdisp, bank_idx, txn, block->fec_buf+block->fec_buf_soff, serializing ? NULL : block->aluts, serializing );
1911 0 : FD_TEST( txn_idx!=0UL );
1912 0 : sched->metrics->txn_parsed_cnt++;
1913 0 : sched->metrics->alut_serializing_cnt += (uint)serializing;
1914 0 : sched->txn_pool_free_cnt--;
1915 0 : fd_txn_p_t * txn_p = sched->txn_pool + txn_idx;
1916 0 : txn_p->payload_sz = pay_sz;
1917 :
1918 0 : txn_p->start_shred_idx = (ushort)fd_sort_up_uint_split( block->shred_blk_offs, block->shred_cnt, block->fec_buf_boff+block->fec_buf_soff );
1919 0 : txn_p->start_shred_idx = fd_ushort_if( txn_p->start_shred_idx>0U, (ushort)(txn_p->start_shred_idx-1U), txn_p->start_shred_idx );
1920 0 : txn_p->end_shred_idx = (ushort)fd_sort_up_uint_split( block->shred_blk_offs, block->shred_cnt, block->fec_buf_boff+block->fec_buf_soff+(uint)pay_sz );
1921 :
1922 0 : fd_memcpy( txn_p->payload, block->fec_buf+block->fec_buf_soff, pay_sz );
1923 0 : fd_memcpy( TXN(txn_p), txn, txn_sz );
1924 0 : txn_bitset_remove( sched->exec_done_set, txn_idx );
1925 0 : txn_bitset_remove( sched->sigverify_done_set, txn_idx );
1926 0 : txn_bitset_remove( sched->poh_mixin_done_set, txn_idx );
1927 0 : sched->txn_idx_to_parse_idx[ txn_idx ] = block->txn_parsed_cnt;
1928 0 : memcpy( block->txn_sigs[ block->txn_parsed_cnt ], fd_txn_get_signatures( TXN(txn_p), txn_p->payload ), FD_TXN_SIGNATURE_SZ );
1929 0 : block->txn_idx[ block->txn_parsed_cnt ] = txn_idx;
1930 0 : block->txn_disp_ticks[ block->txn_parsed_cnt ] = LONG_MAX;
1931 0 : block->txn_done_ticks[ block->txn_parsed_cnt ] = LONG_MAX;
1932 0 : block->fec_buf_soff += (uint)pay_sz;
1933 0 : block->txn_parsed_cnt++;
1934 : #if FD_SCHED_SKIP_SIGVERIFY
1935 : txn_bitset_insert( sched->sigverify_done_set, txn_idx );
1936 : block->txn_sigverify_done_cnt++;
1937 : #endif
1938 : #if FD_SCHED_SKIP_POH
1939 : txn_bitset_insert( sched->poh_mixin_done_set, txn_idx );
1940 : #endif
1941 0 : block->txns_rem--;
1942 0 : return FD_SCHED_OK;
1943 0 : }
1944 :
1945 : #undef CHECK
1946 : #undef CHECK_LEFT
1947 :
1948 : static void
1949 0 : dispatch_sigverify( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out ) {
1950 : /* Dispatch transactions for sigverify in parse order. */
1951 0 : out->task_type = FD_SCHED_TT_TXN_SIGVERIFY;
1952 0 : out->txn_sigverify->bank_idx = bank_idx;
1953 0 : out->txn_sigverify->txn_idx = block->txn_idx[ block->txn_sigverify_done_cnt+block->txn_sigverify_in_flight_cnt ];
1954 0 : out->txn_sigverify->exec_idx = (ulong)exec_tile_idx;
1955 0 : sched->sigverify_ready_bitset[ 0 ] = fd_ulong_clear_bit( sched->sigverify_ready_bitset[ 0 ], exec_tile_idx );
1956 0 : sched->tile_to_bank_idx[ exec_tile_idx ] = bank_idx;
1957 0 : block->txn_sigverify_in_flight_cnt++;
1958 0 : if( FD_UNLIKELY( (~sched->txn_exec_ready_bitset[ 0 ])&(~sched->sigverify_ready_bitset[ 0 ])&(~sched->poh_ready_bitset[ 0 ])&fd_ulong_mask_lsb( (int)sched->exec_cnt ) ) ) FD_LOG_CRIT(( "invariant violation: txn_exec_ready_bitset 0x%lx sigverify_ready_bitset 0x%lx poh_ready_bitset 0x%lx", sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ] ));
1959 0 : }
1960 :
1961 : /* Assumes there is a PoH task available for dispatching. */
1962 : static void
1963 0 : dispatch_poh( fd_sched_t * sched, fd_sched_block_t * block, ulong bank_idx, int exec_tile_idx, fd_sched_task_t * out ) {
1964 0 : fd_sched_mblk_in_progress_t * mblk = NULL;
1965 0 : ulong mblk_idx = ULONG_MAX;
1966 0 : fd_sched_mblk_t * mblk_desc = NULL;
1967 0 : ulong mblk_desc_idx = ULONG_MAX;
1968 0 : if( FD_LIKELY( !mblk_in_progress_slist_is_empty( block->mblks_hashing_in_progress, block->mblk_in_progress_pool ) ) ) {
1969 : /* There's a PoH task in progress, just continue working on that. */
1970 0 : mblk_idx = mblk_in_progress_slist_idx_pop_head( block->mblks_hashing_in_progress, block->mblk_in_progress_pool );
1971 0 : FD_TEST( !mblk_in_progress_slist_private_idx_is_null( mblk_idx ) );
1972 0 : mblk = block->mblk_in_progress_pool+mblk_idx;
1973 0 : mblk_desc_idx = mblk->mblk_idx;
1974 0 : mblk_desc = block->mblk_pool+mblk_desc_idx;
1975 0 : } else {
1976 : /* No in progress PoH task, so start a new one. */
1977 0 : FD_TEST( block->mblk_unhashed_cnt );
1978 0 : mblk_desc_idx = block->mblk_cnt-block->mblk_unhashed_cnt;
1979 0 : mblk_desc = block->mblk_pool+mblk_desc_idx;
1980 0 : block->mblk_unhashed_cnt--;
1981 0 : mblk_idx = mblk_in_progress_bitset_first( block->mblk_in_progress_pool_free_bitset );
1982 0 : FD_TEST( mblk_idx<FD_SCHED_MAX_MBLK_IN_PROGRESS_PER_SLOT );
1983 0 : mblk_in_progress_bitset_remove( block->mblk_in_progress_pool_free_bitset, mblk_idx );
1984 0 : mblk = block->mblk_in_progress_pool+mblk_idx;
1985 0 : ulong start_txn_idx = mblk_desc_idx==0UL ? 0UL : (mblk_desc-1)->end_txn_idx;
1986 0 : mblk->curr_txn_idx = start_txn_idx;
1987 0 : mblk->curr_hashcnt = 0UL;
1988 0 : mblk->curr_sig_cnt = 0U;
1989 0 : memcpy( mblk->curr_hash, mblk_desc_idx==0UL ? block->start_hash : (mblk_desc-1)->end_hash, sizeof(fd_hash_t) );
1990 0 : mblk->mblk_idx = (uint)mblk_desc_idx;
1991 0 : mblk->is_tick = start_txn_idx==mblk_desc->end_txn_idx;
1992 0 : }
1993 0 : FD_TEST( mblk );
1994 0 : FD_TEST( mblk_desc );
1995 0 : out->task_type = FD_SCHED_TT_POH_HASH;
1996 0 : out->poh_hash->bank_idx = bank_idx;
1997 0 : out->poh_hash->mblk_idx = mblk_idx;
1998 0 : out->poh_hash->exec_idx = (ulong)exec_tile_idx;
1999 0 : ulong hashcnt_todo = mblk_desc->hashcnt-mblk->curr_hashcnt;
2000 0 : out->poh_hash->hashcnt = fd_ulong_min( hashcnt_todo, FD_SCHED_MAX_POH_HASHES_PER_TASK );
2001 0 : memcpy( out->poh_hash->hash, mblk->curr_hash, sizeof(fd_hash_t) );
2002 0 : sched->poh_ready_bitset[ 0 ] = fd_ulong_clear_bit( sched->poh_ready_bitset[ 0 ], exec_tile_idx );
2003 0 : sched->tile_to_bank_idx[ exec_tile_idx ] = bank_idx;
2004 0 : block->poh_hashing_in_flight_cnt++;
2005 0 : if( FD_UNLIKELY( (~sched->txn_exec_ready_bitset[ 0 ])&(~sched->sigverify_ready_bitset[ 0 ])&(~sched->poh_ready_bitset[ 0 ])&fd_ulong_mask_lsb( (int)sched->exec_cnt ) ) ) FD_LOG_CRIT(( "invariant violation: txn_exec_ready_bitset 0x%lx sigverify_ready_bitset 0x%lx poh_ready_bitset 0x%lx", sched->txn_exec_ready_bitset[ 0 ], sched->sigverify_ready_bitset[ 0 ], sched->poh_ready_bitset[ 0 ] ));
2006 0 : }
2007 :
2008 : /* Does up to one transaction mixin. Returns 1 if one mixin was done, 2
2009 : if that mixin also completed a microblock, 0 if no transaction mixin
2010 : was available, -1 if there is a PoH verify error. */
2011 : FD_WARN_UNUSED static int
2012 0 : maybe_mixin( fd_sched_t * sched, fd_sched_block_t * block ) {
2013 0 : if( FD_UNLIKELY( mblk_in_progress_slist_is_empty( block->mblks_mixin_in_progress, block->mblk_in_progress_pool ) ) ) return 0;
2014 0 : FD_TEST( block->poh_hashing_done_cnt-block->poh_hash_cmp_done_cnt>0 );
2015 :
2016 : /* The microblock we would like to do mixin on is at the head of the
2017 : queue. */
2018 0 : ulong in_progress_idx = mblk_in_progress_slist_idx_pop_head( block->mblks_mixin_in_progress, block->mblk_in_progress_pool );
2019 0 : fd_sched_mblk_in_progress_t * mblk = block->mblk_in_progress_pool+in_progress_idx;
2020 0 : fd_sched_mblk_t * mblk_desc = block->mblk_pool+mblk->mblk_idx;
2021 0 : ulong start_txn_idx = mblk->mblk_idx==0 ? 0UL : (mblk_desc-1)->end_txn_idx;
2022 :
2023 0 : if( FD_UNLIKELY( mblk_desc->end_txn_idx>block->txn_parsed_cnt ) ) {
2024 : /* Very rarely, we've finished hashing, but not all transactions in
2025 : the microblock have been parsed out. This can happen if we
2026 : haven't received all the FEC sets for this microblock. We can't
2027 : yet fully mixin the microblock. So we'll stick it back into the
2028 : end of the queue. */
2029 0 : mblk_in_progress_slist_idx_push_tail( block->mblks_mixin_in_progress, in_progress_idx, block->mblk_in_progress_pool );
2030 : /* Invariant: we wouldn't start to mixin unless the microblock is
2031 : fully parsed. */
2032 0 : FD_TEST( mblk->curr_txn_idx==start_txn_idx );
2033 :
2034 : /* No other microblock in the mixin queue. */
2035 0 : if( FD_UNLIKELY( block->poh_hashing_done_cnt-block->poh_hash_cmp_done_cnt==1 ) ) return 0;
2036 :
2037 : /* At this point, there's at least one more microblock in the mixin
2038 : queue we could try. */
2039 0 : in_progress_idx = mblk_in_progress_slist_idx_pop_head( block->mblks_mixin_in_progress, block->mblk_in_progress_pool );
2040 0 : mblk = block->mblk_in_progress_pool+in_progress_idx;
2041 0 : mblk_desc = block->mblk_pool+mblk->mblk_idx;
2042 0 : start_txn_idx = mblk->mblk_idx==0 ? 0UL : (mblk_desc-1)->end_txn_idx;
2043 : /* Invariant: at any given point in time, there can be at most one
2044 : microblock that hasn't been fully parsed yet, due to the nature
2045 : of sequential parsing. So this microblock has to be fully
2046 : parsed. */
2047 0 : FD_TEST( mblk_desc->end_txn_idx<=block->txn_parsed_cnt );
2048 0 : }
2049 :
2050 : /* Now mixin. */
2051 0 : if( FD_LIKELY( mblk->curr_txn_idx==start_txn_idx ) ) block->bmtree = fd_bmtree_commit_init( block->bmtree_mem, 32UL, 1UL, 0UL ); /* Optimize for single-transaction microblocks, which are the majority. */
2052 :
2053 0 : ulong txn_gidx = block->txn_idx[ mblk->curr_txn_idx ];
2054 0 : fd_txn_p_t * _txn = sched->txn_pool+txn_gidx;
2055 0 : fd_txn_t * txn = TXN(_txn);
2056 0 : for( ulong j=0; j<txn->signature_cnt; j++ ) {
2057 0 : fd_bmtree_node_t node[ 1 ];
2058 0 : fd_bmtree_hash_leaf( node, _txn->payload+txn->signature_off+FD_TXN_SIGNATURE_SZ*j, 64UL, 1UL );
2059 0 : fd_bmtree_commit_append( block->bmtree, node, 1UL );
2060 0 : mblk->curr_sig_cnt++;
2061 0 : }
2062 :
2063 : /* Release the txn_idx. */
2064 0 : txn_bitset_insert( sched->poh_mixin_done_set, txn_gidx );
2065 0 : if( txn_bitset_test( sched->exec_done_set, txn_gidx ) && txn_bitset_test( sched->sigverify_done_set, txn_gidx ) ) {
2066 0 : fd_rdisp_complete_txn( sched->rdisp, txn_gidx, 1 );
2067 0 : sched->txn_pool_free_cnt++;
2068 0 : block->txn_done_cnt++;
2069 0 : sched->metrics->txn_done_cnt++;
2070 0 : }
2071 :
2072 0 : mblk->curr_txn_idx++;
2073 0 : int rv = 2;
2074 0 : if( FD_LIKELY( mblk->curr_txn_idx==mblk_desc->end_txn_idx ) ) {
2075 : /* Ready to compute the final hash for this microblock. */
2076 0 : uchar * root = fd_bmtree_commit_fini( block->bmtree );
2077 0 : uchar mixin_buf[ 64 ];
2078 0 : fd_memcpy( mixin_buf, mblk->curr_hash, 32UL );
2079 0 : fd_memcpy( mixin_buf+32UL, root, 32UL );
2080 0 : fd_sha256_hash( mixin_buf, 64UL, mblk->curr_hash );
2081 0 : if( FD_UNLIKELY( memcmp( mblk->curr_hash, mblk_desc->end_hash, sizeof(fd_hash_t) ) ) ) {
2082 0 : FD_BASE58_ENCODE_32_BYTES( mblk->curr_hash->hash, our_str );
2083 0 : FD_BASE58_ENCODE_32_BYTES( mblk_desc->end_hash->hash, ref_str );
2084 0 : FD_LOG_INFO(( "bad block: poh hash mismatch on mblk %u, ours %s, claimed %s, hashcnt %lu, txns [%lu,%lu), %u sigs, slot %lu, parent slot %lu", mblk->mblk_idx, our_str, ref_str, mblk_desc->hashcnt, start_txn_idx, mblk_desc->end_txn_idx, mblk->curr_sig_cnt, block->slot, block->parent_slot ));
2085 0 : return -1;
2086 0 : }
2087 0 : block->poh_hash_cmp_done_cnt++;
2088 0 : mblk_in_progress_bitset_insert( block->mblk_in_progress_pool_free_bitset, in_progress_idx );
2089 0 : } else {
2090 : /* There are more transactions to mixin in this microblock. */
2091 0 : mblk_in_progress_slist_idx_push_head( block->mblks_mixin_in_progress, in_progress_idx, block->mblk_in_progress_pool );
2092 0 : rv = 1;
2093 0 : }
2094 :
2095 0 : return rv;
2096 0 : }
2097 :
2098 : static void
2099 0 : try_activate_block( fd_sched_t * sched ) {
2100 :
2101 : /* See if there are any allocated staging lanes that we can activate
2102 : for scheduling ... */
2103 0 : ulong staged_bitset = sched->staged_bitset;
2104 0 : while( staged_bitset ) {
2105 0 : int lane_idx = fd_ulong_find_lsb( staged_bitset );
2106 0 : staged_bitset = fd_ulong_pop_lsb( staged_bitset );
2107 :
2108 0 : ulong head_idx = sched->staged_head_bank_idx[ lane_idx ];
2109 0 : fd_sched_block_t * head_block = block_pool_ele( sched, head_idx );
2110 0 : fd_sched_block_t * parent_block = block_pool_ele( sched, head_block->parent_idx );
2111 0 : if( FD_UNLIKELY( parent_block->dying ) ) {
2112 : /* Invariant: no child of a dying block should be staged. */
2113 0 : FD_LOG_CRIT(( "invariant violation: staged_head_bank_idx %lu, slot %lu, parent slot %lu on lane %d has parent_block->dying set, slot %lu, parent slot %lu",
2114 0 : head_idx, head_block->slot, head_block->parent_slot, lane_idx, parent_block->slot, parent_block->parent_slot ));
2115 0 : }
2116 : //FIXME: restore this invariant check when we have immediate demotion of dying blocks
2117 : // if( FD_UNLIKELY( head_block->dying ) ) {
2118 : // /* Invariant: no dying block should be staged. */
2119 : // FD_LOG_CRIT(( "invariant violation: staged_head_bank_idx %lu, slot %lu, prime %lu on lane %u has head_block->dying set",
2120 : // head_idx, (ulong)head_block->block_id.slot, (ulong)head_block->block_id.prime, lane_idx ));
2121 : // }
2122 0 : if( block_is_done( parent_block ) && block_is_activatable( head_block ) ) {
2123 : /* ... Yes, on this staging lane the parent block is done. So we
2124 : can activate the staged child. */
2125 0 : if( FD_UNLIKELY( head_idx!=sched->last_active_bank_idx ) ) { /* Unlikely because lane switching only possible under forking. */
2126 0 : FD_LOG_DEBUG(( "activating block %lu:%lu: lane switch to %d", head_block->slot, head_idx, lane_idx ));
2127 0 : sched->metrics->lane_switch_cnt++;
2128 0 : }
2129 0 : sched->active_bank_idx = head_idx;
2130 0 : return;
2131 0 : }
2132 0 : }
2133 :
2134 : /* ... No, promote unstaged blocks. */
2135 0 : ulong root_idx = sched->root_idx;
2136 0 : if( FD_UNLIKELY( root_idx==ULONG_MAX ) ) {
2137 0 : FD_LOG_CRIT(( "invariant violation: root_idx==ULONG_MAX indicating fd_sched is uninitialized" ));
2138 0 : }
2139 : /* Find and stage the longest stageable unstaged fork. This is a
2140 : policy decision. */
2141 0 : ulong depth = compute_longest_unstaged_fork( sched, root_idx );
2142 0 : if( FD_LIKELY( depth>0UL ) ) {
2143 0 : if( FD_UNLIKELY( sched->staged_bitset==fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) ) ) {
2144 : /* No more staging lanes available. All of them are occupied by
2145 : slow squatters. Only empty blocks can be demoted, and so
2146 : blocks with in-flight transactions, including dying in-flight
2147 : blocks, shouldn't be demoted. We demote all demotable lanes.
2148 : Demotion isn't all that expensive, since demotable blocks have
2149 : no transactions in them. If a demoted block proves to be
2150 : active still, it'll naturally promote back into a staging lane.
2151 :
2152 : In fact, all lanes should be demotable at this point. None of
2153 : the lanes have anything dispatchable, otherwise we would have
2154 : simply activated one of the dispatchable lanes. None of the
2155 : lanes have anything in-flight either, as we allow for a grace
2156 : period while something is in-flight, before we deactivate any
2157 : block. In principle, we could get rid of the grace period and
2158 : deactivate right away. In that case, it's okay if nothing is
2159 : demotable at the moment, as that simply implies that all lanes
2160 : have in-flight tasks. We would get another chance to try to
2161 : demote when the last in-flight task on any lane completes.
2162 :
2163 : Another interesting side effect of the current dispatching and
2164 : lane switching policy is that each lane should have exactly one
2165 : block in it at this point. A parent block by definition can't
2166 : be partially ingested. Any parent block that is fully ingested
2167 : and dispatchable would have made the lane dispatchable, and we
2168 : wouldn't be here. Any parent that is fully ingested and fully
2169 : dispatched would be fully done after the grace period. So
2170 : there could only be one block per lane, and it is
2171 : simultaneously the head and the tail of the lane.
2172 :
2173 : A note on why this whole thing does not deadlock:
2174 :
2175 : One might reasonably wonder what happens if all the lanes are
2176 : non-empty, non-dead, but for some reason couldn't be activated
2177 : for dispatching. We would deadlock in this case, as no lane
2178 : dispatches to the point of being demotable, and no unstaged
2179 : block can be promoted. Such is not in fact possible. The only
2180 : way a dispatchable lane can be ineligible for activation is if
2181 : it has a parent block that isn't done yet. So a deadlock
2182 : happens when this parent block, or any of its dispatchable
2183 : ancestors, is unstaged. An important invariant we maintain is
2184 : that a staged block can't have an unstaged stageable parent.
2185 : This invariant, by induction, gives us the guarantee that at
2186 : least one of the lanes can be activated. */
2187 0 : for( int l=0; l<(int)FD_SCHED_MAX_STAGING_LANES; l++ ) {
2188 0 : if( FD_UNLIKELY( !lane_is_demotable( sched, l ) ) ) {
2189 0 : FD_LOG_CRIT(( "invariant violation: lane %d is not demotable", l ));
2190 0 : }
2191 0 : ulong demoted_cnt = demote_lane( sched, l );
2192 0 : if( FD_UNLIKELY( demoted_cnt!=1UL ) ) {
2193 0 : FD_LOG_CRIT(( "invariant violation: %lu blocks demoted from lane %d, expected 1 demotion", demoted_cnt, l ));
2194 0 : }
2195 0 : sched->metrics->lane_demoted_cnt++;
2196 0 : }
2197 0 : }
2198 0 : FD_TEST( sched->staged_bitset!=fd_ulong_mask_lsb( FD_SCHED_MAX_STAGING_LANES ) );
2199 0 : int lane_idx = fd_ulong_find_lsb( ~sched->staged_bitset );
2200 0 : if( FD_UNLIKELY( lane_idx>=(int)FD_SCHED_MAX_STAGING_LANES ) ) {
2201 0 : FD_LOG_CRIT(( "invariant violation: lane_idx %d, sched->staged_bitset %lx",
2202 0 : lane_idx, sched->staged_bitset ));
2203 0 : }
2204 0 : ulong head_bank_idx = stage_longest_unstaged_fork( sched, root_idx, lane_idx );
2205 0 : if( FD_UNLIKELY( head_bank_idx==ULONG_MAX ) ) {
2206 : /* We found a promotable fork depth>0. This should not happen. */
2207 0 : FD_LOG_CRIT(( "invariant violation: head_bank_idx==ULONG_MAX" ));
2208 0 : }
2209 : /* We don't bother with promotion unless the block is immediately
2210 : dispatchable. So it's okay to set the active block here. This
2211 : doesn't cause out-of-order block replay because any parent block
2212 : must be fully done. If the parent block were dead, this fork
2213 : would be marked dead too and ineligible for promotion. If the
2214 : parent block were not dead and not done and staged, we wouldn't
2215 : be trying to promote an unstaged fork. If the parent block were
2216 : not dead and not done and unstaged, it would've been part of this
2217 : unstaged fork. */
2218 0 : fd_sched_block_t * head_block = block_pool_ele( sched, head_bank_idx );
2219 0 : FD_LOG_DEBUG(( "activating block %lu:%lu: unstaged promotion to lane %d", head_block->slot, head_bank_idx, lane_idx ));
2220 0 : sched->active_bank_idx = head_bank_idx;
2221 0 : return;
2222 0 : }
2223 : /* No unstaged blocks to promote. So we're done. Yay. */
2224 0 : }
2225 :
2226 : static void
2227 0 : check_or_set_active_block( fd_sched_t * sched ) {
2228 0 : if( FD_UNLIKELY( sched->active_bank_idx==ULONG_MAX ) ) {
2229 0 : try_activate_block( sched );
2230 0 : } else {
2231 0 : fd_sched_block_t * active_block = block_pool_ele( sched, sched->active_bank_idx );
2232 0 : if( FD_UNLIKELY( block_should_deactivate( active_block ) ) ) {
2233 0 : sched->print_buf_sz = 0UL;
2234 0 : print_all( sched, active_block );
2235 0 : FD_LOG_NOTICE(( "%s", sched->print_buf ));
2236 0 : FD_LOG_CRIT(( "invariant violation: should have been deactivated" ));
2237 0 : }
2238 0 : }
2239 0 : }
2240 :
2241 : /* This function has two main jobs:
2242 : - Mark everything on the fork tree dying.
2243 : - Take blocks out of rdisp if possible. */
2244 : static void
2245 0 : subtree_mark_and_maybe_prune_rdisp( fd_sched_t * sched, fd_sched_block_t * block ) {
2246 0 : if( FD_UNLIKELY( block->rooted ) ) {
2247 0 : FD_LOG_CRIT(( "invariant violation: rooted block should not be abandoned, slot %lu, parent slot %lu",
2248 0 : block->slot, block->parent_slot ));
2249 0 : }
2250 : /* All minority fork nodes pass through this function eventually. So
2251 : this is a good point to check per-node invariants for minority
2252 : forks. */
2253 0 : if( FD_UNLIKELY( block->staged && !block->in_rdisp ) ) {
2254 0 : FD_LOG_CRIT(( "invariant violation: staged block is not in the dispatcher, slot %lu, parent slot %lu",
2255 0 : block->slot, block->parent_slot ));
2256 0 : }
2257 :
2258 : /* Setting the flag is non-optional and can happen more than once. */
2259 0 : block->dying = 1;
2260 :
2261 : /* Removal from dispatcher should only happen once. */
2262 0 : if( block->in_rdisp ) {
2263 0 : fd_sched_block_t * parent = block_pool_ele( sched, block->parent_idx );
2264 0 : if( FD_UNLIKELY( !parent ) ) {
2265 : /* Only the root has no parent. Abandon should never be called on
2266 : the root. So any block we are trying to abandon should have a
2267 : parent. */
2268 0 : FD_LOG_CRIT(( "invariant violation: parent not found slot %lu, parent slot %lu",
2269 0 : block->slot, block->parent_slot ));
2270 0 : }
2271 :
2272 : /* The dispatcher expects blocks to be abandoned in the same order
2273 : that they were added on each lane. There are no requirements on
2274 : the order of abandoning if two blocks are not on the same lane,
2275 : or if a block is unstaged. This means that in general we
2276 : shouldn't abandon a child block if the parent hasn't been
2277 : abandoned yet, if and only if they are on the same lane. So wait
2278 : until we can abandon the parent, and then descend down the fork
2279 : tree to ensure orderly abandoning. */
2280 0 : int in_order = !parent->in_rdisp || /* parent is not in the dispatcher */
2281 0 : !parent->staged || /* parent is in the dispatcher but not staged */
2282 0 : !block->staged || /* parent is in the dispatcher and staged but this block is unstaged */
2283 0 : block->staging_lane!=parent->staging_lane; /* this block is on a different staging lane than its parent */
2284 :
2285 0 : if( FD_UNLIKELY( in_order && block->staged && sched->active_bank_idx==sched->staged_head_bank_idx[ block->staging_lane ] && sched->active_bank_idx!=ULONG_MAX ) ) {
2286 0 : FD_TEST( block_pool_ele( sched, sched->active_bank_idx )==block );
2287 0 : FD_LOG_DEBUG(( "reset active_bank_idx %lu: abandon", sched->active_bank_idx ));
2288 0 : sched->last_active_bank_idx = sched->active_bank_idx;
2289 0 : sched->active_bank_idx = ULONG_MAX;
2290 0 : sched->metrics->deactivate_abandoned_cnt++;
2291 0 : }
2292 :
2293 : /* We inform the dispatcher of an abandon only when there are no
2294 : more in-flight transactions. Otherwise, if the dispatcher
2295 : recycles the same txn_id that was just abandoned, and we receive
2296 : completion of an in-flight transaction whose txn_id was just
2297 : recycled. */
2298 : // FIXME The recycling might be fine now that we no longer use
2299 : // txn_id to index into anything. We might be able to just drop
2300 : // txn_id on abandoned blocks. Though would this leak transaction
2301 : // content if the txn_id is recycled?
2302 : // Note that subtree pruning from sched isn't dependent on the
2303 : // in-flight check being present here, as is_prunable already checks
2304 : // for in-flight==0.
2305 0 : int abandon = in_order && !block_is_in_flight( block );
2306 :
2307 0 : if( abandon ) {
2308 0 : block->in_rdisp = 0;
2309 0 : fd_rdisp_abandon_block( sched->rdisp, (ulong)(block-sched->block_pool) );
2310 0 : sched->txn_pool_free_cnt += block->txn_parsed_cnt-block->txn_done_cnt; /* in_flight_cnt==0 */
2311 0 : sched->metrics->block_abandoned_cnt++;
2312 0 : sched->metrics->txn_abandoned_parsed_cnt += block->txn_parsed_cnt;
2313 0 : sched->metrics->txn_abandoned_exec_done_cnt += block->txn_exec_done_cnt;
2314 0 : sched->metrics->txn_abandoned_done_cnt += block->txn_done_cnt;
2315 :
2316 : //FIXME when demote supports non-empty blocks, we should demote
2317 : //the block from the lane unconditionally and immediately,
2318 : //regardles of whether it's safe to abandon or not. So a block
2319 : //would go immediately from staged to unstaged and eventually to
2320 : //abandoned.
2321 0 : if( FD_LIKELY( block->staged ) ) {
2322 0 : FD_LOG_DEBUG(( "block %lu:%lu exited lane %lu: abandon", block->slot, block_to_idx( sched, block ), block->staging_lane ));
2323 0 : block->staged = 0;
2324 : /* Now release the staging lane. This will release the lane as
2325 : soon as we abandon the head block on a lane. Technically a
2326 : release should only happen when we remove the tail block on a
2327 : lane. This is fine though. The way we abandon guarantees by
2328 : induction that an entire lane will be abandoned. Only the
2329 : head block on a lane can possibly have in-flight
2330 : transactions, and so once a head block becomes eligible for
2331 : abandoning, the entire lane all the way to the tail block,
2332 : will be eligible. */
2333 0 : sched->staged_bitset = fd_ulong_clear_bit( sched->staged_bitset, (int)block->staging_lane );
2334 0 : sched->staged_head_bank_idx[ block->staging_lane ] = ULONG_MAX;
2335 0 : }
2336 0 : }
2337 0 : }
2338 :
2339 : /* Abandon the entire fork chaining off of this block. */
2340 0 : ulong child_idx = block->child_idx;
2341 0 : while( child_idx!=ULONG_MAX ) {
2342 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
2343 0 : subtree_mark_and_maybe_prune_rdisp( sched, child );
2344 0 : child_idx = child->sibling_idx;
2345 0 : }
2346 0 : }
2347 :
2348 : /* It's safe to call this function more than once on the same block.
2349 : The final call is when there are no more in-flight tasks for this
2350 : block, at which point the block will be pruned from sched. */
2351 : static void
2352 0 : subtree_abandon( fd_sched_t * sched, fd_sched_block_t * block ) {
2353 0 : subtree_mark_and_maybe_prune_rdisp( sched, block );
2354 0 : if( block_is_prunable( block ) ) {
2355 0 : fd_sched_block_t * parent = block_pool_ele( sched, block->parent_idx );
2356 0 : if( FD_LIKELY( parent ) ) {
2357 : /* Splice the block out of its parent's children list. */
2358 0 : ulong block_idx = block_to_idx( sched, block );
2359 0 : ulong * idx_p = &parent->child_idx;
2360 0 : while( *idx_p!=block_idx ) {
2361 0 : idx_p = &(block_pool_ele( sched, *idx_p )->sibling_idx);
2362 0 : }
2363 0 : *idx_p = block->sibling_idx;
2364 0 : }
2365 0 : subtree_prune( sched, block_to_idx( sched, block ), ULONG_MAX );
2366 0 : }
2367 0 : }
2368 :
2369 : static void
2370 0 : subtree_prune( fd_sched_t * sched, ulong bank_idx, ulong except_idx ) {
2371 0 : fd_sched_block_t * head = block_pool_ele( sched, bank_idx );
2372 0 : head->parent_idx = ULONG_MAX;
2373 0 : fd_sched_block_t * tail = head;
2374 :
2375 0 : while( head ) {
2376 0 : FD_TEST( head->in_sched );
2377 0 : head->in_sched = 0;
2378 0 : if( head->refcnt ) {
2379 0 : FD_TEST( !head->block_end_done );
2380 0 : head->refcnt = 0;
2381 0 : if( FD_UNLIKELY( !ref_q_avail( sched->ref_q ) ) ) FD_LOG_CRIT(( "ref_q full" ));
2382 0 : ref_q_push_tail( sched->ref_q, block_to_idx( sched, head ) );
2383 0 : }
2384 :
2385 0 : if( FD_UNLIKELY( !head->block_end_done ) ) {
2386 0 : sched->print_buf_sz = 0UL;
2387 0 : print_block_metrics( sched, head );
2388 0 : if( FD_LIKELY( head->block_start_done ) ) FD_LOG_DEBUG(( "block %lu:%lu replayed partially, pruning without full replay: %s", head->slot, block_to_idx( sched, head ), sched->print_buf ));
2389 0 : else FD_LOG_DEBUG(( "block %lu:%lu replayed nothing, pruning without any replay: %s", head->slot, block_to_idx( sched, head ), sched->print_buf ));
2390 0 : }
2391 :
2392 0 : ulong child_idx = head->child_idx;
2393 0 : while( child_idx!=ULONG_MAX ) {
2394 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
2395 : /* Add children to be visited. We abuse the parent_idx field to
2396 : link up the next block to visit. */
2397 0 : if( child_idx!=except_idx ) {
2398 0 : tail->parent_idx = child_idx;
2399 0 : tail = child;
2400 0 : tail->parent_idx = ULONG_MAX;
2401 0 : }
2402 0 : child_idx = child->sibling_idx;
2403 0 : }
2404 :
2405 : /* Prune the current block. We will never publish halfway into a
2406 : staging lane, because anything on the rooted fork should have
2407 : finished replaying gracefully and be out of the dispatcher. In
2408 : fact, anything that we are publishing away should be out of the
2409 : dispatcher at this point. And there should be no more in-flight
2410 : transactions. */
2411 0 : if( FD_UNLIKELY( block_is_in_flight( head ) ) ) {
2412 0 : FD_LOG_CRIT(( "invariant violation: block has transactions in flight (%u exec %u sigverify %u poh), slot %lu, parent slot %lu",
2413 0 : head->txn_exec_in_flight_cnt, head->txn_sigverify_in_flight_cnt, head->poh_hashing_in_flight_cnt, head->slot, head->parent_slot ));
2414 0 : }
2415 0 : if( FD_UNLIKELY( head->in_rdisp ) ) {
2416 : /* We should have removed it from the dispatcher when we were
2417 : notified of the new root, or when in-flight transactions were
2418 : drained. */
2419 0 : FD_LOG_CRIT(( "invariant violation: block is in the dispatcher, slot %lu, parent slot %lu",
2420 0 : head->slot, head->parent_slot ));
2421 0 : }
2422 0 : sched->block_pool_popcnt--;
2423 :
2424 0 : fd_sched_block_t * next = block_pool_ele( sched, head->parent_idx );
2425 :
2426 : /* We don't have to clear the indices here since no one should be
2427 : accessing them. Defensive programming. */
2428 0 : head->parent_idx = ULONG_MAX;
2429 0 : head->child_idx = ULONG_MAX;
2430 0 : head->sibling_idx = ULONG_MAX;
2431 :
2432 0 : head = next;
2433 0 : }
2434 0 : }
2435 :
2436 : static void
2437 0 : maybe_switch_block( fd_sched_t * sched, ulong bank_idx ) {
2438 : /* This only happens rarely when there are dying in-flight blocks.
2439 : Early exit and don't let dying blocks affect replay. */
2440 0 : if( FD_UNLIKELY( bank_idx!=sched->active_bank_idx ) ) return;
2441 :
2442 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
2443 0 : if( FD_UNLIKELY( block_is_done( block ) ) ) {
2444 0 : fd_rdisp_remove_block( sched->rdisp, bank_idx );
2445 0 : FD_LOG_DEBUG(( "block %lu:%lu exited lane %lu: remove", block->slot, bank_idx, block->staging_lane ));
2446 0 : block->in_rdisp = 0;
2447 0 : block->staged = 0;
2448 0 : sched->metrics->block_removed_cnt++;
2449 0 : FD_LOG_DEBUG(( "reset active_bank_idx %lu: remove", sched->active_bank_idx ));
2450 0 : sched->last_active_bank_idx = sched->active_bank_idx;
2451 0 : sched->active_bank_idx = ULONG_MAX;
2452 :
2453 : /* See if there is a child block down the same staging lane. This
2454 : is a policy decision to minimize fork churn. We could in theory
2455 : reevaluate staging lane allocation here and do promotion/demotion
2456 : as needed. */
2457 0 : ulong child_idx = block->child_idx;
2458 0 : while( child_idx!=ULONG_MAX ) {
2459 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
2460 0 : if( FD_LIKELY( child->staged && child->staging_lane==block->staging_lane ) ) {
2461 : /* There is a child block down the same staging lane ... */
2462 0 : if( FD_LIKELY( !child->dying ) ) {
2463 : /* ... and the child isn't dead */
2464 0 : if( FD_UNLIKELY( !block_is_activatable( child ) ) ) {
2465 : /* ... but the child is not activatable, likely because
2466 : there are no transactions available yet. */
2467 0 : sched->metrics->deactivate_no_txn_cnt++;
2468 0 : try_activate_block( sched );
2469 0 : return;
2470 0 : }
2471 : /* ... and it's immediately dispatchable, so switch the active
2472 : block to it, and have the child inherit the head status of
2473 : the lane. This is the common case. */
2474 0 : FD_LOG_DEBUG(( "activating block %lu:%lu: child inheritance on lane %lu", child->slot, child_idx, child->staging_lane ));
2475 0 : sched->active_bank_idx = child_idx;
2476 0 : sched->staged_head_bank_idx[ block->staging_lane ] = child_idx;
2477 0 : if( FD_UNLIKELY( !fd_ulong_extract_bit( sched->staged_bitset, (int)block->staging_lane ) ) ) {
2478 0 : FD_LOG_CRIT(( "invariant violation: staged_bitset 0x%lx bit %lu is not set, slot %lu, parent slot %lu, child slot %lu, parent slot %lu",
2479 0 : sched->staged_bitset, block->staging_lane, block->slot, block->parent_slot, child->slot, child->parent_slot ));
2480 0 : }
2481 0 : return;
2482 0 : } else {
2483 : /* ... but the child block is considered dead, likely because
2484 : the parser considers it invalid. */
2485 0 : FD_LOG_INFO(( "child block %lu is already dead", child->slot ));
2486 0 : subtree_abandon( sched, child );
2487 0 : break;
2488 0 : }
2489 0 : }
2490 0 : child_idx = child->sibling_idx;
2491 0 : }
2492 : /* There isn't a child block down the same staging lane. This is
2493 : the last block in the staging lane. Release the staging lane. */
2494 0 : sched->staged_bitset = fd_ulong_clear_bit( sched->staged_bitset, (int)block->staging_lane );
2495 0 : sched->staged_head_bank_idx[ block->staging_lane ] = ULONG_MAX;
2496 0 : sched->metrics->deactivate_no_child_cnt++;
2497 0 : try_activate_block( sched );
2498 0 : } else if( block_should_deactivate( block ) ) {
2499 : /* We exhausted the active block, but it's not fully done yet. We
2500 : are just not getting FEC sets for it fast enough. This could
2501 : happen when the network path is congested, or when the leader
2502 : simply went down. Reset the active block. */
2503 0 : sched->last_active_bank_idx = sched->active_bank_idx;
2504 0 : sched->active_bank_idx = ULONG_MAX;
2505 0 : sched->metrics->deactivate_no_txn_cnt++;
2506 0 : try_activate_block( sched );
2507 0 : }
2508 0 : }
2509 :
2510 : FD_FN_UNUSED static ulong
2511 0 : find_and_stage_longest_unstaged_fork( fd_sched_t * sched, int lane_idx ) {
2512 0 : ulong root_idx = sched->root_idx;
2513 0 :
2514 0 : if( FD_UNLIKELY( root_idx==ULONG_MAX ) ) {
2515 0 : FD_LOG_CRIT(( "invariant violation: root_idx==ULONG_MAX indicating fd_sched is uninitialized" ));
2516 0 : }
2517 0 :
2518 0 : /* First pass: compute the longest unstaged fork depth for each node
2519 0 : in the fork tree. */
2520 0 : ulong depth = compute_longest_unstaged_fork( sched, root_idx );
2521 0 :
2522 0 : /* Second pass: stage blocks on the longest unstaged fork. */
2523 0 : ulong head_bank_idx = stage_longest_unstaged_fork( sched, root_idx, lane_idx );
2524 0 :
2525 0 : if( FD_UNLIKELY( (depth>0UL && head_bank_idx==ULONG_MAX) || (depth==0UL && head_bank_idx!=ULONG_MAX) ) ) {
2526 0 : FD_LOG_CRIT(( "invariant violation: depth %lu, head_bank_idx %lu",
2527 0 : depth, head_bank_idx ));
2528 0 : }
2529 0 :
2530 0 : return head_bank_idx;
2531 0 : }
2532 :
2533 : /* Returns length of the longest stageable unstaged fork, if there is
2534 : one, and 0 otherwise. */
2535 : static ulong
2536 0 : compute_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx ) {
2537 0 : if( FD_UNLIKELY( bank_idx==ULONG_MAX ) ) {
2538 0 : FD_LOG_CRIT(( "invariant violation: bank_idx==ULONG_MAX" ));
2539 0 : }
2540 :
2541 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
2542 :
2543 0 : ulong max_child_depth = 0UL;
2544 0 : ulong child_idx = block->child_idx;
2545 0 : while( child_idx!=ULONG_MAX ) {
2546 0 : ulong child_depth = compute_longest_unstaged_fork( sched, child_idx );
2547 0 : if( child_depth > max_child_depth ) {
2548 0 : max_child_depth = child_depth;
2549 0 : }
2550 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
2551 0 : child_idx = child->sibling_idx;
2552 0 : }
2553 :
2554 0 : block->luf_depth = max_child_depth + fd_ulong_if( block_is_promotable( block ), 1UL, 0UL );
2555 0 : return block->luf_depth;
2556 0 : }
2557 :
2558 : static ulong
2559 0 : stage_longest_unstaged_fork_helper( fd_sched_t * sched, ulong bank_idx, int lane_idx ) {
2560 0 : if( FD_UNLIKELY( bank_idx==ULONG_MAX ) ) {
2561 0 : FD_LOG_CRIT(( "invariant violation: bank_idx==ULONG_MAX" ));
2562 0 : }
2563 :
2564 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
2565 :
2566 0 : int stage_it = fd_int_if( block_is_promotable( block ), 1, 0 );
2567 0 : ulong rv = fd_ulong_if( stage_it, bank_idx, ULONG_MAX );
2568 0 : if( FD_LIKELY( stage_it ) ) {
2569 0 : block->staged = 1;
2570 0 : block->staging_lane = (ulong)lane_idx;
2571 0 : fd_rdisp_promote_block( sched->rdisp, bank_idx, block->staging_lane );
2572 0 : sched->metrics->block_promoted_cnt++;
2573 0 : FD_LOG_DEBUG(( "block %lu:%lu entered lane %lu: promote", block->slot, bank_idx, block->staging_lane ));
2574 0 : }
2575 :
2576 : /* Base case: leaf node. */
2577 0 : if( block->child_idx==ULONG_MAX ) return rv;
2578 :
2579 0 : ulong max_depth = 0UL;
2580 0 : ulong best_child_idx = ULONG_MAX;
2581 0 : ulong child_idx = block->child_idx;
2582 0 : while( child_idx!=ULONG_MAX ) {
2583 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
2584 0 : if( child->luf_depth>max_depth ) {
2585 0 : max_depth = child->luf_depth;
2586 0 : best_child_idx = child_idx;
2587 0 : }
2588 0 : child_idx = child->sibling_idx;
2589 0 : }
2590 :
2591 : /* Recursively stage descendants. */
2592 0 : if( best_child_idx!=ULONG_MAX ) {
2593 0 : ulong head_bank_idx = stage_longest_unstaged_fork_helper( sched, best_child_idx, lane_idx );
2594 0 : rv = fd_ulong_if( rv!=ULONG_MAX, rv, head_bank_idx );
2595 0 : }
2596 :
2597 0 : return rv;
2598 0 : }
2599 :
2600 : /* Returns idx of head block of staged lane on success, idx_null
2601 : otherwise. */
2602 : static ulong
2603 0 : stage_longest_unstaged_fork( fd_sched_t * sched, ulong bank_idx, int lane_idx ) {
2604 0 : ulong head_bank_idx = stage_longest_unstaged_fork_helper( sched, bank_idx, lane_idx );
2605 0 : if( FD_LIKELY( head_bank_idx!=ULONG_MAX ) ) {
2606 0 : sched->metrics->lane_promoted_cnt++;
2607 0 : sched->staged_bitset = fd_ulong_set_bit( sched->staged_bitset, lane_idx );
2608 0 : sched->staged_head_bank_idx[ lane_idx ] = head_bank_idx;
2609 0 : }
2610 0 : return head_bank_idx;
2611 0 : }
2612 :
2613 : /* Check if an entire staging lane can be demoted. Returns 1 if all
2614 : blocks in the lane are demotable, 0 otherwise. */
2615 : static int
2616 0 : lane_is_demotable( fd_sched_t * sched, int lane_idx ) {
2617 0 : ulong bank_idx = sched->staged_head_bank_idx[ lane_idx ];
2618 :
2619 0 : while( bank_idx!=ULONG_MAX ) {
2620 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
2621 0 : FD_TEST( block->staged );
2622 0 : FD_TEST( block->staging_lane==(ulong)lane_idx );
2623 :
2624 0 : if( FD_UNLIKELY( !block_is_demotable( block ) ) ) {
2625 : /* Found a non-demotable block. Early exit. */
2626 0 : return 0;
2627 0 : }
2628 :
2629 : /* Find the child in the same staging lane. */
2630 0 : ulong child_idx = block->child_idx;
2631 0 : ulong next_bank_idx = ULONG_MAX;
2632 0 : while( child_idx!=ULONG_MAX ) {
2633 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
2634 0 : if( child->staged && child->staging_lane==(ulong)lane_idx ) {
2635 0 : next_bank_idx = child_idx;
2636 0 : break;
2637 0 : }
2638 0 : child_idx = child->sibling_idx;
2639 0 : }
2640 0 : bank_idx = next_bank_idx;
2641 0 : }
2642 :
2643 0 : return 1;
2644 0 : }
2645 :
2646 : /* Demote all blocks in a staging lane. Assumes that all blocks in the
2647 : lane are demotable. Returns the number of blocks demoted. */
2648 : static ulong
2649 0 : demote_lane( fd_sched_t * sched, int lane_idx ) {
2650 0 : ulong bank_idx = sched->staged_head_bank_idx[ lane_idx ];
2651 0 : uint demoted_cnt = 0U;
2652 :
2653 0 : while( bank_idx!=ULONG_MAX ) {
2654 0 : fd_sched_block_t * block = block_pool_ele( sched, bank_idx );
2655 0 : FD_TEST( block->staged );
2656 0 : FD_TEST( block->staging_lane==(ulong)lane_idx );
2657 :
2658 0 : int ret = fd_rdisp_demote_block( sched->rdisp, bank_idx );
2659 0 : if( FD_UNLIKELY( ret!=0 ) ) {
2660 0 : FD_LOG_CRIT(( "fd_rdisp_demote_block failed for slot %lu, bank_idx %lu, lane %d", block->slot, bank_idx, lane_idx ));
2661 0 : }
2662 0 : FD_LOG_DEBUG(( "block %lu:%lu exited lane %lu: demote", block->slot, bank_idx, block->staging_lane ));
2663 0 : block->staged = 0;
2664 0 : demoted_cnt++;
2665 :
2666 : /* Find the child in the same staging lane. */
2667 0 : ulong child_idx = block->child_idx;
2668 0 : ulong next_bank_idx = ULONG_MAX;
2669 0 : while( child_idx!=ULONG_MAX ) {
2670 0 : fd_sched_block_t * child = block_pool_ele( sched, child_idx );
2671 0 : if( child->staged && child->staging_lane==(ulong)lane_idx ) {
2672 0 : next_bank_idx = child_idx;
2673 0 : break;
2674 0 : }
2675 0 : child_idx = child->sibling_idx;
2676 0 : }
2677 0 : bank_idx = next_bank_idx;
2678 0 : }
2679 :
2680 : /* Clear the lane. */
2681 0 : sched->staged_bitset = fd_ulong_clear_bit( sched->staged_bitset, lane_idx );
2682 0 : sched->staged_head_bank_idx[ lane_idx ] = ULONG_MAX;
2683 :
2684 0 : sched->metrics->block_demoted_cnt += demoted_cnt;
2685 0 : FD_LOG_DEBUG(( "demoted %u blocks in lane %d", demoted_cnt, lane_idx ));
2686 0 : return demoted_cnt;
2687 0 : }
|