Line data Source code
1 : #include "fd_replay_tile.h"
2 : #include "fd_sched.h"
3 : #include "fd_execrp.h"
4 : #include "fd_vote_tracker.h"
5 : #include "generated/fd_replay_tile_seccomp.h"
6 :
7 : #include "../genesis/fd_genesi_tile.h"
8 : #include "../poh/fd_poh.h"
9 : #include "../poh/fd_poh_tile.h"
10 : #include "../tower/fd_tower_tile.h"
11 : #include "../resolv/fd_resolv_tile.h"
12 : #include "../restore/utils/fd_ssload.h"
13 :
14 : #include "../../disco/tiles.h"
15 : #include "../../disco/fd_txn_m.h"
16 : #include "../../disco/store/fd_store.h"
17 : #include "../../disco/shred/fd_fec_set.h"
18 : #include "../../disco/pack/fd_pack.h"
19 : #include "../../discof/fd_accdb_topo.h"
20 : #include "../../discof/reasm/fd_reasm.h"
21 : #include "../../disco/keyguard/fd_keyload.h"
22 : #include "../../disco/keyguard/fd_keyswitch.h"
23 : #include "../../disco/genesis/fd_genesis_cluster.h"
24 : #include "../../discof/genesis/genesis_hash.h"
25 : #include "../../util/pod/fd_pod.h"
26 : #include "../../flamenco/accdb/fd_accdb_admin_v1.h"
27 : #include "../../flamenco/accdb/fd_accdb_admin_v2.h"
28 : #include "../../flamenco/accdb/fd_accdb_impl_v1.h"
29 : #include "../../flamenco/accdb/fd_accdb_sync.h"
30 : #include "../../flamenco/accdb/fd_vinyl_req_pool.h"
31 : #include "../../flamenco/rewards/fd_rewards.h"
32 : #include "../../flamenco/leaders/fd_multi_epoch_leaders.h"
33 : #include "../../flamenco/progcache/fd_progcache_admin.h"
34 : #include "../../disco/topo/fd_wksp_mon.h"
35 : #include "../../disco/metrics/fd_metrics.h"
36 : #include "../../disco/shred/fd_shred_tile.h"
37 : #include "../../flamenco/fd_flamenco_base.h"
38 : #include "../../flamenco/runtime/fd_runtime.h"
39 : #include "../../flamenco/runtime/fd_runtime_stack.h"
40 : #include "../../flamenco/genesis/fd_genesis_parse.h"
41 : #include "../../flamenco/runtime/sysvar/fd_sysvar_epoch_schedule.h"
42 : #include "../../flamenco/runtime/program/fd_precompiles.h"
43 : #include "../../flamenco/runtime/program/vote/fd_vote_state_versioned.h"
44 : #include "../../flamenco/runtime/program/vote/fd_vote_codec.h"
45 : #include "../../flamenco/runtime/tests/fd_dump_pb.h"
46 :
47 : #include <stdio.h>
48 :
49 : /* Replay concepts:
50 :
51 : - Blocks are aggregations of entries aka. microblocks which are
52 : groupings of txns and are constructed by the block producer (see
53 : fd_pack).
54 :
55 : - Entries are grouped into entry batches by the block producer (see
56 : fd_pack / fd_shredder).
57 :
58 : - Entry batches are divided into chunks known as shreds by the block
59 : producer (see fd_shredder).
60 :
61 : - Shreds are grouped into forward-error-correction sets (FEC sets) by
62 : the block producer (see fd_shredder).
63 :
64 : - Shreds are transmitted to the rest of the cluster via the Turbine
65 : protocol (see fd_shredder / fd_shred).
66 :
67 : - Once enough shreds within a FEC set are received to recover the
68 : entirety of the shred data encoded by that FEC set, the receiver
69 : can "complete" the FEC set (see fd_fec_resolver).
70 :
71 : - If shreds in the FEC set are missing such that it can't complete,
72 : the receiver can use the Repair protocol to request missing shreds
73 : in FEC set (see fd_repair).
74 :
75 : - The current Repair protocol does not support requesting coding
76 : shreds. As a result, some FEC sets might be actually complete
77 : (contain all data shreds). Repair currently hacks around this by
78 : forcing completion but the long-term solution is to add support for
79 : fec_repairing coding shreds via Repair.
80 :
81 : - FEC sets are delivered in partial-order to the Replay tile by the
82 : Repair tile. Currently Replay only supports replaying entry batches
83 : so FEC sets need to reassembled into an entry batch before they can
84 : be replayed. The new Dispatcher will change this by taking a FEC
85 : set as input instead. */
86 :
87 0 : #define IN_KIND_SNAP ( 0)
88 0 : #define IN_KIND_GENESIS ( 1)
89 0 : #define IN_KIND_IPECHO ( 2)
90 0 : #define IN_KIND_TOWER ( 3)
91 0 : #define IN_KIND_RESOLV ( 4)
92 0 : #define IN_KIND_POH ( 5)
93 0 : #define IN_KIND_EXECRP ( 6)
94 0 : #define IN_KIND_REPAIR ( 7)
95 0 : #define IN_KIND_TXSEND ( 8)
96 0 : #define IN_KIND_RPC ( 9)
97 0 : #define IN_KIND_GOSSIP_OUT (10)
98 :
99 : #define DEBUG_LOGGING 0
100 :
101 : /* The first bank that the replay tile produces either for genesis
102 : or the snapshot boot will always be at bank index 0. */
103 0 : #define FD_REPLAY_BOOT_BANK_IDX (0UL)
104 :
105 : struct fd_replay_in_link {
106 : fd_wksp_t * mem;
107 : ulong chunk0;
108 : ulong wmark;
109 : ulong mtu;
110 : };
111 :
112 : typedef struct fd_replay_in_link fd_replay_in_link_t;
113 :
114 : struct fd_replay_out_link {
115 : ulong idx;
116 : fd_wksp_t * mem;
117 : ulong chunk0;
118 : ulong wmark;
119 : ulong chunk;
120 : };
121 :
122 : typedef struct fd_replay_out_link fd_replay_out_link_t;
123 :
124 : /* fd_block_id_map is a simple map of block-ids to bank indices. The
125 : map sits on top of an array of fd_block_id_ele_t. This serves as a
126 : translation layer between block ids to bank indices. The data
127 : array is indexed by bank index and the latest observed merkle root
128 : for the bank index is stored in the array. Once the block id has
129 : been observed, the entry is keyed by the latest merkle root (aka the
130 : block id). */
131 :
132 : struct fd_block_id_ele {
133 : fd_hash_t latest_mr;
134 : uint latest_fec_idx;
135 : int block_id_seen;
136 : ulong slot;
137 : ulong next_;
138 : };
139 : typedef struct fd_block_id_ele fd_block_id_ele_t;
140 :
141 : #define MAP_NAME fd_block_id_map
142 : #define MAP_ELE_T fd_block_id_ele_t
143 : #define MAP_KEY_T fd_hash_t
144 0 : #define MAP_KEY latest_mr
145 0 : #define MAP_NEXT next_
146 0 : #define MAP_KEY_EQ(k0,k1) (!memcmp((k0),(k1), sizeof(fd_hash_t)))
147 0 : #define MAP_KEY_HASH(key,seed) (fd_hash((seed),(key),sizeof(fd_hash_t)))
148 : #include "../../util/tmpl/fd_map_chain.c"
149 :
150 : static inline ulong
151 0 : fd_block_id_ele_get_idx( fd_block_id_ele_t * ele_arr, fd_block_id_ele_t * ele ) {
152 0 : return (ulong)(ele - ele_arr);
153 0 : }
154 :
155 : struct fd_replay_tile {
156 : fd_wksp_t * wksp;
157 :
158 : uint rng_seed;
159 : fd_rng_t rng[ 1 ];
160 :
161 : fd_accdb_admin_t accdb_admin[1];
162 : fd_accdb_user_t accdb[1];
163 : fd_progcache_join_t progcache[1];
164 : fd_wksp_mon_t progcache_wksp_mon[1];
165 : fd_wksp_mon_t accdb_cache_wksp_mon[1];
166 :
167 : fd_txncache_t * txncache;
168 : fd_store_t * store;
169 : fd_banks_t * banks;
170 : ulong frontier_indices[ FD_BANKS_MAX_BANKS ];
171 : ulong frontier_cnt;
172 :
173 : /* This flag is 1 If we have seen a vote signature that our node has
174 : sent out get rooted at least one time. The value is 0 otherwise.
175 : We can't become leader and pack blocks until this flag has been
176 : set. This parallels the Agave 'has_new_vote_been_rooted'. */
177 : int identity_vote_rooted;
178 : int wait_for_vote_to_start_leader;
179 :
180 : /* wfs_enabled is 1 if the validator is booted in
181 : wait_for_supermajority mode. In this mode replay (and, by extension,
182 : downstream consumers) is not allowed to make progress until 80% of
183 : the cluster has published their ContactInfo in Gossip with a
184 : shred version matching expected_shred_version. When this happens,
185 : wfs_complete will be set to 1. */
186 : int wfs_enabled;
187 : int wfs_complete;
188 :
189 : fd_hash_t expected_bank_hash;
190 :
191 : ulong reasm_seed;
192 : fd_reasm_t * reasm;
193 : fd_reasm_fec_t * reasm_evicted; /* evicted FEC by reasm_insert must be stored in returnable_frag, and then drained in after_credit */
194 :
195 : fd_sched_t * sched;
196 : ulong in_cnt;
197 : ulong execrp_idle_cnt;
198 :
199 : ulong vote_tracker_seed;
200 : fd_vote_tracker_t * vote_tracker;
201 :
202 : int has_genesis_hash;
203 : char genesis_path[ PATH_MAX ];
204 : fd_hash_t genesis_hash[1];
205 : fd_genesis_t genesis[1];
206 : ulong cluster_type;
207 :
208 : int has_genesis_timestamp;
209 : ulong genesis_timestamp;
210 : int has_expected_genesis_timestamp;
211 : ulong expected_genesis_timestamp;
212 :
213 : #define FD_REPLAY_HARD_FORKS_MAX (64UL)
214 : ulong hard_forks_cnt;
215 : ulong hard_forks[ FD_REPLAY_HARD_FORKS_MAX ];
216 : ulong hard_forks_cnts[ FD_REPLAY_HARD_FORKS_MAX ];
217 :
218 : ushort expected_shred_version;
219 : ushort ipecho_shred_version;
220 :
221 : /* A note on publishing ...
222 :
223 : The watermarks are used to publish our fork-aware structures. For
224 : example, store, banks, and txncache need to be published to release
225 : resources occupied by rooted or dead blocks. In general,
226 : publishing has the effect of pruning forks in those structures,
227 : indicating that it is ok to release the memory being occupied by
228 : the blocks on said forks. Tower is responsible for informing us of
229 : the latest block on the consensus rooted fork. As soon as we can,
230 : we should move the published root as close as possible to the
231 : latest consensus root, publishing/pruning everything on the fork
232 : tree along the way. That is, all the blocks that directly descend
233 : from the current published root (inclusive) to the new published
234 : root (exclusive) on the rooted fork, as well as all the minority
235 : forks that branch from said blocks.
236 :
237 : Ideally, we'd move the published root to the consensus root
238 : immediately upon receiving a new consensus root. However, that's
239 : not always safe to do. One thing we need to be careful about is
240 : making sure that there are no more users/consumers of
241 : soon-to-be-pruned blocks, lest a use-after-free occurs. This can
242 : be done by using a reference counter for each block. Any
243 : concurrent activity, such as transaction execution in the exec
244 : tiles, should retain a refcnt on the block for as
245 : long as it needs access to the shared fork-aware structures related
246 : to that block. Eventually, refcnt on a given block will drop down
247 : to 0 as the block either finishes replaying or gets marked as dead,
248 : and any other tile that has retained a refcnt on the block releases
249 : it. At that point, it becomes a candidate for pruning. The key to
250 : safe publishing then becomes figuring out how far we could advance
251 : the published root, such that every minority fork branching off of
252 : blocks in between the current published root (inclusive) and the
253 : new published root (exclusive) is safe to be pruned. This is a
254 : straightforward tree traversal, where if a block B on the rooted
255 : fork has refcnt 0, and all minority forks branching off of B also
256 : have refcnt 0, then B is safe to be pruned. We advance the
257 : published root to the farthest consecutively prunable block on the
258 : rooted fork. Note that reasm presents the replay tile with a clean
259 : view of the world where every block is chained off of a parent
260 : block. So there are no orpahned/dangling tree nodes to worry
261 : about. The world is a nice single tree as far as replay is
262 : concerned.
263 :
264 : In the following fork tree, every node is a block and the number in
265 : parentheses is the refcnt on the block. The chain marked with
266 : double slashes is the rooted fork. Suppose the published root is
267 : at block P, and consensus root is at block T. We can't publish
268 : past block P because Q has refcnt 1.
269 :
270 :
271 : P(0)
272 : / \\
273 : Q(1) A(0)
274 : / || \
275 : X(0) B(0) C(0)
276 : / || \
277 : Y(0) M(0) R(0)
278 : / || / \
279 : D(2) T(0) J(0) L(0)
280 : ||
281 : ..
282 : ..
283 : ..
284 : ||
285 : blocks we might be actively replaying
286 :
287 :
288 : When refcnt on Q drops to 0, we would be able to advance the
289 : published root to block M, because blocks P, A, and B, as well as
290 : all subtrees branching off of them, have refcnt 0, and therefore
291 : can be pruned. Block M itself cannot be pruned yet because its
292 : child block D has refcnt 2. After publishing/pruning, the fork
293 : tree would be:
294 :
295 :
296 : M(0)
297 : / ||
298 : D(2) T(0)
299 : ||
300 : ..
301 : ..
302 : ..
303 : ||
304 : blocks we might be actively replaying
305 :
306 :
307 : As a result, the shared fork-aware structures can free resources
308 : for blocks P, A, B, and all subtrees branching off of them.
309 :
310 : For the reference counting part, the replay tile is the sole entity
311 : that can update the refcnt. This ensures that all refcnt increment
312 : and decrement attempts are serialized at the replay tile, and that
313 : there are no racy resurrection of a soon-to-be-pruned block. If a
314 : refcnt increment request arrives after a block has been pruned,
315 : replay simply rejects the request.
316 :
317 : A note on the implementation of the above ...
318 :
319 : Upon receiving a new consensus root, we descend down the rooted
320 : fork from the current published root to the new consensus root. On
321 : each node/block of the rooted fork, we do a summation of the refcnt
322 : on the block and all the minority fork blocks branching from the
323 : block. If the summation is 0, the block is safe for pruning. We
324 : advance the published root to the far end of the consecutive run of
325 : 0 refcnt sums originating from the current published root. On our
326 : descent down the minority forks, we also mark any block that hasn't
327 : finished replaying as dead, so we don't waste time executing them.
328 : No more transactions shall be dispatched for execution from dead
329 : blocks.
330 :
331 : Blocks start out with a refcnt of 0. Other tiles may send a
332 : request to the replay tile for a reference on a block. The
333 : transaction dispatcher is another source of refcnt updates. On
334 : every dispatch of a transaction for block B, we increment the
335 : refcnt for B. And on every transaction finalization, we decrement
336 : the refcnt for B. This means that whenever the refcnt on a block
337 : is 0, there is no more reference on that block from the execution
338 : pipeline. While it might be tempting to simply increment the
339 : refcnt once when we start replaying a block, and decrement the
340 : refcnt once when we finish a block, this more fine-grained refcnt
341 : update strategy allows for aborting and potentially immediate
342 : pruning of blocks under interleaved block replay. Upon receiving a
343 : new consensus root, we can simply look at the refcnt on minority
344 : fork blocks, and a refcnt of 0 would imply that the block is safe
345 : for pruning, even if we haven't finished replaying it. Without the
346 : fine-grained refcnt, we would need to first stop dispatching from
347 : the aborted block, and then wait for a full drain of the execution
348 : pipeline to know for sure that there are no more in-flight
349 : transactions executing on the aborted block. Note that this will
350 : allow the refcnt on any block to transiently drop down to 0. We
351 : will not mistakenly prune an actively replaying block, aka a leaf
352 : node, that is chaining off of the rooted fork, because the
353 : consensus root is always an ancestor of the actively replaying tip.
354 : */
355 : fd_hash_t consensus_root; /* The most recent block to have reached max lockout in the tower. */
356 : ulong consensus_root_slot; /* slot number of the above. */
357 : ulong consensus_root_bank_idx; /* bank index of the above. */
358 : ulong published_root_slot; /* slot number of the published root. */
359 : ulong published_root_bank_idx; /* bank index of the published root. */
360 :
361 : /* Randomly generated block id for the initial genesis/snapshot slot.
362 : To be replaced with block id in the snapshot manifest when SIMD-333
363 : is activated. */
364 :
365 : fd_hash_t initial_block_id;
366 :
367 : /* We need to maintain a tile-local mapping of block-ids to bank index
368 : and vice versa. This translation layer is needed for conversion
369 : since tower operates on block-ids and downstream consumers of FEC
370 : sets operate on bank indices. This mapping must happen both ways:
371 : 1. tower sends us block ids and we must map them to bank indices.
372 : 2. when a block is completed, we must map the bank index to a block
373 : id to send a slot complete message to tower. */
374 : ulong block_id_len;
375 : fd_block_id_ele_t * block_id_arr;
376 : ulong block_id_map_seed;
377 : fd_block_id_map_t * block_id_map;
378 :
379 : /* Capture-related configs */
380 : fd_capture_ctx_t * capture_ctx;
381 : FILE * capture_file;
382 : fd_capture_link_buf_t cap_repl_out[1];
383 :
384 : /* Protobuf dumping context for debugging runtime execution and
385 : collecting seed corpora. */
386 : fd_dump_proto_ctx_t * dump_proto_ctx;
387 :
388 : /* Whether the runtime has been booted either from snapshot loading
389 : or from genesis. */
390 : int is_booted;
391 :
392 : /* Buffer to store vote towers that need to be published to the Tower
393 : tile. */
394 :
395 : fd_multi_epoch_leaders_t * mleaders;
396 :
397 : int larger_max_cost_per_block;
398 :
399 : /* When we transition to becoming leader, we can only unbecome the
400 : leader if we have received a block id from the FEC reassembler, and
401 : a message from PoH that the leader slot has ended. After both of
402 : these conditions are met, then we are free to unbecome the leader.
403 : */
404 : uint is_leader : 1;
405 : uint supports_leader : 1;
406 : int recv_poh;
407 : ulong next_leader_slot;
408 : long next_leader_tickcount;
409 : ulong highwater_leader_slot;
410 : ulong reset_slot;
411 : fd_bank_t * reset_bank;
412 : fd_hash_t reset_block_id;
413 : long reset_timestamp_nanos;
414 : double slot_duration_nanos;
415 : double slot_duration_ticks;
416 : fd_bank_t * leader_bank;
417 :
418 : fd_pubkey_t identity_pubkey[1];
419 : ulong identity_idx;
420 :
421 : fd_keyswitch_t * keyswitch;
422 : int halt_leader;
423 :
424 : ulong resolv_tile_cnt;
425 :
426 : int in_kind[ 128 ];
427 : fd_replay_in_link_t in[ 128 ];
428 :
429 : fd_replay_out_link_t exec_out[ 1 ];
430 :
431 : fd_replay_out_link_t replay_out[1];
432 :
433 : fd_replay_out_link_t epoch_out[1];
434 :
435 : /* The rpc tile needs to occasionally own a reference to a live bank.
436 : Replay needs to know if the rpc as a consumer is enabled so it can
437 : increment the bank's refcnt before publishing bank_idx. */
438 : int rpc_enabled;
439 :
440 : # if FD_HAS_FLATCC
441 : /* For dumping blocks to protobuf. For backtest only. */
442 : fd_block_dump_ctx_t * block_dump_ctx;
443 : # endif
444 :
445 : /* We need a few pieces of information to compute the right addresses
446 : for bundle crank information that we need to send to pack. */
447 : struct {
448 : int enabled;
449 : fd_pubkey_t vote_account;
450 : fd_bundle_crank_gen_t gen[1];
451 : } bundle;
452 :
453 : struct {
454 : ulong store_query_acquire;
455 : ulong store_query_release;
456 : fd_histf_t store_query_wait[1];
457 : fd_histf_t store_query_work[1];
458 : ulong store_query_cnt;
459 : ulong store_query_missing_cnt;
460 : ulong store_query_mr;
461 : ulong store_query_missing_mr;
462 :
463 : ulong slots_total;
464 : ulong transactions_total;
465 :
466 : ulong reasm_latest_slot;
467 : ulong reasm_latest_fec_idx;
468 :
469 : ulong sched_full;
470 : ulong reasm_empty;
471 : ulong leader_bid_wait;
472 : ulong banks_full;
473 : ulong storage_root_behind;
474 :
475 : fd_histf_t root_slot_dur[1];
476 : fd_histf_t root_account_dur[1];
477 : } metrics;
478 :
479 : uchar __attribute__((aligned(FD_MULTI_EPOCH_LEADERS_ALIGN))) mleaders_mem[ FD_MULTI_EPOCH_LEADERS_FOOTPRINT ];
480 :
481 : ulong runtime_stack_seed;
482 : fd_runtime_stack_t * runtime_stack;
483 : };
484 :
485 : typedef struct fd_replay_tile fd_replay_tile_t;
486 :
487 : FD_FN_CONST static inline ulong
488 0 : scratch_align( void ) {
489 0 : return 128UL;
490 0 : }
491 : FD_FN_PURE static inline ulong
492 0 : scratch_footprint( fd_topo_tile_t const * tile ) {
493 0 : ulong chain_cnt = fd_block_id_map_chain_cnt_est( tile->replay.max_live_slots );
494 :
495 0 : ulong l = FD_LAYOUT_INIT;
496 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_replay_tile_t), sizeof(fd_replay_tile_t) );
497 0 : l = FD_LAYOUT_APPEND( l, fd_runtime_stack_align(), fd_runtime_stack_footprint( FD_RUNTIME_MAX_VOTE_ACCOUNTS, FD_RUNTIME_EXPECTED_VOTE_ACCOUNTS, FD_RUNTIME_EXPECTED_STAKE_ACCOUNTS ) );
498 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_block_id_ele_t), sizeof(fd_block_id_ele_t) * tile->replay.max_live_slots );
499 0 : l = FD_LAYOUT_APPEND( l, fd_block_id_map_align(), fd_block_id_map_footprint( chain_cnt ) );
500 0 : l = FD_LAYOUT_APPEND( l, fd_txncache_align(), fd_txncache_footprint( tile->replay.max_live_slots ) );
501 0 : l = FD_LAYOUT_APPEND( l, fd_reasm_align(), fd_reasm_footprint( tile->replay.fec_max ) );
502 0 : l = FD_LAYOUT_APPEND( l, fd_sched_align(), fd_sched_footprint( tile->replay.sched_depth, tile->replay.max_live_slots ) );
503 0 : l = FD_LAYOUT_APPEND( l, fd_vinyl_req_pool_align(), fd_vinyl_req_pool_footprint( 1UL, 1UL ) );
504 0 : l = FD_LAYOUT_APPEND( l, fd_vote_tracker_align(), fd_vote_tracker_footprint() );
505 0 : l = FD_LAYOUT_APPEND( l, fd_capture_ctx_align(), fd_capture_ctx_footprint() );
506 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_dump_proto_ctx_t), sizeof(fd_dump_proto_ctx_t) );
507 :
508 0 : # if FD_HAS_FLATCC
509 0 : if( FD_UNLIKELY( tile->replay.dump_block_to_pb ) ) {
510 0 : l = FD_LAYOUT_APPEND( l, fd_block_dump_context_align(), fd_block_dump_context_footprint() );
511 0 : }
512 0 : # endif
513 :
514 0 : l = FD_LAYOUT_FINI( l, scratch_align() );
515 :
516 0 : return l;
517 0 : }
518 :
519 : static inline void
520 0 : metrics_write( fd_replay_tile_t * ctx ) {
521 0 : FD_MCNT_SET ( REPLAY, STORE_QUERY_ACQUIRE, ctx->metrics.store_query_acquire );
522 0 : FD_MCNT_SET ( REPLAY, STORE_QUERY_RELEASE, ctx->metrics.store_query_release );
523 0 : FD_MHIST_COPY( REPLAY, STORE_QUERY_WAIT, ctx->metrics.store_query_wait );
524 0 : FD_MHIST_COPY( REPLAY, STORE_QUERY_WORK, ctx->metrics.store_query_work );
525 0 : FD_MCNT_SET ( REPLAY, STORE_QUERY_CNT, ctx->metrics.store_query_cnt );
526 0 : FD_MCNT_SET ( REPLAY, STORE_QUERY_MISSING_CNT, ctx->metrics.store_query_missing_cnt );
527 0 : FD_MGAUGE_SET( REPLAY, STORE_QUERY_MR, ctx->metrics.store_query_mr );
528 0 : FD_MGAUGE_SET( REPLAY, STORE_QUERY_MISSING_MR, ctx->metrics.store_query_missing_mr );
529 :
530 0 : FD_MGAUGE_SET( REPLAY, ROOT_SLOT, ctx->consensus_root_slot==ULONG_MAX ? 0UL : ctx->consensus_root_slot );
531 0 : ulong leader_slot = ctx->leader_bank ? ctx->leader_bank->f.slot : 0UL;
532 :
533 0 : if( FD_LIKELY( ctx->leader_bank ) ) {
534 0 : FD_MGAUGE_SET( REPLAY, NEXT_LEADER_SLOT, leader_slot );
535 0 : FD_MGAUGE_SET( REPLAY, LEADER_SLOT, leader_slot );
536 0 : } else {
537 0 : FD_MGAUGE_SET( REPLAY, NEXT_LEADER_SLOT, ctx->next_leader_slot==ULONG_MAX ? 0UL : ctx->next_leader_slot );
538 0 : FD_MGAUGE_SET( REPLAY, LEADER_SLOT, 0UL );
539 0 : }
540 0 : FD_MGAUGE_SET( REPLAY, RESET_SLOT, ctx->reset_slot==ULONG_MAX ? 0UL : ctx->reset_slot );
541 :
542 0 : FD_MGAUGE_SET( REPLAY, LIVE_BANKS, fd_banks_pool_used_cnt( ctx->banks ) );
543 :
544 0 : ulong reasm_free = fd_reasm_free( ctx->reasm );
545 0 : FD_MGAUGE_SET( REPLAY, REASM_FREE, reasm_free );
546 :
547 0 : FD_MCNT_SET( REPLAY, SLOTS_TOTAL, ctx->metrics.slots_total );
548 0 : FD_MCNT_SET( REPLAY, TRANSACTIONS_TOTAL, ctx->metrics.transactions_total );
549 :
550 0 : FD_MGAUGE_SET( REPLAY, REASM_LATEST_SLOT, ctx->metrics.reasm_latest_slot );
551 0 : FD_MGAUGE_SET( REPLAY, REASM_LATEST_FEC_IDX, ctx->metrics.reasm_latest_fec_idx );
552 :
553 0 : fd_sched_metrics_write( ctx->sched );
554 :
555 0 : FD_MCNT_SET( REPLAY, SCHED_FULL, ctx->metrics.sched_full );
556 0 : FD_MCNT_SET( REPLAY, REASM_EMPTY, ctx->metrics.reasm_empty );
557 0 : FD_MCNT_SET( REPLAY, LEADER_BID_WAIT, ctx->metrics.leader_bid_wait );
558 0 : FD_MCNT_SET( REPLAY, BANKS_FULL, ctx->metrics.banks_full );
559 0 : FD_MCNT_SET( REPLAY, STORAGE_ROOT_BEHIND, ctx->metrics.storage_root_behind );
560 :
561 0 : FD_MCNT_SET( REPLAY, ACCDB_CREATED, ctx->accdb->base.created_cnt );
562 0 : FD_MCNT_SET( REPLAY, ACCDB_REVERTED, ctx->accdb_admin->base.revert_cnt );
563 0 : FD_MCNT_SET( REPLAY, ACCDB_ROOTED, ctx->accdb_admin->base.root_cnt );
564 0 : FD_MCNT_SET( REPLAY, ACCDB_ROOTED_BYTES, ctx->accdb_admin->base.root_tot_sz );
565 0 : FD_MCNT_SET( REPLAY, ACCDB_GC_ROOT, ctx->accdb_admin->base.gc_root_cnt );
566 0 : FD_MCNT_SET( REPLAY, ACCDB_RECLAIMED, ctx->accdb_admin->base.reclaim_cnt );
567 0 : FD_MHIST_COPY( REPLAY, ROOT_SLOT_DURATION_SECONDS, ctx->metrics.root_slot_dur );
568 0 : FD_MHIST_COPY( REPLAY, ROOT_ACCOUNT_DURATION_SECONDS, ctx->metrics.root_account_dur );
569 0 : FD_MCNT_SET( REPLAY, ROOT_ELAPSED_SECONDS_DB, (ulong)ctx->accdb_admin->base.dt_vinyl );
570 0 : FD_MCNT_SET( REPLAY, ROOT_ELAPSED_SECONDS_COPY, (ulong)ctx->accdb_admin->base.dt_copy );
571 0 : FD_MCNT_SET( REPLAY, ROOT_ELAPSED_SECONDS_GC, (ulong)ctx->accdb_admin->base.dt_gc );
572 :
573 0 : fd_progcache_admin_metrics_t const * pcm = &fd_progcache_admin_metrics_g;
574 0 : FD_MCNT_SET( REPLAY, PROGCACHE_ROOTED, pcm->root_cnt );
575 0 : FD_MCNT_SET( REPLAY, PROGCACHE_GC_ROOT, pcm->gc_root_cnt );
576 :
577 0 : fd_wksp_mon_t * wm = fd_wksp_mon_tick( ctx->progcache_wksp_mon, fd_tickcount() );
578 0 : FD_MGAUGE_SET( REPLAY, PROGCACHE_FREE_PARTS, wm->free_cnt );
579 0 : FD_MGAUGE_SET( REPLAY, PROGCACHE_FREE_BYTES, wm->free_sz );
580 0 : FD_MGAUGE_SET( REPLAY, PROGCACHE_SIZE_BYTES, wm->wksp->data_max );
581 0 : FD_MGAUGE_SET( REPLAY, PROGCACHE_FREE_PART_MAX_BYTES, wm->free_max_sz );
582 0 : FD_MGAUGE_SET( REPLAY, PROGCACHE_USED_PART_MEDIAN_BYTES, wm->part_median_sz );
583 0 : FD_MGAUGE_SET( REPLAY, PROGCACHE_USED_PART_MEAN_BYTES, wm->part_mean_sz );
584 :
585 0 : fd_wksp_mon_t * am = fd_wksp_mon_tick( ctx->accdb_cache_wksp_mon, fd_tickcount() );
586 0 : FD_MGAUGE_SET( REPLAY, ACCDB_CACHE_FREE_PARTS, am->free_cnt );
587 0 : FD_MGAUGE_SET( REPLAY, ACCDB_CACHE_FREE_BYTES, am->free_sz );
588 0 : FD_MGAUGE_SET( REPLAY, ACCDB_CACHE_SIZE_BYTES, am->wksp->data_max );
589 0 : FD_MGAUGE_SET( REPLAY, ACCDB_CACHE_FREE_PART_MAX_BYTES, am->free_max_sz );
590 0 : FD_MGAUGE_SET( REPLAY, ACCDB_CACHE_USED_PART_MEDIAN_BYTES, am->part_median_sz );
591 0 : FD_MGAUGE_SET( REPLAY, ACCDB_CACHE_USED_PART_MEAN_BYTES, am->part_mean_sz );
592 0 : }
593 :
594 : static void
595 : publish_epoch_info( fd_replay_tile_t * ctx,
596 : fd_stem_context_t * stem,
597 : fd_bank_t * bank,
598 0 : int current_epoch ) {
599 0 : fd_epoch_schedule_t const * schedule = &bank->f.epoch_schedule;
600 0 : ulong epoch = fd_slot_to_epoch( schedule, bank->f.slot, NULL ) + fd_ulong_if( current_epoch, 1UL, 0UL );
601 :
602 0 : fd_features_t const * features = &bank->f.features;
603 :
604 0 : fd_runtime_stack_t * runtime_stack = ctx->runtime_stack;
605 :
606 0 : fd_epoch_info_msg_t * epoch_info_msg = fd_chunk_to_laddr( ctx->epoch_out->mem, ctx->epoch_out->chunk );
607 :
608 0 : epoch_info_msg->staked_vote_cnt = current_epoch ? runtime_stack->epoch_weights.next_stake_weights_cnt : runtime_stack->epoch_weights.stake_weights_cnt;
609 0 : epoch_info_msg->staked_id_cnt = current_epoch ? runtime_stack->epoch_weights.next_id_weights_cnt : runtime_stack->epoch_weights.id_weights_cnt;
610 0 : epoch_info_msg->epoch_schedule = *schedule;
611 0 : epoch_info_msg->features = *features;
612 0 : epoch_info_msg->epoch = epoch;
613 0 : epoch_info_msg->start_slot = fd_epoch_slot0( schedule, epoch );
614 0 : epoch_info_msg->slot_cnt = fd_epoch_slot_cnt( schedule, epoch );
615 0 : epoch_info_msg->excluded_id_stake = current_epoch ? runtime_stack->epoch_weights.next_id_weights_excluded : runtime_stack->epoch_weights.id_weights_excluded;
616 :
617 0 : fd_vote_stake_weight_t * stake_weights = fd_type_pun( epoch_info_msg + 1 );
618 0 : fd_vote_stake_weight_t * src_stake_weights = current_epoch ? runtime_stack->epoch_weights.next_stake_weights : runtime_stack->epoch_weights.stake_weights;
619 0 : memcpy( stake_weights, src_stake_weights, epoch_info_msg->staked_vote_cnt * sizeof(fd_vote_stake_weight_t) );
620 :
621 0 : fd_stake_weight_t * id_weights = fd_epoch_info_msg_id_weights( epoch_info_msg );
622 0 : fd_stake_weight_t * src_id_weights = current_epoch ? runtime_stack->epoch_weights.next_id_weights : runtime_stack->epoch_weights.id_weights;
623 0 : fd_memcpy( id_weights, src_id_weights, epoch_info_msg->staked_id_cnt * sizeof(fd_stake_weight_t) );
624 :
625 0 : ulong epoch_info_sz = fd_epoch_info_msg_sz( epoch_info_msg->staked_vote_cnt , epoch_info_msg->staked_id_cnt );
626 :
627 0 : ulong epoch_info_sig = 4UL;
628 0 : fd_stem_publish( stem, ctx->epoch_out->idx, epoch_info_sig, ctx->epoch_out->chunk, epoch_info_sz, 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
629 0 : ctx->epoch_out->chunk = fd_dcache_compact_next( ctx->epoch_out->chunk, epoch_info_sz, ctx->epoch_out->chunk0, ctx->epoch_out->wmark );
630 :
631 0 : fd_multi_epoch_leaders_epoch_msg_init( ctx->mleaders, epoch_info_msg );
632 0 : fd_multi_epoch_leaders_epoch_msg_fini( ctx->mleaders );
633 0 : }
634 :
635 : /**********************************************************************/
636 : /* Transaction execution state machine helpers */
637 : /**********************************************************************/
638 :
639 : static void
640 : replay_block_start( fd_replay_tile_t * ctx,
641 : fd_stem_context_t * stem,
642 : ulong bank_idx,
643 : ulong parent_bank_idx,
644 0 : ulong slot ) {
645 0 : long before = fd_log_wallclock();
646 :
647 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, bank_idx );
648 0 : FD_CRIT( bank, "invariant violation: bank is NULL" );
649 0 : FD_CRIT( bank->state==FD_BANK_STATE_INIT, "invariant violation: bank is not in correct state" );
650 :
651 0 : bank->preparation_begin_nanos = before;
652 :
653 0 : fd_bank_t * parent_bank = fd_banks_bank_query( ctx->banks, parent_bank_idx );
654 0 : FD_CRIT( parent_bank, "invariant violation: parent bank is NULL" );
655 0 : FD_CRIT( parent_bank->state==FD_BANK_STATE_FROZEN, "invariant violation: parent bank is not in correct state" );
656 :
657 0 : ulong parent_slot = parent_bank->f.slot;
658 :
659 : /* Clone the bank from the parent. We must special case the first
660 : slot that is executed as the snapshot does not provide a parent
661 : block id. */
662 :
663 0 : bank = fd_banks_clone_from_parent( ctx->banks, bank_idx );
664 0 : if( FD_UNLIKELY( !bank ) ) {
665 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for bank index %lu", bank_idx ));
666 0 : }
667 0 : bank->f.slot = slot;
668 0 : bank->txncache_fork_id = fd_txncache_attach_child( ctx->txncache, parent_bank->txncache_fork_id );
669 :
670 : /* Create a new funk txn for the block. */
671 :
672 0 : fd_funk_txn_xid_t xid = { .ul = { slot, bank_idx } };
673 0 : fd_funk_txn_xid_t parent_xid = { .ul = { parent_slot, parent_bank_idx } };
674 0 : fd_accdb_attach_child ( ctx->accdb_admin, &parent_xid, &xid );
675 0 : fd_progcache_attach_child( ctx->progcache, &parent_xid, &xid );
676 :
677 : /* Update required runtime state and handle potential boundary. */
678 :
679 0 : int is_epoch_boundary = 0;
680 0 : fd_runtime_block_execute_prepare( ctx->banks, bank, ctx->accdb, ctx->runtime_stack, ctx->capture_ctx, &is_epoch_boundary );
681 0 : if( FD_UNLIKELY( is_epoch_boundary ) ) publish_epoch_info( ctx, stem, bank, 0 );
682 :
683 0 : ulong max_tick_height;
684 0 : if( FD_UNLIKELY( FD_RUNTIME_EXECUTE_SUCCESS!=fd_runtime_compute_max_tick_height( parent_bank->f.ticks_per_slot, slot, &max_tick_height ) ) ) {
685 0 : FD_LOG_CRIT(( "couldn't compute tick height/max tick height slot %lu ticks_per_slot %lu", slot, parent_bank->f.ticks_per_slot ));
686 0 : }
687 0 : bank->f.max_tick_height = max_tick_height;
688 0 : fd_sched_set_poh_params( ctx->sched, bank->idx, bank->f.tick_height, bank->f.max_tick_height, bank->f.hashes_per_tick, &parent_bank->f.poh );
689 :
690 0 : FD_LOG_DEBUG(( "replay_block_start: bank_idx=%lu slot=%lu parent_bank_idx=%lu", bank_idx, slot, parent_bank_idx ));
691 0 : }
692 :
693 : static void
694 0 : cost_tracker_snap( fd_bank_t * bank, fd_replay_slot_completed_t * slot_info ) {
695 0 : if( FD_LIKELY( bank->cost_tracker_pool_idx!=ULONG_MAX ) ) {
696 0 : fd_cost_tracker_t const * cost_tracker = fd_bank_cost_tracker_query( bank );
697 0 : if( FD_UNLIKELY( cost_tracker->block_cost_limit==0UL ) ) {
698 0 : memset( &slot_info->cost_tracker, -1 /* ULONG_MAX */, sizeof(slot_info->cost_tracker) );
699 0 : } else {
700 0 : slot_info->cost_tracker.block_cost = cost_tracker->block_cost;
701 0 : slot_info->cost_tracker.vote_cost = cost_tracker->vote_cost;
702 0 : slot_info->cost_tracker.allocated_accounts_data_size = cost_tracker->allocated_accounts_data_size;
703 0 : slot_info->cost_tracker.block_cost_limit = cost_tracker->block_cost_limit;
704 0 : slot_info->cost_tracker.vote_cost_limit = cost_tracker->vote_cost_limit;
705 0 : slot_info->cost_tracker.account_cost_limit = cost_tracker->account_cost_limit;
706 0 : }
707 0 : } else {
708 0 : memset( &slot_info->cost_tracker, -1 /* ULONG_MAX */, sizeof(slot_info->cost_tracker) );
709 0 : }
710 0 : }
711 :
712 : static ulong
713 0 : get_identity_balance( fd_replay_tile_t * ctx, fd_funk_txn_xid_t xid ) {
714 0 : ulong identity_balance = ULONG_MAX;
715 0 : fd_accdb_ro_t identity_acc[1];
716 0 : if( FD_LIKELY( fd_accdb_open_ro( ctx->accdb, identity_acc, &xid, ctx->identity_pubkey ) ) ) {
717 0 : identity_balance = identity_acc->meta->lamports;
718 0 : fd_accdb_close_ro( ctx->accdb, identity_acc );
719 0 : }
720 0 : return identity_balance;
721 0 : }
722 :
723 : static void
724 : publish_slot_completed( fd_replay_tile_t * ctx,
725 : fd_stem_context_t * stem,
726 : fd_bank_t * bank,
727 : int is_initial,
728 0 : int is_leader ) {
729 :
730 0 : ulong slot = bank->f.slot;
731 :
732 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ bank->idx ];
733 :
734 : /* HACKY: hacky way of checking if we should send a null parent block
735 : id */
736 0 : fd_hash_t parent_block_id = {0};
737 0 : if( FD_UNLIKELY( !is_initial ) ) {
738 0 : parent_block_id = ctx->block_id_arr[ bank->parent_idx ].latest_mr;
739 0 : }
740 :
741 0 : fd_hash_t const * bank_hash = &bank->f.bank_hash;
742 0 : fd_hash_t const * block_hash = fd_blockhashes_peek_last_hash( &bank->f.block_hash_queue );
743 0 : FD_TEST( block_hash );
744 :
745 0 : if( FD_LIKELY( !is_initial ) ) fd_txncache_finalize_fork( ctx->txncache, bank->txncache_fork_id, 0UL, block_hash->uc );
746 :
747 0 : fd_epoch_schedule_t const * epoch_schedule = &bank->f.epoch_schedule;
748 0 : ulong slot_idx;
749 0 : ulong epoch = fd_slot_to_epoch( epoch_schedule, slot, &slot_idx );
750 :
751 0 : ctx->metrics.slots_total++;
752 0 : ctx->metrics.transactions_total = bank->f.txn_count;
753 :
754 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
755 0 : slot_info->slot = slot;
756 0 : slot_info->root_slot = ctx->consensus_root_slot;
757 0 : slot_info->storage_slot = ctx->published_root_slot;
758 0 : slot_info->epoch = epoch;
759 0 : slot_info->slot_in_epoch = slot_idx;
760 0 : slot_info->slots_per_epoch = fd_epoch_slot_cnt( epoch_schedule, epoch );
761 0 : slot_info->block_height = bank->f.block_height;
762 0 : slot_info->parent_slot = bank->f.parent_slot;
763 0 : slot_info->block_id = block_id_ele->latest_mr;
764 0 : slot_info->parent_block_id = parent_block_id;
765 0 : slot_info->bank_hash = *bank_hash;
766 0 : slot_info->block_hash = *block_hash;
767 0 : slot_info->transaction_count = bank->f.txn_count;
768 :
769 0 : fd_inflation_t inflation = bank->f.inflation;
770 0 : slot_info->inflation.foundation = inflation.foundation;
771 0 : slot_info->inflation.foundation_term = inflation.foundation_term;
772 0 : slot_info->inflation.terminal = inflation.terminal;
773 0 : slot_info->inflation.initial = inflation.initial;
774 0 : slot_info->inflation.taper = inflation.taper;
775 :
776 0 : fd_rent_t rent = bank->f.rent;
777 0 : slot_info->rent.burn_percent = rent.burn_percent;
778 0 : slot_info->rent.lamports_per_uint8_year = rent.lamports_per_uint8_year;
779 0 : slot_info->rent.exemption_threshold = rent.exemption_threshold;
780 :
781 0 : slot_info->first_fec_set_received_nanos = bank->first_fec_set_received_nanos;
782 0 : slot_info->preparation_begin_nanos = bank->preparation_begin_nanos;
783 0 : slot_info->first_transaction_scheduled_nanos = bank->first_transaction_scheduled_nanos;
784 0 : slot_info->last_transaction_finished_nanos = bank->last_transaction_finished_nanos;
785 0 : slot_info->completion_time_nanos = fd_log_wallclock();
786 0 : if( !slot_info->first_transaction_scheduled_nanos ) { /* edge case: empty slot */
787 0 : slot_info->first_transaction_scheduled_nanos = slot_info->last_transaction_finished_nanos;
788 0 : }
789 :
790 : /* refcnt should be incremented by 1 for each consumer that uses
791 : `bank_idx`. Each consumer should decrement the bank's refcnt once
792 : they are done using the bank. */
793 0 : bank->refcnt++; /* tower_tile */
794 0 : if( FD_LIKELY( ctx->rpc_enabled ) ) bank->refcnt++; /* rpc tile */
795 0 : slot_info->bank_idx = bank->idx;
796 0 : FD_LOG_DEBUG(( "bank (idx=%lu, slot=%lu) refcnt incremented to %lu for tower, rpc", bank->idx, slot, bank->refcnt ));
797 :
798 0 : fd_bank_t * parent_bank = fd_banks_get_parent( ctx->banks, bank );
799 0 : if( FD_LIKELY( parent_bank ) ) {
800 0 : ulong total_txn_cnt = bank->f.txn_count;
801 0 : ulong nonvote_txn_cnt = bank->f.nonvote_txn_count;
802 0 : ulong failed_txn_cnt = bank->f.failed_txn_count;
803 0 : ulong nonvote_failed_txn_cnt = bank->f.nonvote_failed_txn_count;
804 :
805 0 : slot_info->nonvote_success = nonvote_txn_cnt - nonvote_failed_txn_cnt;
806 0 : slot_info->nonvote_failed = nonvote_failed_txn_cnt;
807 0 : slot_info->vote_failed = failed_txn_cnt - nonvote_failed_txn_cnt;
808 0 : slot_info->vote_success = total_txn_cnt - nonvote_txn_cnt - slot_info->vote_failed;
809 0 : } else {
810 0 : slot_info->vote_failed = ULONG_MAX;
811 0 : slot_info->vote_success = ULONG_MAX;
812 0 : slot_info->nonvote_success = ULONG_MAX;
813 0 : slot_info->nonvote_failed = ULONG_MAX;
814 0 : }
815 :
816 0 : slot_info->is_leader = is_leader;
817 0 : slot_info->transaction_fee = bank->f.execution_fees;
818 0 : slot_info->transaction_fee -= (slot_info->transaction_fee>>1); /* burn */
819 0 : slot_info->priority_fee = bank->f.priority_fees;
820 0 : slot_info->tips = bank->f.tips;
821 0 : slot_info->shred_cnt = bank->f.shred_cnt;
822 :
823 0 : FD_BASE58_ENCODE_32_BYTES( ctx->block_id_arr[ bank->idx ].latest_mr.uc, block_id_b58 );
824 0 : FD_BASE58_ENCODE_32_BYTES( bank->f.bank_hash.uc, bank_hash_b58 );
825 0 : FD_LOG_DEBUG(( "finished replaying slot %lu with (block id %s, bank hash %s, transactions %lu, votes %lu, shreds %lu, CUs used %lu, fees %lu)"
826 0 : "and timings [since parent fini %ld ns, started prepare %ld ns, started dispatching transactions %ld ns, finished executing transactions %ld ns, finished block %ld ns]",
827 0 : bank->f.slot, block_id_b58,
828 0 : bank_hash_b58,
829 0 : bank->f.transaction_count,
830 0 : bank->f.transaction_count - bank->f.nonvote_txn_count,
831 0 : bank->f.shred_cnt,
832 0 : bank->f.total_compute_units_used,
833 0 : bank->f.execution_fees + bank->f.priority_fees,
834 0 : !!parent_bank ? parent_bank->block_completed_nanos - bank->first_fec_set_received_nanos : LONG_MAX,
835 0 : bank->preparation_begin_nanos - bank->first_fec_set_received_nanos,
836 0 : bank->first_transaction_scheduled_nanos - bank->preparation_begin_nanos,
837 0 : bank->last_transaction_finished_nanos - bank->first_transaction_scheduled_nanos,
838 0 : bank->block_completed_nanos - bank->last_transaction_finished_nanos ));
839 :
840 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_SLOT_COMPLETED, ctx->replay_out->chunk, sizeof(fd_replay_slot_completed_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
841 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_slot_completed_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
842 0 : }
843 :
844 : static void
845 : publish_slot_dead( fd_replay_tile_t * ctx,
846 : fd_stem_context_t * stem,
847 : ulong slot,
848 0 : fd_hash_t const * block_id ) {
849 0 : fd_replay_slot_dead_t * slot_dead = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
850 0 : slot_dead->slot = slot;
851 0 : slot_dead->block_id = *block_id;
852 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_SLOT_DEAD, ctx->replay_out->chunk, sizeof(fd_replay_slot_dead_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
853 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_slot_dead_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
854 0 : }
855 :
856 : static void
857 : publish_txn_executed( fd_replay_tile_t * ctx,
858 : fd_stem_context_t * stem,
859 0 : ulong txn_idx ) {
860 0 : fd_sched_txn_info_t * txn_info = fd_sched_get_txn_info( ctx->sched, txn_idx );
861 0 : fd_replay_txn_executed_t * txn_executed = fd_type_pun( fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk ) );
862 0 : *txn_executed->txn = *fd_sched_get_txn( ctx->sched, txn_idx );
863 0 : txn_executed->txn_err = txn_info->txn_err;
864 0 : txn_executed->is_committable = !!(txn_info->flags&FD_SCHED_TXN_IS_COMMITTABLE);
865 0 : txn_executed->is_fees_only = !!(txn_info->flags&FD_SCHED_TXN_IS_FEES_ONLY);
866 0 : txn_executed->tick_parsed = txn_info->tick_parsed;
867 0 : txn_executed->tick_sigverify_disp = txn_info->tick_sigverify_disp;
868 0 : txn_executed->tick_sigverify_done = txn_info->tick_sigverify_done;
869 0 : txn_executed->tick_exec_disp = txn_info->tick_exec_disp;
870 0 : txn_executed->tick_exec_done = txn_info->tick_exec_done;
871 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_TXN_EXECUTED, ctx->replay_out->chunk, sizeof(*txn_executed), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
872 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(*txn_executed), ctx->replay_out->chunk0, ctx->replay_out->wmark );
873 0 : }
874 :
875 : static void
876 : replay_block_finalize( fd_replay_tile_t * ctx,
877 : fd_stem_context_t * stem,
878 0 : fd_bank_t * bank ) {
879 0 : bank->last_transaction_finished_nanos = fd_log_wallclock();
880 :
881 : /* Set poh hash in bank. */
882 0 : fd_hash_t * poh = fd_sched_get_poh( ctx->sched, bank->idx );
883 0 : bank->f.poh = *poh;
884 :
885 : /* Set shred count in bank. */
886 0 : bank->f.shred_cnt = fd_sched_get_shred_cnt( ctx->sched, bank->idx );
887 :
888 : /* Do hashing and other end-of-block processing. */
889 0 : fd_runtime_block_execute_finalize( bank, ctx->accdb, ctx->capture_ctx );
890 :
891 : /* Copy out cost tracker fields before freezing */
892 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
893 0 : cost_tracker_snap( bank, slot_info );
894 :
895 : /* fetch identity / vote balance updates infrequently */
896 0 : ulong slot = bank->f.slot;
897 0 : fd_funk_txn_xid_t xid = { .ul = { slot, bank->idx } };
898 0 : slot_info->identity_balance = FD_UNLIKELY( slot%4096==0UL ) ? get_identity_balance( ctx, xid ) : ULONG_MAX;
899 :
900 : /* Mark the bank as frozen. */
901 0 : fd_banks_mark_bank_frozen( bank );
902 0 : bank->block_completed_nanos = fd_log_wallclock();
903 :
904 : /**********************************************************************/
905 : /* Bank hash comparison, and halt if there's a mismatch after replay */
906 : /**********************************************************************/
907 :
908 : /* Must be last so we can measure completion time correctly, even
909 : though we could technically do this before the hash cmp and vote
910 : tower stuff. */
911 0 : publish_slot_completed( ctx, stem, bank, 0, 0 /* is_leader */ );
912 :
913 0 : # if FD_HAS_FLATCC
914 : /* If enabled, dump the block to a file and reset the dumping
915 : context state */
916 0 : if( FD_UNLIKELY( ctx->dump_proto_ctx && ctx->dump_proto_ctx->dump_block_to_pb ) ) {
917 0 : fd_dump_block_to_protobuf( ctx->block_dump_ctx, ctx->banks, bank, ctx->accdb, ctx->dump_proto_ctx, ctx->runtime_stack );
918 0 : fd_block_dump_context_reset( ctx->block_dump_ctx );
919 0 : }
920 0 : # endif
921 0 : }
922 :
923 : /**********************************************************************/
924 : /* Leader bank management */
925 : /**********************************************************************/
926 :
927 : static fd_bank_t *
928 : prepare_leader_bank( fd_replay_tile_t * ctx,
929 : fd_stem_context_t * stem,
930 : ulong slot,
931 : long now,
932 0 : fd_hash_t const * parent_block_id ) {
933 0 : long before = fd_log_wallclock();
934 :
935 : /* Make sure that we are not already leader. */
936 0 : FD_TEST( ctx->leader_bank==NULL );
937 :
938 0 : fd_block_id_ele_t * parent_ele = fd_block_id_map_ele_query( ctx->block_id_map, parent_block_id, NULL, ctx->block_id_arr );
939 0 : if( FD_UNLIKELY( !parent_ele ) ) {
940 0 : FD_BASE58_ENCODE_32_BYTES( parent_block_id->key, parent_block_id_b58 );
941 0 : FD_LOG_CRIT(( "invariant violation: parent bank index not found for merkle root %s", parent_block_id_b58 ));
942 0 : }
943 0 : ulong parent_bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, parent_ele );
944 :
945 0 : fd_bank_t * parent_bank = fd_banks_bank_query( ctx->banks, parent_bank_idx );
946 0 : if( FD_UNLIKELY( !parent_bank ) ) {
947 0 : FD_LOG_CRIT(( "invariant violation: parent bank not found for bank index %lu", parent_bank_idx ));
948 0 : }
949 0 : ulong parent_slot = parent_bank->f.slot;
950 :
951 0 : ctx->leader_bank = fd_banks_new_bank( ctx->banks, parent_bank_idx, now );
952 0 : if( FD_UNLIKELY( !ctx->leader_bank ) ) {
953 0 : FD_LOG_CRIT(( "invariant violation: leader bank is NULL for slot %lu", slot ));
954 0 : }
955 :
956 0 : ctx->leader_bank = fd_banks_clone_from_parent( ctx->banks, ctx->leader_bank->idx );
957 0 : if( FD_UNLIKELY( !ctx->leader_bank ) ) {
958 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for slot %lu", slot ));
959 0 : }
960 :
961 0 : ctx->leader_bank->preparation_begin_nanos = before;
962 :
963 0 : ctx->leader_bank->f.slot = slot;
964 0 : ctx->leader_bank->txncache_fork_id = fd_txncache_attach_child( ctx->txncache, parent_bank->txncache_fork_id );
965 : /* prepare the funk transaction for the leader bank */
966 0 : fd_funk_txn_xid_t xid = { .ul = { slot, ctx->leader_bank->idx } };
967 0 : fd_funk_txn_xid_t parent_xid = { .ul = { parent_slot, parent_bank_idx } };
968 0 : fd_accdb_attach_child ( ctx->accdb_admin, &parent_xid, &xid );
969 0 : fd_progcache_attach_child( ctx->progcache, &parent_xid, &xid );
970 :
971 0 : int is_epoch_boundary = 0;
972 0 : fd_runtime_block_execute_prepare( ctx->banks, ctx->leader_bank, ctx->accdb, ctx->runtime_stack, ctx->capture_ctx, &is_epoch_boundary );
973 0 : if( FD_UNLIKELY( is_epoch_boundary ) ) publish_epoch_info( ctx, stem, ctx->leader_bank, 0 );
974 :
975 0 : ulong max_tick_height;
976 0 : if( FD_UNLIKELY( FD_RUNTIME_EXECUTE_SUCCESS!=fd_runtime_compute_max_tick_height( parent_bank->f.ticks_per_slot, slot, &max_tick_height ) ) ) {
977 0 : FD_LOG_CRIT(( "couldn't compute tick height/max tick height slot %lu ticks_per_slot %lu", slot, parent_bank->f.ticks_per_slot ));
978 0 : }
979 0 : ctx->leader_bank->f.max_tick_height = max_tick_height;
980 :
981 : /* Now that a bank has been created for the leader slot, increment the
982 : reference count until we are done with the leader slot. */
983 0 : ctx->leader_bank->refcnt++;
984 :
985 0 : return ctx->leader_bank;
986 0 : }
987 :
988 : static inline void
989 0 : maybe_switch_identity( fd_replay_tile_t * ctx ) {
990 :
991 0 : if( FD_LIKELY( fd_keyswitch_state_query( ctx->keyswitch )!=FD_KEYSWITCH_STATE_SWITCH_PENDING ) ) return;
992 :
993 : /* Switch identity */
994 :
995 0 : FD_LOG_DEBUG(( "keyswitch: switching identity" ));
996 :
997 0 : memcpy( ctx->identity_pubkey, ctx->keyswitch->bytes, 32UL );
998 0 : fd_keyswitch_state( ctx->keyswitch, FD_KEYSWITCH_STATE_COMPLETED );
999 :
1000 : /* The next leader slot will be incorrect now that the identity has
1001 : switched. The next leader slot normally gets updated based on the
1002 : reset slot returned by tower. */
1003 0 : ulong min_leader_slot = fd_ulong_max( ctx->reset_slot+1UL, fd_ulong_if( ctx->highwater_leader_slot==ULONG_MAX, 0UL, ctx->highwater_leader_slot+1UL ) );
1004 0 : ctx->next_leader_slot = fd_multi_epoch_leaders_get_next_slot( ctx->mleaders, min_leader_slot, ctx->identity_pubkey );
1005 0 : if( FD_LIKELY( ctx->next_leader_slot != ULONG_MAX ) ) {
1006 0 : ctx->next_leader_tickcount = (long)((double)(ctx->next_leader_slot-ctx->reset_slot-1UL)*ctx->slot_duration_ticks) + fd_tickcount();
1007 0 : } else {
1008 0 : ctx->next_leader_tickcount = LONG_MAX;
1009 0 : }
1010 :
1011 0 : ctx->identity_vote_rooted = 0;
1012 0 : ctx->identity_idx++;
1013 0 : fd_vote_tracker_reset( ctx->vote_tracker );
1014 0 : }
1015 :
1016 : static void
1017 : fini_leader_bank( fd_replay_tile_t * ctx,
1018 0 : fd_stem_context_t * stem ) {
1019 :
1020 0 : FD_TEST( ctx->leader_bank!=NULL );
1021 0 : FD_TEST( ctx->is_leader );
1022 0 : FD_TEST( ctx->block_id_arr[ ctx->leader_bank->idx ].block_id_seen );
1023 0 : FD_TEST( ctx->recv_poh );
1024 :
1025 0 : ctx->leader_bank->last_transaction_finished_nanos = fd_log_wallclock();
1026 :
1027 0 : ulong curr_slot = ctx->leader_bank->f.slot;
1028 :
1029 0 : fd_sched_block_add_done( ctx->sched, ctx->leader_bank->idx, ctx->leader_bank->parent_idx, curr_slot );
1030 :
1031 0 : fd_runtime_block_execute_finalize( ctx->leader_bank, ctx->accdb, ctx->capture_ctx );
1032 :
1033 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
1034 0 : cost_tracker_snap( ctx->leader_bank, slot_info );
1035 0 : fd_funk_txn_xid_t xid = { .ul = { curr_slot, ctx->leader_bank->idx } };
1036 0 : slot_info->identity_balance = FD_UNLIKELY( curr_slot%4096==0UL ) ? get_identity_balance( ctx, xid ) : ULONG_MAX;
1037 :
1038 0 : fd_banks_mark_bank_frozen( ctx->leader_bank );
1039 0 : ctx->leader_bank->block_completed_nanos = fd_log_wallclock();
1040 :
1041 0 : publish_slot_completed( ctx, stem, ctx->leader_bank, 0, 1 /* is_leader */ );
1042 :
1043 : /* The reference on the bank is finally no longer needed. */
1044 0 : ctx->leader_bank->refcnt--;
1045 :
1046 : /* We are no longer leader so we can clear the bank index we use for
1047 : being the leader. */
1048 0 : ctx->leader_bank = NULL;
1049 0 : ctx->recv_poh = 0;
1050 0 : ctx->is_leader = 0;
1051 :
1052 0 : maybe_switch_identity( ctx );
1053 :
1054 0 : }
1055 :
1056 : static void
1057 : publish_root_advanced( fd_replay_tile_t * ctx,
1058 0 : fd_stem_context_t * stem ) {
1059 :
1060 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, ctx->consensus_root_bank_idx );
1061 0 : if( FD_UNLIKELY( !bank ) ) {
1062 0 : FD_LOG_CRIT(( "invariant violation: consensus root bank is NULL at bank index %lu", ctx->consensus_root_bank_idx ));
1063 0 : }
1064 :
1065 0 : if( FD_UNLIKELY( bank->f.epoch>fd_slot_to_epoch( &bank->f.epoch_schedule, bank->f.parent_slot, NULL ) )) {
1066 0 : publish_epoch_info( ctx, stem, bank, 1 );
1067 0 : }
1068 :
1069 0 : if( ctx->rpc_enabled ) {
1070 0 : bank->refcnt++;
1071 0 : FD_LOG_DEBUG(( "bank (idx=%lu, slot=%lu) refcnt incremented to %lu for rpc", bank->idx, bank->f.slot, bank->refcnt ));
1072 0 : }
1073 :
1074 : /* Increment the reference count on the consensus root bank to account
1075 : for the number of resolv tiles that are waiting on it. */
1076 0 : bank->refcnt += ctx->resolv_tile_cnt;
1077 0 : FD_LOG_DEBUG(( "bank (idx=%lu, slot=%lu) refcnt incremented to %lu for resolv", bank->idx, bank->f.slot, bank->refcnt ));
1078 :
1079 0 : fd_replay_root_advanced_t * msg = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
1080 0 : msg->bank_idx = bank->idx;
1081 :
1082 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_ROOT_ADVANCED, ctx->replay_out->chunk, sizeof(fd_replay_root_advanced_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
1083 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_root_advanced_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
1084 0 : }
1085 :
1086 : /* init_funk performs pre-flight checks for the account database and
1087 : program cache. Ensures that the account database was set up
1088 : correctly by bootstrap components (e.g. genesis or snapshot loader).
1089 : Mirrors the account database's fork tree down to the program cache. */
1090 :
1091 : static void
1092 : init_funk( fd_replay_tile_t * ctx,
1093 0 : ulong bank_slot ) {
1094 : /* Ensure that the loaded bank root corresponds to the account
1095 : database's root. */
1096 0 : fd_funk_t * funk = fd_accdb_user_v1_funk( ctx->accdb );
1097 0 : if( FD_UNLIKELY( !funk->shmem ) ) {
1098 0 : FD_LOG_CRIT(( "failed to initialize account database: replay tile is not joined to database shared memory objects" ));
1099 0 : }
1100 0 : fd_funk_txn_xid_t const * accdb_pub = fd_funk_last_publish( funk );
1101 0 : if( FD_UNLIKELY( accdb_pub->ul[0]!=bank_slot ) ) {
1102 0 : FD_LOG_CRIT(( "failed to initialize account database: accdb is at slot %lu, but chain state is at slot %lu\n"
1103 0 : "This is a bug in startup components.",
1104 0 : accdb_pub->ul[0], bank_slot ));
1105 0 : }
1106 0 : if( FD_UNLIKELY( fd_funk_last_publish_is_frozen( funk ) ) ) {
1107 0 : FD_LOG_CRIT(( "failed to initialize account database: accdb fork graph is not clean.\n"
1108 0 : "The account database should only contain state for the root slot at this point,\n"
1109 0 : "but there are incomplete database transactions leftover.\n"
1110 0 : "This is a bug in startup components." ));
1111 0 : }
1112 :
1113 : /* The program cache tracks the account database's fork graph at all
1114 : times. Perform initial synchronization: pivot from funk 'root' (a
1115 : sentinel XID) to 'last publish' (the bootstrap root slot). */
1116 0 : if( FD_UNLIKELY( !ctx->progcache->shmem ) ) {
1117 0 : FD_LOG_CRIT(( "failed to initialize account database: replay tile is not joined to program cache" ));
1118 0 : }
1119 0 : fd_progcache_clear( ctx->progcache );
1120 :
1121 0 : fd_funk_txn_xid_t last_publish = fd_accdb_root_get( ctx->accdb_admin );
1122 0 : fd_funk_txn_xid_t root = { .ul = { ULONG_MAX, ULONG_MAX } };
1123 0 : fd_progcache_attach_child( ctx->progcache, &root, &last_publish );
1124 0 : fd_progcache_advance_root( ctx->progcache, &last_publish );
1125 0 : }
1126 :
1127 : static void
1128 0 : init_after_snapshot( fd_replay_tile_t * ctx ) {
1129 : /* Now that the snapshot has been loaded in, we have to refresh the
1130 : stake delegations since the manifest does not contain the full set
1131 : of data required for the stake delegations. See
1132 : fd_stake_delegations.h for why this is required. */
1133 :
1134 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, FD_REPLAY_BOOT_BANK_IDX );
1135 0 : if( FD_UNLIKELY( !bank ) ) {
1136 0 : FD_LOG_CRIT(( "invariant violation: replay bank is NULL at bank index %lu", FD_REPLAY_BOOT_BANK_IDX ));
1137 0 : }
1138 :
1139 0 : fd_funk_txn_xid_t xid = { .ul = { bank->f.slot, bank->idx } };
1140 0 : init_funk( ctx, bank->f.slot );
1141 :
1142 0 : bank->f.warmup_cooldown_rate_epoch = fd_slot_to_epoch( &bank->f.epoch_schedule, bank->f.features.reduce_stake_warmup_cooldown, NULL );
1143 0 : fd_stake_delegations_t * root_delegations = fd_banks_stake_delegations_root_query( ctx->banks );
1144 0 : fd_stake_delegations_refresh(
1145 0 : root_delegations,
1146 0 : bank->f.epoch,
1147 0 : fd_sysvar_cache_stake_history_join_const( &bank->f.sysvar_cache ),
1148 0 : &bank->f.warmup_cooldown_rate_epoch,
1149 0 : ctx->accdb,
1150 0 : &xid );
1151 :
1152 0 : fd_top_votes_t * top_votes_t_2 = fd_bank_top_votes_t_2_modify( bank );
1153 0 : fd_top_votes_refresh( top_votes_t_2, ctx->accdb, &xid );
1154 :
1155 : /* After both snapshots have been loaded in, we can determine if we should
1156 : start distributing rewards. */
1157 :
1158 0 : fd_rewards_recalculate_partitioned_rewards( ctx->banks, bank, ctx->accdb, &xid, ctx->runtime_stack, ctx->capture_ctx );
1159 :
1160 0 : ulong snapshot_slot = bank->f.slot;
1161 0 : if( FD_UNLIKELY( !snapshot_slot ) ) {
1162 : /* Genesis-specific setup. */
1163 : /* FIXME: This branch does not set up a new block exec ctx
1164 : properly. Needs to do whatever prepare_new_block_execution
1165 : does, but just hacking that in breaks stuff. */
1166 0 : fd_runtime_update_leaders( bank, ctx->runtime_stack );
1167 :
1168 0 : ulong hashcnt_per_slot = bank->f.hashes_per_tick * bank->f.ticks_per_slot;
1169 0 : fd_hash_t * poh = &bank->f.poh;
1170 0 : while( hashcnt_per_slot-- ) {
1171 0 : fd_sha256_hash( poh->hash, 32UL, poh->hash );
1172 0 : }
1173 :
1174 0 : int is_epoch_boundary = 0;
1175 0 : fd_runtime_block_execute_prepare( ctx->banks, bank, ctx->accdb, ctx->runtime_stack, ctx->capture_ctx, &is_epoch_boundary );
1176 0 : FD_TEST( !is_epoch_boundary );
1177 0 : fd_runtime_block_execute_finalize( bank, ctx->accdb, ctx->capture_ctx );
1178 :
1179 0 : snapshot_slot = 0UL;
1180 0 : }
1181 :
1182 : /* Signals fd_sleep_until_replay_started */
1183 0 : FD_MGAUGE_SET( REPLAY, RUNTIME_STATUS, 1UL );
1184 0 : }
1185 :
1186 : static inline int
1187 : maybe_become_leader( fd_replay_tile_t * ctx,
1188 0 : fd_stem_context_t * stem ) {
1189 0 : FD_TEST( ctx->is_booted );
1190 0 : if( FD_LIKELY( ctx->next_leader_slot==ULONG_MAX || ctx->is_leader || (!ctx->identity_vote_rooted && ctx->wait_for_vote_to_start_leader) || ctx->replay_out->idx==ULONG_MAX || !ctx->wfs_complete ) ) return 0;
1191 0 : if( FD_UNLIKELY( fd_banks_is_full( ctx->banks ) ) ) return 0;
1192 0 : if( FD_UNLIKELY( ctx->halt_leader ) ) return 0;
1193 0 : if( !ctx->supports_leader ) return 0;
1194 :
1195 0 : FD_TEST( ctx->next_leader_slot>ctx->reset_slot );
1196 0 : long now = fd_tickcount();
1197 0 : if( FD_LIKELY( now<ctx->next_leader_tickcount ) ) return 0;
1198 :
1199 : /* If a prior leader is still in the process of publishing their slot,
1200 : delay ours to let them finish ... unless they are so delayed that
1201 : we risk getting skipped by the leader following us. 1.2 seconds
1202 : is a reasonable default here, although any value between 0 and 1.6
1203 : seconds could be considered reasonable. This is arbitrary and
1204 : chosen due to intuition. */
1205 0 : if( FD_UNLIKELY( now<ctx->next_leader_tickcount+(long)(3.0*ctx->slot_duration_ticks) ) ) {
1206 0 : FD_TEST( ctx->reset_bank );
1207 :
1208 : /* TODO: Make the max_active_descendant calculation more efficient
1209 : by caching it in the bank structure and updating it as banks are
1210 : created and completed. */
1211 0 : ulong max_active_descendant = 0UL;
1212 0 : ulong child_idx = ctx->reset_bank->child_idx;
1213 0 : while( child_idx!=ULONG_MAX ) {
1214 0 : fd_bank_t * child_bank = fd_banks_bank_query( ctx->banks, child_idx );
1215 0 : max_active_descendant = fd_ulong_max( max_active_descendant, child_bank->f.slot );
1216 0 : child_idx = child_bank->sibling_idx;
1217 0 : }
1218 :
1219 : /* If the max_active_descendant is >= next_leader_slot, we waited
1220 : too long and a leader after us started publishing to try and skip
1221 : us. Just start our leader slot immediately, we might win ... */
1222 0 : if( FD_LIKELY( max_active_descendant>=ctx->reset_slot && max_active_descendant<ctx->next_leader_slot ) ) {
1223 : /* If one of the leaders between the reset slot and our leader
1224 : slot is in the process of publishing (they have a descendant
1225 : bank that is in progress of being replayed), then keep waiting.
1226 : We probably wouldn't get a leader slot out before they
1227 : finished.
1228 :
1229 : Unless... we are past the deadline to start our slot by more
1230 : than 1.2 seconds, in which case we should probably start it to
1231 : avoid getting skipped by the leader behind us. */
1232 0 : return 0;
1233 0 : }
1234 0 : }
1235 :
1236 : /* If we haven't started replaying the prior block, but we have
1237 : finished replaying the second to last slot of the prior
1238 : leader (and that leader is not us), we should give the prior leader
1239 : a little more time. */
1240 0 : if( FD_UNLIKELY( ctx->next_leader_slot==ctx->reset_slot+2UL && now<ctx->next_leader_tickcount+(long)(1.0*ctx->slot_duration_ticks) ) ) {
1241 :
1242 0 : fd_pubkey_t const * reset_leader = fd_multi_epoch_leaders_get_leader_for_slot( ctx->mleaders, ctx->reset_slot );
1243 0 : if( FD_UNLIKELY( reset_leader && !fd_memeq( reset_leader, ctx->identity_pubkey, 32UL ) ) ) return 0;
1244 0 : }
1245 :
1246 :
1247 0 : long now_nanos = fd_log_wallclock();
1248 :
1249 0 : ctx->is_leader = 1;
1250 0 : ctx->recv_poh = 0;
1251 :
1252 0 : FD_TEST( ctx->highwater_leader_slot==ULONG_MAX || ctx->highwater_leader_slot<ctx->next_leader_slot );
1253 0 : ctx->highwater_leader_slot = ctx->next_leader_slot;
1254 :
1255 0 : FD_LOG_INFO(( "becoming leader for slot %lu, parent slot is %lu", ctx->next_leader_slot, ctx->reset_slot ));
1256 :
1257 : /* Acquires bank, sets up initial state, and refcnts it. */
1258 0 : fd_bank_t * bank = prepare_leader_bank( ctx, stem, ctx->next_leader_slot, now_nanos, &ctx->reset_block_id );
1259 0 : fd_funk_txn_xid_t xid = { .ul = { ctx->next_leader_slot, ctx->leader_bank->idx } };
1260 :
1261 0 : fd_bundle_crank_tip_payment_config_t config[1] = { 0 };
1262 0 : fd_pubkey_t tip_receiver_owner = {0};
1263 :
1264 0 : if( FD_UNLIKELY( ctx->bundle.enabled ) ) {
1265 0 : fd_acct_addr_t tip_payment_config[1];
1266 0 : fd_acct_addr_t tip_receiver[1];
1267 0 : fd_bundle_crank_get_addresses( ctx->bundle.gen, bank->f.epoch, tip_payment_config, tip_receiver );
1268 :
1269 0 : fd_accdb_ro_t tip_config_acc[1];
1270 0 : if( FD_UNLIKELY( !fd_accdb_open_ro( ctx->accdb, tip_config_acc, &xid, tip_payment_config ) ) ) {
1271 0 : FD_BASE58_ENCODE_32_BYTES( tip_payment_config->b, tip_config_acc_b58 );
1272 0 : FD_LOG_WARNING(( "tip payment config account %s does not exist", tip_config_acc_b58 ));
1273 0 : } else if( FD_UNLIKELY( fd_accdb_ref_data_sz( tip_config_acc )<sizeof(fd_bundle_crank_tip_payment_config_t) ) ) {
1274 0 : FD_LOG_HEXDUMP_WARNING(( "invalid tip payment config account data", fd_accdb_ref_data_const( tip_config_acc ), fd_accdb_ref_data_sz( tip_config_acc ) ));
1275 0 : fd_accdb_close_ro( ctx->accdb, tip_config_acc );
1276 0 : } else {
1277 0 : memcpy( config, fd_accdb_ref_data_const( tip_config_acc ), sizeof(fd_bundle_crank_tip_payment_config_t) );
1278 0 : fd_accdb_close_ro( ctx->accdb, tip_config_acc );
1279 0 : }
1280 :
1281 : /* It is possible that the tip receiver account does not exist yet
1282 : if it is the first time in an epoch. */
1283 0 : fd_accdb_ro_t tip_receiver_acc[1];
1284 0 : if( FD_LIKELY( fd_accdb_open_ro( ctx->accdb, tip_receiver_acc, &xid, tip_receiver ) ) ) {
1285 0 : tip_receiver_owner = *fd_accdb_ref_owner( tip_receiver_acc );
1286 0 : fd_accdb_close_ro( ctx->accdb, tip_receiver_acc );
1287 0 : }
1288 0 : }
1289 :
1290 :
1291 0 : fd_became_leader_t * msg = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
1292 0 : msg->slot = ctx->next_leader_slot;
1293 0 : msg->slot_start_ns = now_nanos;
1294 0 : msg->slot_end_ns = now_nanos+(long)ctx->slot_duration_nanos;
1295 0 : msg->bank = NULL;
1296 0 : msg->bank_idx = bank->idx;
1297 0 : msg->ticks_per_slot = bank->f.ticks_per_slot;
1298 0 : msg->hashcnt_per_tick = bank->f.hashes_per_tick;
1299 0 : msg->tick_duration_ns = (ulong)(ctx->slot_duration_nanos/(double)msg->ticks_per_slot);
1300 0 : msg->bundle->config[0] = config[0];
1301 0 : memcpy( msg->bundle->last_blockhash, bank->f.poh.hash, sizeof(fd_hash_t) );
1302 0 : memcpy( msg->bundle->tip_receiver_owner, tip_receiver_owner.uc, sizeof(fd_pubkey_t) );
1303 :
1304 0 : if( FD_UNLIKELY( msg->hashcnt_per_tick==1UL ) ) {
1305 : /* Low power producer, maximum of one microblock per tick in the slot */
1306 0 : msg->max_microblocks_in_slot = msg->ticks_per_slot;
1307 0 : } else {
1308 : /* See the long comment in after_credit for this limit */
1309 0 : msg->max_microblocks_in_slot = fd_ulong_min( MAX_MICROBLOCKS_PER_SLOT, msg->ticks_per_slot*(msg->hashcnt_per_tick-1UL) );
1310 0 : }
1311 :
1312 0 : msg->total_skipped_ticks = msg->ticks_per_slot*(ctx->next_leader_slot-ctx->reset_slot);
1313 0 : msg->epoch = fd_slot_to_epoch( &bank->f.epoch_schedule, ctx->next_leader_slot, NULL );
1314 :
1315 0 : fd_cost_tracker_t const * cost_tracker = fd_bank_cost_tracker_query( bank );
1316 :
1317 0 : msg->limits.slot_max_cost = ctx->larger_max_cost_per_block ? LARGER_MAX_COST_PER_BLOCK : cost_tracker->block_cost_limit;
1318 0 : msg->limits.slot_max_vote_cost = cost_tracker->vote_cost_limit;
1319 0 : msg->limits.slot_max_write_cost_per_acct = cost_tracker->account_cost_limit;
1320 :
1321 0 : if( FD_UNLIKELY( msg->ticks_per_slot+msg->total_skipped_ticks>USHORT_MAX ) ) {
1322 : /* There can be at most USHORT_MAX skipped ticks, because the
1323 : parent_offset field in the shred data is only 2 bytes wide. */
1324 0 : FD_LOG_ERR(( "too many skipped ticks %lu for slot %lu, chain must halt", msg->ticks_per_slot+msg->total_skipped_ticks, ctx->next_leader_slot ));
1325 0 : }
1326 :
1327 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_BECAME_LEADER, ctx->replay_out->chunk, sizeof(fd_became_leader_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
1328 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_became_leader_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
1329 :
1330 0 : ctx->next_leader_slot = ULONG_MAX;
1331 0 : ctx->next_leader_tickcount = LONG_MAX;
1332 :
1333 0 : return 1;
1334 0 : }
1335 :
1336 : static void
1337 : process_poh_message( fd_replay_tile_t * ctx,
1338 0 : fd_poh_leader_slot_ended_t const * slot_ended ) {
1339 :
1340 0 : FD_TEST( ctx->is_booted );
1341 0 : FD_TEST( ctx->is_leader );
1342 0 : FD_TEST( ctx->leader_bank!=NULL );
1343 :
1344 0 : FD_TEST( ctx->highwater_leader_slot>=slot_ended->slot );
1345 0 : FD_TEST( ctx->next_leader_slot>ctx->highwater_leader_slot );
1346 :
1347 : /* Update the poh hash in the bank. We will want to maintain a refcnt
1348 : on the bank until we have recieved the block id for the block after
1349 : it has been shredded. */
1350 :
1351 0 : memcpy( &ctx->leader_bank->f.poh, slot_ended->blockhash, sizeof(fd_hash_t) );
1352 :
1353 0 : ctx->recv_poh = 1;
1354 0 : }
1355 :
1356 : static void
1357 : publish_reset( fd_replay_tile_t * ctx,
1358 : fd_stem_context_t * stem,
1359 0 : fd_bank_t * bank ) {
1360 0 : if( FD_UNLIKELY( ctx->replay_out->idx==ULONG_MAX ) ) return;
1361 :
1362 0 : fd_hash_t const * block_hash = fd_blockhashes_peek_last_hash( &bank->f.block_hash_queue );
1363 0 : FD_TEST( block_hash );
1364 :
1365 0 : fd_poh_reset_t * reset = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
1366 :
1367 0 : reset->bank_idx = bank->idx;
1368 0 : reset->timestamp = fd_log_wallclock();
1369 0 : reset->completed_slot = bank->f.slot;
1370 0 : reset->hashcnt_per_tick = bank->f.hashes_per_tick;
1371 0 : reset->ticks_per_slot = bank->f.ticks_per_slot;
1372 0 : reset->tick_duration_ns = (ulong)(ctx->slot_duration_nanos/(double)reset->ticks_per_slot);
1373 0 : fd_memcpy( reset->completed_block_id, ctx->reset_block_id.uc, sizeof(fd_hash_t) );
1374 0 : fd_memcpy( reset->completed_blockhash, block_hash->uc, sizeof(fd_hash_t) );
1375 :
1376 0 : ulong ticks_per_slot = bank->f.ticks_per_slot;
1377 0 : if( FD_UNLIKELY( reset->hashcnt_per_tick==1UL ) ) {
1378 : /* Low power producer, maximum of one microblock per tick in the slot */
1379 0 : reset->max_microblocks_in_slot = ticks_per_slot;
1380 0 : } else {
1381 : /* See the long comment in after_credit for this limit */
1382 0 : reset->max_microblocks_in_slot = fd_ulong_min( MAX_MICROBLOCKS_PER_SLOT, ticks_per_slot*(reset->hashcnt_per_tick-1UL) );
1383 0 : }
1384 0 : reset->next_leader_slot = ctx->next_leader_slot;
1385 :
1386 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_RESET, ctx->replay_out->chunk, sizeof(fd_poh_reset_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
1387 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_poh_reset_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
1388 0 : }
1389 :
1390 : static void
1391 : store_xinsert( fd_store_t * store,
1392 0 : fd_hash_t const * merkle_root ) {
1393 0 : fd_store_pool_t pool = {
1394 0 : .pool = fd_wksp_laddr_fast( fd_store_wksp( store ), store->pool_mem_gaddr ),
1395 0 : .ele = fd_wksp_laddr_fast( fd_store_wksp( store ), store->pool_ele_gaddr ),
1396 0 : .ele_max = store->fec_max
1397 0 : };
1398 0 : fd_store_fec_t * fec = fd_store_pool_acquire( &pool );
1399 0 : if( FD_UNLIKELY( !fec ) ) FD_LOG_CRIT(( "fd_store_pool_acquire failed" ));
1400 0 : fec->key.merkle_root = *merkle_root;
1401 0 : fec->key.part_idx = 0;
1402 0 : fec->cmr = (fd_hash_t){ 0 };
1403 0 : fec->next = fd_store_pool_idx_null();
1404 0 : fec->data_sz = 0UL;
1405 :
1406 0 : FD_STORE_XLOCK_BEGIN( store ) {
1407 0 : fd_store_map_ele_insert( fd_wksp_laddr_fast( fd_store_wksp( store ), store->map_gaddr ), fec, pool.ele );
1408 0 : } FD_STORE_XLOCK_END;
1409 0 : }
1410 :
1411 : static void
1412 : boot_genesis( fd_replay_tile_t * ctx,
1413 : fd_stem_context_t * stem,
1414 0 : fd_genesis_meta_t const * meta ) {
1415 : /* If we are bootstrapping, we can't wait to wait for our identity
1416 : vote to be rooted as this creates a circular dependency. */
1417 0 : ctx->identity_vote_rooted = 1;
1418 :
1419 0 : uchar const * genesis_blob = (uchar const *)( meta+1 );
1420 0 : FD_TEST( meta->bootstrap && meta->has_lthash );
1421 0 : FD_TEST( fd_genesis_parse( ctx->genesis, genesis_blob, meta->blob_sz ) );
1422 :
1423 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, FD_REPLAY_BOOT_BANK_IDX );
1424 0 : FD_TEST( bank );
1425 0 : fd_funk_txn_xid_t xid = { .ul = { 0UL, FD_REPLAY_BOOT_BANK_IDX } };
1426 :
1427 : /* Do genesis-related processing in a non-rooted transaction */
1428 0 : fd_funk_txn_xid_t root_xid = { .ul = { LONG_MAX, LONG_MAX } };
1429 0 : fd_funk_txn_xid_t target_xid = { .ul = { 0UL, 0UL } };
1430 0 : fd_accdb_attach_child( ctx->accdb_admin, &root_xid, &target_xid );
1431 0 : fd_runtime_read_genesis( ctx->banks, bank, ctx->accdb, &xid, NULL, &meta->genesis_hash, &meta->lthash, ctx->genesis, genesis_blob, ctx->runtime_stack );
1432 0 : fd_accdb_advance_root( ctx->accdb_admin, &target_xid );
1433 :
1434 0 : static const fd_txncache_fork_id_t txncache_root = { .val = USHORT_MAX };
1435 0 : bank->txncache_fork_id = fd_txncache_attach_child( ctx->txncache, txncache_root );
1436 :
1437 0 : fd_hash_t const * block_hash = fd_blockhashes_peek_last_hash( &bank->f.block_hash_queue );
1438 0 : fd_txncache_finalize_fork( ctx->txncache, bank->txncache_fork_id, 0UL, block_hash->uc );
1439 :
1440 0 : publish_epoch_info( ctx, stem, bank, 0 );
1441 0 : publish_epoch_info( ctx, stem, bank, 1 );
1442 :
1443 : /* We call this after fd_runtime_read_genesis, which sets up the
1444 : slot_bank needed in blockstore_init. */
1445 0 : init_after_snapshot( ctx );
1446 :
1447 0 : ctx->published_root_slot = 0UL;
1448 0 : fd_sched_block_add_done( ctx->sched, bank->idx, ULONG_MAX, 0UL );
1449 :
1450 0 : bank->f.block_height = 1UL;
1451 :
1452 0 : ctx->consensus_root = ctx->initial_block_id;
1453 0 : ctx->consensus_root_slot = 0UL;
1454 0 : ctx->consensus_root_bank_idx = 0UL;
1455 0 : ctx->published_root_slot = 0UL;
1456 0 : ctx->published_root_bank_idx = 0UL;
1457 :
1458 0 : ctx->reset_slot = 0UL;
1459 0 : ctx->reset_bank = bank;
1460 0 : ctx->reset_timestamp_nanos = fd_log_wallclock();
1461 0 : ctx->next_leader_slot = fd_multi_epoch_leaders_get_next_slot( ctx->mleaders, 1UL, ctx->identity_pubkey );
1462 0 : if( FD_LIKELY( ctx->next_leader_slot != ULONG_MAX ) ) {
1463 0 : ctx->next_leader_tickcount = (long)((double)(ctx->next_leader_slot-ctx->reset_slot-1UL)*ctx->slot_duration_ticks) + fd_tickcount();
1464 0 : } else {
1465 0 : ctx->next_leader_tickcount = LONG_MAX;
1466 0 : }
1467 :
1468 0 : ctx->is_booted = 1;
1469 0 : maybe_become_leader( ctx, stem );
1470 :
1471 0 : fd_hash_t initial_block_id = ctx->initial_block_id;
1472 0 : fd_reasm_fec_t * fec = fd_reasm_insert( ctx->reasm, &initial_block_id, NULL, 0 /* genesis slot */, 0, 0, 0, 0, 1, 0, ctx->store, &ctx->reasm_evicted ); /* FIXME manifest block_id */
1473 0 : fec->bank_idx = bank->idx;
1474 0 : fec->bank_seq = bank->bank_seq;
1475 0 : store_xinsert( ctx->store, &initial_block_id );
1476 :
1477 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ 0 ];
1478 0 : block_id_ele->latest_mr = initial_block_id;
1479 0 : block_id_ele->slot = 0UL;
1480 :
1481 0 : FD_TEST( fd_block_id_map_ele_insert( ctx->block_id_map, block_id_ele, ctx->block_id_arr ) );
1482 :
1483 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
1484 0 : cost_tracker_snap( bank, slot_info );
1485 0 : slot_info->identity_balance = get_identity_balance( ctx, xid );
1486 :
1487 0 : publish_slot_completed( ctx, stem, bank, 1, 0 /* is_leader */ );
1488 0 : publish_root_advanced( ctx, stem );
1489 0 : publish_reset( ctx, stem, bank );
1490 0 : }
1491 :
1492 : static inline void
1493 0 : maybe_verify_cluster_type( fd_replay_tile_t * ctx ) {
1494 0 : if( FD_UNLIKELY( !ctx->is_booted || !ctx->has_genesis_hash ) ) {
1495 0 : return;
1496 0 : }
1497 :
1498 0 : FD_BASE58_ENCODE_32_BYTES( ctx->genesis_hash->uc, hash_cstr );
1499 0 : ulong cluster = fd_genesis_cluster_identify( hash_cstr );
1500 : /* Map pyth-related clusters to unkwown. */
1501 0 : switch( cluster ) {
1502 0 : case FD_CLUSTER_PYTHNET:
1503 0 : case FD_CLUSTER_PYTHTEST:
1504 0 : cluster = FD_CLUSTER_UNKNOWN;
1505 0 : }
1506 :
1507 0 : if( FD_UNLIKELY( cluster!=ctx->cluster_type ) ) {
1508 0 : FD_LOG_ERR(( "Your genesis.bin file at `%s` has a genesis hash of `%s` which means the cluster is %s "
1509 0 : "but the snapshot you loaded is for a different cluster %s. If you are trying to join the "
1510 0 : "%s cluster, you can delete the genesis.bin file and restart the node to download the correct "
1511 0 : "genesis file automatically.",
1512 0 : ctx->genesis_path,
1513 0 : hash_cstr,
1514 0 : fd_genesis_cluster_name( cluster ),
1515 0 : fd_genesis_cluster_name( ctx->cluster_type ),
1516 0 : fd_genesis_cluster_name( cluster ) ));
1517 0 : }
1518 0 : }
1519 :
1520 : static void
1521 : on_snapshot_message( fd_replay_tile_t * ctx,
1522 : fd_stem_context_t * stem,
1523 : ulong in_idx,
1524 : ulong chunk,
1525 0 : ulong sig ) {
1526 0 : ulong msg = fd_ssmsg_sig_message( sig );
1527 0 : if( FD_LIKELY( msg==FD_SSMSG_DONE ) ) {
1528 : /* An end of message notification indicates the snapshot is loaded.
1529 : Replay is able to start executing from this point onwards. */
1530 : /* TODO: replay should finish booting. Could make replay a
1531 : state machine and set the state here accordingly. */
1532 0 : ctx->is_booted = 1;
1533 :
1534 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, FD_REPLAY_BOOT_BANK_IDX );
1535 0 : if( FD_UNLIKELY( !bank ) ) {
1536 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for bank index %lu", FD_REPLAY_BOOT_BANK_IDX ));
1537 0 : }
1538 :
1539 0 : ulong snapshot_slot = bank->f.slot;
1540 :
1541 0 : fd_hash_t bank_hash = bank->f.bank_hash;
1542 0 : if( FD_UNLIKELY( ctx->wfs_enabled && memcmp( ctx->expected_bank_hash.uc, bank_hash.uc, sizeof(fd_hash_t) ) ) ) {
1543 0 : FD_BASE58_ENCODE_32_BYTES( ctx->expected_bank_hash.uc, expected_bank_hash_cstr );
1544 0 : FD_BASE58_ENCODE_32_BYTES( bank_hash.uc, actual_bank_hash_cstr );
1545 0 : FD_LOG_ERR(( "[consensus.wait_for_supermajority_with_bank_hash] expected_bank_hash=%s does not match snapshot slot"
1546 0 : "=%lu bank_hash=%s. If you are loading a snapshot from the network, check that the slot matches the "
1547 0 : "cluster restart slot. ", expected_bank_hash_cstr, snapshot_slot, actual_bank_hash_cstr ));
1548 0 : }
1549 0 : if( FD_UNLIKELY( ctx->wfs_enabled ) ) {
1550 0 : FD_LOG_NOTICE(( "waiting for supermajority at snapshot slot %lu", snapshot_slot ));
1551 0 : }
1552 :
1553 : /* FIXME: This is a hack because the block id of the snapshot slot
1554 : is not provided in the snapshot. A possible solution is to get
1555 : the block id of the snapshot slot from repair. */
1556 0 : fd_hash_t manifest_block_id = ctx->initial_block_id;
1557 :
1558 0 : fd_funk_txn_xid_t xid = { .ul = { snapshot_slot, FD_REPLAY_BOOT_BANK_IDX } };
1559 0 : fd_features_restore( bank, ctx->accdb, &xid );
1560 :
1561 0 : FD_TEST( fd_sysvar_cache_restore( bank, ctx->accdb, &xid ) );
1562 :
1563 0 : ctx->consensus_root = manifest_block_id;
1564 0 : ctx->consensus_root_slot = snapshot_slot;
1565 0 : ctx->consensus_root_bank_idx = 0UL;
1566 0 : ctx->published_root_slot = ctx->consensus_root_slot;
1567 0 : ctx->published_root_bank_idx = 0UL;
1568 :
1569 0 : ctx->reset_slot = snapshot_slot;
1570 0 : ctx->reset_bank = bank;
1571 0 : ctx->reset_timestamp_nanos = fd_log_wallclock();
1572 0 : ctx->next_leader_slot = fd_multi_epoch_leaders_get_next_slot( ctx->mleaders, 1UL, ctx->identity_pubkey );
1573 0 : if( FD_LIKELY( ctx->next_leader_slot != ULONG_MAX ) ) {
1574 0 : ctx->next_leader_tickcount = (long)((double)(ctx->next_leader_slot-ctx->reset_slot-1UL)*ctx->slot_duration_ticks) + fd_tickcount();
1575 0 : } else {
1576 0 : ctx->next_leader_tickcount = LONG_MAX;
1577 0 : }
1578 :
1579 0 : fd_sched_block_add_done( ctx->sched, bank->idx, ULONG_MAX, snapshot_slot );
1580 0 : FD_TEST( bank->idx==0UL );
1581 :
1582 0 : fd_runtime_update_leaders( bank, ctx->runtime_stack );
1583 :
1584 : /* Typically, when we cross an epoch boundary during normal
1585 : operation, we publish the stake weights for the new epoch. But
1586 : since we are starting from a snapshot, we need to publish two
1587 : epochs worth of stake weights: the previous epoch (which is
1588 : needed for voting on the current epoch), and the current epoch
1589 : (which is needed for voting on the next epoch). */
1590 0 : publish_epoch_info( ctx, stem, bank, 0 );
1591 0 : publish_epoch_info( ctx, stem, bank, 1 );
1592 :
1593 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ 0 ];
1594 0 : block_id_ele->latest_mr = manifest_block_id;
1595 0 : block_id_ele->slot = snapshot_slot;
1596 0 : block_id_ele->block_id_seen = 1;
1597 0 : block_id_ele->latest_fec_idx = 0U;
1598 0 : FD_TEST( fd_block_id_map_ele_insert( ctx->block_id_map, block_id_ele, ctx->block_id_arr ) );
1599 :
1600 : /* We call this after fd_runtime_read_genesis, which sets up the
1601 : slot_bank needed in blockstore_init. */
1602 0 : init_after_snapshot( ctx );
1603 :
1604 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
1605 0 : cost_tracker_snap( bank, slot_info );
1606 0 : slot_info->identity_balance = get_identity_balance( ctx, xid );
1607 :
1608 0 : publish_slot_completed( ctx, stem, bank, 1, 0 /* is_leader */ );
1609 0 : publish_root_advanced( ctx, stem );
1610 :
1611 0 : fd_reasm_fec_t * fec = fd_reasm_insert( ctx->reasm, &manifest_block_id, NULL, snapshot_slot, 0, 0, 0, 0, 1, 0, ctx->store, &ctx->reasm_evicted ); /* FIXME manifest block_id */
1612 0 : fec->bank_idx = bank->idx;
1613 0 : fec->bank_seq = bank->bank_seq;
1614 0 : store_xinsert( ctx->store, &manifest_block_id );
1615 :
1616 0 : ctx->cluster_type = bank->f.cluster_type;
1617 :
1618 0 : maybe_verify_cluster_type( ctx );
1619 :
1620 0 : return;
1621 0 : }
1622 :
1623 0 : switch( msg ) {
1624 0 : case FD_SSMSG_MANIFEST_FULL:
1625 0 : case FD_SSMSG_MANIFEST_INCREMENTAL: {
1626 : /* We may either receive a full snapshot manifest or an
1627 : incremental snapshot manifest. Note that this external message
1628 : id is only used temporarily because replay cannot yet receive
1629 : the firedancer-internal snapshot manifest message. */
1630 0 : if( FD_UNLIKELY( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark ) )
1631 0 : FD_LOG_ERR(( "chunk %lu from in %d corrupt, not in range [%lu,%lu]", chunk, ctx->in_kind[ in_idx ], ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
1632 :
1633 0 : fd_ssload_recover( fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ),
1634 0 : ctx->banks,
1635 0 : fd_banks_bank_query( ctx->banks, FD_REPLAY_BOOT_BANK_IDX ),
1636 0 : msg==FD_SSMSG_MANIFEST_INCREMENTAL );
1637 :
1638 0 : fd_snapshot_manifest_t const * manifest = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
1639 0 : ctx->hard_forks_cnt = manifest->hard_forks_len;
1640 0 : for( ulong i=0UL; i<manifest->hard_forks_len; i++ ) {
1641 0 : ctx->hard_forks[ i ] = manifest->hard_forks[ i ];
1642 0 : ctx->hard_forks_cnts[ i ] = manifest->hard_forks_cnts[ i ];
1643 0 : }
1644 0 : ctx->has_expected_genesis_timestamp = 1;
1645 0 : ctx->expected_genesis_timestamp = manifest->creation_time_seconds;
1646 0 : break;
1647 0 : }
1648 0 : default: {
1649 0 : FD_LOG_ERR(( "Received unknown snapshot message with msg %lu", msg ));
1650 0 : return;
1651 0 : }
1652 0 : }
1653 :
1654 0 : return;
1655 0 : }
1656 :
1657 : static void
1658 : dispatch_task( fd_replay_tile_t * ctx,
1659 : fd_stem_context_t * stem,
1660 0 : fd_sched_task_t * task ) {
1661 :
1662 0 : switch( task->task_type ) {
1663 0 : case FD_SCHED_TT_TXN_EXEC: {
1664 0 : fd_txn_p_t * txn_p = fd_sched_get_txn( ctx->sched, task->txn_exec->txn_idx );
1665 :
1666 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, task->txn_exec->bank_idx );
1667 0 : FD_TEST( bank );
1668 :
1669 0 : # if FD_HAS_FLATCC
1670 : /* Add the transaction to the block dumper if necessary. This
1671 : logic doesn't need to be fork-aware since it's only meant to
1672 : be used in backtest. */
1673 0 : if( FD_UNLIKELY( ctx->dump_proto_ctx && ctx->dump_proto_ctx->dump_block_to_pb ) ) {
1674 0 : fd_dump_block_to_protobuf_collect_tx( ctx->block_dump_ctx, txn_p );
1675 0 : }
1676 0 : # endif
1677 :
1678 0 : bank->refcnt++;
1679 :
1680 0 : if( FD_UNLIKELY( !bank->first_transaction_scheduled_nanos ) ) bank->first_transaction_scheduled_nanos = fd_log_wallclock();
1681 :
1682 0 : fd_replay_out_link_t * exec_out = ctx->exec_out;
1683 0 : fd_execrp_txn_exec_msg_t * exec_msg = fd_chunk_to_laddr( exec_out->mem, exec_out->chunk );
1684 0 : memcpy( exec_msg->txn, txn_p, sizeof(fd_txn_p_t) );
1685 0 : exec_msg->bank_idx = task->txn_exec->bank_idx;
1686 0 : exec_msg->txn_idx = task->txn_exec->txn_idx;
1687 0 : if( FD_UNLIKELY( ctx->capture_ctx ) ) {
1688 0 : exec_msg->capture_txn_idx = ctx->capture_ctx->current_txn_idx++;
1689 0 : }
1690 0 : fd_stem_publish( stem, exec_out->idx, (FD_EXECRP_TT_TXN_EXEC<<32) | task->txn_exec->exec_idx, exec_out->chunk, sizeof(*exec_msg), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
1691 0 : exec_out->chunk = fd_dcache_compact_next( exec_out->chunk, sizeof(*exec_msg), exec_out->chunk0, exec_out->wmark );
1692 0 : break;
1693 0 : }
1694 0 : case FD_SCHED_TT_TXN_SIGVERIFY: {
1695 0 : fd_txn_p_t * txn_p = fd_sched_get_txn( ctx->sched, task->txn_sigverify->txn_idx );
1696 :
1697 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, task->txn_sigverify->bank_idx );
1698 0 : FD_TEST( bank );
1699 0 : bank->refcnt++;
1700 :
1701 0 : fd_replay_out_link_t * exec_out = ctx->exec_out;
1702 0 : fd_execrp_txn_sigverify_msg_t * exec_msg = fd_chunk_to_laddr( exec_out->mem, exec_out->chunk );
1703 0 : memcpy( exec_msg->txn, txn_p, sizeof(fd_txn_p_t) );
1704 0 : exec_msg->bank_idx = task->txn_sigverify->bank_idx;
1705 0 : exec_msg->txn_idx = task->txn_sigverify->txn_idx;
1706 0 : fd_stem_publish( stem, exec_out->idx, (FD_EXECRP_TT_TXN_SIGVERIFY<<32) | task->txn_sigverify->exec_idx, exec_out->chunk, sizeof(*exec_msg), 0UL, 0UL, 0UL );
1707 0 : exec_out->chunk = fd_dcache_compact_next( exec_out->chunk, sizeof(*exec_msg), exec_out->chunk0, exec_out->wmark );
1708 0 : break;
1709 0 : };
1710 0 : case FD_SCHED_TT_POH_HASH: {
1711 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, task->poh_hash->bank_idx );
1712 0 : FD_TEST( bank );
1713 0 : bank->refcnt++;
1714 :
1715 0 : fd_replay_out_link_t * exec_out = ctx->exec_out;
1716 0 : fd_execrp_poh_hash_msg_t * exec_msg = fd_chunk_to_laddr( exec_out->mem, exec_out->chunk );
1717 0 : exec_msg->bank_idx = task->poh_hash->bank_idx;
1718 0 : exec_msg->mblk_idx = task->poh_hash->mblk_idx;
1719 0 : exec_msg->hashcnt = task->poh_hash->hashcnt;
1720 0 : memcpy( exec_msg->hash, task->poh_hash->hash, sizeof(fd_hash_t) );
1721 0 : fd_stem_publish( stem, exec_out->idx, (FD_EXECRP_TT_POH_HASH<<32) | task->poh_hash->exec_idx, exec_out->chunk, sizeof(*exec_msg), 0UL, 0UL, 0UL );
1722 0 : exec_out->chunk = fd_dcache_compact_next( exec_out->chunk, sizeof(*exec_msg), exec_out->chunk0, exec_out->wmark );
1723 0 : break;
1724 0 : };
1725 0 : default: {
1726 0 : FD_LOG_CRIT(( "unexpected task type %lu", task->task_type ));
1727 0 : }
1728 0 : }
1729 0 : }
1730 :
1731 : static void
1732 : mark_bank_dead( fd_replay_tile_t * ctx,
1733 : fd_stem_context_t * stem,
1734 0 : ulong bank_idx ) {
1735 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, bank_idx );
1736 0 : FD_TEST( bank );
1737 0 : fd_banks_mark_bank_dead( ctx->banks, bank_idx );
1738 :
1739 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ bank_idx ];
1740 0 : if( block_id_ele->block_id_seen ) publish_slot_dead( ctx, stem, block_id_ele->slot, &block_id_ele->latest_mr );
1741 :
1742 0 : fd_reasm_fec_t * fec = fd_reasm_query( ctx->reasm, &block_id_ele->latest_mr );
1743 0 : if( FD_UNLIKELY( !fec ) ) return;
1744 0 : fec->bank_dead = 1;
1745 :
1746 0 : }
1747 :
1748 : /* Returns 1 if charge_busy. */
1749 : static int
1750 : replay( fd_replay_tile_t * ctx,
1751 0 : fd_stem_context_t * stem ) {
1752 :
1753 0 : if( FD_UNLIKELY( !ctx->is_booted ) ) return 0;
1754 :
1755 0 : int charge_busy = 0;
1756 0 : fd_sched_task_t task[ 1 ];
1757 0 : if( FD_UNLIKELY( !fd_sched_task_next_ready( ctx->sched, task ) ) ) {
1758 0 : return charge_busy; /* Nothing to execute or do. */
1759 0 : }
1760 :
1761 0 : charge_busy = 1;
1762 :
1763 0 : switch( task->task_type ) {
1764 0 : case FD_SCHED_TT_BLOCK_START: {
1765 0 : replay_block_start( ctx, stem, task->block_start->bank_idx, task->block_start->parent_bank_idx, task->block_start->slot );
1766 0 : fd_sched_task_done( ctx->sched, FD_SCHED_TT_BLOCK_START, ULONG_MAX, ULONG_MAX, NULL );
1767 0 : break;
1768 0 : }
1769 0 : case FD_SCHED_TT_BLOCK_END: {
1770 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, task->block_end->bank_idx );
1771 0 : if( FD_LIKELY( bank->state==FD_BANK_STATE_REPLAYABLE ) ) replay_block_finalize( ctx, stem, bank );
1772 0 : fd_sched_task_done( ctx->sched, FD_SCHED_TT_BLOCK_END, ULONG_MAX, ULONG_MAX, NULL );
1773 0 : break;
1774 0 : }
1775 0 : case FD_SCHED_TT_TXN_EXEC:
1776 0 : case FD_SCHED_TT_TXN_SIGVERIFY:
1777 0 : case FD_SCHED_TT_POH_HASH: {
1778 : /* Common case: we have a transaction we need to execute. */
1779 0 : dispatch_task( ctx, stem, task );
1780 0 : break;
1781 0 : }
1782 0 : case FD_SCHED_TT_MARK_DEAD: {
1783 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, task->mark_dead->bank_idx );
1784 0 : FD_TEST( bank );
1785 0 : mark_bank_dead( ctx, stem, task->mark_dead->bank_idx );
1786 0 : break;
1787 0 : }
1788 0 : default: {
1789 0 : FD_LOG_CRIT(( "unexpected task type %lu", task->task_type ));
1790 0 : }
1791 0 : }
1792 :
1793 0 : return charge_busy;
1794 0 : }
1795 :
1796 : static int
1797 : can_process_fec( fd_replay_tile_t * ctx,
1798 0 : int * evict_banks_out ) {
1799 0 : fd_reasm_fec_t * fec;
1800 0 : if( FD_UNLIKELY( fd_sched_can_ingest_cnt( ctx->sched )==0UL ) ) {
1801 0 : ctx->metrics.sched_full++;
1802 0 : return 0;
1803 0 : }
1804 :
1805 0 : if( FD_UNLIKELY( (fec = fd_reasm_peek( ctx->reasm ))==NULL ) ) {
1806 0 : ctx->metrics.reasm_empty++;
1807 0 : return 0;
1808 0 : }
1809 :
1810 0 : ctx->metrics.reasm_latest_slot = fec->slot;
1811 0 : ctx->metrics.reasm_latest_fec_idx = fec->fec_set_idx;
1812 :
1813 0 : if( FD_UNLIKELY( ctx->is_leader && fec->fec_set_idx==0U && fd_reasm_parent( ctx->reasm, fec )->bank_idx==ctx->leader_bank->idx ) ) {
1814 : /* This guards against a rare race where we receive the FEC set for
1815 : the slot right after our leader rotation before we freeze the
1816 : bank for the last slot in our leader rotation. Leader slot
1817 : freezing happens only after if we've received the final PoH hash
1818 : from the poh tile as well as the final FEC set for the leader
1819 : slot. So the race happens when FEC sets are delivered and
1820 : processed sooner than the PoH hash, aka when the
1821 : poh=>shred=>replay path for the block id beats the poh=>replay
1822 : path for the poh hash. To mitigate this race, we must block on
1823 : ingesting the FEC set for the ensuing slot before the leader
1824 : bank freezes, because that would violate ordering invariants in
1825 : banks and sched. */
1826 0 : FD_TEST( ctx->block_id_arr[ ctx->leader_bank->idx ].block_id_seen );
1827 0 : FD_TEST( !ctx->recv_poh );
1828 0 : ctx->metrics.leader_bid_wait++;
1829 0 : return 0;
1830 0 : }
1831 :
1832 : /* If fec_set_idx is 0, we need a new bank for a new slot. Banks must
1833 : not be full in this case. */
1834 0 : if( FD_UNLIKELY( fd_banks_is_full( ctx->banks ) && fec->fec_set_idx==0 ) ) {
1835 0 : ctx->metrics.banks_full++;
1836 : /* We only want to evict banks if sched is drained and banks is no
1837 : longer making progress. Otherwise, sched might not release
1838 : refcnts on the frontier/leaf banks immediately, and the eviction
1839 : will have to wait for sched to drain anyways. */
1840 0 : if( FD_UNLIKELY( fd_sched_is_drained( ctx->sched ) ) ) *evict_banks_out = 1;
1841 0 : return 0;
1842 0 : }
1843 :
1844 : /* Otherwise, banks may not be full, so we can always create a new
1845 : bank if needed. Or, if banks are full, the current fec set's
1846 : ancestor (idx 0) already created a bank for this slot.*/
1847 0 : return 1;
1848 0 : }
1849 :
1850 : /* Returns 0 on successful FEC ingestion, 1 if the block got marked
1851 : dead. */
1852 : static int
1853 : insert_fec_set( fd_replay_tile_t * ctx,
1854 : fd_stem_context_t * stem,
1855 0 : fd_reasm_fec_t * reasm_fec ) {
1856 :
1857 0 : long now = fd_log_wallclock();
1858 :
1859 0 : reasm_fec->parent_bank_idx = fd_reasm_parent( ctx->reasm, reasm_fec )->bank_idx;
1860 :
1861 0 : fd_bank_t * parent_bank = fd_banks_bank_query( ctx->banks, reasm_fec->parent_bank_idx );
1862 0 : FD_TEST( parent_bank );
1863 0 : reasm_fec->parent_bank_seq = parent_bank->bank_seq;
1864 :
1865 0 : if( FD_UNLIKELY( reasm_fec->fec_set_idx==0U ) ) {
1866 : /* If the first FEC set for a slot is observed, provision a new bank
1867 : if you are not the leader. Remove any stale block id map entry
1868 : and update the block id entry. */
1869 0 : fd_bank_t * bank = NULL;
1870 0 : if( FD_UNLIKELY( reasm_fec->is_leader ) ) {
1871 0 : bank = ctx->leader_bank;
1872 0 : } else {
1873 0 : bank = fd_banks_new_bank( ctx->banks, reasm_fec->parent_bank_idx, now );
1874 0 : }
1875 :
1876 0 : reasm_fec->bank_idx = bank->idx;
1877 0 : reasm_fec->bank_seq = bank->bank_seq;
1878 :
1879 : /* At this point remove any stale entry in the block id map if it
1880 : exists and set the block id as not having been seen yet. This is
1881 : safe because we know that the old entry for this bank index has
1882 : already been pruned away. */
1883 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ reasm_fec->bank_idx ];
1884 0 : if( FD_LIKELY( fd_block_id_map_ele_query( ctx->block_id_map, &block_id_ele->latest_mr, NULL, ctx->block_id_arr )==block_id_ele ) ) {
1885 0 : FD_TEST( fd_block_id_map_ele_remove( ctx->block_id_map, &block_id_ele->latest_mr, NULL, ctx->block_id_arr ) );
1886 0 : }
1887 0 : block_id_ele->block_id_seen = 0;
1888 0 : block_id_ele->slot = reasm_fec->slot;
1889 0 : block_id_ele->latest_fec_idx = 0U;
1890 0 : block_id_ele->latest_mr = reasm_fec->key;
1891 0 : } else {
1892 : /* We are continuing to execute through a slot that we already have
1893 : a bank index for. */
1894 0 : reasm_fec->bank_idx = reasm_fec->parent_bank_idx;
1895 0 : reasm_fec->bank_seq = reasm_fec->parent_bank_seq;
1896 :
1897 0 : FD_TEST( reasm_fec->bank_idx!=ULONG_MAX );
1898 :
1899 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ reasm_fec->bank_idx ];
1900 0 : if( FD_UNLIKELY( block_id_ele->latest_fec_idx>=reasm_fec->fec_set_idx ) ) {
1901 0 : FD_LOG_WARNING(( "dropping FEC set (slot=%lu, fec_set_idx=%u) because it is at least as old as the latest FEC set (slot=%lu, fec_set_idx=%u)", reasm_fec->slot, reasm_fec->fec_set_idx, block_id_ele->slot, block_id_ele->latest_fec_idx ));
1902 0 : return 0;
1903 0 : }
1904 0 : block_id_ele->latest_fec_idx = reasm_fec->fec_set_idx;
1905 0 : block_id_ele->latest_mr = reasm_fec->key;
1906 0 : }
1907 :
1908 0 : if( FD_UNLIKELY( reasm_fec->slot_complete ) ) {
1909 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ reasm_fec->bank_idx ];
1910 0 : block_id_ele->block_id_seen = 1;
1911 0 : block_id_ele->latest_mr = reasm_fec->key;
1912 0 : block_id_ele->latest_fec_idx = reasm_fec->fec_set_idx;
1913 0 : FD_TEST( fd_block_id_map_ele_insert( ctx->block_id_map, block_id_ele, ctx->block_id_arr ) );
1914 0 : }
1915 :
1916 : /* If we are the leader, we don't need to process the FEC set. */
1917 0 : if( FD_UNLIKELY( reasm_fec->is_leader ) ) return 0;
1918 :
1919 : /* Forks form a partial ordering over FEC sets. The Repair tile
1920 : delivers FEC sets in-order per fork, but FEC set ordering across
1921 : forks is arbitrary */
1922 0 : fd_sched_fec_t sched_fec[ 1 ];
1923 :
1924 : # if DEBUG_LOGGING
1925 : FD_BASE58_ENCODE_32_BYTES( reasm_fec->key.key, key_b58 );
1926 : FD_BASE58_ENCODE_32_BYTES( reasm_fec->cmr.key, cmr_b58 );
1927 : FD_LOG_INFO(( "replay processing FEC set for slot %lu fec_set_idx %u, mr %s cmr %s", reasm_fec->slot, reasm_fec->fec_set_idx, key_b58, cmr_b58 ));
1928 : # endif
1929 :
1930 0 : sched_fec->shred_cnt = reasm_fec->data_cnt;
1931 0 : sched_fec->is_last_in_batch = !!reasm_fec->data_complete;
1932 0 : sched_fec->is_last_in_block = !!reasm_fec->slot_complete;
1933 0 : sched_fec->bank_idx = reasm_fec->bank_idx;
1934 0 : sched_fec->parent_bank_idx = reasm_fec->parent_bank_idx;
1935 0 : sched_fec->slot = reasm_fec->slot;
1936 0 : sched_fec->parent_slot = reasm_fec->slot - reasm_fec->parent_off;
1937 0 : sched_fec->is_first_in_block = reasm_fec->fec_set_idx==0U;
1938 0 : fd_funk_txn_xid_t const root = fd_accdb_root_get( ctx->accdb_admin );
1939 0 : fd_funk_txn_xid_copy( sched_fec->alut_ctx->xid, &root );
1940 0 : sched_fec->alut_ctx->accdb[0] = ctx->accdb[0];
1941 0 : sched_fec->alut_ctx->els = ctx->published_root_slot;
1942 :
1943 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, sched_fec->bank_idx );
1944 0 : FD_TEST( bank );
1945 0 : if( sched_fec->is_first_in_block ) {
1946 0 : bank->refcnt++;
1947 0 : FD_LOG_DEBUG(( "bank (idx=%lu, slot=%lu) refcnt incremented to %lu for sched", bank->idx, sched_fec->slot, bank->refcnt ));
1948 0 : }
1949 :
1950 : /* Read FEC set from the store. This should happen before we try to
1951 : ingest the FEC set. This allows us to filter out frags that were
1952 : in-flight when we published away minority forks that the frags land
1953 : on. These frags would have no bank to execute against, because
1954 : their corresponding banks, or parent banks, have also been pruned
1955 : during publishing. A query against store will rightfully tell us
1956 : that the underlying data is not found, implying that this is for a
1957 : minority fork that we can safeljy ignore. */
1958 :
1959 0 : ulong wait = (ulong)fd_log_wallclock();
1960 0 : ulong work = wait;
1961 0 : FD_STORE_SLOCK_BEGIN( ctx->store ) {
1962 0 : ctx->metrics.store_query_acquire++;
1963 0 : work = (ulong)fd_log_wallclock();
1964 0 : fd_histf_sample( ctx->metrics.store_query_wait, work - wait );
1965 :
1966 0 : fd_store_fec_t * store_fec = fd_store_query( ctx->store, &reasm_fec->key );
1967 0 : ctx->metrics.store_query_cnt++;
1968 0 : if( FD_UNLIKELY( !store_fec ) ) {
1969 :
1970 : /* The only case in which a FEC is not found in the store after
1971 : repair has notified is if the FEC was on a minority fork that
1972 : has already been published away. In this case we abandon the
1973 : entire slice because it is no longer relevant. */
1974 :
1975 0 : ctx->metrics.store_query_missing_cnt++;
1976 0 : ctx->metrics.store_query_missing_mr = reasm_fec->key.ul[0];
1977 0 : FD_BASE58_ENCODE_32_BYTES( reasm_fec->key.key, key_b58 );
1978 0 : FD_LOG_WARNING(( "store fec for slot: %lu is on minority fork already pruned by publish. abandoning slice. root: %lu. pruned merkle: %s", reasm_fec->slot, ctx->consensus_root_slot, key_b58 ));
1979 0 : return 0;
1980 0 : }
1981 0 : sched_fec->fec = store_fec;
1982 0 : sched_fec->data = fd_store_fec_data( ctx->store, store_fec );
1983 0 : if( FD_UNLIKELY( !fd_sched_fec_ingest( ctx->sched, sched_fec ) ) ) { /* FIXME this critical section is unnecessarily complex. should refactor to just be held for the memcpy and shred_offs. */
1984 0 : mark_bank_dead( ctx, stem, sched_fec->bank_idx );
1985 0 : return 1;
1986 0 : }
1987 0 : } FD_STORE_SLOCK_END;
1988 :
1989 0 : ctx->metrics.store_query_release++;
1990 0 : fd_histf_sample( ctx->metrics.store_query_work, (ulong)fd_log_wallclock() - work );
1991 0 : return 0;
1992 0 : }
1993 :
1994 : static void
1995 : process_fec_set( fd_replay_tile_t * ctx,
1996 : fd_stem_context_t * stem,
1997 0 : fd_reasm_fec_t * reasm_fec ) {
1998 :
1999 0 : fd_reasm_fec_t * parent = fd_reasm_parent( ctx->reasm, reasm_fec );
2000 0 : if( FD_UNLIKELY( !parent ) ) {
2001 0 : FD_LOG_WARNING(( "dropping FEC set (slot=%lu, fec_set_idx=%u) because it is unconnected in reasm", reasm_fec->slot, reasm_fec->fec_set_idx ));
2002 0 : return;
2003 0 : }
2004 :
2005 0 : if( FD_UNLIKELY( parent->bank_dead ) ) {
2006 : /* Inherit the dead flag from the parent. If a dead slot is
2007 : completed, we publish the slot as dead. Don't insert FECs for
2008 : dead slots. */
2009 0 : reasm_fec->bank_dead = 1;
2010 0 : if( FD_UNLIKELY( reasm_fec->slot_complete ) ) publish_slot_dead( ctx, stem, reasm_fec->slot, &reasm_fec->key );
2011 0 : FD_LOG_DEBUG(( "dropping FEC set (slot=%lu, fec_set_idx=%u) because parent bank is marked dead", reasm_fec->slot, reasm_fec->fec_set_idx ));
2012 0 : return;
2013 0 : }
2014 :
2015 : /* Standard case, the parent FEC has a valid corresponding bank. */
2016 0 : fd_bank_t * parent_fec_bank = fd_banks_bank_query( ctx->banks, parent->bank_idx );
2017 0 : if( FD_LIKELY( parent_fec_bank &&
2018 0 : parent_fec_bank->bank_seq==parent->bank_seq ) ) {
2019 0 : insert_fec_set( ctx, stem, reasm_fec );
2020 0 : return;
2021 0 : }
2022 :
2023 : /* In the case the FEC doesn't directly connect, iterate up the reasm
2024 : tree to find the closest valid slot complete that corresponds to a
2025 : valid bank. */
2026 :
2027 : /* First keep track of all of the slot completes up to and including
2028 : the fec we want to insert off of. */
2029 0 : fd_reasm_fec_t * path[ FD_BANKS_MAX_BANKS ];
2030 0 : ulong path_cnt = 0UL;
2031 0 : path[ path_cnt++ ] = reasm_fec;
2032 :
2033 0 : for( fd_reasm_fec_t * curr = reasm_fec;; ) {
2034 0 : curr = fd_reasm_parent( ctx->reasm, curr );
2035 0 : FD_TEST( curr );
2036 0 : if( FD_LIKELY( !curr->slot_complete ) ) continue;
2037 :
2038 0 : fd_bank_t * curr_bank = fd_banks_bank_query( ctx->banks, curr->bank_idx );
2039 0 : if( FD_LIKELY( curr_bank && curr_bank->bank_seq==curr->bank_seq ) ) break;
2040 :
2041 0 : FD_TEST( path_cnt<FD_BANKS_MAX_BANKS );
2042 0 : path[ path_cnt++ ] = curr;
2043 0 : }
2044 :
2045 0 : for( ulong i=path_cnt; i>0UL; i-- ) {
2046 0 : fd_reasm_fec_t * leaf = path[ i-1 ];
2047 :
2048 : /* If there's not capacity in the sched or banks, return early and
2049 : drop the FEC. We have inserted as much as we can for now. */
2050 0 : if( FD_UNLIKELY( fd_sched_can_ingest_cnt( ctx->sched ) < (leaf->fec_set_idx/FD_FEC_SHRED_CNT + 1) ) ) return;
2051 0 : if( FD_UNLIKELY( fd_banks_is_full( ctx->banks ) ) ) return;
2052 :
2053 : /* Gather all FECs for this slot; */
2054 0 : fd_reasm_fec_t * slot_fecs[ FD_FEC_BLK_MAX ];
2055 0 : fd_reasm_fec_t * curr = leaf;
2056 0 : for(;;) {
2057 0 : slot_fecs[ curr->fec_set_idx/FD_FEC_SHRED_CNT ] = curr;
2058 0 : if( curr->fec_set_idx==0U ) break;
2059 0 : curr = fd_reasm_parent( ctx->reasm, curr );
2060 0 : FD_TEST( curr );
2061 0 : }
2062 0 : FD_LOG_NOTICE(( "backfilling FEC sets for slot %lu from fec_set_idx %u to fec_set_idx %u", leaf->slot, leaf->fec_set_idx, curr->fec_set_idx ));
2063 :
2064 0 : for( ulong j=0UL; j<=leaf->fec_set_idx/FD_FEC_SHRED_CNT; j++ ) {
2065 0 : if( FD_UNLIKELY( insert_fec_set( ctx, stem, slot_fecs[ j ] ) ) ) return;
2066 0 : }
2067 0 : }
2068 0 : }
2069 :
2070 : /* accdb_advance_root moves account records from the unrooted to the
2071 : rooted database. */
2072 :
2073 : static inline ulong
2074 0 : accdb_root_op_total( fd_replay_tile_t const * ctx ) {
2075 0 : return ctx->accdb_admin->base.root_cnt +
2076 0 : ctx->accdb_admin->base.reclaim_cnt;
2077 0 : }
2078 :
2079 : static void
2080 : accdb_advance_root( fd_replay_tile_t * ctx,
2081 : ulong slot,
2082 0 : ulong bank_idx ) {
2083 0 : fd_funk_txn_xid_t xid = { .ul[0] = slot, .ul[1] = bank_idx };
2084 0 : FD_LOG_DEBUG(( "advancing root to slot=%lu", slot ));
2085 :
2086 0 : long rooted_accounts = -(long)accdb_root_op_total( ctx );
2087 0 : long t0 = fd_tickcount();
2088 0 : fd_accdb_advance_root( ctx->accdb_admin, &xid );
2089 0 : rooted_accounts += (long)accdb_root_op_total( ctx );
2090 0 : long t1 = fd_tickcount();
2091 0 : long root_accounts_dt = t1 - t0;
2092 0 : fd_histf_sample( ctx->metrics.root_slot_dur, (ulong)root_accounts_dt );
2093 0 : fd_histf_sample( ctx->metrics.root_account_dur, (ulong)root_accounts_dt / (ulong)fd_long_max( rooted_accounts, 1L ) );
2094 :
2095 0 : fd_progcache_advance_root( ctx->progcache, &xid );
2096 0 : long t2 = fd_tickcount();
2097 0 : FD_MCNT_INC( REPLAY, PROGCACHE_TIME_SECONDS, (ulong)( t2-t1 ) );
2098 0 : }
2099 :
2100 : static int
2101 0 : advance_published_root( fd_replay_tile_t * ctx ) {
2102 :
2103 0 : fd_block_id_ele_t * block_id_ele = fd_block_id_map_ele_query( ctx->block_id_map, &ctx->consensus_root, NULL, ctx->block_id_arr );
2104 0 : if( FD_UNLIKELY( !block_id_ele ) ) {
2105 0 : FD_BASE58_ENCODE_32_BYTES( ctx->consensus_root.key, consensus_root_b58 );
2106 0 : FD_LOG_CRIT(( "invariant violation: block id ele not found for consensus root %s", consensus_root_b58 ));
2107 0 : }
2108 0 : ulong target_bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, block_id_ele );
2109 :
2110 : /* If the identity vote has been seen on a bank that should be rooted,
2111 : then we are now ready to produce blocks. */
2112 0 : if( FD_UNLIKELY( !ctx->identity_vote_rooted ) ) {
2113 0 : fd_bank_t * root_bank = fd_banks_bank_query( ctx->banks, target_bank_idx );
2114 0 : if( FD_UNLIKELY( !root_bank ) ) FD_LOG_CRIT(( "invariant violation: root bank not found for bank index %lu", target_bank_idx ));
2115 0 : if( root_bank->f.identity_vote_idx==ctx->identity_idx ) ctx->identity_vote_rooted = 1;
2116 0 : }
2117 :
2118 0 : ulong advanceable_root_idx = ULONG_MAX;
2119 0 : if( FD_UNLIKELY( !fd_banks_advance_root_prepare( ctx->banks, target_bank_idx, &advanceable_root_idx ) ) ) {
2120 0 : ctx->metrics.storage_root_behind++;
2121 0 : return 0;
2122 0 : }
2123 :
2124 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, advanceable_root_idx );
2125 0 : FD_TEST( bank );
2126 :
2127 0 : if( FD_UNLIKELY( advanceable_root_idx >= ctx->block_id_len ) ) {
2128 0 : FD_LOG_CRIT(( "invariant violation: advanceable root ele out of bounds [0, %lu) index %lu", ctx->block_id_len, advanceable_root_idx ));
2129 0 : }
2130 0 : fd_block_id_ele_t * advanceable_root_ele = &ctx->block_id_arr[ advanceable_root_idx ];
2131 :
2132 0 : ulong advanceable_root_slot = bank->f.slot;
2133 0 : accdb_advance_root( ctx, advanceable_root_slot, bank->idx );
2134 :
2135 0 : fd_txncache_advance_root( ctx->txncache, bank->txncache_fork_id );
2136 0 : fd_sched_advance_root( ctx->sched, advanceable_root_idx );
2137 0 : fd_banks_advance_root( ctx->banks, advanceable_root_idx );
2138 :
2139 : /* Reasm also prunes from the store during its publish. */
2140 :
2141 0 : fd_reasm_publish( ctx->reasm, &advanceable_root_ele->latest_mr, ctx->store );
2142 :
2143 0 : ctx->published_root_slot = advanceable_root_slot;
2144 0 : ctx->published_root_bank_idx = advanceable_root_idx;
2145 :
2146 0 : return 1;
2147 0 : }
2148 :
2149 : static void
2150 : after_credit( fd_replay_tile_t * ctx,
2151 : fd_stem_context_t * stem,
2152 : int * opt_poll_in,
2153 0 : int * charge_busy ) {
2154 0 : if( FD_UNLIKELY( !ctx->is_booted || !ctx->wfs_complete ) ) return;
2155 :
2156 0 : if( FD_UNLIKELY( maybe_become_leader( ctx, stem ) ) ) {
2157 0 : *charge_busy = 1;
2158 0 : *opt_poll_in = 0;
2159 0 : return;
2160 0 : }
2161 :
2162 : /* If we are leader, we can only unbecome the leader iff we have
2163 : received the poh hash from the poh tile and block id from reasm.
2164 : We have to do an additional check against the slot of the leader
2165 : bank because we lazily remove entries from the block id arr. */
2166 0 : if( FD_UNLIKELY( ctx->is_leader &&
2167 0 : ctx->recv_poh &&
2168 0 : ctx->block_id_arr[ ctx->leader_bank->idx ].block_id_seen &&
2169 0 : ctx->block_id_arr[ ctx->leader_bank->idx ].slot==ctx->leader_bank->f.slot ) ) {
2170 :
2171 0 : fini_leader_bank( ctx, stem );
2172 0 : *charge_busy = 1;
2173 0 : *opt_poll_in = 0;
2174 0 : return;
2175 0 : }
2176 :
2177 0 : ulong bank_idx;
2178 0 : while( (bank_idx=fd_sched_pruned_block_next( ctx->sched ))!=ULONG_MAX ) {
2179 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, bank_idx );
2180 0 : FD_TEST( bank );
2181 0 : bank->refcnt--;
2182 0 : FD_LOG_DEBUG(( "bank (idx=%lu) refcnt decremented to %lu for sched", bank->idx, bank->refcnt ));
2183 0 : }
2184 :
2185 : /* If the published_root is not caught up to the consensus root, then
2186 : we should try to advance the published root. */
2187 0 : if( FD_UNLIKELY( ctx->consensus_root_bank_idx!=ctx->published_root_bank_idx && advance_published_root( ctx ) ) ) {
2188 0 : *charge_busy = 1;
2189 0 : *opt_poll_in = 0;
2190 0 : return;
2191 0 : }
2192 :
2193 0 : fd_banks_prune_cancel_info_t cancel_info[ 1 ];
2194 0 : int pruned = fd_banks_prune_one_dead_bank( ctx->banks, cancel_info );
2195 0 : if( FD_UNLIKELY( pruned==2 ) ) {
2196 0 : fd_txncache_cancel_fork( ctx->txncache, cancel_info->txncache_fork_id );
2197 0 : fd_funk_txn_xid_t xid = { .ul = { cancel_info->slot, cancel_info->bank_idx } };
2198 0 : fd_accdb_cancel ( ctx->accdb_admin, &xid );
2199 0 : fd_progcache_cancel( ctx->progcache, &xid );
2200 0 : *charge_busy = 1;
2201 0 : *opt_poll_in = 0;
2202 0 : return;
2203 0 : }
2204 :
2205 : /* if reasm evicted is set, publish starting from reasm_evicted down
2206 : to the leaf node to repair so repair can re-request for it */
2207 :
2208 0 : if( FD_UNLIKELY( ctx->reasm_evicted ) ) {
2209 0 : fd_replay_fec_evicted_t evicted = (fd_replay_fec_evicted_t){ .mr = ctx->reasm_evicted->key, .slot = ctx->reasm_evicted->slot, .fec_set_idx = ctx->reasm_evicted->fec_set_idx, .bank_idx = ctx->reasm_evicted->bank_idx };
2210 0 : fd_memcpy( fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk ), &evicted, sizeof(fd_replay_fec_evicted_t) );
2211 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_REASM_EVICTED, ctx->replay_out->chunk, sizeof(fd_replay_fec_evicted_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
2212 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_fec_evicted_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
2213 :
2214 : /* eviction policy only evicts chains of nodes until there is a
2215 : fork, so guaranteed that the evict path is always the left-child */
2216 0 : fd_reasm_pool_release( ctx->reasm, ctx->reasm_evicted );
2217 0 : ctx->reasm_evicted = fd_reasm_child( ctx->reasm, ctx->reasm_evicted ); /* indexes into pool, safe to use */
2218 :
2219 0 : *charge_busy = 1;
2220 0 : *opt_poll_in = 0;
2221 0 : return;
2222 0 : }
2223 :
2224 : /* Mark a frontier eviction victim bank as dead. As refcnts on said
2225 : banks are drained, they will be pruned away. */
2226 0 : if( FD_UNLIKELY( ctx->frontier_cnt ) ) {
2227 0 : *charge_busy = 1;
2228 0 : *opt_poll_in = 0;
2229 0 : bank_idx = ctx->frontier_indices[ --ctx->frontier_cnt ];
2230 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, bank_idx );
2231 0 : FD_TEST( bank );
2232 0 : if( FD_UNLIKELY( ctx->is_leader && bank_idx==ctx->leader_bank->idx ) ) return;
2233 0 : mark_bank_dead( ctx, stem, bank->idx );
2234 0 : fd_sched_block_abandon( ctx->sched, bank->idx );
2235 :
2236 : /* evict it from reasm */
2237 :
2238 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ bank->idx ];
2239 0 : fd_reasm_fec_t * fec = fd_reasm_query( ctx->reasm, &block_id_ele->latest_mr );
2240 0 : FD_TEST( fec );
2241 0 : fd_reasm_fec_t * evicted_head = fd_reasm_remove( ctx->reasm, fec, ctx->store );
2242 0 : if( FD_UNLIKELY( ctx->reasm_evicted ) ) {
2243 : /* already have a chain we are evicting. Prepend the new chain to the existing chain */
2244 0 : fec->child = fd_reasm_pool_idx( ctx->reasm, ctx->reasm_evicted );
2245 0 : }
2246 0 : ctx->reasm_evicted = evicted_head;
2247 0 : return;
2248 0 : }
2249 :
2250 : /* Try to dispatch some work before we try to ingest more FEC sets.
2251 : If FEC ingestion takes precedence, exec tiles can be left idle for
2252 : an extended period of time during catchup due to the burstiness of
2253 : reassembled FEC delivery. It's better to keep the exec tiles busy
2254 : with potentially suboptimal scheduling than to leave them idle
2255 : while a burst of FEC sets gets ingested. */
2256 0 : if( FD_LIKELY( replay( ctx, stem ) ) ) {
2257 0 : *charge_busy = 1;
2258 0 : *opt_poll_in = 0;
2259 0 : return;
2260 0 : }
2261 :
2262 : /* If the reassembler has a fec that is ready, we should process it
2263 : and pass it to the scheduler.
2264 :
2265 : We would also like to pace FEC ingestion such that we keep the exec
2266 : tiles busy. If there's a pending frag from one of the exec tiles,
2267 : we would like to know about that asap, because that could unblock
2268 : dispatching. So we ingest FEC sets only if we are sure that there
2269 : are no more exec tile notifications to process. This delays FEC
2270 : ingestion just enough so as to keep the exec tiles as busy as we
2271 : can, and prevents us from being stuck ingesting a backlog of FEC
2272 : sets, especially when there is a pending completion notification
2273 : about a single-transaction chokepoint in the replay dispatcher DAG.
2274 : Except that when we are leader or the reasm buffer is getting full,
2275 : we prioritize FEC processing. In the leader case, this is so we
2276 : can get to the leader FEC sets asap and freeze the leader bank on
2277 : time. In the reasm full case, this is so we don't prematurely
2278 : trigger eviction. */
2279 0 : int evict_banks = 0;
2280 0 : if( FD_LIKELY( (ctx->execrp_idle_cnt>=2UL*ctx->in_cnt||ctx->is_leader||fd_reasm_free( ctx->reasm )<=1UL) && can_process_fec( ctx, &evict_banks ) ) ) {
2281 0 : fd_reasm_fec_t * fec = fd_reasm_pop( ctx->reasm );
2282 0 : process_fec_set( ctx, stem, fec );
2283 0 : *charge_busy = 1;
2284 0 : *opt_poll_in = 0;
2285 0 : ctx->execrp_idle_cnt = 0UL;
2286 0 : return;
2287 0 : }
2288 :
2289 0 : if( FD_UNLIKELY( evict_banks ) ) {
2290 0 : FD_LOG_WARNING(( "banks are full and partially executed frontier banks are being evicted" ));
2291 0 : fd_banks_get_frontier( ctx->banks, ctx->frontier_indices, &ctx->frontier_cnt );
2292 0 : *charge_busy = 1;
2293 0 : *opt_poll_in = 0;
2294 0 : return;
2295 0 : }
2296 :
2297 0 : ctx->execrp_idle_cnt++;
2298 0 : }
2299 :
2300 : static int
2301 : before_frag( fd_replay_tile_t * ctx,
2302 : ulong in_idx,
2303 : ulong seq FD_PARAM_UNUSED,
2304 0 : ulong sig ) {
2305 :
2306 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_GOSSIP_OUT && sig!=FD_GOSSIP_UPDATE_TAG_WFS_DONE ) ) return 1;
2307 0 : return 0;
2308 0 : }
2309 :
2310 : static void
2311 : process_exec_task_done( fd_replay_tile_t * ctx,
2312 : fd_stem_context_t * stem,
2313 : fd_execrp_task_done_msg_t * msg,
2314 0 : ulong sig ) {
2315 :
2316 0 : ulong exec_tile_idx = sig&0xFFFFFFFFUL;
2317 :
2318 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, msg->bank_idx );
2319 0 : FD_TEST( bank );
2320 0 : bank->refcnt--;
2321 :
2322 0 : switch( sig>>32 ) {
2323 0 : case FD_EXECRP_TT_TXN_EXEC: {
2324 0 : ulong txn_idx = msg->txn_exec->txn_idx;
2325 0 : if( FD_UNLIKELY( !ctx->identity_vote_rooted ) ) {
2326 : /* Query the txn signature against our recently generated vote
2327 : txn signatures. If the query is successful, then we have
2328 : seen our own vote transaction land and this should be marked
2329 : in the bank. We go through this exercise until we've seen
2330 : our vote rooted. */
2331 0 : fd_txn_p_t * txn_p = fd_sched_get_txn( ctx->sched, txn_idx );
2332 :
2333 0 : fd_pubkey_t * identity_pubkey_out = NULL;
2334 0 : if( fd_vote_tracker_query_sig( ctx->vote_tracker, fd_type_pun_const( txn_p->payload+TXN( txn_p )->signature_off ), &identity_pubkey_out ) && fd_pubkey_eq( identity_pubkey_out, ctx->identity_pubkey ) ) {
2335 0 : bank->f.identity_vote_idx = ctx->identity_idx;
2336 0 : }
2337 0 : }
2338 0 : if( FD_UNLIKELY( !msg->txn_exec->is_committable && bank->state!=FD_BANK_STATE_DEAD) ) {
2339 : /* Every transaction in a valid block has to execute.
2340 : Otherwise, we should mark the block as dead. */
2341 0 : mark_bank_dead( ctx, stem, bank->idx );
2342 0 : fd_sched_block_abandon( ctx->sched, bank->idx );
2343 0 : }
2344 0 : int res = fd_sched_task_done( ctx->sched, FD_SCHED_TT_TXN_EXEC, txn_idx, exec_tile_idx, NULL );
2345 0 : FD_TEST( res==0 );
2346 0 : fd_sched_txn_info_t * txn_info = fd_sched_get_txn_info( ctx->sched, txn_idx );
2347 0 : txn_info->flags |= FD_SCHED_TXN_EXEC_DONE;
2348 0 : if( FD_LIKELY( !(txn_info->flags&FD_SCHED_TXN_SIGVERIFY_DONE)||!txn_info->txn_err ) ) { /* Set execution status if sigverify hasn't happened yet or if sigverify was a success. */
2349 0 : txn_info->txn_err = msg->txn_exec->txn_err;
2350 0 : txn_info->flags |= fd_ulong_if( msg->txn_exec->is_committable, FD_SCHED_TXN_IS_COMMITTABLE, 0UL );
2351 0 : txn_info->flags |= fd_ulong_if( msg->txn_exec->is_fees_only, FD_SCHED_TXN_IS_FEES_ONLY, 0UL );
2352 0 : }
2353 0 : if( FD_UNLIKELY( (txn_info->flags&FD_SCHED_TXN_REPLAY_DONE)==FD_SCHED_TXN_REPLAY_DONE ) ) { /* UNLIKELY because generally exec happens before sigverify. */
2354 0 : publish_txn_executed( ctx, stem, txn_idx );
2355 0 : }
2356 0 : break;
2357 0 : }
2358 0 : case FD_EXECRP_TT_TXN_SIGVERIFY: {
2359 0 : ulong txn_idx = msg->txn_sigverify->txn_idx;
2360 0 : fd_sched_txn_info_t * txn_info = fd_sched_get_txn_info( ctx->sched, txn_idx );
2361 0 : txn_info->flags |= FD_SCHED_TXN_SIGVERIFY_DONE;
2362 0 : if( FD_UNLIKELY( msg->txn_sigverify->err ) ) {
2363 0 : txn_info->txn_err = FD_RUNTIME_TXN_ERR_SIGNATURE_FAILURE;
2364 0 : txn_info->flags &= ~FD_SCHED_TXN_IS_COMMITTABLE;
2365 0 : txn_info->flags &= ~FD_SCHED_TXN_IS_FEES_ONLY;
2366 0 : }
2367 0 : if( FD_UNLIKELY( msg->txn_sigverify->err && bank->state!=FD_BANK_STATE_DEAD ) ) {
2368 : /* Every transaction in a valid block has to sigverify.
2369 : Otherwise, we should mark the block as dead. Also freeze the
2370 : bank if possible. */
2371 0 : mark_bank_dead( ctx, stem, bank->idx );
2372 0 : fd_sched_block_abandon( ctx->sched, bank->idx );
2373 0 : }
2374 0 : int res = fd_sched_task_done( ctx->sched, FD_SCHED_TT_TXN_SIGVERIFY, txn_idx, exec_tile_idx, NULL );
2375 0 : FD_TEST( res==0 );
2376 0 : if( FD_LIKELY( (txn_info->flags&FD_SCHED_TXN_REPLAY_DONE)==FD_SCHED_TXN_REPLAY_DONE ) ) {
2377 0 : publish_txn_executed( ctx, stem, txn_idx );
2378 0 : }
2379 0 : break;
2380 0 : }
2381 0 : case FD_EXECRP_TT_POH_HASH: {
2382 0 : int res = fd_sched_task_done( ctx->sched, FD_SCHED_TT_POH_HASH, ULONG_MAX, exec_tile_idx, msg->poh_hash );
2383 0 : if( FD_UNLIKELY( res<0 && bank->state!=FD_BANK_STATE_DEAD ) ) {
2384 0 : mark_bank_dead( ctx, stem, bank->idx );
2385 0 : }
2386 0 : break;
2387 0 : }
2388 0 : default: FD_LOG_CRIT(( "unexpected sig 0x%lx", sig ));
2389 0 : }
2390 :
2391 : /* Reference counter just decreased, and an exec tile just got freed
2392 : up. If there's a need to be more aggressively pruning, we could
2393 : check here if more slots just became publishable and publish. Not
2394 : publishing here shouldn't bloat the fork tree too much though. We
2395 : mark minority forks dead as soon as we can, and execution dispatch
2396 : stops on dead blocks. So shortly afterwards, dead blocks should be
2397 : eligible for pruning as in-flight transactions retire from the
2398 : execution pipeline. */
2399 :
2400 0 : }
2401 :
2402 : static void
2403 : process_tower_slot_done( fd_replay_tile_t * ctx,
2404 : fd_stem_context_t * stem,
2405 : fd_tower_slot_done_t const * msg,
2406 0 : ulong seq ) {
2407 0 : fd_bank_t * replay_bank = fd_banks_bank_query( ctx->banks, msg->replay_bank_idx );
2408 0 : if( FD_UNLIKELY( !replay_bank ) ) FD_LOG_CRIT(( "invariant violation: bank not found for bank index %lu", msg->replay_bank_idx ));
2409 0 : replay_bank->refcnt--;
2410 0 : FD_LOG_DEBUG(( "bank (idx=%lu, slot=%lu) refcnt decremented to %lu for tower", replay_bank->idx, msg->replay_slot, replay_bank->refcnt ));
2411 :
2412 0 : ctx->reset_block_id = msg->reset_block_id;
2413 0 : ctx->reset_slot = msg->reset_slot;
2414 0 : ctx->reset_timestamp_nanos = fd_log_wallclock();
2415 0 : ulong min_leader_slot = fd_ulong_max( msg->reset_slot+1UL, fd_ulong_if( ctx->highwater_leader_slot==ULONG_MAX, 0UL, ctx->highwater_leader_slot+1UL ) );
2416 0 : ctx->next_leader_slot = fd_multi_epoch_leaders_get_next_slot( ctx->mleaders, min_leader_slot, ctx->identity_pubkey );
2417 0 : if( FD_LIKELY( ctx->next_leader_slot != ULONG_MAX ) ) {
2418 0 : ctx->next_leader_tickcount = (long)((double)(ctx->next_leader_slot-ctx->reset_slot-1UL)*ctx->slot_duration_ticks) + fd_tickcount();
2419 0 : } else {
2420 0 : ctx->next_leader_tickcount = LONG_MAX;
2421 0 : }
2422 :
2423 0 : fd_block_id_ele_t * block_id_ele = fd_block_id_map_ele_query( ctx->block_id_map, &msg->reset_block_id, NULL, ctx->block_id_arr );
2424 0 : if( FD_UNLIKELY( !block_id_ele ) ) {
2425 0 : FD_BASE58_ENCODE_32_BYTES( msg->reset_block_id.key, reset_block_id_b58 );
2426 0 : FD_LOG_CRIT(( "invariant violation: block id ele doesn't exist for reset block id: %s, slot: %lu", reset_block_id_b58, msg->reset_slot ));
2427 0 : }
2428 0 : ulong reset_bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, block_id_ele );
2429 :
2430 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, reset_bank_idx );
2431 0 : if( FD_UNLIKELY( !bank ) ) {
2432 0 : FD_LOG_CRIT(( "invariant violation: bank not found for bank index %lu", reset_bank_idx ));
2433 0 : }
2434 :
2435 0 : if( FD_LIKELY( msg->root_slot!=ULONG_MAX ) ) FD_TEST( msg->root_slot<=msg->reset_slot );
2436 0 : ctx->reset_bank = bank;
2437 :
2438 0 : if( FD_LIKELY( ctx->replay_out->idx!=ULONG_MAX ) ) {
2439 0 : fd_poh_reset_t * reset = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
2440 :
2441 0 : reset->bank_idx = bank->idx;
2442 0 : reset->timestamp = ctx->reset_timestamp_nanos;
2443 0 : reset->completed_slot = ctx->reset_slot;
2444 0 : reset->hashcnt_per_tick = bank->f.hashes_per_tick;
2445 0 : reset->ticks_per_slot = bank->f.ticks_per_slot;
2446 0 : reset->tick_duration_ns = (ulong)(ctx->slot_duration_nanos/(double)reset->ticks_per_slot);
2447 :
2448 0 : fd_memcpy( reset->completed_block_id, &block_id_ele->latest_mr, sizeof(fd_hash_t) );
2449 :
2450 0 : fd_blockhashes_t const * block_hash_queue = &bank->f.block_hash_queue;
2451 0 : fd_hash_t const * last_hash = fd_blockhashes_peek_last_hash( block_hash_queue );
2452 0 : FD_TEST( last_hash );
2453 0 : fd_memcpy( reset->completed_blockhash, last_hash->uc, sizeof(fd_hash_t) );
2454 :
2455 0 : ulong ticks_per_slot = bank->f.ticks_per_slot;
2456 0 : if( FD_UNLIKELY( reset->hashcnt_per_tick==1UL ) ) {
2457 : /* Low power producer, maximum of one microblock per tick in the slot */
2458 0 : reset->max_microblocks_in_slot = ticks_per_slot;
2459 0 : } else {
2460 : /* See the long comment in after_credit for this limit */
2461 0 : reset->max_microblocks_in_slot = fd_ulong_min( MAX_MICROBLOCKS_PER_SLOT, ticks_per_slot*(reset->hashcnt_per_tick-1UL) );
2462 0 : }
2463 0 : reset->next_leader_slot = ctx->next_leader_slot;
2464 :
2465 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_RESET, ctx->replay_out->chunk, sizeof(fd_poh_reset_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
2466 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_poh_reset_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
2467 0 : }
2468 :
2469 0 : FD_LOG_INFO(( "tower_slot_done(reset_slot=%lu, next_leader_slot=%lu, vote_slot=%lu, replay_slot=%lu, root_slot=%lu, seqno=%lu)", msg->reset_slot, ctx->next_leader_slot, msg->vote_slot, msg->replay_slot, msg->root_slot, seq ));
2470 0 : maybe_become_leader( ctx, stem );
2471 :
2472 0 : if( FD_LIKELY( msg->root_slot!=ULONG_MAX ) ) {
2473 :
2474 0 : FD_TEST( msg->root_slot>=ctx->consensus_root_slot );
2475 0 : fd_block_id_ele_t * block_id_ele = fd_block_id_map_ele_query( ctx->block_id_map, &msg->root_block_id, NULL, ctx->block_id_arr );
2476 0 : FD_TEST( block_id_ele );
2477 0 : ctx->consensus_root_slot = msg->root_slot;
2478 0 : ctx->consensus_root = msg->root_block_id;
2479 0 : ctx->consensus_root_bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, block_id_ele );
2480 :
2481 0 : publish_root_advanced( ctx, stem );
2482 :
2483 0 : fd_sched_root_notify( ctx->sched, ctx->consensus_root_bank_idx );
2484 0 : }
2485 :
2486 0 : ulong distance = 0UL;
2487 0 : fd_bank_t * parent = bank;
2488 0 : while( parent ) {
2489 0 : if( FD_UNLIKELY( parent->idx==ctx->consensus_root_bank_idx ) ) break;
2490 0 : parent = fd_banks_get_parent( ctx->banks, parent );
2491 0 : distance++;
2492 0 : }
2493 :
2494 0 : FD_MGAUGE_SET( REPLAY, ROOT_DISTANCE, distance );
2495 0 : }
2496 :
2497 : static void
2498 : process_fec_complete( fd_replay_tile_t * ctx,
2499 : fd_stem_context_t * stem,
2500 : ulong sig,
2501 0 : fd_fec_complete_t * complete_msg ) {
2502 0 : fd_shred_t const * shred = &complete_msg->last_shred_hdr;
2503 :
2504 0 : fd_hash_t const * merkle_root = &complete_msg->merkle_root;
2505 0 : fd_hash_t const * chained_merkle_root = &complete_msg->chained_merkle_root;
2506 0 : int is_leader_fec = sig == SHRED_SIG_FEC_COMPLETE_LEADER;
2507 :
2508 0 : int data_complete = !!( shred->data.flags & FD_SHRED_DATA_FLAG_DATA_COMPLETE );
2509 0 : int slot_complete = !!( shred->data.flags & FD_SHRED_DATA_FLAG_SLOT_COMPLETE );
2510 :
2511 0 : if( FD_UNLIKELY( shred->slot - shred->data.parent_off == fd_reasm_slot0( ctx->reasm ) && shred->fec_set_idx == 0) ) {
2512 0 : chained_merkle_root = &fd_reasm_root( ctx->reasm )->key;
2513 0 : }
2514 :
2515 0 : if( FD_UNLIKELY( fd_reasm_query( ctx->reasm, merkle_root ) ) ) return;
2516 0 : fd_reasm_fec_t * fec = fd_reasm_insert( ctx->reasm, merkle_root, chained_merkle_root, shred->slot, shred->fec_set_idx, shred->data.parent_off, (ushort)(shred->idx - shred->fec_set_idx + 1), data_complete, slot_complete, is_leader_fec, ctx->store, &ctx->reasm_evicted );
2517 :
2518 0 : if( FD_UNLIKELY( !fec ) ) {
2519 : /* reasm failed to insert. We don't want to just put this back on
2520 : the returnable_frag queue because it's unclear whether this FEC
2521 : is truly something we want to process. Therefore our best option
2522 : is to punt it and "go around." reasm_insert populates it's last
2523 : pool element with the data of the failed insert, so we make sure
2524 : to publish the failed insert data to repair in after_credit. */
2525 0 : return;
2526 0 : }
2527 :
2528 0 : if( FD_UNLIKELY( ctx->reasm_evicted && ctx->reasm_evicted->bank_idx != ULONG_MAX ) ) {
2529 0 : mark_bank_dead( ctx, stem, ctx->reasm_evicted->bank_idx );
2530 0 : fd_sched_block_abandon( ctx->sched, ctx->reasm_evicted->bank_idx );
2531 0 : }
2532 0 : }
2533 :
2534 : static void
2535 0 : process_resolv_slot_completed( fd_replay_tile_t * ctx, ulong bank_idx ) {
2536 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, bank_idx );
2537 0 : FD_TEST( bank );
2538 0 : bank->refcnt--;
2539 0 : FD_LOG_DEBUG(( "bank (idx=%lu, slot=%lu) refcnt decremented to %lu for resolv", bank->idx, bank->f.slot, bank->refcnt ));
2540 0 : }
2541 :
2542 : static void
2543 : process_vote_txn_sent( fd_replay_tile_t * ctx,
2544 0 : fd_txn_m_t * txnm ) {
2545 : /* The send tile has signed and sent a vote. Add this vote to the
2546 : vote tracker. We go through this exercise until the client has
2547 : seen a vote corresponding to the current identity rooted. */
2548 0 : if( FD_UNLIKELY( !ctx->identity_vote_rooted ) ) {
2549 0 : uchar * payload = (uchar *)txnm + sizeof(fd_txn_m_t);
2550 0 : uchar txn_mem[ FD_TXN_MAX_SZ ] __attribute__((aligned(alignof(fd_txn_t))));
2551 0 : fd_txn_t * txn = (fd_txn_t *)txn_mem;
2552 0 : if( FD_UNLIKELY( !fd_txn_parse( payload, txnm->payload_sz, txn_mem, NULL ) ) ) {
2553 0 : FD_LOG_CRIT(( "Could not parse txn from send tile" ));
2554 0 : }
2555 : /* The identity of the validator that the signed the vote will
2556 : always be the first signer in the vote transaction. */
2557 0 : fd_pubkey_t * vote_identity = fd_type_pun( payload+txn->acct_addr_off );
2558 0 : fd_vote_tracker_insert( ctx->vote_tracker, vote_identity, fd_type_pun_const( payload+txn->signature_off ) );
2559 0 : }
2560 0 : }
2561 :
2562 : static inline void
2563 0 : maybe_verify_shred_version( fd_replay_tile_t * ctx ) {
2564 0 : if( FD_LIKELY( ctx->expected_shred_version && ctx->ipecho_shred_version ) ) {
2565 0 : if( FD_UNLIKELY( ctx->expected_shred_version!=ctx->ipecho_shred_version ) ) {
2566 0 : FD_LOG_ERR(( "shred version mismatch: expected %u but got %u from ipecho", ctx->expected_shred_version, ctx->ipecho_shred_version ) );
2567 0 : }
2568 0 : }
2569 :
2570 0 : if( FD_LIKELY( ctx->has_genesis_hash && ctx->hard_forks_cnt!=ULONG_MAX && (ctx->expected_shred_version || ctx->ipecho_shred_version) ) ) {
2571 0 : ushort expected_shred_version = ctx->expected_shred_version ? ctx->expected_shred_version : ctx->ipecho_shred_version;
2572 :
2573 0 : ushort actual_shred_version = compute_shred_version( ctx->genesis_hash->uc, ctx->hard_forks, ctx->hard_forks_cnts, ctx->hard_forks_cnt );
2574 :
2575 0 : if( FD_UNLIKELY( expected_shred_version!=actual_shred_version ) ) {
2576 0 : FD_BASE58_ENCODE_32_BYTES( ctx->genesis_hash->uc, genesis_hash_b58 );
2577 0 : FD_LOG_ERR(( "Your genesis.bin file at `%s` combined with the hard_forks from the loaded snapshot have produced "
2578 0 : "a shred version of %hu but the entrypoint you connected to on boot reported a shred version of %hu. "
2579 0 : "This likely means that the genesis.bin file you have is for a different cluster than the one you "
2580 0 : "are trying to connect to, you can delete it and restart the node to download the correct genesis "
2581 0 : "file automatically.", ctx->genesis_path, actual_shred_version, expected_shred_version ));
2582 0 : }
2583 0 : }
2584 0 : }
2585 :
2586 : static inline void
2587 0 : maybe_verify_genesis_timestamp( fd_replay_tile_t * ctx ) {
2588 0 : if( FD_LIKELY( !ctx->has_expected_genesis_timestamp || !ctx->has_genesis_timestamp ) ) return;
2589 0 : if( FD_LIKELY( ctx->genesis_timestamp==ctx->expected_genesis_timestamp ) ) return;
2590 :
2591 0 : FD_LOG_ERR(( "Your genesis.bin file at `%s` has a genesis timestamp of %lu but the snapshot you loaded has a genesis "
2592 0 : "timestamp of %lu. This either means that the genesis.bin file you have is for a different cluster than "
2593 0 : "the one you are trying to connect to, or you have loaded a snapshot for the wrong cluster. In either "
2594 0 : "case, you can delete the problematic file and restart the node to download the correct one automatically.",
2595 0 : ctx->genesis_path, ctx->genesis_timestamp, ctx->expected_genesis_timestamp ));
2596 0 : }
2597 :
2598 : static void
2599 : process_tower_optimistic_confirmed( fd_replay_tile_t * ctx,
2600 : fd_stem_context_t * stem,
2601 0 : fd_tower_slot_confirmed_t const * msg ) {
2602 :
2603 0 : fd_block_id_ele_t * block_id_ele = fd_block_id_map_ele_query( ctx->block_id_map, &msg->block_id, NULL, ctx->block_id_arr );
2604 0 : if( FD_UNLIKELY( !block_id_ele ) ) {
2605 0 : FD_BASE58_ENCODE_32_BYTES( msg->block_id.key, block_id_b58 );
2606 0 : FD_LOG_WARNING(( "missing bank for confirmed block_id: %s level %d", block_id_b58, msg->level ));
2607 0 : return;
2608 0 : }
2609 :
2610 0 : ulong bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, block_id_ele );
2611 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, bank_idx );
2612 :
2613 :
2614 0 : if( FD_UNLIKELY( !bank ) ) {
2615 0 : FD_BASE58_ENCODE_32_BYTES( msg->block_id.key, block_id_cstr );
2616 0 : FD_LOG_WARNING(( "failed to query optimistically confirmed bank for block id %s", block_id_cstr ));
2617 0 : return;
2618 0 : }
2619 :
2620 0 : if( ctx->rpc_enabled ) {
2621 0 : bank->refcnt++;
2622 0 : FD_LOG_DEBUG(( "bank (idx=%lu, slot=%lu) refcnt incremented to %lu for rpc", bank->idx, bank->f.slot, bank->refcnt ));
2623 0 : }
2624 :
2625 0 : fd_replay_oc_advanced_t * replay_msg = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
2626 0 : replay_msg->bank_idx = bank_idx;
2627 0 : replay_msg->slot = msg->slot;
2628 :
2629 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_OC_ADVANCED, ctx->replay_out->chunk, sizeof(fd_replay_oc_advanced_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
2630 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_oc_advanced_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
2631 0 : }
2632 :
2633 : static inline int
2634 : returnable_frag( fd_replay_tile_t * ctx,
2635 : ulong in_idx,
2636 : ulong seq,
2637 : ulong sig,
2638 : ulong chunk,
2639 : ulong sz,
2640 : ulong ctl,
2641 : ulong tsorig,
2642 : ulong tspub,
2643 0 : fd_stem_context_t * stem ) {
2644 0 : (void)seq;
2645 0 : (void)ctl;
2646 0 : (void)tsorig;
2647 0 : (void)tspub;
2648 :
2649 0 : if( FD_UNLIKELY( sz!=0UL && (chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>ctx->in[ in_idx ].mtu ) ) )
2650 0 : FD_LOG_ERR(( "chunk %lu %lu from in %d corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in_kind[ in_idx ], ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
2651 :
2652 0 : switch( ctx->in_kind[in_idx] ) {
2653 0 : case IN_KIND_GENESIS: {
2654 0 : fd_genesis_meta_t const * meta = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
2655 0 : ctx->has_genesis_hash = 1;
2656 0 : ctx->has_genesis_timestamp = 1;
2657 0 : ctx->genesis_timestamp = meta->creation_time_seconds;
2658 0 : *ctx->genesis_hash = meta->genesis_hash;
2659 0 : if( FD_LIKELY( meta->bootstrap ) ) {
2660 0 : boot_genesis( ctx, stem, meta );
2661 0 : } else {
2662 0 : uchar const * genesis_blob = (uchar const *)( meta+1 );
2663 0 : FD_TEST( fd_genesis_parse( ctx->genesis, genesis_blob, meta->blob_sz ) );
2664 0 : }
2665 0 : ctx->has_genesis_timestamp = 1;
2666 0 : ctx->genesis_timestamp = ctx->genesis->creation_time;
2667 :
2668 0 : maybe_verify_cluster_type( ctx );
2669 0 : maybe_verify_shred_version( ctx );
2670 0 : maybe_verify_genesis_timestamp( ctx );
2671 0 : break;
2672 0 : }
2673 0 : case IN_KIND_IPECHO: {
2674 0 : FD_TEST( sig && sig<=USHORT_MAX );
2675 0 : ctx->ipecho_shred_version = (ushort)sig;
2676 0 : maybe_verify_shred_version( ctx );
2677 0 : break;
2678 0 : }
2679 0 : case IN_KIND_SNAP: {
2680 0 : on_snapshot_message( ctx, stem, in_idx, chunk, sig );
2681 0 : maybe_verify_shred_version( ctx );
2682 0 : maybe_verify_genesis_timestamp( ctx );
2683 0 : break;
2684 0 : }
2685 0 : case IN_KIND_EXECRP: {
2686 0 : process_exec_task_done( ctx, stem, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ), sig );
2687 0 : ctx->execrp_idle_cnt = 0UL;
2688 0 : break;
2689 0 : }
2690 0 : case IN_KIND_POH: {
2691 0 : process_poh_message( ctx, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
2692 0 : break;
2693 0 : }
2694 0 : case IN_KIND_RESOLV: {
2695 0 : fd_resolv_slot_exchanged_t * exchanged_slot = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
2696 0 : process_resolv_slot_completed( ctx, exchanged_slot->bank_idx );
2697 0 : break;
2698 0 : }
2699 0 : case IN_KIND_TOWER: {
2700 0 : if( FD_LIKELY( sig==FD_TOWER_SIG_SLOT_DONE ) ) {
2701 0 : process_tower_slot_done( ctx, stem, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ), seq );
2702 0 : } else if( FD_LIKELY( sig==FD_TOWER_SIG_SLOT_CONFIRMED ) ) {
2703 0 : fd_tower_slot_confirmed_t const * msg = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
2704 0 : if( msg->level==FD_TOWER_SLOT_CONFIRMED_OPTIMISTIC && !msg->fwd ) process_tower_optimistic_confirmed( ctx, stem, msg );
2705 0 : if( msg->level==FD_TOWER_SLOT_CONFIRMED_DUPLICATE ) fd_reasm_confirm( ctx->reasm, &msg->block_id );
2706 0 : } else if( FD_LIKELY( sig==FD_TOWER_SIG_SLOT_IGNORED ) ) {
2707 0 : fd_tower_slot_ignored_t const * msg = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
2708 0 : fd_tower_slot_done_t ignored = {
2709 0 : .replay_slot = msg->slot,
2710 0 : .replay_bank_idx = msg->bank_idx,
2711 0 : .vote_slot = ULONG_MAX,
2712 0 : .reset_slot = ctx->reset_slot, /* Use most recent reset slot */
2713 0 : .reset_block_id = ctx->reset_block_id,
2714 0 : .root_slot = ULONG_MAX
2715 0 : };
2716 0 : process_tower_slot_done( ctx, stem, &ignored, seq );
2717 0 : }
2718 0 : break;
2719 0 : }
2720 0 : case IN_KIND_REPAIR: {
2721 0 : if( FD_UNLIKELY( sig==SHRED_SIG_FEC_COMPLETE || sig==SHRED_SIG_FEC_COMPLETE_LEADER ) ) {
2722 : /* If receive a FEC complete message. */
2723 0 : process_fec_complete( ctx, stem, sig, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
2724 0 : }
2725 0 : break;
2726 0 : }
2727 0 : case IN_KIND_TXSEND: {
2728 0 : process_vote_txn_sent( ctx, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
2729 0 : break;
2730 0 : }
2731 0 : case IN_KIND_GOSSIP_OUT: {
2732 0 : FD_TEST( sig==FD_GOSSIP_UPDATE_TAG_WFS_DONE );
2733 0 : ctx->wfs_complete = 1;
2734 0 : FD_LOG_NOTICE(( "Done waiting for supermajority. More than 80 percent of cluster stake has joined." ));
2735 0 : break;
2736 0 : }
2737 0 : case IN_KIND_RPC: {
2738 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, sig );
2739 0 : FD_TEST( bank );
2740 0 : bank->refcnt--;
2741 0 : FD_LOG_DEBUG(( "bank (idx=%lu, slot=%lu) refcnt decremented to %lu for %s", bank->idx, bank->f.slot, bank->refcnt, ctx->in_kind[ in_idx ]==IN_KIND_RPC ? "rpc" : "gui" ));
2742 0 : break;
2743 0 : }
2744 0 : default:
2745 0 : FD_LOG_ERR(( "unhandled kind %d", ctx->in_kind[ in_idx ] ));
2746 0 : }
2747 :
2748 0 : return 0;
2749 0 : }
2750 :
2751 : static inline fd_replay_out_link_t
2752 : out1( fd_topo_t const * topo,
2753 : fd_topo_tile_t const * tile,
2754 0 : char const * name ) {
2755 0 : ulong idx = ULONG_MAX;
2756 :
2757 0 : for( ulong i=0UL; i<tile->out_cnt; i++ ) {
2758 0 : fd_topo_link_t const * link = &topo->links[ tile->out_link_id[ i ] ];
2759 0 : if( !strcmp( link->name, name ) ) {
2760 0 : if( FD_UNLIKELY( idx!=ULONG_MAX ) ) FD_LOG_ERR(( "tile %s:%lu had multiple output links named %s but expected one", tile->name, tile->kind_id, name ));
2761 0 : idx = i;
2762 0 : }
2763 0 : }
2764 :
2765 0 : if( FD_UNLIKELY( idx==ULONG_MAX ) ) return (fd_replay_out_link_t){ .idx = ULONG_MAX, .mem = NULL, .chunk0 = 0, .wmark = 0, .chunk = 0 };
2766 :
2767 0 : void * mem = topo->workspaces[ topo->objs[ topo->links[ tile->out_link_id[ idx ] ].dcache_obj_id ].wksp_id ].wksp;
2768 0 : ulong chunk0 = fd_dcache_compact_chunk0( mem, topo->links[ tile->out_link_id[ idx ] ].dcache );
2769 0 : ulong wmark = fd_dcache_compact_wmark ( mem, topo->links[ tile->out_link_id[ idx ] ].dcache, topo->links[ tile->out_link_id[ idx ] ].mtu );
2770 :
2771 0 : return (fd_replay_out_link_t){ .idx = idx, .mem = mem, .chunk0 = chunk0, .wmark = wmark, .chunk = chunk0 };
2772 0 : }
2773 :
2774 : static void
2775 : privileged_init( fd_topo_t * topo,
2776 0 : fd_topo_tile_t * tile ) {
2777 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
2778 :
2779 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
2780 0 : fd_replay_tile_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_replay_tile_t), sizeof(fd_replay_tile_t) );
2781 :
2782 0 : if( FD_UNLIKELY( !strcmp( tile->replay.identity_key_path, "" ) ) ) FD_LOG_ERR(( "identity_key_path not set" ));
2783 :
2784 0 : ctx->identity_pubkey[ 0 ] = *(fd_pubkey_t const *)fd_type_pun_const( fd_keyload_load( tile->replay.identity_key_path, /* pubkey only: */ 1 ) );
2785 0 : ctx->identity_idx = 0UL;
2786 :
2787 0 : if( FD_UNLIKELY( !tile->replay.bundle.vote_account_path[0] ) ) {
2788 0 : tile->replay.bundle.enabled = 0;
2789 0 : }
2790 :
2791 0 : if( FD_UNLIKELY( tile->replay.bundle.enabled ) ) {
2792 0 : if( FD_UNLIKELY( !fd_base58_decode_32( tile->replay.bundle.vote_account_path, ctx->bundle.vote_account.uc ) ) ) {
2793 0 : const uchar * vote_key = fd_keyload_load( tile->replay.bundle.vote_account_path, /* pubkey only: */ 1 );
2794 0 : fd_memcpy( ctx->bundle.vote_account.uc, vote_key, 32UL );
2795 0 : }
2796 0 : }
2797 :
2798 0 : FD_TEST( fd_rng_secure( &ctx->rng_seed, sizeof(ctx->rng_seed) ) );
2799 :
2800 0 : if( FD_UNLIKELY( !fd_rng_secure( &ctx->reasm_seed, sizeof(ulong) ) ) ) {
2801 0 : FD_LOG_CRIT(( "fd_rng_secure failed" ));
2802 0 : }
2803 :
2804 0 : if( FD_UNLIKELY( !fd_rng_secure( &ctx->vote_tracker_seed, sizeof(ulong) ) ) ) {
2805 0 : FD_LOG_CRIT(( "fd_rng_secure failed" ));
2806 0 : }
2807 :
2808 0 : if( FD_UNLIKELY( !fd_rng_secure( &ctx->block_id_map_seed, sizeof(ulong) ) ) ) {
2809 0 : FD_LOG_CRIT(( "fd_rng_secure failed" ));
2810 0 : }
2811 :
2812 0 : if( FD_UNLIKELY( !fd_rng_secure( &ctx->initial_block_id, sizeof(fd_hash_t) ) ) ) {
2813 0 : FD_LOG_CRIT(( "fd_rng_secure failed" ));
2814 0 : }
2815 :
2816 0 : if( FD_UNLIKELY( !fd_rng_secure( &ctx->runtime_stack_seed, sizeof(ulong) ) ) ) {
2817 0 : FD_LOG_CRIT(( "fd_rng_secure failed" ));
2818 0 : }
2819 0 : }
2820 :
2821 : static void
2822 : unprivileged_init( fd_topo_t * topo,
2823 0 : fd_topo_tile_t * tile ) {
2824 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
2825 :
2826 0 : ulong chain_cnt = fd_block_id_map_chain_cnt_est( tile->replay.max_live_slots );
2827 :
2828 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
2829 0 : fd_replay_tile_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_replay_tile_t), sizeof(fd_replay_tile_t) );
2830 0 : void * runtime_stack_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_runtime_stack_align(), fd_runtime_stack_footprint( FD_RUNTIME_MAX_VOTE_ACCOUNTS, FD_RUNTIME_EXPECTED_VOTE_ACCOUNTS, FD_RUNTIME_EXPECTED_STAKE_ACCOUNTS ) );
2831 0 : void * block_id_arr_mem = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_block_id_ele_t), sizeof(fd_block_id_ele_t) * tile->replay.max_live_slots );
2832 0 : void * block_id_map_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_block_id_map_align(), fd_block_id_map_footprint( chain_cnt ) );
2833 0 : void * _txncache = FD_SCRATCH_ALLOC_APPEND( l, fd_txncache_align(), fd_txncache_footprint( tile->replay.max_live_slots ) );
2834 0 : void * reasm_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_reasm_align(), fd_reasm_footprint( tile->replay.fec_max ) );
2835 0 : void * sched_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(), fd_sched_footprint( tile->replay.sched_depth, tile->replay.max_live_slots ) );
2836 0 : void * vinyl_req_pool_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_vinyl_req_pool_align(), fd_vinyl_req_pool_footprint( 1UL, 1UL ) );
2837 0 : void * vote_tracker_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_vote_tracker_align(), fd_vote_tracker_footprint() );
2838 0 : void * _capture_ctx = FD_SCRATCH_ALLOC_APPEND( l, fd_capture_ctx_align(), fd_capture_ctx_footprint() );
2839 0 : void * dump_proto_ctx_mem = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_dump_proto_ctx_t), sizeof(fd_dump_proto_ctx_t) );
2840 0 : # if FD_HAS_FLATCC
2841 0 : void * block_dump_ctx = NULL;
2842 0 : if( FD_UNLIKELY( tile->replay.dump_block_to_pb ) ) {
2843 0 : block_dump_ctx = FD_SCRATCH_ALLOC_APPEND( l, fd_block_dump_context_align(), fd_block_dump_context_footprint() );
2844 0 : }
2845 0 : # endif
2846 :
2847 0 : ctx->runtime_stack = fd_runtime_stack_join( fd_runtime_stack_new( runtime_stack_mem, FD_RUNTIME_MAX_VOTE_ACCOUNTS, FD_RUNTIME_EXPECTED_VOTE_ACCOUNTS, FD_RUNTIME_EXPECTED_STAKE_ACCOUNTS, ctx->runtime_stack_seed ) );
2848 0 : FD_TEST( ctx->runtime_stack );
2849 :
2850 0 : ctx->wksp = topo->workspaces[ topo->objs[ tile->tile_obj_id ].wksp_id ].wksp;
2851 :
2852 0 : ulong store_obj_id = fd_pod_query_ulong( topo->props, "store", ULONG_MAX );
2853 0 : FD_TEST( store_obj_id!=ULONG_MAX );
2854 0 : ctx->store = fd_store_join( fd_topo_obj_laddr( topo, store_obj_id ) );
2855 0 : FD_TEST( ctx->store );
2856 :
2857 0 : ulong banks_obj_id = fd_pod_query_ulong( topo->props, "banks", ULONG_MAX );
2858 0 : FD_TEST( banks_obj_id!=ULONG_MAX );
2859 :
2860 0 : ctx->banks = fd_banks_join( fd_topo_obj_laddr( topo, banks_obj_id ) );
2861 0 : FD_TEST( ctx->banks );
2862 :
2863 0 : FD_MGAUGE_SET( REPLAY, MAX_LIVE_BANKS, fd_banks_pool_max_cnt( ctx->banks ) );
2864 :
2865 0 : ctx->frontier_cnt = 0UL;
2866 :
2867 0 : fd_bank_t * bank = fd_banks_init_bank( ctx->banks );
2868 0 : FD_TEST( bank );
2869 0 : bank->f.slot = 0UL;
2870 0 : FD_TEST( bank->idx==FD_REPLAY_BOOT_BANK_IDX );
2871 :
2872 0 : ctx->consensus_root_slot = ULONG_MAX;
2873 0 : ctx->consensus_root = ctx->initial_block_id;
2874 0 : ctx->published_root_slot = ULONG_MAX;
2875 :
2876 0 : ctx->expected_shred_version = tile->replay.expected_shred_version;
2877 0 : ctx->ipecho_shred_version = 0;
2878 0 : fd_memcpy( ctx->genesis_path, tile->replay.genesis_path, sizeof(ctx->genesis_path) );
2879 0 : ctx->has_genesis_hash = 0;
2880 0 : ctx->has_genesis_timestamp = 0;
2881 0 : ctx->has_expected_genesis_timestamp = 0;
2882 0 : ctx->cluster_type = FD_CLUSTER_UNKNOWN;
2883 0 : ctx->hard_forks_cnt = ULONG_MAX;
2884 :
2885 0 : if( FD_UNLIKELY( tile->replay.bundle.enabled ) ) {
2886 0 : ctx->bundle.enabled = 1;
2887 0 : if( FD_UNLIKELY( !fd_bundle_crank_gen_init( ctx->bundle.gen,
2888 0 : (fd_acct_addr_t const *)tile->replay.bundle.tip_distribution_program_addr,
2889 0 : (fd_acct_addr_t const *)tile->replay.bundle.tip_payment_program_addr,
2890 0 : (fd_acct_addr_t const *)ctx->bundle.vote_account.uc,
2891 0 : (fd_acct_addr_t const *)ctx->bundle.vote_account.uc, "NAN", 0UL ) ) ) {
2892 0 : FD_LOG_ERR(( "failed to initialize bundle crank gen" ));
2893 0 : }
2894 0 : } else {
2895 0 : ctx->bundle.enabled = 0;
2896 0 : }
2897 :
2898 0 : fd_features_t * features = &bank->f.features;
2899 0 : fd_features_enable_cleaned_up( features );
2900 :
2901 0 : char const * one_off_features[ 16UL ];
2902 0 : FD_TEST( tile->replay.enable_features_cnt<=sizeof(one_off_features)/sizeof(one_off_features[0]) );
2903 0 : for( ulong i=0UL; i<tile->replay.enable_features_cnt; i++ ) one_off_features[ i ] = tile->replay.enable_features[i];
2904 0 : fd_features_enable_one_offs( features, one_off_features, (uint)tile->replay.enable_features_cnt, 0UL );
2905 :
2906 0 : fd_topo_obj_t const * vinyl_data = fd_topo_find_tile_obj( topo, tile, "vinyl_data" );
2907 :
2908 0 : ulong progcache_obj_id; FD_TEST( (progcache_obj_id = fd_pod_query_ulong( topo->props, "progcache", ULONG_MAX ) )!=ULONG_MAX );
2909 0 : FD_TEST( fd_progcache_shmem_join( ctx->progcache, fd_topo_obj_laddr( topo, progcache_obj_id ) ) );
2910 :
2911 0 : fd_wksp_t * progcache_wksp = fd_wksp_containing( ctx->progcache->shmem );
2912 0 : FD_TEST( progcache_wksp );
2913 0 : fd_wksp_mon_init( ctx->progcache_wksp_mon, progcache_wksp, FD_WKSP_MON_DEFAULT_RATE, fd_tickcount() );
2914 :
2915 0 : ulong funk_obj_id; FD_TEST( (funk_obj_id = fd_pod_query_ulong( topo->props, "funk", ULONG_MAX ) )!=ULONG_MAX );
2916 0 : ulong funk_locks_obj_id; FD_TEST( (funk_locks_obj_id = fd_pod_query_ulong( topo->props, "funk_locks", ULONG_MAX ) )!=ULONG_MAX );
2917 0 : ulong max_depth = tile->replay.max_live_slots + tile->replay.write_delay_slots;
2918 0 : if( !vinyl_data ) {
2919 0 : FD_TEST( fd_accdb_admin_v1_init( ctx->accdb_admin,
2920 0 : fd_topo_obj_laddr( topo, funk_obj_id ),
2921 0 : fd_topo_obj_laddr( topo, funk_locks_obj_id ) ) );
2922 0 : } else {
2923 0 : fd_topo_obj_t const * vinyl_rq = fd_topo_find_tile_obj( topo, tile, "vinyl_rq" );
2924 0 : fd_topo_obj_t const * vinyl_req_pool = fd_topo_find_tile_obj( topo, tile, "vinyl_rpool" );
2925 0 : FD_TEST( fd_accdb_admin_v2_init( ctx->accdb_admin,
2926 0 : fd_topo_obj_laddr( topo, funk_obj_id ),
2927 0 : fd_topo_obj_laddr( topo, funk_locks_obj_id ),
2928 0 : fd_topo_obj_laddr( topo, vinyl_rq->id ),
2929 0 : topo->workspaces[ vinyl_data->wksp_id ].wksp,
2930 0 : fd_topo_obj_laddr( topo, vinyl_req_pool->id ),
2931 0 : vinyl_rq->id,
2932 0 : max_depth ) );
2933 0 : fd_accdb_admin_v2_delay_set( ctx->accdb_admin, tile->replay.write_delay_slots );
2934 0 : }
2935 0 : fd_accdb_init_from_topo( ctx->accdb, topo, tile, max_depth );
2936 :
2937 0 : fd_wksp_t * funk_wksp = fd_wksp_containing( fd_topo_obj_laddr( topo, funk_obj_id ) );
2938 0 : FD_TEST( funk_wksp );
2939 0 : fd_wksp_mon_init( ctx->accdb_cache_wksp_mon, funk_wksp, FD_WKSP_MON_DEFAULT_RATE, fd_tickcount() );
2940 :
2941 0 : void * _txncache_shmem = fd_topo_obj_laddr( topo, tile->replay.txncache_obj_id );
2942 0 : fd_txncache_shmem_t * txncache_shmem = fd_txncache_shmem_join( _txncache_shmem );
2943 0 : FD_TEST( txncache_shmem );
2944 0 : ctx->txncache = fd_txncache_join( fd_txncache_new( _txncache, txncache_shmem ) );
2945 0 : FD_TEST( ctx->txncache );
2946 :
2947 0 : ctx->capture_ctx = NULL;
2948 0 : if( FD_UNLIKELY( strcmp( "", tile->replay.solcap_capture ) ) ) {
2949 0 : ctx->capture_ctx = fd_capture_ctx_join( fd_capture_ctx_new( _capture_ctx ) );
2950 0 : ctx->capture_ctx->solcap_start_slot = tile->replay.capture_start_slot;
2951 0 : ctx->capture_ctx->capture_solcap = 1;
2952 0 : }
2953 :
2954 0 : ctx->dump_proto_ctx = NULL;
2955 0 : if( FD_UNLIKELY( strcmp( "", tile->replay.dump_proto_dir ) ) ) {
2956 0 : ctx->dump_proto_ctx = dump_proto_ctx_mem;
2957 0 : ctx->dump_proto_ctx->dump_proto_output_dir = tile->replay.dump_proto_dir;
2958 0 : if( FD_LIKELY( tile->replay.dump_block_to_pb ) ) {
2959 0 : ctx->dump_proto_ctx->dump_block_to_pb = !!tile->replay.dump_block_to_pb;
2960 0 : }
2961 0 : }
2962 :
2963 0 : # if FD_HAS_FLATCC
2964 0 : if( FD_UNLIKELY( tile->replay.dump_block_to_pb ) ) {
2965 0 : ctx->block_dump_ctx = fd_block_dump_context_join( fd_block_dump_context_new( block_dump_ctx ) );
2966 0 : } else {
2967 0 : ctx->block_dump_ctx = NULL;
2968 0 : }
2969 0 : # endif
2970 :
2971 0 : ctx->is_booted = 0;
2972 :
2973 0 : ctx->larger_max_cost_per_block = tile->replay.larger_max_cost_per_block;
2974 :
2975 0 : FD_TEST( fd_rng_new( ctx->rng, ctx->rng_seed, 0UL ) );
2976 :
2977 0 : ctx->reasm = fd_reasm_join( fd_reasm_new( reasm_mem, tile->replay.fec_max, ctx->reasm_seed ) );
2978 0 : FD_TEST( ctx->reasm );
2979 :
2980 0 : ctx->sched = fd_sched_join( fd_sched_new( sched_mem, ctx->rng, tile->replay.sched_depth, tile->replay.max_live_slots, fd_topo_tile_name_cnt( topo, "execrp" ) ) );
2981 0 : FD_TEST( ctx->sched );
2982 :
2983 0 : ctx->in_cnt = tile->in_cnt;
2984 0 : ctx->execrp_idle_cnt = 0UL;
2985 :
2986 0 : FD_TEST( fd_vinyl_req_pool_new( vinyl_req_pool_mem, 1UL, 1UL ) );
2987 :
2988 0 : ctx->vote_tracker = fd_vote_tracker_join( fd_vote_tracker_new( vote_tracker_mem, ctx->vote_tracker_seed ) );
2989 0 : FD_TEST( ctx->vote_tracker );
2990 :
2991 0 : ctx->identity_vote_rooted = 0;
2992 :
2993 0 : ctx->wait_for_vote_to_start_leader = tile->replay.wait_for_vote_to_start_leader;
2994 :
2995 0 : ctx->wfs_enabled = memcmp( tile->replay.wait_for_supermajority_with_bank_hash.uc, ((fd_pubkey_t){ 0 }).uc, sizeof(fd_pubkey_t) );
2996 0 : ctx->expected_bank_hash = tile->replay.wait_for_supermajority_with_bank_hash;
2997 0 : ctx->wfs_complete = !ctx->wfs_enabled;
2998 :
2999 0 : ctx->mleaders = fd_multi_epoch_leaders_join( fd_multi_epoch_leaders_new( ctx->mleaders_mem ) );
3000 0 : FD_TEST( ctx->mleaders );
3001 :
3002 0 : ctx->is_leader = 0;
3003 0 : ctx->supports_leader = fd_topo_find_tile( topo, "pack", 0UL )!=ULONG_MAX;
3004 0 : ctx->reset_slot = 0UL;
3005 0 : ctx->reset_bank = NULL;
3006 0 : ctx->reset_block_id = ctx->initial_block_id;
3007 0 : ctx->reset_timestamp_nanos = 0UL;
3008 0 : ctx->next_leader_slot = ULONG_MAX;
3009 0 : ctx->next_leader_tickcount = LONG_MAX;
3010 0 : ctx->highwater_leader_slot = ULONG_MAX;
3011 0 : ctx->slot_duration_nanos = 350L*1000L*1000L; /* TODO: Not fixed ... not always 350ms ... */
3012 0 : ctx->slot_duration_ticks = (double)ctx->slot_duration_nanos*fd_tempo_tick_per_ns( NULL );
3013 0 : ctx->leader_bank = NULL;
3014 :
3015 0 : ctx->block_id_len = tile->replay.max_live_slots;
3016 0 : ctx->block_id_arr = (fd_block_id_ele_t *)block_id_arr_mem;
3017 0 : ctx->block_id_map = fd_block_id_map_join( fd_block_id_map_new( block_id_map_mem, chain_cnt, ctx->block_id_map_seed ) );
3018 0 : FD_TEST( ctx->block_id_map );
3019 0 : for( ulong i=0UL; i<tile->replay.max_live_slots; i++ ) ctx->block_id_arr[ i ].block_id_seen = 0;
3020 :
3021 0 : ctx->resolv_tile_cnt = fd_topo_tile_name_cnt( topo, "resolv" );
3022 :
3023 0 : ctx->keyswitch = fd_keyswitch_join( fd_topo_obj_laddr( topo, tile->id_keyswitch_obj_id ) );
3024 0 : FD_TEST( ctx->keyswitch );
3025 0 : ctx->halt_leader = 0;
3026 :
3027 0 : FD_TEST( tile->in_cnt<=sizeof(ctx->in)/sizeof(ctx->in[0]) );
3028 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
3029 0 : fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
3030 0 : fd_topo_wksp_t * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
3031 :
3032 0 : if( FD_LIKELY( link->dcache ) ) {
3033 0 : ctx->in[ i ].mem = link_wksp->wksp;
3034 0 : ctx->in[ i ].chunk0 = fd_dcache_compact_chunk0( ctx->in[ i ].mem, link->dcache );
3035 0 : ctx->in[ i ].wmark = fd_dcache_compact_wmark ( ctx->in[ i ].mem, link->dcache, link->mtu );
3036 0 : ctx->in[ i ].mtu = link->mtu;
3037 0 : }
3038 :
3039 0 : if( !strcmp( link->name, "genesi_out" ) ) ctx->in_kind[ i ] = IN_KIND_GENESIS;
3040 0 : else if( !strcmp( link->name, "ipecho_out" ) ) ctx->in_kind[ i ] = IN_KIND_IPECHO;
3041 0 : else if( !strcmp( link->name, "snapin_manif" ) ) ctx->in_kind[ i ] = IN_KIND_SNAP;
3042 0 : else if( !strcmp( link->name, "execrp_replay" ) ) ctx->in_kind[ i ] = IN_KIND_EXECRP;
3043 0 : else if( !strcmp( link->name, "tower_out" ) ) ctx->in_kind[ i ] = IN_KIND_TOWER;
3044 0 : else if( !strcmp( link->name, "poh_replay" ) ) ctx->in_kind[ i ] = IN_KIND_POH;
3045 0 : else if( !strcmp( link->name, "resolv_replay" ) ) ctx->in_kind[ i ] = IN_KIND_RESOLV;
3046 0 : else if( !strcmp( link->name, "repair_out" ) ) ctx->in_kind[ i ] = IN_KIND_REPAIR;
3047 0 : else if( !strcmp( link->name, "txsend_out" ) ) ctx->in_kind[ i ] = IN_KIND_TXSEND;
3048 0 : else if( !strcmp( link->name, "rpc_replay" ) ) ctx->in_kind[ i ] = IN_KIND_RPC;
3049 0 : else if( !strcmp( link->name, "gossip_out" ) ) ctx->in_kind[ i ] = IN_KIND_GOSSIP_OUT;
3050 0 : else FD_LOG_ERR(( "unexpected input link name %s", link->name ));
3051 0 : }
3052 :
3053 0 : *ctx->epoch_out = out1( topo, tile, "replay_epoch" ); FD_TEST( ctx->epoch_out->idx!=ULONG_MAX );
3054 0 : *ctx->replay_out = out1( topo, tile, "replay_out" ); FD_TEST( ctx->replay_out->idx!=ULONG_MAX );
3055 0 : *ctx->exec_out = out1( topo, tile, "replay_execrp" ); FD_TEST( ctx->exec_out->idx!=ULONG_MAX );
3056 :
3057 0 : ctx->rpc_enabled = fd_topo_find_tile( topo, "rpc", 0UL )!=ULONG_MAX;
3058 :
3059 0 : if( FD_UNLIKELY( strcmp( "", tile->replay.solcap_capture ) ) ) {
3060 0 : ulong idx = fd_topo_find_tile_out_link( topo, tile, "cap_repl", 0UL );
3061 0 : FD_TEST( idx!=ULONG_MAX );
3062 0 : fd_topo_link_t * link = &topo->links[ tile->out_link_id[ idx ] ];
3063 :
3064 :
3065 0 : fd_capture_link_buf_t * cap_repl_out = ctx->cap_repl_out;
3066 0 : cap_repl_out->base.vt = &fd_capture_link_buf_vt;
3067 0 : cap_repl_out->idx = idx;
3068 0 : cap_repl_out->mem = topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ].wksp;
3069 0 : cap_repl_out->chunk0 = fd_dcache_compact_chunk0( cap_repl_out->mem, link->dcache );
3070 0 : cap_repl_out->wmark = fd_dcache_compact_wmark( cap_repl_out->mem, link->dcache, link->mtu );
3071 0 : cap_repl_out->chunk = cap_repl_out->chunk0;
3072 0 : cap_repl_out->mcache = link->mcache;
3073 0 : cap_repl_out->depth = fd_mcache_depth( link->mcache );
3074 0 : cap_repl_out->seq = 0UL;
3075 :
3076 0 : ctx->capture_ctx->capctx_type.buf = cap_repl_out;
3077 0 : ctx->capture_ctx->capture_link = &cap_repl_out->base;
3078 0 : ctx->capture_ctx->current_txn_idx = 0UL;
3079 :
3080 :
3081 0 : ulong consumer_tile_idx = fd_topo_find_tile( topo, "solcap", 0UL );
3082 0 : fd_topo_tile_t * consumer_tile = &topo->tiles[ consumer_tile_idx ];
3083 0 : cap_repl_out->fseq = NULL;
3084 0 : for( ulong j = 0UL; j < consumer_tile->in_cnt; j++ ) {
3085 0 : if( FD_UNLIKELY( consumer_tile->in_link_id[ j ] == link->id ) ) {
3086 0 : cap_repl_out->fseq = fd_fseq_join( fd_topo_obj_laddr( topo, consumer_tile->in_link_fseq_obj_id[ j ] ) );
3087 0 : FD_TEST( cap_repl_out->fseq );
3088 0 : break;
3089 0 : }
3090 0 : }
3091 0 : }
3092 :
3093 0 : fd_memset( &ctx->metrics, 0, sizeof(ctx->metrics) );
3094 :
3095 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_query_wait, FD_MHIST_SECONDS_MIN( REPLAY, STORE_QUERY_WAIT ),
3096 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_QUERY_WAIT ) ) );
3097 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_query_work, FD_MHIST_SECONDS_MIN( REPLAY, STORE_QUERY_WORK ),
3098 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_QUERY_WORK ) ) );
3099 :
3100 0 : fd_histf_join( fd_histf_new( ctx->metrics.root_slot_dur, FD_MHIST_SECONDS_MIN( REPLAY, ROOT_SLOT_DURATION_SECONDS ),
3101 0 : FD_MHIST_SECONDS_MAX( REPLAY, ROOT_SLOT_DURATION_SECONDS ) ) );
3102 0 : fd_histf_join( fd_histf_new( ctx->metrics.root_account_dur, FD_MHIST_SECONDS_MIN( REPLAY, ROOT_ACCOUNT_DURATION_SECONDS ),
3103 0 : FD_MHIST_SECONDS_MAX( REPLAY, ROOT_ACCOUNT_DURATION_SECONDS ) ) );
3104 :
3105 : /* Ensure precompiles are available, crash fast otherwise */
3106 0 : fd_precompiles();
3107 :
3108 0 : ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, 1UL );
3109 0 : if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
3110 0 : FD_LOG_ERR(( "scratch overflow %lu %lu %lu", scratch_top - (ulong)scratch - scratch_footprint( tile ), scratch_top, (ulong)scratch + scratch_footprint( tile ) ));
3111 0 : }
3112 :
3113 : static ulong
3114 : populate_allowed_seccomp( fd_topo_t const * topo FD_FN_UNUSED,
3115 : fd_topo_tile_t const * tile FD_FN_UNUSED,
3116 : ulong out_cnt,
3117 0 : struct sock_filter * out ) {
3118 :
3119 0 : populate_sock_filter_policy_fd_replay_tile( out_cnt, out, (uint)fd_log_private_logfile_fd() );
3120 0 : return sock_filter_policy_fd_replay_tile_instr_cnt;
3121 0 : }
3122 :
3123 : static ulong
3124 : populate_allowed_fds( fd_topo_t const * topo FD_FN_UNUSED,
3125 : fd_topo_tile_t const * tile FD_FN_UNUSED,
3126 : ulong out_fds_cnt,
3127 0 : int * out_fds ) {
3128 :
3129 0 : if( FD_UNLIKELY( out_fds_cnt<2UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
3130 :
3131 0 : ulong out_cnt = 0UL;
3132 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
3133 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
3134 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
3135 0 : return out_cnt;
3136 0 : }
3137 :
3138 : static inline void
3139 0 : during_housekeeping( fd_replay_tile_t * ctx ) {
3140 0 : if( FD_UNLIKELY( fd_keyswitch_state_query( ctx->keyswitch )==FD_KEYSWITCH_STATE_UNHALT_PENDING ) ) {
3141 0 : FD_CRIT( ctx->halt_leader, "state machine corruption" );
3142 0 : FD_LOG_DEBUG(( "keyswitch: unhalting leader" ));
3143 0 : ctx->halt_leader = 0;
3144 0 : fd_keyswitch_state( ctx->keyswitch, FD_KEYSWITCH_STATE_COMPLETED );
3145 0 : }
3146 :
3147 0 : if( FD_UNLIKELY( fd_keyswitch_state_query( ctx->keyswitch )==FD_KEYSWITCH_STATE_SWITCH_PENDING ) ) {
3148 0 : FD_LOG_DEBUG(( "keyswitch: halting leader" ));
3149 0 : ctx->halt_leader = 1;
3150 0 : if( !ctx->is_leader ) maybe_switch_identity( ctx );
3151 0 : }
3152 0 : }
3153 :
3154 : #undef DEBUG_LOGGING
3155 :
3156 : /* counting carefully, after_credit can generate at most 7 frags and
3157 : returnable_frag boot_genesis can also generate at most 7 frags, so 14
3158 : is a conservative bound. */
3159 0 : #define STEM_BURST (14UL)
3160 :
3161 : /* fd_tempo_lazy_default( 16384 ) where 16384 is the minimum out-link
3162 : depth (i.e. cr_max) but excludes replay_epoch, which is so infrequent
3163 : credit availability is a non-issue. */
3164 0 : #define STEM_LAZY ((long)36865)
3165 :
3166 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_replay_tile_t
3167 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_replay_tile_t)
3168 :
3169 0 : #define STEM_CALLBACK_METRICS_WRITE metrics_write
3170 0 : #define STEM_CALLBACK_AFTER_CREDIT after_credit
3171 0 : #define STEM_CALLBACK_BEFORE_FRAG before_frag
3172 0 : #define STEM_CALLBACK_RETURNABLE_FRAG returnable_frag
3173 0 : #define STEM_CALLBACK_DURING_HOUSEKEEPING during_housekeeping
3174 :
3175 : #include "../../disco/stem/fd_stem.c"
3176 :
3177 : fd_topo_run_tile_t fd_tile_replay = {
3178 : .name = "replay",
3179 : .populate_allowed_seccomp = populate_allowed_seccomp,
3180 : .populate_allowed_fds = populate_allowed_fds,
3181 : .scratch_align = scratch_align,
3182 : .scratch_footprint = scratch_footprint,
3183 : .privileged_init = privileged_init,
3184 : .unprivileged_init = unprivileged_init,
3185 : .run = stem_run,
3186 : };
|