Line data Source code
1 : #include "fd_replay_tile.h"
2 : #include "fd_sched.h"
3 : #include "fd_exec.h"
4 : #include "fd_vote_tracker.h"
5 : #include "generated/fd_replay_tile_seccomp.h"
6 :
7 : #include "../genesis/fd_genesi_tile.h"
8 : #include "../poh/fd_poh.h"
9 : #include "../poh/fd_poh_tile.h"
10 : #include "../tower/fd_tower_tile.h"
11 : #include "../resolv/fd_resolv_tile.h"
12 : #include "../restore/utils/fd_ssload.h"
13 :
14 : #include "../../disco/tiles.h"
15 : #include "../../disco/fd_txn_m.h"
16 : #include "../../disco/store/fd_store.h"
17 : #include "../../disco/pack/fd_pack.h"
18 : #include "../../discof/reasm/fd_reasm.h"
19 : #include "../../disco/keyguard/fd_keyload.h"
20 : #include "../../disco/genesis/fd_genesis_cluster.h"
21 : #include "../../util/pod/fd_pod.h"
22 : #include "../../flamenco/accdb/fd_accdb_admin.h"
23 : #include "../../flamenco/accdb/fd_accdb_impl_v1.h"
24 : #include "../../flamenco/rewards/fd_rewards.h"
25 : #include "../../flamenco/leaders/fd_multi_epoch_leaders.h"
26 : #include "../../flamenco/progcache/fd_progcache_admin.h"
27 : #include "../../disco/metrics/fd_metrics.h"
28 :
29 : #include "../../flamenco/runtime/fd_runtime.h"
30 : #include "../../flamenco/runtime/fd_runtime_stack.h"
31 : #include "../../flamenco/runtime/fd_genesis_parse.h"
32 : #include "../../flamenco/fd_flamenco_base.h"
33 : #include "../../flamenco/runtime/sysvar/fd_sysvar_epoch_schedule.h"
34 :
35 : #include "../../flamenco/runtime/tests/fd_dump_pb.h"
36 :
37 : #include <errno.h>
38 : #include <stdio.h>
39 :
40 : /* Replay concepts:
41 :
42 : - Blocks are aggregations of entries aka. microblocks which are
43 : groupings of txns and are constructed by the block producer (see
44 : fd_pack).
45 :
46 : - Entries are grouped into entry batches by the block producer (see
47 : fd_pack / fd_shredder).
48 :
49 : - Entry batches are divided into chunks known as shreds by the block
50 : producer (see fd_shredder).
51 :
52 : - Shreds are grouped into forward-error-correction sets (FEC sets) by
53 : the block producer (see fd_shredder).
54 :
55 : - Shreds are transmitted to the rest of the cluster via the Turbine
56 : protocol (see fd_shredder / fd_shred).
57 :
58 : - Once enough shreds within a FEC set are received to recover the
59 : entirety of the shred data encoded by that FEC set, the receiver
60 : can "complete" the FEC set (see fd_fec_resolver).
61 :
62 : - If shreds in the FEC set are missing such that it can't complete,
63 : the receiver can use the Repair protocol to request missing shreds
64 : in FEC set (see fd_repair).
65 :
66 : - The current Repair protocol does not support requesting coding
67 : shreds. As a result, some FEC sets might be actually complete
68 : (contain all data shreds). Repair currently hacks around this by
69 : forcing completion but the long-term solution is to add support for
70 : fec_repairing coding shreds via Repair.
71 :
72 : - FEC sets are delivered in partial-order to the Replay tile by the
73 : Repair tile. Currently Replay only supports replaying entry batches
74 : so FEC sets need to reassembled into an entry batch before they can
75 : be replayed. The new Dispatcher will change this by taking a FEC
76 : set as input instead. */
77 :
78 0 : #define IN_KIND_SNAP ( 0)
79 0 : #define IN_KIND_GENESIS ( 1)
80 0 : #define IN_KIND_IPECHO ( 2)
81 0 : #define IN_KIND_TOWER ( 3)
82 0 : #define IN_KIND_RESOLV ( 4)
83 0 : #define IN_KIND_POH ( 5)
84 0 : #define IN_KIND_EXEC ( 6)
85 0 : #define IN_KIND_SHRED ( 7)
86 0 : #define IN_KIND_VTXN ( 8)
87 0 : #define IN_KIND_GUI ( 9)
88 0 : #define IN_KIND_RPC (10)
89 :
90 : #define DEBUG_LOGGING 0
91 :
92 : /* The first bank that the replay tile produces either for genesis
93 : or the snapshot boot will always be at bank index 0. */
94 0 : #define FD_REPLAY_BOOT_BANK_IDX (0UL)
95 :
96 : struct fd_replay_in_link {
97 : fd_wksp_t * mem;
98 : ulong chunk0;
99 : ulong wmark;
100 : ulong mtu;
101 : };
102 :
103 : typedef struct fd_replay_in_link fd_replay_in_link_t;
104 :
105 : struct fd_replay_out_link {
106 : ulong idx;
107 : fd_wksp_t * mem;
108 : ulong chunk0;
109 : ulong wmark;
110 : ulong chunk;
111 : };
112 :
113 : typedef struct fd_replay_out_link fd_replay_out_link_t;
114 :
115 : /* fd_block_id_map is a simple map of block-ids to bank indices. The
116 : map sits on top of an array of fd_block_id_ele_t. This serves as a
117 : translation layer between block ids to bank indices. */
118 :
119 : struct fd_block_id_ele {
120 : fd_hash_t block_id;
121 : ulong slot; /* = FD_SLOT_NULL if not initialized */
122 : ulong next_;
123 : };
124 : typedef struct fd_block_id_ele fd_block_id_ele_t;
125 :
126 : #define MAP_NAME fd_block_id_map
127 : #define MAP_ELE_T fd_block_id_ele_t
128 : #define MAP_KEY_T fd_hash_t
129 0 : #define MAP_KEY block_id
130 0 : #define MAP_NEXT next_
131 0 : #define MAP_KEY_EQ(k0,k1) (!memcmp((k0),(k1), sizeof(fd_hash_t)))
132 0 : #define MAP_KEY_HASH(key,seed) (fd_hash((seed),(key),sizeof(fd_hash_t)))
133 : #include "../../util/tmpl/fd_map_chain.c"
134 :
135 : static inline ulong
136 0 : fd_block_id_ele_get_idx( fd_block_id_ele_t * ele_arr, fd_block_id_ele_t * ele ) {
137 0 : return (ulong)(ele - ele_arr);
138 0 : }
139 :
140 : struct fd_replay_tile {
141 : fd_wksp_t * wksp;
142 :
143 : fd_accdb_admin_t accdb_admin[1];
144 : fd_accdb_user_t accdb[1];
145 : fd_progcache_admin_t progcache_admin[1];
146 :
147 : fd_txncache_t * txncache;
148 : fd_store_t * store;
149 : fd_banks_t * banks;
150 :
151 : /* This flag is 1 If we have seen a vote signature that our node has
152 : sent out get rooted at least one time. The value is 0 otherwise.
153 : We can't become leader and pack blocks until this flag has been
154 : set. This parallels the Agave 'has_new_vote_been_rooted'.
155 :
156 : TODO: Add a flag to the toml to make this optional. */
157 : int has_identity_vote_rooted;
158 :
159 : ulong reasm_seed;
160 : fd_reasm_t * reasm;
161 :
162 : /* Replay state machine. */
163 : fd_sched_t * sched;
164 : ulong exec_cnt;
165 : fd_replay_out_link_t exec_out[ 1 ]; /* Sending work down to exec tiles */
166 :
167 : ulong vote_tracker_seed;
168 : fd_vote_tracker_t * vote_tracker;
169 :
170 : int has_genesis_hash;
171 : char genesis_path[ PATH_MAX ];
172 : uchar genesis_hash[ 32UL ];
173 : ulong cluster_type;
174 :
175 : #define FD_REPLAY_HARD_FORKS_MAX (64UL)
176 : ulong hard_forks_cnt;
177 : ulong hard_forks[ FD_REPLAY_HARD_FORKS_MAX ];
178 :
179 : ushort expected_shred_version;
180 : ushort ipecho_shred_version;
181 :
182 : /* A note on publishing ...
183 :
184 : The watermarks are used to publish our fork-aware structures. For
185 : example, store, banks, and txncache need to be published to release
186 : resources occupied by rooted or dead blocks. In general,
187 : publishing has the effect of pruning forks in those structures,
188 : indicating that it is ok to release the memory being occupied by
189 : the blocks on said forks. Tower is responsible for informing us of
190 : the latest block on the consensus rooted fork. As soon as we can,
191 : we should move the published root as close as possible to the
192 : latest consensus root, publishing/pruning everything on the fork
193 : tree along the way. That is, all the blocks that directly descend
194 : from the current published root (inclusive) to the new published
195 : root (exclusive) on the rooted fork, as well as all the minority
196 : forks that branch from said blocks.
197 :
198 : Ideally, we'd move the published root to the consensus root
199 : immediately upon receiving a new consensus root. However, that's
200 : not always safe to do. One thing we need to be careful about is
201 : making sure that there are no more users/consumers of
202 : soon-to-be-pruned blocks, lest a use-after-free occurs. This can
203 : be done by using a reference counter for each block. Any
204 : concurrent activity, such as transaction execution in the exec
205 : tiles, should retain a refcnt on the block for as
206 : long as it needs access to the shared fork-aware structures related
207 : to that block. Eventually, refcnt on a given block will drop down
208 : to 0 as the block either finishes replaying or gets marked as dead,
209 : and any other tile that has retained a refcnt on the block releases
210 : it. At that point, it becomes a candidate for pruning. The key to
211 : safe publishing then becomes figuring out how far we could advance
212 : the published root, such that every minority fork branching off of
213 : blocks in between the current published root (inclusive) and the
214 : new published root (exclusive) is safe to be pruned. This is a
215 : straightforward tree traversal, where if a block B on the rooted
216 : fork has refcnt 0, and all minority forks branching off of B also
217 : have refcnt 0, then B is safe to be pruned. We advance the
218 : published root to the farthest consecutively prunable block on the
219 : rooted fork. Note that reasm presents the replay tile with a clean
220 : view of the world where every block is chained off of a parent
221 : block. So there are no orpahned/dangling tree nodes to worry
222 : about. The world is a nice single tree as far as replay is
223 : concerned.
224 :
225 : In the following fork tree, every node is a block and the number in
226 : parentheses is the refcnt on the block. The chain marked with
227 : double slashes is the rooted fork. Suppose the published root is
228 : at block P, and consensus root is at block T. We can't publish
229 : past block P because Q has refcnt 1.
230 :
231 :
232 : P(0)
233 : / \\
234 : Q(1) A(0)
235 : / || \
236 : X(0) B(0) C(0)
237 : / || \
238 : Y(0) M(0) R(0)
239 : / || / \
240 : D(2) T(0) J(0) L(0)
241 : ||
242 : ..
243 : ..
244 : ..
245 : ||
246 : blocks we might be actively replaying
247 :
248 :
249 : When refcnt on Q drops to 0, we would be able to advance the
250 : published root to block M, because blocks P, A, and B, as well as
251 : all subtrees branching off of them, have refcnt 0, and therefore
252 : can be pruned. Block M itself cannot be pruned yet because its
253 : child block D has refcnt 2. After publishing/pruning, the fork
254 : tree would be:
255 :
256 :
257 : M(0)
258 : / ||
259 : D(2) T(0)
260 : ||
261 : ..
262 : ..
263 : ..
264 : ||
265 : blocks we might be actively replaying
266 :
267 :
268 : As a result, the shared fork-aware structures can free resources
269 : for blocks P, A, B, and all subtrees branching off of them.
270 :
271 : For the reference counting part, the replay tile is the sole entity
272 : that can update the refcnt. This ensures that all refcnt increment
273 : and decrement attempts are serialized at the replay tile, and that
274 : there are no racy resurrection of a soon-to-be-pruned block. If a
275 : refcnt increment request arrives after a block has been pruned,
276 : replay simply rejects the request.
277 :
278 : A note on the implementation of the above ...
279 :
280 : Upon receiving a new consensus root, we descend down the rooted
281 : fork from the current published root to the new consensus root. On
282 : each node/block of the rooted fork, we do a summation of the refcnt
283 : on the block and all the minority fork blocks branching from the
284 : block. If the summation is 0, the block is safe for pruning. We
285 : advance the published root to the far end of the consecutive run of
286 : 0 refcnt sums originating from the current published root. On our
287 : descent down the minority forks, we also mark any block that hasn't
288 : finished replaying as dead, so we don't waste time executing them.
289 : No more transactions shall be dispatched for execution from dead
290 : blocks.
291 :
292 : Blocks start out with a refcnt of 0. Other tiles may send a
293 : request to the replay tile for a reference on a block. The
294 : transaction dispatcher is another source of refcnt updates. On
295 : every dispatch of a transaction for block B, we increment the
296 : refcnt for B. And on every transaction finalization, we decrement
297 : the refcnt for B. This means that whenever the refcnt on a block
298 : is 0, there is no more reference on that block from the execution
299 : pipeline. While it might be tempting to simply increment the
300 : refcnt once when we start replaying a block, and decrement the
301 : refcnt once when we finish a block, this more fine-grained refcnt
302 : update strategy allows for aborting and potentially immediate
303 : pruning of blocks under interleaved block replay. Upon receiving a
304 : new consensus root, we can simply look at the refcnt on minority
305 : fork blocks, and a refcnt of 0 would imply that the block is safe
306 : for pruning, even if we haven't finished replaying it. Without the
307 : fine-grained refcnt, we would need to first stop dispatching from
308 : the aborted block, and then wait for a full drain of the execution
309 : pipeline to know for sure that there are no more in-flight
310 : transactions executing on the aborted block. Note that this will
311 : allow the refcnt on any block to transiently drop down to 0. We
312 : will not mistakenly prune an actively replaying block, aka a leaf
313 : node, that is chaining off of the rooted fork, because the
314 : consensus root is always an ancestor of the actively replaying tip.
315 : */
316 : fd_hash_t consensus_root; /* The most recent block to have reached max lockout in the tower. */
317 : ulong consensus_root_slot; /* slot number of the above. */
318 : ulong consensus_root_bank_idx; /* bank index of the above. */
319 : ulong published_root_slot; /* slot number of the published root. */
320 : ulong published_root_bank_idx; /* bank index of the published root. */
321 :
322 : /* We need to maintain a tile-local mapping of block-ids to bank index
323 : and vice versa. This translation layer is needed for conversion
324 : since tower operates on block-ids and downstream consumers of FEC
325 : sets operate on bank indices. This mapping must happen both ways:
326 : 1. tower sends us block ids and we must map them to bank indices.
327 : 2. when a block is completed, we must map the bank index to a block
328 : id to send a slot complete message to tower. */
329 : ulong block_id_len;
330 : fd_block_id_ele_t * block_id_arr;
331 : ulong block_id_map_seed;
332 : fd_block_id_map_t * block_id_map;
333 :
334 : /* Capture-related configs */
335 : fd_capture_ctx_t * capture_ctx;
336 : FILE * capture_file;
337 : fd_capture_link_buf_t cap_repl_out[1];
338 :
339 : /* Whether the runtime has been booted either from snapshot loading
340 : or from genesis. */
341 : int is_booted;
342 :
343 : /* Buffer to store vote towers that need to be published to the Tower
344 : tile. */
345 :
346 : fd_multi_epoch_leaders_t * mleaders;
347 :
348 : int larger_max_cost_per_block;
349 :
350 : fd_pubkey_t identity_pubkey[1]; /* TODO: Keyswitch */
351 :
352 : /* When we transition to becoming leader, we can only unbecome the
353 : leader if we have received a block id from the FEC reassembler, and
354 : a message from PoH that the leader slot has ended. After both of
355 : these conditions are met, then we are free to unbecome the leader.
356 : */
357 : int is_leader;
358 : int recv_poh;
359 : int recv_block_id;
360 : ulong next_leader_slot;
361 : long next_leader_tickcount;
362 : ulong highwater_leader_slot;
363 : ulong reset_slot;
364 : fd_bank_t * reset_bank;
365 : fd_hash_t reset_block_id;
366 : long reset_timestamp_nanos;
367 : double slot_duration_nanos;
368 : double slot_duration_ticks;
369 : fd_bank_t * leader_bank; /* ==NULL if not currently the leader */
370 :
371 : ulong resolv_tile_cnt;
372 :
373 : int in_kind[ 64 ];
374 : fd_replay_in_link_t in[ 64 ];
375 :
376 : fd_replay_out_link_t replay_out[1];
377 :
378 : fd_replay_out_link_t stake_out[1];
379 :
380 : /* The gui tile needs to reliably own a reference to the most recent
381 : completed active bank. Replay needs to know if the gui as a
382 : consumer is enabled so it can increment the bank's refcnt before
383 : publishing the bank_idx to the gui. */
384 : int gui_enabled;
385 : int rpc_enabled;
386 :
387 : # if FD_HAS_FLATCC
388 : /* For dumping blocks to protobuf. For backtest only. */
389 : fd_block_dump_ctx_t * block_dump_ctx;
390 : # endif
391 :
392 : /* We need a few pieces of information to compute the right addresses
393 : for bundle crank information that we need to send to pack. */
394 : struct {
395 : int enabled;
396 : fd_pubkey_t vote_account;
397 : fd_bundle_crank_gen_t gen[1];
398 : } bundle;
399 :
400 : struct {
401 : fd_histf_t store_read_wait[ 1 ];
402 : fd_histf_t store_read_work[ 1 ];
403 : fd_histf_t store_publish_wait[ 1 ];
404 : fd_histf_t store_publish_work[ 1 ];
405 : fd_histf_t store_link_wait[ 1 ];
406 : fd_histf_t store_link_work[ 1 ];
407 :
408 : ulong slots_total;
409 : ulong transactions_total;
410 :
411 : ulong reasm_latest_slot;
412 : ulong reasm_latest_fec_idx;
413 :
414 : ulong sched_full;
415 : ulong reasm_empty;
416 : ulong leader_bid_wait;
417 : ulong banks_full;
418 : } metrics;
419 :
420 : uchar __attribute__((aligned(FD_MULTI_EPOCH_LEADERS_ALIGN))) mleaders_mem[ FD_MULTI_EPOCH_LEADERS_FOOTPRINT ];
421 :
422 : fd_runtime_stack_t runtime_stack;
423 : };
424 :
425 : typedef struct fd_replay_tile fd_replay_tile_t;
426 :
427 : FD_FN_CONST static inline ulong
428 0 : scratch_align( void ) {
429 0 : return 128UL;
430 0 : }
431 : FD_FN_PURE static inline ulong
432 0 : scratch_footprint( fd_topo_tile_t const * tile ) {
433 0 : ulong chain_cnt = fd_block_id_map_chain_cnt_est( tile->replay.max_live_slots );
434 :
435 0 : ulong l = FD_LAYOUT_INIT;
436 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_replay_tile_t), sizeof(fd_replay_tile_t) );
437 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_block_id_ele_t), sizeof(fd_block_id_ele_t) * tile->replay.max_live_slots );
438 0 : l = FD_LAYOUT_APPEND( l, fd_block_id_map_align(), fd_block_id_map_footprint( chain_cnt ) );
439 0 : l = FD_LAYOUT_APPEND( l, fd_txncache_align(), fd_txncache_footprint( tile->replay.max_live_slots ) );
440 0 : l = FD_LAYOUT_APPEND( l, fd_reasm_align(), fd_reasm_footprint( 1 << 20 ) );
441 0 : l = FD_LAYOUT_APPEND( l, fd_sched_align(), fd_sched_footprint( tile->replay.max_live_slots ) );
442 0 : l = FD_LAYOUT_APPEND( l, fd_vote_tracker_align(), fd_vote_tracker_footprint() );
443 0 : l = FD_LAYOUT_APPEND( l, fd_capture_ctx_align(), fd_capture_ctx_footprint() );
444 :
445 0 : # if FD_HAS_FLATCC
446 0 : if( FD_UNLIKELY( tile->replay.dump_block_to_pb ) ) {
447 0 : l = FD_LAYOUT_APPEND( l, fd_block_dump_context_align(), fd_block_dump_context_footprint() );
448 0 : }
449 0 : # endif
450 :
451 0 : l = FD_LAYOUT_FINI( l, scratch_align() );
452 :
453 0 : return l;
454 0 : }
455 :
456 : static inline void
457 0 : metrics_write( fd_replay_tile_t * ctx ) {
458 0 : FD_MHIST_COPY( REPLAY, STORE_LINK_WAIT, ctx->metrics.store_link_wait );
459 0 : FD_MHIST_COPY( REPLAY, STORE_LINK_WORK, ctx->metrics.store_link_work );
460 0 : FD_MHIST_COPY( REPLAY, STORE_READ_WAIT, ctx->metrics.store_read_wait );
461 0 : FD_MHIST_COPY( REPLAY, STORE_READ_WORK, ctx->metrics.store_read_work );
462 0 : FD_MHIST_COPY( REPLAY, STORE_PUBLISH_WAIT, ctx->metrics.store_publish_wait );
463 0 : FD_MHIST_COPY( REPLAY, STORE_PUBLISH_WORK, ctx->metrics.store_publish_work );
464 :
465 0 : FD_MGAUGE_SET( REPLAY, ROOT_SLOT, ctx->consensus_root_slot==ULONG_MAX ? 0UL : ctx->consensus_root_slot );
466 0 : ulong leader_slot = ctx->leader_bank ? fd_bank_slot_get( ctx->leader_bank ) : 0UL;
467 0 : FD_MGAUGE_SET( REPLAY, LEADER_SLOT, leader_slot );
468 :
469 0 : if( FD_LIKELY( ctx->leader_bank ) ) {
470 0 : FD_MGAUGE_SET( REPLAY, NEXT_LEADER_SLOT, leader_slot );
471 0 : FD_MGAUGE_SET( REPLAY, LEADER_SLOT, leader_slot );
472 0 : } else {
473 0 : FD_MGAUGE_SET( REPLAY, NEXT_LEADER_SLOT, ctx->next_leader_slot==ULONG_MAX ? 0UL : ctx->next_leader_slot );
474 0 : FD_MGAUGE_SET( REPLAY, LEADER_SLOT, 0UL );
475 0 : }
476 0 : FD_MGAUGE_SET( REPLAY, RESET_SLOT, ctx->reset_slot==ULONG_MAX ? 0UL : ctx->reset_slot );
477 :
478 0 : fd_bank_t * bank_pool = fd_banks_get_bank_pool( ctx->banks );
479 0 : ulong live_banks = fd_banks_pool_max( bank_pool ) - fd_banks_pool_free( bank_pool );
480 0 : FD_MGAUGE_SET( REPLAY, LIVE_BANKS, live_banks );
481 :
482 0 : ulong reasm_free = fd_reasm_free( ctx->reasm );
483 0 : FD_MGAUGE_SET( REPLAY, REASM_FREE, reasm_free );
484 :
485 0 : FD_MCNT_SET( REPLAY, SLOTS_TOTAL, ctx->metrics.slots_total );
486 0 : FD_MCNT_SET( REPLAY, TRANSACTIONS_TOTAL, ctx->metrics.transactions_total );
487 :
488 0 : FD_MGAUGE_SET( REPLAY, REASM_LATEST_SLOT, ctx->metrics.reasm_latest_slot );
489 0 : FD_MGAUGE_SET( REPLAY, REASM_LATEST_FEC_IDX, ctx->metrics.reasm_latest_fec_idx );
490 :
491 0 : FD_MCNT_SET( REPLAY, SCHED_FULL, ctx->metrics.sched_full );
492 0 : FD_MCNT_SET( REPLAY, REASM_EMPTY, ctx->metrics.reasm_empty );
493 0 : FD_MCNT_SET( REPLAY, LEADER_BID_WAIT, ctx->metrics.leader_bid_wait );
494 0 : FD_MCNT_SET( REPLAY, BANKS_FULL, ctx->metrics.banks_full );
495 :
496 0 : FD_MCNT_SET( REPLAY, PROGCACHE_ROOTED, ctx->progcache_admin->metrics.root_cnt );
497 0 : FD_MCNT_SET( REPLAY, PROGCACHE_GC_ROOT, ctx->progcache_admin->metrics.gc_root_cnt );
498 :
499 0 : FD_MCNT_SET( REPLAY, ACCDB_CREATED, ctx->accdb->base.created_cnt );
500 0 : FD_MCNT_SET( REPLAY, ACCDB_REVERTED, ctx->accdb_admin->metrics.revert_cnt );
501 0 : FD_MCNT_SET( REPLAY, ACCDB_ROOTED, ctx->accdb_admin->metrics.root_cnt );
502 0 : FD_MCNT_SET( REPLAY, ACCDB_GC_ROOT, ctx->accdb_admin->metrics.gc_root_cnt );
503 0 : }
504 :
505 : static inline ulong
506 : generate_stake_weight_msg( ulong epoch,
507 : fd_epoch_schedule_t const * epoch_schedule,
508 : fd_vote_states_t const * epoch_stakes,
509 0 : ulong * stake_weight_msg_out ) {
510 0 : fd_stake_weight_msg_t * stake_weight_msg = (fd_stake_weight_msg_t *)fd_type_pun( stake_weight_msg_out );
511 0 : fd_vote_stake_weight_t * stake_weights = stake_weight_msg->weights;
512 :
513 0 : stake_weight_msg->epoch = epoch;
514 0 : stake_weight_msg->start_slot = fd_epoch_slot0( epoch_schedule, epoch );
515 0 : stake_weight_msg->slot_cnt = epoch_schedule->slots_per_epoch;
516 0 : stake_weight_msg->excluded_stake = 0UL;
517 0 : stake_weight_msg->vote_keyed_lsched = 1UL;
518 :
519 : /* FIXME: SIMD-0180 - hack to (de)activate in testnet vs mainnet.
520 : This code can be removed once the feature is active. */
521 0 : if( (1==epoch_schedule->warmup && epoch<FD_SIMD0180_ACTIVE_EPOCH_TESTNET) ||
522 0 : (0==epoch_schedule->warmup && epoch<FD_SIMD0180_ACTIVE_EPOCH_MAINNET) ) {
523 0 : stake_weight_msg->vote_keyed_lsched = 0UL;
524 0 : }
525 :
526 : /* epoch_stakes from manifest are already filtered (stake>0), but not sorted */
527 0 : fd_vote_states_iter_t iter_[1];
528 0 : ulong idx = 0UL;
529 0 : for( fd_vote_states_iter_t * iter = fd_vote_states_iter_init( iter_, epoch_stakes ); !fd_vote_states_iter_done( iter ); fd_vote_states_iter_next( iter ) ) {
530 0 : fd_vote_state_ele_t * vote_state = fd_vote_states_iter_ele( iter );
531 0 : if( FD_UNLIKELY( !vote_state->stake ) ) continue;
532 :
533 0 : stake_weights[ idx ].stake = vote_state->stake;
534 0 : memcpy( stake_weights[ idx ].id_key.uc, &vote_state->node_account, sizeof(fd_pubkey_t) );
535 0 : memcpy( stake_weights[ idx ].vote_key.uc, &vote_state->vote_account, sizeof(fd_pubkey_t) );
536 0 : idx++;
537 0 : }
538 0 : stake_weight_msg->staked_cnt = idx;
539 0 : sort_vote_weights_by_stake_vote_inplace( stake_weights, idx );
540 :
541 0 : return fd_stake_weight_msg_sz( idx );
542 0 : }
543 :
544 : static void
545 : publish_stake_weights( fd_replay_tile_t * ctx,
546 : fd_stem_context_t * stem,
547 : fd_bank_t * bank,
548 0 : int current_epoch ) {
549 0 : fd_epoch_schedule_t const * schedule = fd_bank_epoch_schedule_query( bank );
550 0 : ulong epoch = fd_slot_to_epoch( schedule, fd_bank_slot_get( bank ), NULL );
551 :
552 0 : fd_vote_states_t const * vote_states_prev;
553 0 : if( FD_LIKELY( current_epoch ) ) vote_states_prev = fd_bank_vote_states_prev_locking_query( bank );
554 0 : else vote_states_prev = fd_bank_vote_states_prev_prev_locking_query( bank );
555 :
556 0 : ulong * stake_weights_msg = fd_chunk_to_laddr( ctx->stake_out->mem, ctx->stake_out->chunk );
557 0 : ulong stake_weights_sz = generate_stake_weight_msg( epoch+fd_ulong_if( current_epoch, 1UL, 0UL), schedule, vote_states_prev, stake_weights_msg );
558 0 : ulong stake_weights_sig = 4UL;
559 0 : fd_stem_publish( stem, ctx->stake_out->idx, stake_weights_sig, ctx->stake_out->chunk, stake_weights_sz, 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
560 0 : ctx->stake_out->chunk = fd_dcache_compact_next( ctx->stake_out->chunk, stake_weights_sz, ctx->stake_out->chunk0, ctx->stake_out->wmark );
561 :
562 0 : if( FD_LIKELY( current_epoch ) ) fd_bank_vote_states_prev_end_locking_query( bank );
563 0 : else fd_bank_vote_states_prev_prev_end_locking_query( bank );
564 :
565 0 : fd_multi_epoch_leaders_stake_msg_init( ctx->mleaders, fd_type_pun_const( stake_weights_msg ) );
566 0 : fd_multi_epoch_leaders_stake_msg_fini( ctx->mleaders );
567 0 : }
568 :
569 : /**********************************************************************/
570 : /* Transaction execution state machine helpers */
571 : /**********************************************************************/
572 :
573 : static fd_bank_t *
574 : replay_block_start( fd_replay_tile_t * ctx,
575 : fd_stem_context_t * stem,
576 : ulong bank_idx,
577 : ulong parent_bank_idx,
578 0 : ulong slot ) {
579 0 : long before = fd_log_wallclock();
580 :
581 : /* Switch to a new block that we don't have a bank for. */
582 :
583 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, bank_idx );
584 0 : if( FD_UNLIKELY( !bank ) ) {
585 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for bank index %lu", bank_idx ));
586 0 : }
587 0 : if( FD_UNLIKELY( bank->flags!=FD_BANK_FLAGS_INIT ) ) {
588 0 : FD_LOG_CRIT(( "invariant violation: bank is not in correct state for bank index %lu", bank_idx ));
589 0 : }
590 :
591 0 : bank->preparation_begin_nanos = before;
592 :
593 0 : fd_bank_t * parent_bank = fd_banks_bank_query( ctx->banks, parent_bank_idx );
594 0 : if( FD_UNLIKELY( !parent_bank ) ) {
595 0 : FD_LOG_CRIT(( "invariant violation: parent bank is NULL for bank index %lu", parent_bank_idx ));
596 0 : }
597 0 : if( FD_UNLIKELY( !(parent_bank->flags&FD_BANK_FLAGS_FROZEN) ) ) {
598 0 : FD_LOG_CRIT(( "invariant violation: parent bank is not frozen for bank index %lu", parent_bank_idx ));
599 0 : }
600 0 : ulong parent_slot = fd_bank_slot_get( parent_bank );
601 :
602 : /* Clone the bank from the parent. We must special case the first
603 : slot that is executed as the snapshot does not provide a parent
604 : block id. */
605 :
606 0 : bank = fd_banks_clone_from_parent( ctx->banks, bank_idx, parent_bank_idx );
607 0 : if( FD_UNLIKELY( !bank ) ) {
608 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for bank index %lu", bank_idx ));
609 0 : }
610 0 : fd_bank_slot_set( bank, slot );
611 0 : fd_bank_parent_slot_set( bank, parent_slot );
612 0 : bank->txncache_fork_id = fd_txncache_attach_child( ctx->txncache, parent_bank->txncache_fork_id );
613 :
614 : /* Create a new funk txn for the block. */
615 :
616 0 : fd_funk_txn_xid_t xid = { .ul = { slot, bank_idx } };
617 0 : fd_funk_txn_xid_t parent_xid = { .ul = { parent_slot, parent_bank_idx } };
618 0 : fd_accdb_attach_child( ctx->accdb_admin, &parent_xid, &xid );
619 0 : fd_progcache_txn_attach_child( ctx->progcache_admin, &parent_xid, &xid );
620 :
621 : /* Update any required runtime state and handle any potential epoch
622 : boundary change. */
623 :
624 0 : fd_bank_shred_cnt_set( bank, 0UL );
625 0 : fd_bank_execution_fees_set( bank, 0UL );
626 0 : fd_bank_priority_fees_set( bank, 0UL );
627 0 : fd_bank_tips_set( bank, 0UL );
628 :
629 0 : fd_bank_has_identity_vote_set( bank, 0 );
630 :
631 : /* Set the tick height. */
632 0 : fd_bank_tick_height_set( bank, fd_bank_max_tick_height_get( bank ) );
633 :
634 : /* Update block height. */
635 0 : fd_bank_block_height_set( bank, fd_bank_block_height_get( bank ) + 1UL );
636 :
637 0 : ulong * max_tick_height = fd_bank_max_tick_height_modify( bank );
638 0 : ulong ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
639 0 : if( FD_UNLIKELY( FD_RUNTIME_EXECUTE_SUCCESS != fd_runtime_compute_max_tick_height( ticks_per_slot, slot, max_tick_height ) ) ) {
640 0 : FD_LOG_CRIT(( "couldn't compute tick height/max tick height slot %lu ticks_per_slot %lu", slot, ticks_per_slot ));
641 0 : }
642 :
643 0 : int is_epoch_boundary = 0;
644 0 : fd_runtime_block_execute_prepare( ctx->banks, bank, ctx->accdb, &ctx->runtime_stack, ctx->capture_ctx, &is_epoch_boundary );
645 0 : if( FD_UNLIKELY( is_epoch_boundary ) ) publish_stake_weights( ctx, stem, bank, 1 );
646 :
647 0 : return bank;
648 0 : }
649 :
650 : static void
651 0 : cost_tracker_snap( fd_bank_t * bank, fd_replay_slot_completed_t * slot_info ) {
652 0 : if( bank->cost_tracker_pool_idx!=fd_bank_cost_tracker_pool_idx_null( fd_bank_get_cost_tracker_pool( bank ) ) ) {
653 0 : fd_cost_tracker_t const * cost_tracker = fd_bank_cost_tracker_locking_query( bank );
654 0 : slot_info->cost_tracker.block_cost = cost_tracker->block_cost;
655 0 : slot_info->cost_tracker.vote_cost = cost_tracker->vote_cost;
656 0 : slot_info->cost_tracker.allocated_accounts_data_size = cost_tracker->allocated_accounts_data_size;
657 0 : slot_info->cost_tracker.block_cost_limit = cost_tracker->block_cost_limit;
658 0 : slot_info->cost_tracker.vote_cost_limit = cost_tracker->vote_cost_limit;
659 0 : slot_info->cost_tracker.account_cost_limit = cost_tracker->account_cost_limit;
660 0 : fd_bank_cost_tracker_end_locking_query( bank );
661 0 : } else {
662 0 : memset( &slot_info->cost_tracker, 0, sizeof(slot_info->cost_tracker) );
663 0 : }
664 0 : }
665 :
666 : static ulong
667 0 : get_identity_balance( fd_replay_tile_t * ctx, fd_funk_txn_xid_t xid ) {
668 0 : ulong identity_balance = ULONG_MAX;
669 0 : fd_txn_account_t identity_acc[1];
670 0 : fd_funk_t * funk = fd_accdb_user_v1_funk( ctx->accdb );
671 0 : int err = fd_txn_account_init_from_funk_readonly( identity_acc,
672 0 : ctx->identity_pubkey,
673 0 : funk,
674 0 : &xid );
675 0 : if( FD_LIKELY( !err && identity_acc->meta ) ) identity_balance = identity_acc->meta->lamports;
676 :
677 0 : return identity_balance;
678 0 : }
679 :
680 : static void
681 : publish_slot_completed( fd_replay_tile_t * ctx,
682 : fd_stem_context_t * stem,
683 : fd_bank_t * bank,
684 : int is_initial,
685 0 : int is_leader ) {
686 :
687 0 : ulong slot = fd_bank_slot_get( bank );
688 :
689 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ bank->idx ];
690 :
691 : /* HACKY: hacky way of checking if we should send a null parent block
692 : id */
693 0 : fd_hash_t parent_block_id = {0};
694 0 : if( FD_UNLIKELY( !is_initial ) ) {
695 0 : parent_block_id = ctx->block_id_arr[ bank->parent_idx ].block_id;
696 0 : }
697 :
698 0 : fd_hash_t const * bank_hash = fd_bank_bank_hash_query( bank );
699 0 : fd_hash_t const * block_hash = fd_blockhashes_peek_last_hash( fd_bank_block_hash_queue_query( bank ) );
700 0 : FD_TEST( bank_hash );
701 0 : FD_TEST( block_hash );
702 :
703 0 : if( FD_LIKELY( !is_initial ) ) fd_txncache_finalize_fork( ctx->txncache, bank->txncache_fork_id, 0UL, block_hash->uc );
704 :
705 0 : fd_epoch_schedule_t const * epoch_schedule = fd_bank_epoch_schedule_query( bank );
706 0 : ulong slot_idx;
707 0 : ulong epoch = fd_slot_to_epoch( epoch_schedule, slot, &slot_idx );
708 :
709 0 : ctx->metrics.slots_total++;
710 0 : ctx->metrics.transactions_total = fd_bank_txn_count_get( bank );
711 :
712 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
713 0 : slot_info->slot = slot;
714 0 : slot_info->root_slot = ctx->consensus_root_slot;
715 0 : slot_info->storage_slot = ctx->published_root_slot;
716 0 : slot_info->epoch = epoch;
717 0 : slot_info->slot_in_epoch = slot_idx;
718 0 : slot_info->block_height = fd_bank_block_height_get( bank );
719 0 : slot_info->parent_slot = fd_bank_parent_slot_get( bank );
720 0 : slot_info->block_id = block_id_ele->block_id;
721 0 : slot_info->parent_block_id = parent_block_id;
722 0 : slot_info->bank_hash = *bank_hash;
723 0 : slot_info->block_hash = *block_hash;
724 0 : slot_info->transaction_count = fd_bank_txn_count_get( bank );
725 :
726 0 : fd_inflation_t inflation = fd_bank_inflation_get( bank );
727 0 : slot_info->inflation.foundation = inflation.foundation;
728 0 : slot_info->inflation.foundation_term = inflation.foundation_term;
729 0 : slot_info->inflation.terminal = inflation.terminal;
730 0 : slot_info->inflation.initial = inflation.initial;
731 0 : slot_info->inflation.taper = inflation.taper;
732 :
733 0 : fd_rent_t rent = fd_bank_rent_get( bank );
734 0 : slot_info->rent.burn_percent = rent.burn_percent;
735 0 : slot_info->rent.lamports_per_uint8_year = rent.lamports_per_uint8_year;
736 0 : slot_info->rent.exemption_threshold = rent.exemption_threshold;
737 :
738 0 : slot_info->first_fec_set_received_nanos = bank->first_fec_set_received_nanos;
739 0 : slot_info->preparation_begin_nanos = bank->preparation_begin_nanos;
740 0 : slot_info->first_transaction_scheduled_nanos = bank->first_transaction_scheduled_nanos;
741 0 : slot_info->last_transaction_finished_nanos = bank->last_transaction_finished_nanos;
742 0 : slot_info->completion_time_nanos = fd_log_wallclock();
743 :
744 : /* refcnt should be incremented by 1 for each consumer that uses
745 : `bank_idx`. Each consumer should decrement the bank's refcnt once
746 : they are done usin the bank. */
747 0 : bank->refcnt++; /* tower_tile */
748 0 : if( FD_LIKELY( ctx->gui_enabled ) ) bank->refcnt++; /* gui tile */
749 0 : slot_info->bank_idx = bank->idx;
750 :
751 0 : slot_info->parent_bank_idx = ULONG_MAX;
752 0 : fd_bank_t * parent_bank = fd_banks_get_parent( ctx->banks, bank );
753 0 : if( FD_LIKELY( parent_bank && ctx->gui_enabled ) ) {
754 0 : parent_bank->refcnt++;
755 0 : slot_info->parent_bank_idx = parent_bank->idx;
756 0 : }
757 :
758 0 : slot_info->is_leader = is_leader;
759 :
760 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_SLOT_COMPLETED, ctx->replay_out->chunk, sizeof(fd_replay_slot_completed_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
761 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_slot_completed_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
762 0 : }
763 :
764 : static void
765 : publish_slot_dead( fd_replay_tile_t * ctx,
766 : fd_stem_context_t * stem,
767 0 : fd_bank_t * bank ) {
768 0 : fd_replay_slot_dead_t * slot_dead = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
769 0 : slot_dead->slot = fd_bank_slot_get( bank );
770 0 : slot_dead->block_id = ctx->block_id_arr[ bank->idx ].block_id;
771 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_SLOT_DEAD, ctx->replay_out->chunk, sizeof(fd_replay_slot_dead_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
772 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_slot_dead_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
773 0 : }
774 :
775 : static void
776 : replay_block_finalize( fd_replay_tile_t * ctx,
777 : fd_stem_context_t * stem,
778 0 : fd_bank_t * bank ) {
779 0 : bank->last_transaction_finished_nanos = fd_log_wallclock();
780 :
781 0 : FD_TEST( !(bank->flags&FD_BANK_FLAGS_FROZEN) );
782 :
783 : /* Set poh hash in bank. */
784 0 : fd_hash_t * poh = fd_sched_get_poh( ctx->sched, bank->idx );
785 0 : fd_bank_poh_set( bank, *poh );
786 :
787 : /* Set shred count in bank. */
788 0 : fd_bank_shred_cnt_set( bank, fd_sched_get_shred_cnt( ctx->sched, bank->idx ) );
789 :
790 : /* Do hashing and other end-of-block processing. */
791 0 : fd_runtime_block_execute_finalize( bank, ctx->accdb, ctx->capture_ctx );
792 :
793 : /* Copy out cost tracker fields before freezing */
794 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
795 0 : cost_tracker_snap( bank, slot_info );
796 :
797 : /* fetch identity / vote balance updates infrequently */
798 0 : ulong slot = fd_bank_slot_get( bank );
799 0 : fd_funk_txn_xid_t xid = { .ul = { slot, bank->idx } };
800 0 : slot_info->identity_balance = FD_UNLIKELY( slot%4096==0UL ) ? get_identity_balance( ctx, xid ) : ULONG_MAX;
801 :
802 : /* Mark the bank as frozen. */
803 0 : fd_banks_mark_bank_frozen( ctx->banks, bank );
804 :
805 : /**********************************************************************/
806 : /* Bank hash comparison, and halt if there's a mismatch after replay */
807 : /**********************************************************************/
808 :
809 0 : fd_hash_t const * bank_hash = fd_bank_bank_hash_query( bank );
810 0 : FD_TEST( bank_hash );
811 :
812 : /* Must be last so we can measure completion time correctly, even
813 : though we could technically do this before the hash cmp and vote
814 : tower stuff. */
815 0 : publish_slot_completed( ctx, stem, bank, 0, 0 /* is_leader */ );
816 :
817 0 : # if FD_HAS_FLATCC
818 : /* If enabled, dump the block to a file and reset the dumping
819 : context state */
820 0 : if( FD_UNLIKELY( ctx->capture_ctx && ctx->capture_ctx->dump_block_to_pb ) ) {
821 0 : fd_funk_t * funk = fd_accdb_user_v1_funk( ctx->accdb );
822 0 : fd_dump_block_to_protobuf( ctx->block_dump_ctx, ctx->banks, bank, funk, ctx->capture_ctx );
823 0 : fd_block_dump_context_reset( ctx->block_dump_ctx );
824 0 : }
825 0 : # endif
826 0 : }
827 :
828 : /**********************************************************************/
829 : /* Leader bank management */
830 : /**********************************************************************/
831 :
832 : static fd_bank_t *
833 : prepare_leader_bank( fd_replay_tile_t * ctx,
834 : ulong slot,
835 : long now,
836 : fd_hash_t const * parent_block_id,
837 0 : fd_stem_context_t * stem ) {
838 0 : long before = fd_log_wallclock();
839 :
840 : /* Make sure that we are not already leader. */
841 0 : FD_TEST( ctx->leader_bank==NULL );
842 :
843 0 : fd_block_id_ele_t * parent_ele = fd_block_id_map_ele_query( ctx->block_id_map, parent_block_id, NULL, ctx->block_id_arr );
844 0 : if( FD_UNLIKELY( !parent_ele ) ) {
845 0 : FD_BASE58_ENCODE_32_BYTES( parent_block_id->key, parent_block_id_b58 );
846 0 : FD_LOG_CRIT(( "invariant violation: parent bank index not found for merkle root %s", parent_block_id_b58 ));
847 0 : }
848 0 : ulong parent_bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, parent_ele );
849 :
850 0 : fd_bank_t * parent_bank = fd_banks_bank_query( ctx->banks, parent_bank_idx );
851 0 : if( FD_UNLIKELY( !parent_bank ) ) {
852 0 : FD_LOG_CRIT(( "invariant violation: parent bank not found for bank index %lu", parent_bank_idx ));
853 0 : }
854 0 : ulong parent_slot = fd_bank_slot_get( parent_bank );
855 :
856 0 : ctx->leader_bank = fd_banks_new_bank( ctx->banks, parent_bank_idx, now );
857 0 : if( FD_UNLIKELY( !ctx->leader_bank ) ) {
858 0 : FD_LOG_CRIT(( "invariant violation: leader bank is NULL for slot %lu", slot ));
859 0 : }
860 :
861 0 : if( FD_UNLIKELY( !fd_banks_clone_from_parent( ctx->banks, ctx->leader_bank->idx, parent_bank_idx ) ) ) {
862 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for slot %lu", slot ));
863 0 : }
864 :
865 0 : ctx->leader_bank->preparation_begin_nanos = before;
866 :
867 0 : fd_bank_slot_set( ctx->leader_bank, slot );
868 0 : fd_bank_parent_slot_set( ctx->leader_bank, parent_slot );
869 0 : ctx->leader_bank->txncache_fork_id = fd_txncache_attach_child( ctx->txncache, parent_bank->txncache_fork_id );
870 : /* prepare the funk transaction for the leader bank */
871 0 : fd_funk_txn_xid_t xid = { .ul = { slot, ctx->leader_bank->idx } };
872 0 : fd_funk_txn_xid_t parent_xid = { .ul = { parent_slot, parent_bank_idx } };
873 0 : fd_accdb_attach_child( ctx->accdb_admin, &parent_xid, &xid );
874 0 : fd_progcache_txn_attach_child( ctx->progcache_admin, &parent_xid, &xid );
875 :
876 0 : fd_bank_execution_fees_set( ctx->leader_bank, 0UL );
877 0 : fd_bank_priority_fees_set( ctx->leader_bank, 0UL );
878 0 : fd_bank_shred_cnt_set( ctx->leader_bank, 0UL );
879 0 : fd_bank_tips_set( ctx->leader_bank, 0UL );
880 :
881 : /* Set the tick height. */
882 0 : fd_bank_tick_height_set( ctx->leader_bank, fd_bank_max_tick_height_get( ctx->leader_bank ) );
883 :
884 : /* Update block height. */
885 0 : fd_bank_block_height_set( ctx->leader_bank, fd_bank_block_height_get( ctx->leader_bank ) + 1UL );
886 :
887 0 : ulong * max_tick_height = fd_bank_max_tick_height_modify( ctx->leader_bank );
888 0 : ulong ticks_per_slot = fd_bank_ticks_per_slot_get( ctx->leader_bank );
889 0 : if( FD_UNLIKELY( FD_RUNTIME_EXECUTE_SUCCESS != fd_runtime_compute_max_tick_height( ticks_per_slot, slot, max_tick_height ) ) ) {
890 0 : FD_LOG_CRIT(( "couldn't compute tick height/max tick height slot %lu ticks_per_slot %lu", slot, ticks_per_slot ));
891 0 : }
892 :
893 0 : int is_epoch_boundary = 0;
894 0 : fd_runtime_block_execute_prepare( ctx->banks, ctx->leader_bank, ctx->accdb, &ctx->runtime_stack, ctx->capture_ctx, &is_epoch_boundary );
895 0 : if( FD_UNLIKELY( is_epoch_boundary ) ) publish_stake_weights( ctx, stem, ctx->leader_bank, 1 );
896 :
897 : /* Now that a bank has been created for the leader slot, increment the
898 : reference count until we are done with the leader slot. */
899 0 : ctx->leader_bank->refcnt++;
900 :
901 0 : return ctx->leader_bank;
902 0 : }
903 :
904 : static void
905 : fini_leader_bank( fd_replay_tile_t * ctx,
906 0 : fd_stem_context_t * stem ) {
907 :
908 0 : FD_TEST( ctx->leader_bank!=NULL );
909 0 : FD_TEST( ctx->is_leader );
910 0 : FD_TEST( ctx->recv_block_id );
911 0 : FD_TEST( ctx->recv_poh );
912 :
913 0 : ctx->leader_bank->last_transaction_finished_nanos = fd_log_wallclock();
914 :
915 0 : ulong curr_slot = fd_bank_slot_get( ctx->leader_bank );
916 :
917 0 : fd_sched_block_add_done( ctx->sched, ctx->leader_bank->idx, ctx->leader_bank->parent_idx, curr_slot );
918 :
919 : /* Do hashing and other end-of-block processing */
920 0 : fd_funk_t * funk = fd_accdb_user_v1_funk( ctx->accdb );
921 0 : fd_funk_txn_map_t * txn_map = fd_funk_txn_map( funk );
922 0 : if( FD_UNLIKELY( !txn_map->map ) ) {
923 0 : FD_LOG_ERR(( "Could not find valid funk transaction map" ));
924 0 : }
925 :
926 0 : fd_runtime_block_execute_finalize( ctx->leader_bank, ctx->accdb, ctx->capture_ctx );
927 :
928 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
929 0 : cost_tracker_snap( ctx->leader_bank, slot_info );
930 0 : fd_funk_txn_xid_t xid = { .ul = { curr_slot, ctx->leader_bank->idx } };
931 0 : slot_info->identity_balance = FD_UNLIKELY( curr_slot%4096==0UL ) ? get_identity_balance( ctx, xid ) : ULONG_MAX;
932 :
933 0 : fd_banks_mark_bank_frozen( ctx->banks, ctx->leader_bank );
934 :
935 0 : fd_hash_t const * bank_hash = fd_bank_bank_hash_query( ctx->leader_bank );
936 0 : FD_TEST( bank_hash );
937 :
938 0 : publish_slot_completed( ctx, stem, ctx->leader_bank, 0, 1 /* is_leader */ );
939 :
940 : /* The reference on the bank is finally no longer needed. */
941 0 : ctx->leader_bank->refcnt--;
942 :
943 : /* We are no longer leader so we can clear the bank index we use for
944 : being the leader. */
945 0 : ctx->leader_bank = NULL;
946 0 : ctx->recv_block_id = 0;
947 0 : ctx->recv_poh = 0;
948 0 : ctx->is_leader = 0;
949 0 : }
950 :
951 : static void
952 : publish_root_advanced( fd_replay_tile_t * ctx,
953 0 : fd_stem_context_t * stem ) {
954 :
955 : /* FIXME: for now we want to send the child of the consensus root to
956 : avoid data races with funk root advancing. This is a temporary
957 : hack because currently it is not safe to query against the xid for
958 : the root that is being advanced in funk. This doesn't eliminate
959 : the data race that exists in funk, but reduces how often it occurs.
960 :
961 : Case that causes a data race:
962 : replay: we are advancing the root from slot A->B
963 : resolv: we are resolving ALUTs against slot B */
964 :
965 0 : fd_bank_t * consensus_root_bank = fd_banks_bank_query( ctx->banks, ctx->consensus_root_bank_idx );
966 0 : if( FD_UNLIKELY( !consensus_root_bank ) ) {
967 0 : FD_LOG_CRIT(( "invariant violation: consensus root bank is NULL at bank index %lu", ctx->consensus_root_bank_idx ));
968 0 : }
969 :
970 0 : if( FD_UNLIKELY( consensus_root_bank->child_idx==ULONG_MAX ) ) {
971 0 : return;
972 0 : }
973 :
974 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, consensus_root_bank->child_idx );
975 0 : if( FD_UNLIKELY( !bank ) ) {
976 0 : FD_LOG_CRIT(( "invariant violation: consensus root bank child is NULL at bank index %lu", consensus_root_bank->child_idx ));
977 0 : }
978 :
979 : /* Increment the reference count on the consensus root bank to account
980 : for the number of exec tiles that are waiting on it. */
981 0 : bank->refcnt += ctx->resolv_tile_cnt;
982 :
983 0 : fd_replay_root_advanced_t * msg = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
984 0 : msg->bank_idx = bank->idx;
985 :
986 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_ROOT_ADVANCED, ctx->replay_out->chunk, sizeof(fd_replay_root_advanced_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
987 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_root_advanced_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
988 0 : }
989 :
990 : /* init_funk performs pre-flight checks for the account database and
991 : program cache. Ensures that the account database was set up
992 : correctly by bootstrap components (e.g. genesis or snapshot loader).
993 : Mirrors the account database's fork tree down to the program cache. */
994 :
995 : static void
996 : init_funk( fd_replay_tile_t * ctx,
997 0 : ulong bank_slot ) {
998 : /* Ensure that the loaded bank root corresponds to the account
999 : database's root. */
1000 0 : fd_funk_t * funk = ctx->accdb_admin->funk;
1001 0 : if( FD_UNLIKELY( !funk->shmem ) ) {
1002 0 : FD_LOG_CRIT(( "failed to initialize account database: replay tile is not joined to database shared memory objects" ));
1003 0 : }
1004 0 : fd_funk_txn_xid_t const * accdb_pub = fd_funk_last_publish( funk );
1005 0 : if( FD_UNLIKELY( accdb_pub->ul[0]!=bank_slot ) ) {
1006 0 : FD_LOG_CRIT(( "failed to initialize account database: accdb is at slot %lu, but chain state is at slot %lu\n"
1007 0 : "This is a bug in startup components.",
1008 0 : accdb_pub->ul[0], bank_slot ));
1009 0 : }
1010 0 : if( FD_UNLIKELY( fd_funk_last_publish_is_frozen( funk ) ) ) {
1011 0 : FD_LOG_CRIT(( "failed to initialize account database: accdb fork graph is not clean.\n"
1012 0 : "The account database should only contain state for the root slot at this point,\n"
1013 0 : "but there are incomplete database transactions leftover.\n"
1014 0 : "This is a bug in startup components." ));
1015 0 : }
1016 :
1017 : /* The program cache tracks the account database's fork graph at all
1018 : times. Perform initial synchronization: pivot from funk 'root' (a
1019 : sentinel XID) to 'last publish' (the bootstrap root slot). */
1020 0 : if( FD_UNLIKELY( !ctx->progcache_admin->funk->shmem ) ) {
1021 0 : FD_LOG_CRIT(( "failed to initialize account database: replay tile is not joined to program cache" ));
1022 0 : }
1023 0 : fd_progcache_clear( ctx->progcache_admin );
1024 0 : fd_progcache_txn_attach_child( ctx->progcache_admin, fd_funk_root( ctx->progcache_admin->funk ), fd_funk_last_publish( ctx->accdb_admin->funk ) );
1025 0 : fd_progcache_txn_advance_root( ctx->progcache_admin, fd_funk_last_publish( ctx->accdb_admin->funk ) );
1026 0 : }
1027 :
1028 : static void
1029 0 : init_after_snapshot( fd_replay_tile_t * ctx ) {
1030 : /* Now that the snapshot has been loaded in, we have to refresh the
1031 : stake delegations since the manifest does not contain the full set
1032 : of data required for the stake delegations. See
1033 : fd_stake_delegations.h for why this is required. */
1034 :
1035 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, FD_REPLAY_BOOT_BANK_IDX );
1036 0 : if( FD_UNLIKELY( !bank ) ) {
1037 0 : FD_LOG_CRIT(( "invariant violation: replay bank is NULL at bank index %lu", FD_REPLAY_BOOT_BANK_IDX ));
1038 0 : }
1039 :
1040 0 : fd_funk_t * funk = fd_accdb_user_v1_funk( ctx->accdb );
1041 0 : fd_funk_txn_xid_t xid = { .ul = { fd_bank_slot_get( bank ), bank->idx } };
1042 0 : init_funk( ctx, fd_bank_slot_get( bank ) );
1043 :
1044 0 : fd_stake_delegations_t * root_delegations = fd_banks_stake_delegations_root_query( ctx->banks );
1045 :
1046 0 : fd_stake_delegations_refresh( root_delegations, funk, &xid );
1047 :
1048 : /* After both snapshots have been loaded in, we can determine if we should
1049 : start distributing rewards. */
1050 :
1051 0 : fd_rewards_recalculate_partitioned_rewards( ctx->banks, bank, funk, &xid, &ctx->runtime_stack, ctx->capture_ctx );
1052 :
1053 0 : ulong snapshot_slot = fd_bank_slot_get( bank );
1054 0 : if( FD_UNLIKELY( !snapshot_slot ) ) {
1055 : /* Genesis-specific setup. */
1056 : /* FIXME: This branch does not set up a new block exec ctx
1057 : properly. Needs to do whatever prepare_new_block_execution
1058 : does, but just hacking that in breaks stuff. */
1059 0 : fd_runtime_update_leaders( bank, &ctx->runtime_stack );
1060 :
1061 0 : ulong hashcnt_per_slot = fd_bank_hashes_per_tick_get( bank ) * fd_bank_ticks_per_slot_get( bank );
1062 0 : fd_hash_t * poh = fd_bank_poh_modify( bank );
1063 0 : while( hashcnt_per_slot-- ) {
1064 0 : fd_sha256_hash( poh->hash, 32UL, poh->hash );
1065 0 : }
1066 :
1067 0 : int is_epoch_boundary = 0;
1068 0 : fd_runtime_block_execute_prepare( ctx->banks, bank, ctx->accdb, &ctx->runtime_stack, ctx->capture_ctx, &is_epoch_boundary );
1069 0 : FD_TEST( !is_epoch_boundary );
1070 0 : fd_runtime_block_execute_finalize( bank, ctx->accdb, ctx->capture_ctx );
1071 :
1072 0 : snapshot_slot = 0UL;
1073 0 : }
1074 :
1075 0 : }
1076 :
1077 : static inline int
1078 : maybe_become_leader( fd_replay_tile_t * ctx,
1079 0 : fd_stem_context_t * stem ) {
1080 0 : FD_TEST( ctx->is_booted );
1081 0 : if( FD_LIKELY( ctx->next_leader_slot==ULONG_MAX || ctx->is_leader || !ctx->has_identity_vote_rooted || ctx->replay_out->idx==ULONG_MAX ) ) return 0;
1082 :
1083 0 : FD_TEST( ctx->next_leader_slot>ctx->reset_slot );
1084 0 : long now = fd_tickcount();
1085 0 : if( FD_LIKELY( now<ctx->next_leader_tickcount ) ) return 0;
1086 :
1087 : /* TODO:
1088 : if( FD_UNLIKELY( ctx->halted_switching_key ) ) return 0; */
1089 :
1090 : /* If a prior leader is still in the process of publishing their slot,
1091 : delay ours to let them finish ... unless they are so delayed that
1092 : we risk getting skipped by the leader following us. 1.2 seconds
1093 : is a reasonable default here, although any value between 0 and 1.6
1094 : seconds could be considered reasonable. This is arbitrary and
1095 : chosen due to intuition. */
1096 0 : if( FD_UNLIKELY( now<ctx->next_leader_tickcount+(long)(3.0*ctx->slot_duration_ticks) ) ) {
1097 0 : FD_TEST( ctx->reset_bank );
1098 :
1099 : /* TODO: Make the max_active_descendant calculation more efficient
1100 : by caching it in the bank structure and updating it as banks are
1101 : created and completed. */
1102 0 : ulong max_active_descendant = 0UL;
1103 0 : ulong child_idx = ctx->reset_bank->child_idx;
1104 0 : while( child_idx!=ULONG_MAX ) {
1105 0 : fd_bank_t const * child_bank = fd_banks_bank_query( ctx->banks, child_idx );
1106 0 : max_active_descendant = fd_ulong_max( max_active_descendant, fd_bank_slot_get( child_bank ) );
1107 0 : child_idx = child_bank->sibling_idx;
1108 0 : }
1109 :
1110 : /* If the max_active_descendant is >= next_leader_slot, we waited
1111 : too long and a leader after us started publishing to try and skip
1112 : us. Just start our leader slot immediately, we might win ... */
1113 0 : if( FD_LIKELY( max_active_descendant>=ctx->reset_slot && max_active_descendant<ctx->next_leader_slot ) ) {
1114 : /* If one of the leaders between the reset slot and our leader
1115 : slot is in the process of publishing (they have a descendant
1116 : bank that is in progress of being replayed), then keep waiting.
1117 : We probably wouldn't get a leader slot out before they
1118 : finished.
1119 :
1120 : Unless... we are past the deadline to start our slot by more
1121 : than 1.2 seconds, in which case we should probably start it to
1122 : avoid getting skipped by the leader behind us. */
1123 0 : return 0;
1124 0 : }
1125 0 : }
1126 :
1127 0 : long now_nanos = fd_log_wallclock();
1128 :
1129 0 : ctx->is_leader = 1;
1130 0 : ctx->recv_poh = 0;
1131 0 : ctx->recv_block_id = 0;
1132 :
1133 0 : FD_TEST( ctx->highwater_leader_slot==ULONG_MAX || ctx->highwater_leader_slot<ctx->next_leader_slot );
1134 0 : ctx->highwater_leader_slot = ctx->next_leader_slot;
1135 :
1136 0 : FD_LOG_INFO(( "becoming leader for slot %lu, parent slot is %lu", ctx->next_leader_slot, ctx->reset_slot ));
1137 :
1138 : /* Acquires bank, sets up initial state, and refcnts it. */
1139 0 : fd_bank_t * bank = prepare_leader_bank( ctx, ctx->next_leader_slot, now_nanos, &ctx->reset_block_id, stem );
1140 0 : fd_funk_txn_xid_t xid = { .ul = { ctx->next_leader_slot, ctx->leader_bank->idx } };
1141 :
1142 0 : fd_bundle_crank_tip_payment_config_t config[1] = { 0 };
1143 0 : fd_acct_addr_t tip_receiver_owner[1] = { 0 };
1144 :
1145 0 : if( FD_UNLIKELY( ctx->bundle.enabled ) ) {
1146 0 : fd_acct_addr_t tip_payment_config[1];
1147 0 : fd_acct_addr_t tip_receiver[1];
1148 0 : fd_bundle_crank_get_addresses( ctx->bundle.gen, fd_bank_epoch_get( bank ), tip_payment_config, tip_receiver );
1149 :
1150 0 : fd_funk_t * funk = fd_accdb_user_v1_funk( ctx->accdb );
1151 0 : fd_txn_account_t tip_config_acc[1];
1152 0 : int err = fd_txn_account_init_from_funk_readonly( tip_config_acc,
1153 0 : (fd_hash_t *)tip_payment_config->b,
1154 0 : funk,
1155 0 : &xid );
1156 0 : if( FD_UNLIKELY( err ) ) {
1157 0 : FD_LOG_CRIT(( "failed to initialize tip payment config account: err=%d", err ));
1158 0 : }
1159 0 : memcpy( config, fd_txn_account_get_data( tip_config_acc ), sizeof(fd_bundle_crank_tip_payment_config_t) );
1160 :
1161 : /* It is possible that the tip receiver account does not exist yet
1162 : if it is the first time in an epoch. */
1163 0 : fd_txn_account_t tip_receiver_acc[1];
1164 0 : err = fd_txn_account_init_from_funk_readonly( tip_receiver_acc,
1165 0 : (fd_hash_t *)tip_receiver->b,
1166 0 : funk,
1167 0 : &xid );
1168 0 : if( FD_LIKELY( !err ) ) {
1169 0 : memcpy( tip_receiver_owner, tip_receiver_acc->meta->owner, sizeof(fd_acct_addr_t) );
1170 0 : }
1171 0 : }
1172 :
1173 :
1174 0 : fd_became_leader_t * msg = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
1175 0 : msg->slot = ctx->next_leader_slot;
1176 0 : msg->slot_start_ns = now_nanos;
1177 0 : msg->slot_end_ns = now_nanos+(long)ctx->slot_duration_nanos;
1178 0 : msg->bank = NULL;
1179 0 : msg->bank_idx = bank->idx;
1180 0 : msg->ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
1181 0 : msg->hashcnt_per_tick = fd_bank_hashes_per_tick_get( bank );
1182 0 : msg->tick_duration_ns = (ulong)(ctx->slot_duration_nanos/(double)msg->ticks_per_slot);
1183 0 : msg->bundle->config[0] = config[0];
1184 0 : memcpy( msg->bundle->last_blockhash, (fd_hash_t *)fd_bank_poh_query( bank )->hash, 32UL );
1185 0 : memcpy( msg->bundle->tip_receiver_owner, tip_receiver_owner, 32UL );
1186 :
1187 :
1188 0 : if( FD_UNLIKELY( msg->hashcnt_per_tick==1UL ) ) {
1189 : /* Low power producer, maximum of one microblock per tick in the slot */
1190 0 : msg->max_microblocks_in_slot = msg->ticks_per_slot;
1191 0 : } else {
1192 : /* See the long comment in after_credit for this limit */
1193 0 : msg->max_microblocks_in_slot = fd_ulong_min( MAX_MICROBLOCKS_PER_SLOT, msg->ticks_per_slot*(msg->hashcnt_per_tick-1UL) );
1194 0 : }
1195 :
1196 0 : msg->total_skipped_ticks = msg->ticks_per_slot*(ctx->next_leader_slot-ctx->reset_slot);
1197 0 : msg->epoch = fd_slot_to_epoch( fd_bank_epoch_schedule_query( bank ), ctx->next_leader_slot, NULL );
1198 :
1199 0 : fd_cost_tracker_t const * cost_tracker = fd_bank_cost_tracker_locking_query( bank );
1200 :
1201 0 : msg->limits.slot_max_cost = ctx->larger_max_cost_per_block ? LARGER_MAX_COST_PER_BLOCK : cost_tracker->block_cost_limit;
1202 0 : msg->limits.slot_max_vote_cost = cost_tracker->vote_cost_limit;
1203 0 : msg->limits.slot_max_write_cost_per_acct = cost_tracker->account_cost_limit;
1204 :
1205 0 : fd_bank_cost_tracker_end_locking_query( bank );
1206 :
1207 0 : if( FD_UNLIKELY( msg->ticks_per_slot+msg->total_skipped_ticks>USHORT_MAX ) ) {
1208 : /* There can be at most USHORT_MAX skipped ticks, because the
1209 : parent_offset field in the shred data is only 2 bytes wide. */
1210 0 : FD_LOG_ERR(( "too many skipped ticks %lu for slot %lu, chain must halt", msg->ticks_per_slot+msg->total_skipped_ticks, ctx->next_leader_slot ));
1211 0 : }
1212 :
1213 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_BECAME_LEADER, ctx->replay_out->chunk, sizeof(fd_became_leader_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
1214 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_became_leader_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
1215 :
1216 0 : ctx->next_leader_slot = ULONG_MAX;
1217 0 : ctx->next_leader_tickcount = LONG_MAX;
1218 :
1219 0 : return 1;
1220 0 : }
1221 :
1222 : static void
1223 : process_poh_message( fd_replay_tile_t * ctx,
1224 0 : fd_poh_leader_slot_ended_t const * slot_ended ) {
1225 :
1226 0 : FD_TEST( ctx->is_booted );
1227 0 : FD_TEST( ctx->is_leader );
1228 0 : FD_TEST( ctx->leader_bank!=NULL );
1229 :
1230 0 : FD_TEST( ctx->highwater_leader_slot>=slot_ended->slot );
1231 0 : FD_TEST( ctx->next_leader_slot>ctx->highwater_leader_slot );
1232 :
1233 : /* Update the poh hash in the bank. We will want to maintain a refcnt
1234 : on the bank until we have recieved the block id for the block after
1235 : it has been shredded. */
1236 :
1237 0 : memcpy( fd_bank_poh_modify( ctx->leader_bank ), slot_ended->blockhash, sizeof(fd_hash_t) );
1238 :
1239 0 : ctx->recv_poh = 1;
1240 0 : }
1241 :
1242 : static void
1243 : publish_reset( fd_replay_tile_t * ctx,
1244 : fd_stem_context_t * stem,
1245 0 : fd_bank_t * bank ) {
1246 0 : if( FD_UNLIKELY( ctx->replay_out->idx==ULONG_MAX ) ) return;
1247 :
1248 0 : fd_hash_t const * block_hash = fd_blockhashes_peek_last_hash( fd_bank_block_hash_queue_query( bank ) );
1249 0 : FD_TEST( block_hash );
1250 :
1251 0 : fd_poh_reset_t * reset = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
1252 :
1253 0 : reset->bank_idx = bank->idx;
1254 0 : reset->timestamp = fd_log_wallclock();
1255 0 : reset->completed_slot = fd_bank_slot_get( bank );
1256 0 : reset->hashcnt_per_tick = fd_bank_hashes_per_tick_get( bank );
1257 0 : reset->ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
1258 0 : reset->tick_duration_ns = (ulong)(ctx->slot_duration_nanos/(double)reset->ticks_per_slot);
1259 0 : fd_memcpy( reset->completed_blockhash, block_hash->uc, sizeof(fd_hash_t) );
1260 :
1261 0 : ulong ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
1262 0 : if( FD_UNLIKELY( reset->hashcnt_per_tick==1UL ) ) {
1263 : /* Low power producer, maximum of one microblock per tick in the slot */
1264 0 : reset->max_microblocks_in_slot = ticks_per_slot;
1265 0 : } else {
1266 : /* See the long comment in after_credit for this limit */
1267 0 : reset->max_microblocks_in_slot = fd_ulong_min( MAX_MICROBLOCKS_PER_SLOT, ticks_per_slot*(reset->hashcnt_per_tick-1UL) );
1268 0 : }
1269 0 : reset->next_leader_slot = ctx->next_leader_slot;
1270 :
1271 0 : if( FD_LIKELY( ctx->rpc_enabled ) ) bank->refcnt++;
1272 :
1273 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_RESET, ctx->replay_out->chunk, sizeof(fd_poh_reset_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
1274 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_poh_reset_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
1275 0 : }
1276 :
1277 : static void
1278 : boot_genesis( fd_replay_tile_t * ctx,
1279 : fd_stem_context_t * stem,
1280 : ulong in_idx,
1281 0 : ulong chunk ) {
1282 : /* If we are bootstrapping, we can't wait to wait for our identity
1283 : vote to be rooted as this creates a circular dependency. */
1284 0 : ctx->has_identity_vote_rooted = 1;
1285 :
1286 0 : uchar const * lthash = (uchar*)fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
1287 0 : uchar const * genesis_hash = (uchar*)fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk )+sizeof(fd_lthash_value_t);
1288 :
1289 0 : fd_genesis_t const * genesis = fd_type_pun( (uchar*)fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk )+sizeof(fd_hash_t)+sizeof(fd_lthash_value_t) );
1290 :
1291 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, FD_REPLAY_BOOT_BANK_IDX );
1292 0 : FD_TEST( bank );
1293 0 : fd_funk_txn_xid_t xid = { .ul = { 0UL, FD_REPLAY_BOOT_BANK_IDX } };
1294 :
1295 : /* Do genesis-related processing in a non-rooted transaction */
1296 0 : fd_funk_txn_xid_t root_xid; fd_funk_txn_xid_set_root( &root_xid );
1297 0 : fd_funk_txn_xid_t target_xid = { .ul = { 0UL, 0UL } };
1298 0 : fd_accdb_attach_child( ctx->accdb_admin, &root_xid, &target_xid );
1299 0 : fd_runtime_read_genesis( ctx->banks, bank, ctx->accdb, &xid, NULL, fd_type_pun_const( genesis_hash ), fd_type_pun_const( lthash ), genesis, &ctx->runtime_stack );
1300 0 : fd_accdb_advance_root( ctx->accdb_admin, &target_xid );
1301 :
1302 0 : static const fd_txncache_fork_id_t txncache_root = { .val = USHORT_MAX };
1303 0 : bank->txncache_fork_id = fd_txncache_attach_child( ctx->txncache, txncache_root );
1304 :
1305 0 : fd_hash_t const * block_hash = fd_blockhashes_peek_last_hash( fd_bank_block_hash_queue_query( bank ) );
1306 0 : fd_txncache_finalize_fork( ctx->txncache, bank->txncache_fork_id, 0UL, block_hash->uc );
1307 :
1308 0 : publish_stake_weights( ctx, stem, bank, 0 );
1309 0 : publish_stake_weights( ctx, stem, bank, 1 );
1310 :
1311 : /* We call this after fd_runtime_read_genesis, which sets up the
1312 : slot_bank needed in blockstore_init. */
1313 0 : init_after_snapshot( ctx );
1314 :
1315 : /* Initialize store for genesis case, similar to snapshot case */
1316 0 : fd_hash_t genesis_block_id = { .ul[0] = FD_RUNTIME_INITIAL_BLOCK_ID };
1317 0 : fd_store_exacq( ctx->store );
1318 0 : if( FD_UNLIKELY( fd_store_root( ctx->store ) ) ) {
1319 0 : FD_LOG_CRIT(( "invariant violation: store root is not 0 for genesis" ));
1320 0 : }
1321 0 : fd_store_insert( ctx->store, 0, &genesis_block_id );
1322 0 : ctx->store->slot0 = 0UL; /* Genesis slot */
1323 0 : fd_store_exrel( ctx->store );
1324 :
1325 0 : ctx->published_root_slot = 0UL;
1326 0 : fd_sched_block_add_done( ctx->sched, bank->idx, ULONG_MAX, 0UL );
1327 :
1328 0 : fd_bank_block_height_set( bank, 1UL );
1329 :
1330 0 : ctx->consensus_root = (fd_hash_t){ .ul[0] = FD_RUNTIME_INITIAL_BLOCK_ID };
1331 0 : ctx->consensus_root_slot = 0UL;
1332 0 : ctx->consensus_root_bank_idx = 0UL;
1333 0 : ctx->published_root_slot = 0UL;
1334 0 : ctx->published_root_bank_idx = 0UL;
1335 :
1336 0 : ctx->reset_slot = 0UL;
1337 0 : ctx->reset_bank = bank;
1338 0 : ctx->reset_timestamp_nanos = fd_log_wallclock();
1339 0 : ctx->next_leader_slot = fd_multi_epoch_leaders_get_next_slot( ctx->mleaders, 1UL, ctx->identity_pubkey );
1340 0 : if( FD_LIKELY( ctx->next_leader_slot != ULONG_MAX ) ) {
1341 0 : ctx->next_leader_tickcount = (long)((double)(ctx->next_leader_slot-ctx->reset_slot-1UL)*ctx->slot_duration_ticks) + fd_tickcount();
1342 0 : } else {
1343 0 : ctx->next_leader_tickcount = LONG_MAX;
1344 0 : }
1345 :
1346 0 : ctx->is_booted = 1;
1347 0 : maybe_become_leader( ctx, stem );
1348 :
1349 0 : fd_hash_t initial_block_id = { .ul = { FD_RUNTIME_INITIAL_BLOCK_ID } };
1350 0 : fd_reasm_fec_t * fec = fd_reasm_insert( ctx->reasm, &initial_block_id, NULL, 0 /* genesis slot */, 0, 0, 0, 0, 1, 0 ); /* FIXME manifest block_id */
1351 0 : fec->bank_idx = 0UL;
1352 :
1353 :
1354 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ 0 ];
1355 0 : FD_TEST( block_id_ele );
1356 0 : block_id_ele->block_id = initial_block_id;
1357 0 : block_id_ele->slot = 0UL;
1358 :
1359 0 : FD_TEST( fd_block_id_map_ele_insert( ctx->block_id_map, block_id_ele, ctx->block_id_arr ) );
1360 :
1361 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
1362 0 : slot_info->identity_balance = get_identity_balance( ctx, xid );
1363 :
1364 0 : publish_slot_completed( ctx, stem, bank, 1, 0 /* is_leader */ );
1365 0 : publish_root_advanced( ctx, stem );
1366 0 : publish_reset( ctx, stem, bank );
1367 0 : }
1368 :
1369 : static inline void
1370 0 : maybe_verify_cluster_type( fd_replay_tile_t * ctx ) {
1371 0 : if( FD_UNLIKELY( !ctx->is_booted || !ctx->has_genesis_hash ) ) {
1372 0 : return;
1373 0 : }
1374 :
1375 0 : FD_BASE58_ENCODE_32_BYTES( ctx->genesis_hash, hash_cstr );
1376 0 : ulong cluster = fd_genesis_cluster_identify( hash_cstr );
1377 : /* Map pyth-related clusters to unkwown. */
1378 0 : switch( cluster ) {
1379 0 : case FD_CLUSTER_PYTHNET:
1380 0 : case FD_CLUSTER_PYTHTEST:
1381 0 : cluster = FD_CLUSTER_UNKNOWN;
1382 0 : }
1383 :
1384 0 : if( FD_UNLIKELY( cluster!=ctx->cluster_type ) ) {
1385 0 : FD_LOG_ERR(( "Your genesis.bin file at `%s` has a genesis hash of `%s` which means the cluster is %s "
1386 0 : "but the snapshot you loaded is for a different cluster %s. If you are trying to join the "
1387 0 : "%s cluster, you can delete the genesis.bin file and restart the node to download the correct "
1388 0 : "genesis file automatically.",
1389 0 : ctx->genesis_path,
1390 0 : hash_cstr,
1391 0 : fd_genesis_cluster_name( cluster ),
1392 0 : fd_genesis_cluster_name( ctx->cluster_type ),
1393 0 : fd_genesis_cluster_name( cluster ) ));
1394 0 : }
1395 0 : }
1396 :
1397 : static void
1398 : on_snapshot_message( fd_replay_tile_t * ctx,
1399 : fd_stem_context_t * stem,
1400 : ulong in_idx,
1401 : ulong chunk,
1402 0 : ulong sig ) {
1403 0 : ulong msg = fd_ssmsg_sig_message( sig );
1404 0 : if( FD_LIKELY( msg==FD_SSMSG_DONE ) ) {
1405 : /* An end of message notification indicates the snapshot is loaded.
1406 : Replay is able to start executing from this point onwards. */
1407 : /* TODO: replay should finish booting. Could make replay a
1408 : state machine and set the state here accordingly. */
1409 0 : ctx->is_booted = 1;
1410 :
1411 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, FD_REPLAY_BOOT_BANK_IDX );
1412 0 : if( FD_UNLIKELY( !bank ) ) {
1413 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for bank index %lu", FD_REPLAY_BOOT_BANK_IDX ));
1414 0 : }
1415 :
1416 0 : ulong snapshot_slot = fd_bank_slot_get( bank );
1417 : /* FIXME: This is a hack because the block id of the snapshot slot
1418 : is not provided in the snapshot. A possible solution is to get
1419 : the block id of the snapshot slot from repair. */
1420 0 : fd_hash_t manifest_block_id = { .ul = { FD_RUNTIME_INITIAL_BLOCK_ID } };
1421 :
1422 0 : fd_store_exacq( ctx->store );
1423 0 : FD_TEST( !fd_store_root( ctx->store ) );
1424 0 : fd_store_insert( ctx->store, 0, &manifest_block_id );
1425 0 : ctx->store->slot0 = snapshot_slot; /* FIXME manifest_block_id */
1426 0 : fd_store_exrel( ctx->store );
1427 :
1428 : /* Typically, when we cross an epoch boundary during normal
1429 : operation, we publish the stake weights for the new epoch. But
1430 : since we are starting from a snapshot, we need to publish two
1431 : epochs worth of stake weights: the previous epoch (which is
1432 : needed for voting on the current epoch), and the current epoch
1433 : (which is needed for voting on the next epoch). */
1434 0 : publish_stake_weights( ctx, stem, bank, 0 );
1435 0 : publish_stake_weights( ctx, stem, bank, 1 );
1436 :
1437 0 : ctx->consensus_root = manifest_block_id;
1438 0 : ctx->consensus_root_slot = snapshot_slot;
1439 0 : ctx->consensus_root_bank_idx = 0UL;
1440 0 : ctx->published_root_slot = ctx->consensus_root_slot;
1441 0 : ctx->published_root_bank_idx = 0UL;
1442 :
1443 0 : ctx->reset_slot = snapshot_slot;
1444 0 : ctx->reset_bank = bank;
1445 0 : ctx->reset_timestamp_nanos = fd_log_wallclock();
1446 0 : ctx->next_leader_slot = fd_multi_epoch_leaders_get_next_slot( ctx->mleaders, 1UL, ctx->identity_pubkey );
1447 0 : if( FD_LIKELY( ctx->next_leader_slot != ULONG_MAX ) ) {
1448 0 : ctx->next_leader_tickcount = (long)((double)(ctx->next_leader_slot-ctx->reset_slot-1UL)*ctx->slot_duration_ticks) + fd_tickcount();
1449 0 : } else {
1450 0 : ctx->next_leader_tickcount = LONG_MAX;
1451 0 : }
1452 :
1453 0 : fd_sched_block_add_done( ctx->sched, bank->idx, ULONG_MAX, snapshot_slot );
1454 0 : FD_TEST( bank->idx==0UL );
1455 :
1456 0 : fd_funk_txn_xid_t xid = { .ul = { snapshot_slot, FD_REPLAY_BOOT_BANK_IDX } };
1457 :
1458 0 : fd_funk_t * funk = fd_accdb_user_v1_funk( ctx->accdb );
1459 0 : fd_features_restore( bank, funk, &xid );
1460 :
1461 0 : fd_runtime_update_leaders( bank, &ctx->runtime_stack );
1462 :
1463 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ 0 ];
1464 0 : FD_TEST( block_id_ele );
1465 0 : block_id_ele->block_id = manifest_block_id;
1466 0 : block_id_ele->slot = snapshot_slot;
1467 0 : FD_TEST( fd_block_id_map_ele_insert( ctx->block_id_map, block_id_ele, ctx->block_id_arr ) );
1468 :
1469 : /* We call this after fd_runtime_read_genesis, which sets up the
1470 : slot_bank needed in blockstore_init. */
1471 0 : init_after_snapshot( ctx );
1472 :
1473 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
1474 0 : slot_info->identity_balance = get_identity_balance( ctx, xid );
1475 :
1476 0 : publish_slot_completed( ctx, stem, bank, 1, 0 /* is_leader */ );
1477 0 : publish_root_advanced( ctx, stem );
1478 :
1479 0 : fd_reasm_fec_t * fec = fd_reasm_insert( ctx->reasm, &manifest_block_id, NULL, snapshot_slot, 0, 0, 0, 0, 1, 0 ); /* FIXME manifest block_id */
1480 0 : fec->bank_idx = 0UL;
1481 :
1482 0 : ctx->cluster_type = fd_bank_cluster_type_get( bank );
1483 :
1484 0 : maybe_verify_cluster_type( ctx );
1485 :
1486 0 : return;
1487 0 : }
1488 :
1489 0 : switch( msg ) {
1490 0 : case FD_SSMSG_MANIFEST_FULL:
1491 0 : case FD_SSMSG_MANIFEST_INCREMENTAL: {
1492 : /* We may either receive a full snapshot manifest or an
1493 : incremental snapshot manifest. Note that this external message
1494 : id is only used temporarily because replay cannot yet receive
1495 : the firedancer-internal snapshot manifest message. */
1496 0 : if( FD_UNLIKELY( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark ) )
1497 0 : FD_LOG_ERR(( "chunk %lu from in %d corrupt, not in range [%lu,%lu]", chunk, ctx->in_kind[ in_idx ], ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
1498 :
1499 0 : fd_ssload_recover( fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ),
1500 0 : ctx->banks,
1501 0 : fd_banks_bank_query( ctx->banks, FD_REPLAY_BOOT_BANK_IDX ),
1502 0 : ctx->runtime_stack.stakes.vote_credits );
1503 :
1504 0 : fd_snapshot_manifest_t const * manifest = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
1505 0 : ctx->hard_forks_cnt = manifest->hard_forks_len;
1506 0 : for( ulong i=0UL; i<manifest->hard_forks_len; i++ ) ctx->hard_forks[ i ] = manifest->hard_forks[ i ];
1507 0 : break;
1508 0 : }
1509 0 : default: {
1510 0 : FD_LOG_ERR(( "Received unknown snapshot message with msg %lu", msg ));
1511 0 : return;
1512 0 : }
1513 0 : }
1514 :
1515 0 : return;
1516 0 : }
1517 :
1518 : static void
1519 : dispatch_task( fd_replay_tile_t * ctx,
1520 : fd_stem_context_t * stem,
1521 0 : fd_sched_task_t * task ) {
1522 :
1523 0 : switch( task->task_type ) {
1524 0 : case FD_SCHED_TT_TXN_EXEC: {
1525 0 : fd_txn_p_t * txn_p = fd_sched_get_txn( ctx->sched, task->txn_exec->txn_idx );
1526 :
1527 : /* FIXME: this should be done during txn parsing so that we don't
1528 : have to loop over all accounts a second time. */
1529 : /* Insert or reverify invoked programs for this epoch, if needed. */
1530 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, task->txn_exec->bank_idx );
1531 :
1532 0 : # if FD_HAS_FLATCC
1533 : /* Add the transaction to the block dumper if necessary. This
1534 : logic doesn't need to be fork-aware since it's only meant to
1535 : be used in backtest. */
1536 0 : if( FD_UNLIKELY( ctx->capture_ctx && ctx->capture_ctx->dump_block_to_pb ) ) {
1537 0 : fd_dump_block_to_protobuf_collect_tx( ctx->block_dump_ctx, txn_p );
1538 0 : }
1539 0 : # endif
1540 :
1541 0 : bank->refcnt++;
1542 :
1543 0 : if( FD_UNLIKELY( !bank->first_transaction_scheduled_nanos ) ) bank->first_transaction_scheduled_nanos = fd_log_wallclock();
1544 :
1545 0 : fd_replay_out_link_t * exec_out = ctx->exec_out;
1546 0 : fd_exec_txn_exec_msg_t * exec_msg = fd_chunk_to_laddr( exec_out->mem, exec_out->chunk );
1547 0 : memcpy( &exec_msg->txn, txn_p, sizeof(fd_txn_p_t) );
1548 0 : exec_msg->bank_idx = task->txn_exec->bank_idx;
1549 0 : exec_msg->txn_idx = task->txn_exec->txn_idx;
1550 0 : if( FD_UNLIKELY( ctx->capture_ctx ) ) {
1551 0 : exec_msg->capture_txn_idx = ctx->capture_ctx->current_txn_idx++;
1552 0 : }
1553 0 : fd_stem_publish( stem, exec_out->idx, (FD_EXEC_TT_TXN_EXEC<<32) | task->txn_exec->exec_idx, exec_out->chunk, sizeof(*exec_msg), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
1554 0 : exec_out->chunk = fd_dcache_compact_next( exec_out->chunk, sizeof(*exec_msg), exec_out->chunk0, exec_out->wmark );
1555 0 : break;
1556 0 : }
1557 0 : case FD_SCHED_TT_TXN_SIGVERIFY: {
1558 0 : fd_txn_p_t * txn_p = fd_sched_get_txn( ctx->sched, task->txn_sigverify->txn_idx );
1559 :
1560 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, task->txn_sigverify->bank_idx );
1561 0 : bank->refcnt++;
1562 :
1563 0 : fd_replay_out_link_t * exec_out = ctx->exec_out;
1564 0 : fd_exec_txn_sigverify_msg_t * exec_msg = fd_chunk_to_laddr( exec_out->mem, exec_out->chunk );
1565 0 : memcpy( &exec_msg->txn, txn_p, sizeof(fd_txn_p_t) );
1566 0 : exec_msg->bank_idx = task->txn_sigverify->bank_idx;
1567 0 : exec_msg->txn_idx = task->txn_sigverify->txn_idx;
1568 0 : fd_stem_publish( stem, exec_out->idx, (FD_EXEC_TT_TXN_SIGVERIFY<<32) | task->txn_sigverify->exec_idx, exec_out->chunk, sizeof(*exec_msg), 0UL, 0UL, 0UL );
1569 0 : exec_out->chunk = fd_dcache_compact_next( exec_out->chunk, sizeof(*exec_msg), exec_out->chunk0, exec_out->wmark );
1570 0 : break;
1571 0 : };
1572 0 : default: {
1573 0 : FD_LOG_CRIT(( "unexpected task type %lu", task->task_type ));
1574 0 : }
1575 0 : }
1576 0 : }
1577 :
1578 : /* Returns 1 if charge_busy. */
1579 : static int
1580 : replay( fd_replay_tile_t * ctx,
1581 0 : fd_stem_context_t * stem ) {
1582 :
1583 0 : if( FD_UNLIKELY( !ctx->is_booted ) ) return 0;
1584 :
1585 0 : int charge_busy = 0;
1586 0 : fd_sched_task_t task[ 1 ];
1587 0 : if( FD_UNLIKELY( !fd_sched_task_next_ready( ctx->sched, task ) ) ) {
1588 0 : return charge_busy; /* Nothing to execute or do. */
1589 0 : }
1590 :
1591 0 : charge_busy = 1;
1592 :
1593 0 : switch( task->task_type ) {
1594 0 : case FD_SCHED_TT_BLOCK_START: {
1595 0 : replay_block_start( ctx, stem, task->block_start->bank_idx, task->block_start->parent_bank_idx, task->block_start->slot );
1596 0 : fd_sched_task_done( ctx->sched, FD_SCHED_TT_BLOCK_START, ULONG_MAX, ULONG_MAX );
1597 0 : break;
1598 0 : }
1599 0 : case FD_SCHED_TT_BLOCK_END: {
1600 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, task->block_end->bank_idx );
1601 0 : if( FD_LIKELY( !(bank->flags&FD_BANK_FLAGS_DEAD) ) ) replay_block_finalize( ctx, stem, bank );
1602 0 : fd_sched_task_done( ctx->sched, FD_SCHED_TT_BLOCK_END, ULONG_MAX, ULONG_MAX );
1603 0 : break;
1604 0 : }
1605 0 : case FD_SCHED_TT_TXN_EXEC:
1606 0 : case FD_SCHED_TT_TXN_SIGVERIFY: {
1607 : /* Likely/common case: we have a transaction we actually need to
1608 : execute. */
1609 0 : dispatch_task( ctx, stem, task );
1610 0 : break;
1611 0 : }
1612 0 : default: {
1613 0 : FD_LOG_CRIT(( "unexpected task type %lu", task->task_type ));
1614 0 : }
1615 0 : }
1616 :
1617 0 : return charge_busy;
1618 0 : }
1619 :
1620 : static int
1621 0 : can_process_fec( fd_replay_tile_t * ctx ) {
1622 0 : fd_reasm_fec_t * fec;
1623 0 : if( FD_UNLIKELY( !fd_sched_can_ingest( ctx->sched, 1UL ) ) ) {
1624 0 : ctx->metrics.sched_full++;
1625 0 : return 0;
1626 0 : }
1627 :
1628 0 : if( FD_UNLIKELY( (fec = fd_reasm_peek( ctx->reasm ))==NULL ) ) {
1629 0 : ctx->metrics.reasm_empty++;
1630 0 : return 0;
1631 0 : }
1632 :
1633 0 : ctx->metrics.reasm_latest_slot = fec->slot;
1634 0 : ctx->metrics.reasm_latest_fec_idx = fec->fec_set_idx;
1635 :
1636 0 : if( FD_UNLIKELY( ctx->is_leader && fec->fec_set_idx==0U && fd_reasm_parent( ctx->reasm, fec )->bank_idx==ctx->leader_bank->idx ) ) {
1637 : /* There's a race that's exceedingly rare, where we receive the
1638 : FEC set for the slot right after our leader rotation before we
1639 : freeze the bank for the last slot in our leader rotation.
1640 : Leader slot freezing happens only after if we've received the
1641 : final PoH hash from the poh tile as well as the final FEC set
1642 : for the leader slot. So the race happens when FEC sets are
1643 : delivered and processed sooner than the PoH hash, aka when the
1644 : poh=>shred=>replay path for the block id somehow beats the
1645 : poh=>replay path for the poh hash. To mitigate this race,
1646 : we must block on ingesting the FEC set for the ensuing slot
1647 : before the leader bank freezes, because that would violate
1648 : ordering invariants in banks and sched. */
1649 0 : FD_TEST( ctx->recv_block_id );
1650 0 : FD_TEST( !ctx->recv_poh );
1651 0 : ctx->metrics.leader_bid_wait++;
1652 0 : return 0;
1653 0 : }
1654 :
1655 : /* If fec_set_idx is 0, we need a new bank for a new slot. Banks must
1656 : not be full in this case. */
1657 0 : if( FD_UNLIKELY( fd_banks_is_full( ctx->banks ) && fec->fec_set_idx==0 ) ) {
1658 0 : ctx->metrics.banks_full++;
1659 0 : return 0;
1660 0 : }
1661 :
1662 : /* Otherwise, banks may not be full, so we can always create a new
1663 : bank if needed. Or, if banks are full, the current fec set's
1664 : ancestor (idx 0) already created a bank for this slot.*/
1665 0 : return 1;
1666 0 : }
1667 :
1668 : static void
1669 : process_fec_set( fd_replay_tile_t * ctx,
1670 : fd_stem_context_t * stem,
1671 0 : fd_reasm_fec_t * reasm_fec ) {
1672 0 : long now = fd_log_wallclock();
1673 :
1674 : /* Linking only requires a shared lock because the fields that are
1675 : modified are only read on publish which uses exclusive lock. */
1676 :
1677 0 : long shacq_start, shacq_end, shrel_end;
1678 :
1679 0 : FD_STORE_SHARED_LOCK( ctx->store, shacq_start, shacq_end, shrel_end ) {
1680 0 : if( FD_UNLIKELY( !fd_store_link( ctx->store, &reasm_fec->key, &reasm_fec->cmr ) ) ) {
1681 0 : FD_BASE58_ENCODE_32_BYTES( reasm_fec->key.key, key_b58 );
1682 0 : FD_BASE58_ENCODE_32_BYTES( reasm_fec->cmr.key, cmr_b58 );
1683 0 : FD_LOG_WARNING(( "failed to link %s %s. slot %lu fec_set_idx %u", key_b58, cmr_b58, reasm_fec->slot, reasm_fec->fec_set_idx ));
1684 0 : }
1685 0 : } FD_STORE_SHARED_LOCK_END;
1686 0 : fd_histf_sample( ctx->metrics.store_link_wait, (ulong)fd_long_max( shacq_end - shacq_start, 0L ) );
1687 0 : fd_histf_sample( ctx->metrics.store_link_work, (ulong)fd_long_max( shrel_end - shacq_end, 0L ) );
1688 :
1689 : /* Update the reasm_fec with the correct bank index and parent bank
1690 : index. If the FEC belongs to a leader, we have already allocated
1691 : a bank index for the FEC and it just needs to be propagated to the
1692 : reasm_fec. */
1693 :
1694 0 : reasm_fec->parent_bank_idx = fd_reasm_parent( ctx->reasm, reasm_fec )->bank_idx;
1695 :
1696 0 : if( FD_UNLIKELY( reasm_fec->leader ) ) {
1697 : /* If we are the leader we just need to copy in the bank index that
1698 : the leader slot is using. */
1699 0 : FD_TEST( ctx->leader_bank!=NULL );
1700 0 : reasm_fec->bank_idx = ctx->leader_bank->idx;
1701 0 : } else if( FD_UNLIKELY( reasm_fec->fec_set_idx==0U ) ) {
1702 : /* If we are seeing a FEC with fec set idx 0, this means that we are
1703 : starting a new slot, and we need a new bank index. */
1704 0 : reasm_fec->bank_idx = fd_banks_new_bank( ctx->banks, reasm_fec->parent_bank_idx, now )->idx;
1705 0 : } else {
1706 : /* We are continuing to execute through a slot that we already have
1707 : a bank index for. */
1708 0 : reasm_fec->bank_idx = reasm_fec->parent_bank_idx;
1709 0 : }
1710 :
1711 0 : if( FD_UNLIKELY( reasm_fec->slot_complete ) ) {
1712 : /* Once the block id for a block is known it must be added to the
1713 : leader block mapping. */
1714 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ reasm_fec->bank_idx ];
1715 0 : FD_TEST( block_id_ele );
1716 :
1717 : /* If an entry already exists for this bank index in the block id
1718 : map, we can safely remove it and replace it with the new entry.
1719 : This is safe because we know that the old entry for this fork
1720 : index has already been pruned away. */
1721 0 : if( FD_LIKELY( block_id_ele->slot!=FD_SLOT_NULL && fd_block_id_map_ele_query( ctx->block_id_map, &block_id_ele->block_id, NULL, ctx->block_id_arr ) ) ) {
1722 0 : FD_TEST( fd_block_id_map_ele_remove( ctx->block_id_map, &block_id_ele->block_id, NULL, ctx->block_id_arr ) );
1723 0 : }
1724 :
1725 0 : block_id_ele->block_id = reasm_fec->key;
1726 0 : block_id_ele->slot = reasm_fec->slot;
1727 :
1728 0 : FD_TEST( fd_block_id_map_ele_insert( ctx->block_id_map, block_id_ele, ctx->block_id_arr ) );
1729 :
1730 0 : if( FD_UNLIKELY( reasm_fec->leader ) ) {
1731 0 : ctx->recv_block_id = 1;
1732 0 : }
1733 0 : }
1734 :
1735 0 : if( FD_UNLIKELY( reasm_fec->leader ) ) {
1736 0 : return;
1737 0 : }
1738 :
1739 : /* Forks form a partial ordering over FEC sets. The Repair tile
1740 : delivers FEC sets in-order per fork, but FEC set ordering across
1741 : forks is arbitrary */
1742 0 : fd_sched_fec_t sched_fec[ 1 ];
1743 :
1744 : # if DEBUG_LOGGING
1745 : FD_BASE58_ENCODE_32_BYTES( reasm_fec->key.key, key_b58 );
1746 : FD_BASE58_ENCODE_32_BYTES( reasm_fec->cmr.key, cmr_b58 );
1747 : FD_LOG_INFO(( "replay processing FEC set for slot %lu fec_set_idx %u, mr %s cmr %s", reasm_fec->slot, reasm_fec->fec_set_idx, key_b58, cmr_b58 ));
1748 : # endif
1749 :
1750 : /* Read FEC set from the store. This should happen before we try to
1751 : ingest the FEC set. This allows us to filter out frags that were
1752 : in-flight when we published away minority forks that the frags land
1753 : on. These frags would have no bank to execute against, because
1754 : their corresponding banks, or parent banks, have also been pruned
1755 : during publishing. A query against store will rightfully tell us
1756 : that the underlying data is not found, implying that this is for a
1757 : minority fork that we can safely ignore. */
1758 0 : FD_STORE_SHARED_LOCK( ctx->store, shacq_start, shacq_end, shrel_end ) {
1759 0 : fd_store_fec_t * store_fec = fd_store_query( ctx->store, &reasm_fec->key );
1760 0 : if( FD_UNLIKELY( !store_fec ) ) {
1761 : /* The only case in which a FEC is not found in the store after
1762 : repair has notified is if the FEC was on a minority fork that
1763 : has already been published away. In this case we abandon the
1764 : entire slice because it is no longer relevant. */
1765 0 : FD_BASE58_ENCODE_32_BYTES( reasm_fec->key.key, key_b58 );
1766 0 : FD_LOG_WARNING(( "store fec for slot: %lu is on minority fork already pruned by publish. abandoning slice. root: %lu. pruned merkle: %s", reasm_fec->slot, ctx->consensus_root_slot, key_b58 ));
1767 0 : return;
1768 0 : }
1769 0 : FD_TEST( store_fec );
1770 0 : sched_fec->fec = store_fec;
1771 0 : sched_fec->shred_cnt = reasm_fec->data_cnt;
1772 0 : } FD_STORE_SHARED_LOCK_END;
1773 :
1774 0 : fd_histf_sample( ctx->metrics.store_read_wait, (ulong)fd_long_max( shacq_end - shacq_start, 0UL ) );
1775 0 : fd_histf_sample( ctx->metrics.store_read_work, (ulong)fd_long_max( shrel_end - shacq_end, 0UL ) );
1776 :
1777 0 : sched_fec->is_last_in_batch = !!reasm_fec->data_complete;
1778 0 : sched_fec->is_last_in_block = !!reasm_fec->slot_complete;
1779 0 : sched_fec->bank_idx = reasm_fec->bank_idx;
1780 0 : sched_fec->parent_bank_idx = reasm_fec->parent_bank_idx;
1781 0 : sched_fec->slot = reasm_fec->slot;
1782 0 : sched_fec->parent_slot = reasm_fec->slot - reasm_fec->parent_off;
1783 0 : sched_fec->is_first_in_block = reasm_fec->fec_set_idx==0U;
1784 0 : fd_funk_txn_xid_copy( sched_fec->alut_ctx->xid, fd_funk_last_publish( ctx->accdb_admin->funk ) );
1785 0 : sched_fec->alut_ctx->accdb[0] = ctx->accdb[0];
1786 0 : sched_fec->alut_ctx->els = ctx->published_root_slot;
1787 :
1788 0 : if( FD_UNLIKELY( !fd_sched_fec_ingest( ctx->sched, sched_fec ) ) ) {
1789 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, sched_fec->bank_idx );
1790 0 : publish_slot_dead( ctx, stem, bank );
1791 0 : fd_banks_mark_bank_dead( ctx->banks, bank );
1792 0 : }
1793 0 : }
1794 :
1795 : static void
1796 : funk_publish( fd_replay_tile_t * ctx,
1797 : ulong slot,
1798 0 : ulong bank_idx ) {
1799 0 : fd_funk_txn_xid_t xid = { .ul[0] = slot, .ul[1] = bank_idx };
1800 0 : FD_LOG_DEBUG(( "publishing slot=%lu", slot ));
1801 :
1802 : /* This is the standard case. Publish all transactions up to and
1803 : including the watermark. This will publish any in-prep ancestors
1804 : of root_txn as well. */
1805 0 : fd_accdb_advance_root( ctx->accdb_admin, &xid );
1806 0 : fd_progcache_txn_advance_root( ctx->progcache_admin, &xid );
1807 0 : }
1808 :
1809 : static int
1810 0 : advance_published_root( fd_replay_tile_t * ctx ) {
1811 :
1812 0 : fd_block_id_ele_t * block_id_ele = fd_block_id_map_ele_query( ctx->block_id_map, &ctx->consensus_root, NULL, ctx->block_id_arr );
1813 0 : if( FD_UNLIKELY( !block_id_ele ) ) {
1814 0 : FD_BASE58_ENCODE_32_BYTES( ctx->consensus_root.key, consensus_root_b58 );
1815 0 : FD_LOG_CRIT(( "invariant violation: block id ele not found for consensus root %s", consensus_root_b58 ));
1816 0 : }
1817 0 : ulong target_bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, block_id_ele );
1818 :
1819 0 : fd_sched_root_notify( ctx->sched, target_bank_idx );
1820 :
1821 : /* If the identity vote has been seen on a bank that should be rooted,
1822 : then we are now ready to produce blocks. */
1823 0 : if( FD_UNLIKELY( !ctx->has_identity_vote_rooted ) ) {
1824 0 : fd_bank_t * root_bank = fd_banks_bank_query( ctx->banks, target_bank_idx );
1825 0 : if( FD_UNLIKELY( !root_bank ) ) FD_LOG_CRIT(( "invariant violation: root bank not found for bank index %lu", target_bank_idx ));
1826 0 : if( FD_LIKELY( fd_bank_has_identity_vote_get( root_bank ) ) ) ctx->has_identity_vote_rooted = 1;
1827 0 : }
1828 :
1829 0 : ulong advanceable_root_idx = ULONG_MAX;
1830 0 : if( FD_UNLIKELY( !fd_banks_advance_root_prepare( ctx->banks, target_bank_idx, &advanceable_root_idx ) ) ) return 0;
1831 :
1832 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, advanceable_root_idx );
1833 0 : FD_TEST( bank );
1834 :
1835 0 : fd_block_id_ele_t * advanceable_root_ele = &ctx->block_id_arr[ advanceable_root_idx ];
1836 0 : if( FD_UNLIKELY( !advanceable_root_ele ) ) {
1837 0 : FD_LOG_CRIT(( "invariant violation: advanceable root ele not found for bank index %lu", advanceable_root_idx ));
1838 0 : }
1839 :
1840 0 : long exacq_start, exacq_end, exrel_end;
1841 0 : FD_STORE_EXCLUSIVE_LOCK( ctx->store, exacq_start, exacq_end, exrel_end ) {
1842 0 : fd_store_publish( ctx->store, &advanceable_root_ele->block_id );
1843 0 : } FD_STORE_EXCLUSIVE_LOCK_END;
1844 :
1845 0 : fd_histf_sample( ctx->metrics.store_publish_wait, (ulong)fd_long_max( exacq_end-exacq_start, 0UL ) );
1846 0 : fd_histf_sample( ctx->metrics.store_publish_work, (ulong)fd_long_max( exrel_end-exacq_end, 0UL ) );
1847 :
1848 0 : ulong advanceable_root_slot = fd_bank_slot_get( bank );
1849 0 : funk_publish( ctx, advanceable_root_slot, bank->idx );
1850 :
1851 0 : fd_txncache_advance_root( ctx->txncache, bank->txncache_fork_id );
1852 0 : fd_sched_advance_root( ctx->sched, advanceable_root_idx );
1853 0 : fd_banks_advance_root( ctx->banks, advanceable_root_idx );
1854 0 : fd_reasm_publish( ctx->reasm, &advanceable_root_ele->block_id );
1855 :
1856 0 : ctx->published_root_slot = advanceable_root_slot;
1857 0 : ctx->published_root_bank_idx = advanceable_root_idx;
1858 :
1859 0 : return 1;
1860 0 : }
1861 :
1862 : static void
1863 : after_credit( fd_replay_tile_t * ctx,
1864 : fd_stem_context_t * stem,
1865 : int * opt_poll_in,
1866 0 : int * charge_busy ) {
1867 0 : if( FD_UNLIKELY( !ctx->is_booted ) ) return;
1868 :
1869 0 : if( FD_UNLIKELY( maybe_become_leader( ctx, stem ) ) ) {
1870 0 : *charge_busy = 1;
1871 0 : *opt_poll_in = 0;
1872 0 : return;
1873 0 : }
1874 :
1875 : /* If we are leader, we can only unbecome the leader iff we have
1876 : received the poh hash from the poh tile and block id from reasm. */
1877 0 : if( FD_UNLIKELY( ctx->is_leader && ctx->recv_block_id && ctx->recv_poh ) ) {
1878 0 : fini_leader_bank( ctx, stem );
1879 0 : *charge_busy = 1;
1880 0 : *opt_poll_in = 0;
1881 0 : return;
1882 0 : }
1883 :
1884 : /* If the published_root is not caught up to the consensus root, then
1885 : we should try to advance the published root. */
1886 0 : if( FD_UNLIKELY( ctx->consensus_root_bank_idx!=ctx->published_root_bank_idx && advance_published_root( ctx ) ) ) {
1887 0 : *charge_busy = 1;
1888 0 : *opt_poll_in = 0;
1889 0 : return;
1890 0 : }
1891 :
1892 : /* If the reassembler has a fec that is ready, we should process it
1893 : and pass it to the scheduler. */
1894 :
1895 : /* FIXME: The reasm logic needs to get reworked to support
1896 : equivocation more robustly. */
1897 0 : if( FD_LIKELY( can_process_fec( ctx ) ) ) {
1898 0 : fd_reasm_fec_t * fec = fd_reasm_peek( ctx->reasm );
1899 :
1900 : /* If fec->eqvoc is set that means that equivocation mid-block was
1901 : detected in fd_reasm_t. We need to replay up to and including
1902 : the equivocating FEC on a new bank. */
1903 :
1904 0 : if( FD_UNLIKELY( fec->eqvoc ) ) {
1905 0 : FD_LOG_WARNING(( "Block equivocation detected at slot %lu", fec->slot ));
1906 :
1907 : /* We need to figure out which and how many FECs we need to
1908 : (re)insert into the scheduler. We work backwards from the
1909 : equivocating FEC, querying for chained merkle roots until we
1910 : reach the first FEC in the slot.
1911 : TODO: replace the magic number with a constant for the max
1912 : number of fecs possible in a slot with fix-32. */
1913 0 : fd_reasm_fec_t * fecs[ 1024 ] = { [0] = fec };
1914 0 : ulong fec_cnt = 1UL;
1915 0 : while( fecs[ fec_cnt-1UL ]->fec_set_idx!=0UL ) {
1916 0 : fec = fd_reasm_query( ctx->reasm, &fecs[ fec_cnt-1UL ]->cmr );
1917 0 : fecs[ fec_cnt++ ] = fec;
1918 0 : }
1919 :
1920 : /* If we don't have enough space in the scheduler to ingest all of
1921 : FECs, we can't proceed yet. */
1922 0 : if( FD_UNLIKELY( !fd_sched_can_ingest( ctx->sched, fec_cnt ) ) ) return;
1923 :
1924 : /* Now that we have validated that sched can ingest all of the
1925 : required FECs, it is finally safe to remove the equivocating
1926 : fec from the reasm deque. */
1927 0 : fd_reasm_out( ctx->reasm );
1928 :
1929 : /* Now we can process all of the FECs. */
1930 0 : for( ulong i=fec_cnt; i>0UL; i-- ) {
1931 0 : process_fec_set( ctx, stem, fecs[i-1UL] );
1932 0 : }
1933 0 : } else {
1934 : /* Standard case. */
1935 0 : fec = fd_reasm_out( ctx->reasm );
1936 0 : process_fec_set( ctx, stem, fec );
1937 0 : }
1938 :
1939 0 : *charge_busy = 1;
1940 0 : *opt_poll_in = 0;
1941 0 : return;
1942 0 : }
1943 :
1944 0 : *charge_busy = replay( ctx, stem );
1945 0 : *opt_poll_in = !*charge_busy;
1946 0 : }
1947 :
1948 : static int
1949 : before_frag( fd_replay_tile_t * ctx,
1950 : ulong in_idx,
1951 : ulong seq FD_PARAM_UNUSED,
1952 0 : ulong sig FD_PARAM_UNUSED ) {
1953 :
1954 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_SHRED ) ) {
1955 : /* If reasm is full, we can not insert any more FEC sets. We must
1956 : not consume any frags from shred_out until reasm can process more
1957 : FEC sets. */
1958 :
1959 0 : if( FD_UNLIKELY( !fd_reasm_free( ctx->reasm ) ) ) {
1960 0 : return -1;
1961 0 : }
1962 0 : }
1963 :
1964 0 : return 0;
1965 0 : }
1966 :
1967 : static void
1968 : process_exec_task_done( fd_replay_tile_t * ctx,
1969 : fd_stem_context_t * stem,
1970 : fd_exec_task_done_msg_t * msg,
1971 0 : ulong sig ) {
1972 :
1973 0 : ulong exec_tile_idx = sig&0xFFFFFFFFUL;
1974 :
1975 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, msg->bank_idx );
1976 0 : bank->refcnt--;
1977 :
1978 0 : switch( sig>>32 ) {
1979 0 : case FD_EXEC_TT_TXN_EXEC: {
1980 0 : if( FD_UNLIKELY( !ctx->has_identity_vote_rooted ) ) {
1981 : /* Query the txn signature against our recently generated vote
1982 : txn signatures. If the query is successful, then we have
1983 : seen our own vote transaction land and this should be marked
1984 : in the bank. We go through this exercise until we've seen
1985 : our vote rooted. */
1986 0 : fd_txn_p_t * txn_p = fd_sched_get_txn( ctx->sched, msg->txn_exec->txn_idx );
1987 0 : if( fd_vote_tracker_query_sig( ctx->vote_tracker, fd_type_pun_const( txn_p->payload+TXN( txn_p )->signature_off ) ) ) {
1988 0 : *fd_bank_has_identity_vote_modify( bank ) += 1;
1989 0 : }
1990 0 : }
1991 0 : if( FD_UNLIKELY( msg->txn_exec->err && !(bank->flags&FD_BANK_FLAGS_DEAD) ) ) {
1992 : /* Every transaction in a valid block has to execute.
1993 : Otherwise, we should mark the block as dead. Also freeze the
1994 : bank if possible. */
1995 0 : publish_slot_dead( ctx, stem, bank );
1996 0 : fd_banks_mark_bank_dead( ctx->banks, bank );
1997 0 : fd_sched_block_abandon( ctx->sched, bank->idx );
1998 0 : }
1999 0 : if( FD_UNLIKELY( (bank->flags&FD_BANK_FLAGS_DEAD) && bank->refcnt==0UL ) ) {
2000 0 : fd_banks_mark_bank_frozen( ctx->banks, bank );
2001 0 : }
2002 0 : fd_sched_task_done( ctx->sched, FD_SCHED_TT_TXN_EXEC, msg->txn_exec->txn_idx, exec_tile_idx );
2003 0 : break;
2004 0 : }
2005 0 : case FD_EXEC_TT_TXN_SIGVERIFY: {
2006 0 : if( FD_UNLIKELY( msg->txn_sigverify->err && !(bank->flags&FD_BANK_FLAGS_DEAD) ) ) {
2007 : /* Every transaction in a valid block has to sigverify.
2008 : Otherwise, we should mark the block as dead. Also freeze the
2009 : bank if possible. */
2010 0 : publish_slot_dead( ctx, stem, bank );
2011 0 : fd_banks_mark_bank_dead( ctx->banks, bank );
2012 0 : fd_sched_block_abandon( ctx->sched, bank->idx );
2013 0 : }
2014 0 : if( FD_UNLIKELY( (bank->flags&FD_BANK_FLAGS_DEAD) && bank->refcnt==0UL ) ) {
2015 0 : fd_banks_mark_bank_frozen( ctx->banks, bank );
2016 0 : }
2017 0 : fd_sched_task_done( ctx->sched, FD_SCHED_TT_TXN_SIGVERIFY, msg->txn_sigverify->txn_idx, exec_tile_idx );
2018 0 : break;
2019 0 : }
2020 0 : default: FD_LOG_CRIT(( "unexpected sig 0x%lx", sig ));
2021 0 : }
2022 :
2023 : /* Reference counter just decreased, and an exec tile just got freed
2024 : up. If there's a need to be more aggressively pruning, we could
2025 : check here if more slots just became publishable and publish. Not
2026 : publishing here shouldn't bloat the fork tree too much though. We
2027 : mark minority forks dead as soon as we can, and execution dispatch
2028 : stops on dead blocks. So shortly afterwards, dead blocks should be
2029 : eligible for pruning as in-flight transactions retire from the
2030 : execution pipeline. */
2031 :
2032 0 : }
2033 :
2034 : static void
2035 : process_tower_slot_done( fd_replay_tile_t * ctx,
2036 : fd_stem_context_t * stem,
2037 0 : fd_tower_slot_done_t const * msg ) {
2038 0 : fd_bank_t * replay_bank = fd_banks_bank_query( ctx->banks, msg->replay_bank_idx );
2039 0 : if( FD_UNLIKELY( !replay_bank ) ) FD_LOG_CRIT(( "invariant violation: bank not found for bank index %lu", msg->replay_bank_idx ));
2040 0 : replay_bank->refcnt--;
2041 :
2042 0 : ctx->reset_block_id = msg->reset_block_id;
2043 0 : ctx->reset_slot = msg->reset_slot;
2044 0 : ctx->reset_timestamp_nanos = fd_log_wallclock();
2045 0 : ulong min_leader_slot = fd_ulong_max( msg->reset_slot+1UL, fd_ulong_if( ctx->highwater_leader_slot==ULONG_MAX, 0UL, ctx->highwater_leader_slot+1UL ) );
2046 0 : ctx->next_leader_slot = fd_multi_epoch_leaders_get_next_slot( ctx->mleaders, min_leader_slot, ctx->identity_pubkey );
2047 0 : if( FD_LIKELY( ctx->next_leader_slot != ULONG_MAX ) ) {
2048 0 : ctx->next_leader_tickcount = (long)((double)(ctx->next_leader_slot-ctx->reset_slot-1UL)*ctx->slot_duration_ticks) + fd_tickcount();
2049 0 : } else {
2050 0 : ctx->next_leader_tickcount = LONG_MAX;
2051 0 : }
2052 :
2053 0 : fd_block_id_ele_t * block_id_ele = fd_block_id_map_ele_query( ctx->block_id_map, &msg->reset_block_id, NULL, ctx->block_id_arr );
2054 0 : if( FD_UNLIKELY( !block_id_ele ) ) {
2055 0 : FD_BASE58_ENCODE_32_BYTES( msg->reset_block_id.key, reset_block_id_b58 );
2056 0 : FD_LOG_CRIT(( "invariant violation: block id ele doesn't exist for reset block id: %s, slot: %lu", reset_block_id_b58, msg->reset_slot ));
2057 0 : }
2058 0 : ulong reset_bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, block_id_ele );
2059 :
2060 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, reset_bank_idx );
2061 0 : if( FD_UNLIKELY( !bank ) ) {
2062 0 : FD_LOG_CRIT(( "invariant violation: bank not found for bank index %lu", reset_bank_idx ));
2063 0 : }
2064 :
2065 0 : if( FD_LIKELY( msg->root_slot!=ULONG_MAX ) ) FD_TEST( msg->root_slot<=msg->reset_slot );
2066 0 : ctx->reset_bank = bank;
2067 :
2068 0 : if( FD_LIKELY( ctx->replay_out->idx!=ULONG_MAX ) ) {
2069 0 : fd_poh_reset_t * reset = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
2070 :
2071 0 : reset->bank_idx = bank->idx;
2072 0 : reset->timestamp = ctx->reset_timestamp_nanos;
2073 0 : reset->completed_slot = ctx->reset_slot;
2074 0 : reset->hashcnt_per_tick = fd_bank_hashes_per_tick_get( bank );
2075 0 : reset->ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
2076 0 : reset->tick_duration_ns = (ulong)(ctx->slot_duration_nanos/(double)reset->ticks_per_slot);
2077 :
2078 0 : fd_memcpy( reset->completed_block_id, &block_id_ele->block_id, sizeof(fd_hash_t) );
2079 :
2080 0 : fd_blockhashes_t const * block_hash_queue = fd_bank_block_hash_queue_query( bank );
2081 0 : fd_hash_t const * last_hash = fd_blockhashes_peek_last_hash( block_hash_queue );
2082 0 : FD_TEST( last_hash );
2083 0 : fd_memcpy( reset->completed_blockhash, last_hash->uc, sizeof(fd_hash_t) );
2084 :
2085 0 : ulong ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
2086 0 : if( FD_UNLIKELY( reset->hashcnt_per_tick==1UL ) ) {
2087 : /* Low power producer, maximum of one microblock per tick in the slot */
2088 0 : reset->max_microblocks_in_slot = ticks_per_slot;
2089 0 : } else {
2090 : /* See the long comment in after_credit for this limit */
2091 0 : reset->max_microblocks_in_slot = fd_ulong_min( MAX_MICROBLOCKS_PER_SLOT, ticks_per_slot*(reset->hashcnt_per_tick-1UL) );
2092 0 : }
2093 0 : reset->next_leader_slot = ctx->next_leader_slot;
2094 :
2095 0 : if( FD_LIKELY( ctx->rpc_enabled ) ) bank->refcnt++;
2096 :
2097 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_RESET, ctx->replay_out->chunk, sizeof(fd_poh_reset_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
2098 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_poh_reset_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
2099 0 : }
2100 :
2101 0 : FD_LOG_INFO(( "tower_slot_done(reset_slot=%lu, next_leader_slot=%lu, vote_slot=%lu)", msg->reset_slot, ctx->next_leader_slot, msg->vote_slot ));
2102 0 : maybe_become_leader( ctx, stem );
2103 :
2104 0 : if( FD_LIKELY( msg->root_slot!=ULONG_MAX ) ) {
2105 :
2106 0 : FD_TEST( msg->root_slot>=ctx->consensus_root_slot );
2107 0 : fd_block_id_ele_t * block_id_ele = fd_block_id_map_ele_query( ctx->block_id_map, &msg->root_block_id, NULL, ctx->block_id_arr );
2108 0 : FD_TEST( block_id_ele );
2109 :
2110 0 : ctx->consensus_root_slot = msg->root_slot;
2111 0 : ctx->consensus_root = msg->root_block_id;
2112 0 : ctx->consensus_root_bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, block_id_ele );
2113 :
2114 0 : publish_root_advanced( ctx, stem );
2115 0 : }
2116 :
2117 0 : ulong distance = 0UL;
2118 0 : fd_bank_t * parent = bank;
2119 0 : while( parent ) {
2120 0 : if( FD_UNLIKELY( parent->idx==ctx->consensus_root_bank_idx ) ) break;
2121 0 : parent = fd_banks_get_parent( ctx->banks, parent );
2122 0 : distance++;
2123 0 : }
2124 :
2125 0 : FD_MGAUGE_SET( REPLAY, ROOT_DISTANCE, distance );
2126 0 : }
2127 :
2128 : static void
2129 : process_fec_complete( fd_replay_tile_t * ctx,
2130 0 : uchar const * shred_buf ) {
2131 0 : fd_shred_t const * shred = (fd_shred_t const *)fd_type_pun_const( shred_buf );
2132 :
2133 0 : fd_hash_t const * merkle_root = (fd_hash_t const *)fd_type_pun_const( shred_buf + FD_SHRED_DATA_HEADER_SZ );
2134 0 : fd_hash_t const * chained_merkle_root = (fd_hash_t const *)fd_type_pun_const( shred_buf + FD_SHRED_DATA_HEADER_SZ + sizeof(fd_hash_t) );
2135 0 : int is_leader_fec = *(int const *) fd_type_pun_const( shred_buf + FD_SHRED_DATA_HEADER_SZ + sizeof(fd_hash_t) + sizeof(fd_hash_t) );
2136 :
2137 0 : int data_complete = !!( shred->data.flags & FD_SHRED_DATA_FLAG_DATA_COMPLETE );
2138 0 : int slot_complete = !!( shred->data.flags & FD_SHRED_DATA_FLAG_SLOT_COMPLETE );
2139 :
2140 0 : FD_TEST( !fd_reasm_query( ctx->reasm, merkle_root ) );
2141 0 : if( FD_UNLIKELY( shred->slot - shred->data.parent_off == fd_reasm_slot0( ctx->reasm ) && shred->fec_set_idx == 0) ) {
2142 0 : chained_merkle_root = &fd_reasm_root( ctx->reasm )->key;
2143 0 : }
2144 :
2145 0 : FD_TEST( fd_reasm_free( ctx->reasm ) );
2146 :
2147 0 : FD_TEST( fd_reasm_insert( ctx->reasm, merkle_root, chained_merkle_root, shred->slot, shred->fec_set_idx, shred->data.parent_off, (ushort)(shred->idx - shred->fec_set_idx + 1), data_complete, slot_complete, is_leader_fec ) );
2148 0 : }
2149 :
2150 : static void
2151 0 : process_resolv_slot_completed( fd_replay_tile_t * ctx, ulong bank_idx ) {
2152 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, bank_idx );
2153 0 : FD_TEST( bank );
2154 :
2155 0 : bank->refcnt--;
2156 0 : }
2157 :
2158 : static void
2159 : process_vote_txn_sent( fd_replay_tile_t * ctx,
2160 0 : fd_txn_m_t * txnm ) {
2161 : /* The send tile has signed and sent a vote. Add this vote to the
2162 : vote tracker. We go through this exercise until we've seen our
2163 : vote rooted. */
2164 0 : if( FD_UNLIKELY( !ctx->has_identity_vote_rooted ) ) {
2165 0 : uchar * payload = ((uchar *)txnm) + sizeof(fd_txn_m_t);
2166 0 : uchar txn_mem[ FD_TXN_MAX_SZ ] __attribute__((aligned(alignof(fd_txn_t))));
2167 0 : fd_txn_t * txn = (fd_txn_t *)txn_mem;
2168 0 : if( FD_UNLIKELY( !fd_txn_parse( payload, txnm->payload_sz, txn_mem, NULL ) ) ) {
2169 0 : FD_LOG_CRIT(( "Could not parse txn from send tile" ));
2170 0 : }
2171 0 : fd_vote_tracker_insert( ctx->vote_tracker, fd_type_pun_const( payload+txn->signature_off ) );
2172 0 : }
2173 0 : }
2174 :
2175 : static inline void
2176 0 : maybe_verify_shred_version( fd_replay_tile_t * ctx ) {
2177 0 : if( FD_LIKELY( ctx->expected_shred_version && ctx->ipecho_shred_version ) ) {
2178 0 : if( FD_UNLIKELY( ctx->expected_shred_version!=ctx->ipecho_shred_version ) ) {
2179 0 : FD_LOG_ERR(( "shred version mismatch: expected %u but got %u from ipecho", ctx->expected_shred_version, ctx->ipecho_shred_version ) );
2180 0 : }
2181 0 : }
2182 :
2183 0 : if( FD_LIKELY( ctx->has_genesis_hash && ctx->hard_forks_cnt!=ULONG_MAX && (ctx->expected_shred_version || ctx->ipecho_shred_version) ) ) {
2184 0 : ushort expected_shred_version = ctx->expected_shred_version ? ctx->expected_shred_version : ctx->ipecho_shred_version;
2185 :
2186 0 : union {
2187 0 : uchar c[ 32 ];
2188 0 : ushort s[ 16 ];
2189 0 : } running_hash;
2190 0 : fd_memcpy( running_hash.c, ctx->genesis_hash, sizeof(fd_hash_t) );
2191 :
2192 0 : ulong processed = 0UL;
2193 0 : ulong min_value = 0UL;
2194 0 : while( processed<ctx->hard_forks_cnt ) {
2195 0 : ulong min_index = ULONG_MAX;
2196 0 : for( ulong i=0UL; i<ctx->hard_forks_cnt; i++ ) {
2197 0 : if( ctx->hard_forks[ i ]>=min_value && (min_index==ULONG_MAX || ctx->hard_forks[ i ]<ctx->hard_forks[ min_index ] ) ) {
2198 0 : min_index = i;
2199 0 : }
2200 0 : }
2201 :
2202 0 : FD_TEST( min_index!=ULONG_MAX );
2203 0 : min_value = ctx->hard_forks[ min_index ];
2204 0 : ulong min_count = 0UL;
2205 0 : for( ulong i=0UL; i<ctx->hard_forks_cnt; i++ ) {
2206 0 : if( ctx->hard_forks[ i ]==min_value ) min_count++;
2207 0 : }
2208 :
2209 0 : uchar data[ 48UL ];
2210 0 : fd_memcpy( data, running_hash.c, sizeof(fd_hash_t) );
2211 0 : fd_memcpy( data+32UL, &min_value, sizeof(ulong) );
2212 0 : fd_memcpy( data+40UL, &min_count, sizeof(ulong) );
2213 :
2214 0 : FD_TEST( fd_sha256_hash( data, 48UL, running_hash.c ) );
2215 0 : processed += min_count;
2216 0 : min_value += 1UL;
2217 0 : }
2218 :
2219 0 : ushort xor = 0;
2220 0 : for( ulong i=0UL; i<16UL; i++ ) xor ^= running_hash.s[ i ];
2221 :
2222 0 : xor = fd_ushort_bswap( xor );
2223 0 : xor = fd_ushort_if( xor<USHORT_MAX, (ushort)(xor + 1), USHORT_MAX );
2224 :
2225 0 : if( FD_UNLIKELY( expected_shred_version!=xor ) ) {
2226 0 : FD_BASE58_ENCODE_32_BYTES( ctx->genesis_hash, genesis_hash_b58 );
2227 0 : FD_LOG_ERR(( "shred version mismatch: expected %u but got %u from genesis hash %s and hard forks", expected_shred_version, xor, genesis_hash_b58 ));
2228 0 : }
2229 0 : }
2230 0 : }
2231 :
2232 : static inline int
2233 : returnable_frag( fd_replay_tile_t * ctx,
2234 : ulong in_idx,
2235 : ulong seq,
2236 : ulong sig,
2237 : ulong chunk,
2238 : ulong sz,
2239 : ulong ctl,
2240 : ulong tsorig,
2241 : ulong tspub,
2242 0 : fd_stem_context_t * stem ) {
2243 0 : (void)seq;
2244 0 : (void)ctl;
2245 0 : (void)tsorig;
2246 0 : (void)tspub;
2247 :
2248 0 : if( FD_UNLIKELY( sz!=0UL && (chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>ctx->in[ in_idx ].mtu ) ) )
2249 0 : FD_LOG_ERR(( "chunk %lu %lu from in %d corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in_kind[ in_idx ], ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
2250 :
2251 0 : switch( ctx->in_kind[in_idx] ) {
2252 0 : case IN_KIND_GENESIS: {
2253 0 : uchar const * src = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
2254 0 : ctx->has_genesis_hash = 1;
2255 0 : if( FD_LIKELY( sig==GENESI_SIG_BOOTSTRAP_COMPLETED ) ) {
2256 0 : boot_genesis( ctx, stem, in_idx, chunk );
2257 0 : fd_memcpy( ctx->genesis_hash, src+sizeof(fd_lthash_value_t), sizeof(fd_hash_t) );
2258 0 : } else {
2259 0 : fd_memcpy( ctx->genesis_hash, src, sizeof(fd_hash_t) );
2260 0 : }
2261 :
2262 0 : maybe_verify_cluster_type( ctx );
2263 0 : maybe_verify_shred_version( ctx );
2264 0 : break;
2265 0 : }
2266 0 : case IN_KIND_IPECHO: {
2267 0 : FD_TEST( sig && sig<=USHORT_MAX );
2268 0 : ctx->ipecho_shred_version = (ushort)sig;
2269 0 : maybe_verify_shred_version( ctx );
2270 0 : break;
2271 0 : }
2272 0 : case IN_KIND_SNAP:
2273 0 : on_snapshot_message( ctx, stem, in_idx, chunk, sig );
2274 0 : maybe_verify_shred_version( ctx );
2275 0 : break;
2276 0 : case IN_KIND_EXEC: {
2277 0 : process_exec_task_done( ctx, stem, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ), sig );
2278 0 : break;
2279 0 : }
2280 0 : case IN_KIND_POH: {
2281 0 : process_poh_message( ctx, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
2282 0 : break;
2283 0 : }
2284 0 : case IN_KIND_RESOLV: {
2285 0 : fd_resolv_slot_exchanged_t * exchanged_slot = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
2286 0 : process_resolv_slot_completed( ctx, exchanged_slot->bank_idx );
2287 0 : break;
2288 0 : }
2289 0 : case IN_KIND_TOWER: {
2290 0 : if ( FD_LIKELY( sig==FD_TOWER_SIG_SLOT_DONE ) ) process_tower_slot_done( ctx, stem, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
2291 0 : else if( FD_LIKELY( sig==FD_TOWER_SIG_SLOT_CONFIRMED ) ) {
2292 0 : fd_tower_slot_confirmed_t const * msg = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
2293 :
2294 : /* Implement replay plugin API here */
2295 :
2296 0 : switch( msg->kind ) {
2297 0 : case FD_TOWER_SLOT_CONFIRMED_OPTIMISTIC: break;
2298 0 : case FD_TOWER_SLOT_CONFIRMED_ROOTED: break;
2299 0 : }
2300 0 : };
2301 0 : break;
2302 0 : }
2303 0 : case IN_KIND_SHRED: {
2304 : /* TODO: This message/sz should be defined. */
2305 0 : if( sz==FD_SHRED_DATA_HEADER_SZ + sizeof(fd_hash_t) + sizeof(fd_hash_t) + sizeof(int) ) {
2306 : /* If receive a FEC complete message. */
2307 0 : process_fec_complete( ctx, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
2308 0 : }
2309 0 : break;
2310 0 : }
2311 0 : case IN_KIND_VTXN: {
2312 0 : process_vote_txn_sent( ctx, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
2313 0 : break;
2314 0 : }
2315 0 : case IN_KIND_RPC:
2316 0 : case IN_KIND_GUI: {
2317 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, sig );
2318 0 : FD_TEST( bank );
2319 0 : bank->refcnt--;
2320 0 : break;
2321 0 : }
2322 0 : default:
2323 0 : FD_LOG_ERR(( "unhandled kind %d", ctx->in_kind[ in_idx ] ));
2324 0 : }
2325 :
2326 0 : return 0;
2327 0 : }
2328 :
2329 : static inline fd_replay_out_link_t
2330 : out1( fd_topo_t const * topo,
2331 : fd_topo_tile_t const * tile,
2332 0 : char const * name ) {
2333 0 : ulong idx = ULONG_MAX;
2334 :
2335 0 : for( ulong i=0UL; i<tile->out_cnt; i++ ) {
2336 0 : fd_topo_link_t const * link = &topo->links[ tile->out_link_id[ i ] ];
2337 0 : if( !strcmp( link->name, name ) ) {
2338 0 : if( FD_UNLIKELY( idx!=ULONG_MAX ) ) FD_LOG_ERR(( "tile %s:%lu had multiple output links named %s but expected one", tile->name, tile->kind_id, name ));
2339 0 : idx = i;
2340 0 : }
2341 0 : }
2342 :
2343 0 : if( FD_UNLIKELY( idx==ULONG_MAX ) ) return (fd_replay_out_link_t){ .idx = ULONG_MAX, .mem = NULL, .chunk0 = 0, .wmark = 0, .chunk = 0 };
2344 :
2345 0 : void * mem = topo->workspaces[ topo->objs[ topo->links[ tile->out_link_id[ idx ] ].dcache_obj_id ].wksp_id ].wksp;
2346 0 : ulong chunk0 = fd_dcache_compact_chunk0( mem, topo->links[ tile->out_link_id[ idx ] ].dcache );
2347 0 : ulong wmark = fd_dcache_compact_wmark ( mem, topo->links[ tile->out_link_id[ idx ] ].dcache, topo->links[ tile->out_link_id[ idx ] ].mtu );
2348 :
2349 0 : return (fd_replay_out_link_t){ .idx = idx, .mem = mem, .chunk0 = chunk0, .wmark = wmark, .chunk = chunk0 };
2350 0 : }
2351 :
2352 : static void
2353 : privileged_init( fd_topo_t * topo,
2354 0 : fd_topo_tile_t * tile ) {
2355 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
2356 :
2357 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
2358 0 : fd_replay_tile_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_replay_tile_t), sizeof(fd_replay_tile_t) );
2359 :
2360 0 : if( FD_UNLIKELY( !strcmp( tile->replay.identity_key_path, "" ) ) ) FD_LOG_ERR(( "identity_key_path not set" ));
2361 :
2362 0 : ctx->identity_pubkey[ 0 ] = *(fd_pubkey_t const *)fd_type_pun_const( fd_keyload_load( tile->replay.identity_key_path, /* pubkey only: */ 1 ) );
2363 :
2364 0 : if( FD_UNLIKELY( !tile->replay.bundle.vote_account_path[0] ) ) {
2365 0 : tile->replay.bundle.enabled = 0;
2366 0 : }
2367 :
2368 0 : if( FD_UNLIKELY( tile->replay.bundle.enabled ) ) {
2369 0 : if( FD_UNLIKELY( !fd_base58_decode_32( tile->replay.bundle.vote_account_path, ctx->bundle.vote_account.uc ) ) ) {
2370 0 : const uchar * vote_key = fd_keyload_load( tile->replay.bundle.vote_account_path, /* pubkey only: */ 1 );
2371 0 : fd_memcpy( ctx->bundle.vote_account.uc, vote_key, 32UL );
2372 0 : }
2373 0 : }
2374 :
2375 0 : if( FD_UNLIKELY( !fd_rng_secure( &ctx->reasm_seed, sizeof(ulong) ) ) ) {
2376 0 : FD_LOG_CRIT(( "fd_rng_secure failed" ));
2377 0 : }
2378 :
2379 0 : if( FD_UNLIKELY( !fd_rng_secure( &ctx->vote_tracker_seed, sizeof(ulong) ) ) ) {
2380 0 : FD_LOG_CRIT(( "fd_rng_secure failed" ));
2381 0 : }
2382 :
2383 0 : if( FD_UNLIKELY( !fd_rng_secure( &ctx->block_id_map_seed, sizeof(ulong) ) ) ) {
2384 0 : FD_LOG_CRIT(( "fd_rng_secure failed" ));
2385 0 : }
2386 0 : }
2387 :
2388 : static void
2389 : unprivileged_init( fd_topo_t * topo,
2390 0 : fd_topo_tile_t * tile ) {
2391 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
2392 :
2393 0 : ulong chain_cnt = fd_block_id_map_chain_cnt_est( tile->replay.max_live_slots );
2394 :
2395 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
2396 0 : fd_replay_tile_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_replay_tile_t), sizeof(fd_replay_tile_t) );
2397 0 : void * block_id_arr_mem = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_block_id_ele_t), sizeof(fd_block_id_ele_t) * tile->replay.max_live_slots );
2398 0 : void * block_id_map_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_block_id_map_align(), fd_block_id_map_footprint( chain_cnt ) );
2399 0 : void * _txncache = FD_SCRATCH_ALLOC_APPEND( l, fd_txncache_align(), fd_txncache_footprint( tile->replay.max_live_slots ) );
2400 0 : void * reasm_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_reasm_align(), fd_reasm_footprint( 1 << 20 ) );
2401 0 : void * sched_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(), fd_sched_footprint( tile->replay.max_live_slots ) );
2402 0 : void * vote_tracker_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_vote_tracker_align(), fd_vote_tracker_footprint() );
2403 0 : void * _capture_ctx = FD_SCRATCH_ALLOC_APPEND( l, fd_capture_ctx_align(), fd_capture_ctx_footprint() );
2404 0 : # if FD_HAS_FLATCC
2405 0 : void * block_dump_ctx = NULL;
2406 0 : if( FD_UNLIKELY( tile->replay.dump_block_to_pb ) ) {
2407 0 : block_dump_ctx = FD_SCRATCH_ALLOC_APPEND( l, fd_block_dump_context_align(), fd_block_dump_context_footprint() );
2408 0 : }
2409 0 : # endif
2410 :
2411 0 : ulong store_obj_id = fd_pod_query_ulong( topo->props, "store", ULONG_MAX );
2412 0 : FD_TEST( store_obj_id!=ULONG_MAX );
2413 0 : ctx->store = fd_store_join( fd_topo_obj_laddr( topo, store_obj_id ) );
2414 0 : FD_TEST( ctx->store );
2415 :
2416 0 : ulong banks_obj_id = fd_pod_query_ulong( topo->props, "banks", ULONG_MAX );
2417 0 : FD_TEST( banks_obj_id!=ULONG_MAX );
2418 0 : ctx->banks = fd_banks_join( fd_topo_obj_laddr( topo, banks_obj_id ) );
2419 0 : FD_TEST( ctx->banks );
2420 :
2421 0 : fd_bank_t * bank_pool = fd_banks_get_bank_pool( ctx->banks );
2422 0 : FD_MGAUGE_SET( REPLAY, MAX_LIVE_BANKS, fd_banks_pool_max( bank_pool ) );
2423 :
2424 0 : fd_bank_t * bank = fd_banks_init_bank( ctx->banks );
2425 0 : fd_bank_slot_set( bank, 0UL );
2426 0 : FD_TEST( bank );
2427 0 : FD_TEST( bank->idx==FD_REPLAY_BOOT_BANK_IDX );
2428 :
2429 0 : ctx->consensus_root_slot = ULONG_MAX;
2430 0 : ctx->consensus_root = (fd_hash_t){ .ul[0] = FD_RUNTIME_INITIAL_BLOCK_ID };
2431 0 : ctx->published_root_slot = ULONG_MAX;
2432 :
2433 0 : ctx->expected_shred_version = tile->replay.expected_shred_version;
2434 0 : ctx->ipecho_shred_version = 0;
2435 0 : fd_memcpy( ctx->genesis_path, tile->replay.genesis_path, sizeof(ctx->genesis_path) );
2436 0 : ctx->has_genesis_hash = 0;
2437 0 : ctx->cluster_type = FD_CLUSTER_UNKNOWN;
2438 0 : ctx->hard_forks_cnt = ULONG_MAX;
2439 :
2440 0 : if( FD_UNLIKELY( tile->replay.bundle.enabled ) ) {
2441 0 : ctx->bundle.enabled = 1;
2442 0 : if( FD_UNLIKELY( !fd_bundle_crank_gen_init( ctx->bundle.gen,
2443 0 : (fd_acct_addr_t const *)tile->replay.bundle.tip_distribution_program_addr,
2444 0 : (fd_acct_addr_t const *)tile->replay.bundle.tip_payment_program_addr,
2445 0 : (fd_acct_addr_t const *)ctx->bundle.vote_account.uc,
2446 0 : (fd_acct_addr_t const *)ctx->bundle.vote_account.uc, "NAN", 0UL ) ) ) {
2447 0 : FD_LOG_ERR(( "failed to initialize bundle crank gen" ));
2448 0 : }
2449 0 : } else {
2450 0 : ctx->bundle.enabled = 0;
2451 0 : }
2452 :
2453 0 : fd_features_t * features = fd_bank_features_modify( bank );
2454 0 : fd_features_enable_cleaned_up( features, &FD_RUNTIME_CLUSTER_VERSION );
2455 :
2456 0 : char const * one_off_features[ 16UL ];
2457 0 : FD_TEST( tile->replay.enable_features_cnt<=sizeof(one_off_features)/sizeof(one_off_features[0]) );
2458 0 : for( ulong i=0UL; i<tile->replay.enable_features_cnt; i++ ) one_off_features[ i ] = tile->replay.enable_features[i];
2459 0 : fd_features_enable_one_offs( features, one_off_features, (uint)tile->replay.enable_features_cnt, 0UL );
2460 :
2461 0 : FD_TEST( fd_accdb_admin_join ( ctx->accdb_admin, fd_topo_obj_laddr( topo, tile->replay.funk_obj_id ) ) );
2462 0 : FD_TEST( fd_accdb_user_v1_init ( ctx->accdb, fd_topo_obj_laddr( topo, tile->replay.funk_obj_id ) ) );
2463 0 : FD_TEST( fd_progcache_admin_join( ctx->progcache_admin, fd_topo_obj_laddr( topo, tile->replay.progcache_obj_id ) ) );
2464 :
2465 0 : void * _txncache_shmem = fd_topo_obj_laddr( topo, tile->replay.txncache_obj_id );
2466 0 : fd_txncache_shmem_t * txncache_shmem = fd_txncache_shmem_join( _txncache_shmem );
2467 0 : FD_TEST( txncache_shmem );
2468 0 : ctx->txncache = fd_txncache_join( fd_txncache_new( _txncache, txncache_shmem ) );
2469 0 : FD_TEST( ctx->txncache );
2470 :
2471 0 : ctx->capture_ctx = NULL;
2472 0 : if( FD_UNLIKELY( strcmp( "", tile->replay.solcap_capture ) || strcmp( "", tile->replay.dump_proto_dir ) ) ) {
2473 0 : ctx->capture_ctx = fd_capture_ctx_join( fd_capture_ctx_new( _capture_ctx ) );
2474 0 : ctx->capture_ctx->solcap_start_slot = tile->replay.capture_start_slot;
2475 0 : }
2476 :
2477 0 : if( FD_UNLIKELY( strcmp( "", tile->replay.dump_proto_dir ) ) ) {
2478 0 : ctx->capture_ctx->dump_proto_output_dir = tile->replay.dump_proto_dir;
2479 0 : if( FD_LIKELY( tile->replay.dump_block_to_pb ) ) ctx->capture_ctx->dump_block_to_pb = tile->replay.dump_block_to_pb;
2480 0 : }
2481 :
2482 0 : # if FD_HAS_FLATCC
2483 0 : if( FD_UNLIKELY( tile->replay.dump_block_to_pb ) ) {
2484 0 : ctx->block_dump_ctx = fd_block_dump_context_join( fd_block_dump_context_new( block_dump_ctx ) );
2485 0 : } else {
2486 0 : ctx->block_dump_ctx = NULL;
2487 0 : }
2488 0 : # endif
2489 :
2490 0 : ctx->exec_cnt = fd_topo_tile_name_cnt( topo, "exec" );
2491 :
2492 0 : ctx->is_booted = 0;
2493 :
2494 0 : ctx->larger_max_cost_per_block = tile->replay.larger_max_cost_per_block;
2495 :
2496 0 : ctx->reasm = fd_reasm_join( fd_reasm_new( reasm_mem, 1 << 20, ctx->reasm_seed ) );
2497 0 : FD_TEST( ctx->reasm );
2498 :
2499 0 : ctx->sched = fd_sched_join( fd_sched_new( sched_mem, tile->replay.max_live_slots, ctx->exec_cnt ), tile->replay.max_live_slots );
2500 0 : FD_TEST( ctx->sched );
2501 :
2502 0 : ctx->vote_tracker = fd_vote_tracker_join( fd_vote_tracker_new( vote_tracker_mem, ctx->vote_tracker_seed ) );
2503 0 : FD_TEST( ctx->vote_tracker );
2504 :
2505 0 : ctx->has_identity_vote_rooted = 0;
2506 :
2507 0 : ctx->mleaders = fd_multi_epoch_leaders_join( fd_multi_epoch_leaders_new( ctx->mleaders_mem ) );
2508 0 : FD_TEST( ctx->mleaders );
2509 :
2510 0 : ctx->is_leader = 0;
2511 0 : ctx->reset_slot = 0UL;
2512 0 : ctx->reset_bank = NULL;
2513 0 : ctx->reset_block_id = (fd_hash_t){ .ul[0] = FD_RUNTIME_INITIAL_BLOCK_ID };
2514 0 : ctx->reset_timestamp_nanos = 0UL;
2515 0 : ctx->next_leader_slot = ULONG_MAX;
2516 0 : ctx->next_leader_tickcount = LONG_MAX;
2517 0 : ctx->highwater_leader_slot = ULONG_MAX;
2518 0 : ctx->slot_duration_nanos = 350L*1000L*1000L; /* TODO: Not fixed ... not always 350ms ... */
2519 0 : ctx->slot_duration_ticks = (double)ctx->slot_duration_nanos*fd_tempo_tick_per_ns( NULL );
2520 0 : ctx->leader_bank = NULL;
2521 :
2522 0 : ctx->block_id_len = tile->replay.max_live_slots;
2523 0 : ctx->block_id_arr = (fd_block_id_ele_t *)block_id_arr_mem;
2524 0 : ctx->block_id_map = fd_block_id_map_join( fd_block_id_map_new( block_id_map_mem, chain_cnt, ctx->block_id_map_seed ) );
2525 0 : FD_TEST( ctx->block_id_map );
2526 :
2527 0 : for( ulong i=0UL; i<tile->replay.max_live_slots; i++ ) {
2528 0 : ctx->block_id_arr[ i ].slot = FD_SLOT_NULL;
2529 0 : }
2530 :
2531 0 : ctx->resolv_tile_cnt = fd_topo_tile_name_cnt( topo, "resolv" );
2532 :
2533 0 : FD_TEST( tile->in_cnt<=sizeof(ctx->in)/sizeof(ctx->in[0]) );
2534 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
2535 0 : fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
2536 0 : fd_topo_wksp_t * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
2537 :
2538 0 : if( FD_LIKELY( link->dcache ) ) {
2539 0 : ctx->in[ i ].mem = link_wksp->wksp;
2540 0 : ctx->in[ i ].chunk0 = fd_dcache_compact_chunk0( ctx->in[ i ].mem, link->dcache );
2541 0 : ctx->in[ i ].wmark = fd_dcache_compact_wmark ( ctx->in[ i ].mem, link->dcache, link->mtu );
2542 0 : ctx->in[ i ].mtu = link->mtu;
2543 0 : }
2544 :
2545 0 : if( !strcmp( link->name, "genesi_out" ) ) ctx->in_kind[ i ] = IN_KIND_GENESIS;
2546 0 : else if( !strcmp( link->name, "ipecho_out" ) ) ctx->in_kind[ i ] = IN_KIND_IPECHO;
2547 0 : else if( !strcmp( link->name, "snapin_manif" ) ) ctx->in_kind[ i ] = IN_KIND_SNAP;
2548 0 : else if( !strcmp( link->name, "exec_replay" ) ) ctx->in_kind[ i ] = IN_KIND_EXEC;
2549 0 : else if( !strcmp( link->name, "tower_out" ) ) ctx->in_kind[ i ] = IN_KIND_TOWER;
2550 0 : else if( !strcmp( link->name, "poh_replay" ) ) ctx->in_kind[ i ] = IN_KIND_POH;
2551 0 : else if( !strcmp( link->name, "resolv_repla" ) ) ctx->in_kind[ i ] = IN_KIND_RESOLV;
2552 0 : else if( !strcmp( link->name, "shred_out" ) ) ctx->in_kind[ i ] = IN_KIND_SHRED;
2553 0 : else if( !strcmp( link->name, "send_out" ) ) ctx->in_kind[ i ] = IN_KIND_VTXN;
2554 0 : else if( !strcmp( link->name, "gui_replay" ) ) ctx->in_kind[ i ] = IN_KIND_GUI;
2555 0 : else if( !strcmp( link->name, "rpc_replay" ) ) ctx->in_kind[ i ] = IN_KIND_RPC;
2556 0 : else FD_LOG_ERR(( "unexpected input link name %s", link->name ));
2557 0 : }
2558 :
2559 0 : *ctx->stake_out = out1( topo, tile, "replay_stake" ); FD_TEST( ctx->stake_out->idx!=ULONG_MAX );
2560 0 : *ctx->replay_out = out1( topo, tile, "replay_out" ); FD_TEST( ctx->replay_out->idx!=ULONG_MAX );
2561 :
2562 0 : ulong idx = fd_topo_find_tile_out_link( topo, tile, "replay_exec", 0UL );
2563 0 : FD_TEST( idx!=ULONG_MAX );
2564 0 : fd_topo_link_t * link = &topo->links[ tile->out_link_id[ idx ] ];
2565 :
2566 0 : fd_replay_out_link_t * exec_out = ctx->exec_out;
2567 0 : exec_out->idx = idx;
2568 0 : exec_out->mem = topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ].wksp;
2569 0 : exec_out->chunk0 = fd_dcache_compact_chunk0( exec_out->mem, link->dcache );
2570 0 : exec_out->wmark = fd_dcache_compact_wmark( exec_out->mem, link->dcache, link->mtu );
2571 0 : exec_out->chunk = exec_out->chunk0;
2572 :
2573 0 : ctx->gui_enabled = fd_topo_find_tile( topo, "gui", 0UL )!=ULONG_MAX;
2574 0 : ctx->rpc_enabled = fd_topo_find_tile( topo, "rpc", 0UL )!=ULONG_MAX;
2575 :
2576 0 : if( FD_UNLIKELY( strcmp( "", tile->replay.solcap_capture ) ) ) {
2577 0 : idx = fd_topo_find_tile_out_link( topo, tile, "cap_repl", 0UL );
2578 0 : FD_TEST( idx!=ULONG_MAX );
2579 0 : link = &topo->links[ tile->out_link_id[ idx ] ];
2580 :
2581 :
2582 0 : fd_capture_link_buf_t * cap_repl_out = ctx->cap_repl_out;
2583 0 : cap_repl_out->base.vt = &fd_capture_link_buf_vt;
2584 0 : cap_repl_out->idx = idx;
2585 0 : cap_repl_out->mem = topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ].wksp;
2586 0 : cap_repl_out->chunk0 = fd_dcache_compact_chunk0( cap_repl_out->mem, link->dcache );
2587 0 : cap_repl_out->wmark = fd_dcache_compact_wmark( cap_repl_out->mem, link->dcache, link->mtu );
2588 0 : cap_repl_out->chunk = cap_repl_out->chunk0;
2589 0 : cap_repl_out->mcache = link->mcache;
2590 0 : cap_repl_out->depth = fd_mcache_depth( link->mcache );
2591 0 : cap_repl_out->seq = 0UL;
2592 :
2593 0 : ctx->capture_ctx->capctx_type.buf = cap_repl_out;
2594 0 : ctx->capture_ctx->capture_link = &cap_repl_out->base;
2595 0 : ctx->capture_ctx->current_txn_idx = 0UL;
2596 :
2597 :
2598 0 : ulong consumer_tile_idx = fd_topo_find_tile( topo, "solcap", 0UL );
2599 0 : fd_topo_tile_t * consumer_tile = &topo->tiles[ consumer_tile_idx ];
2600 0 : cap_repl_out->fseq = NULL;
2601 0 : for( ulong j = 0UL; j < consumer_tile->in_cnt; j++ ) {
2602 0 : if( FD_UNLIKELY( consumer_tile->in_link_id[ j ] == link->id ) ) {
2603 0 : cap_repl_out->fseq = fd_fseq_join( fd_topo_obj_laddr( topo, consumer_tile->in_link_fseq_obj_id[ j ] ) );
2604 0 : FD_TEST( cap_repl_out->fseq );
2605 0 : break;
2606 0 : }
2607 0 : }
2608 0 : }
2609 :
2610 0 : fd_memset( &ctx->metrics, 0, sizeof(ctx->metrics) );
2611 :
2612 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_link_wait, FD_MHIST_SECONDS_MIN( REPLAY, STORE_LINK_WAIT ),
2613 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_LINK_WAIT ) ) );
2614 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_link_work, FD_MHIST_SECONDS_MIN( REPLAY, STORE_LINK_WORK ),
2615 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_LINK_WORK ) ) );
2616 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_read_wait, FD_MHIST_SECONDS_MIN( REPLAY, STORE_READ_WAIT ),
2617 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_READ_WAIT ) ) );
2618 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_read_work, FD_MHIST_SECONDS_MIN( REPLAY, STORE_READ_WORK ),
2619 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_READ_WORK ) ) );
2620 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_publish_wait, FD_MHIST_SECONDS_MIN( REPLAY, STORE_PUBLISH_WAIT ),
2621 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_PUBLISH_WAIT ) ) );
2622 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_publish_work, FD_MHIST_SECONDS_MIN( REPLAY, STORE_PUBLISH_WORK ),
2623 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_PUBLISH_WORK ) ) );
2624 :
2625 0 : ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, 1UL );
2626 0 : if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
2627 0 : FD_LOG_ERR(( "scratch overflow %lu %lu %lu", scratch_top - (ulong)scratch - scratch_footprint( tile ), scratch_top, (ulong)scratch + scratch_footprint( tile ) ));
2628 0 : }
2629 :
2630 : static ulong
2631 : populate_allowed_seccomp( fd_topo_t const * topo FD_FN_UNUSED,
2632 : fd_topo_tile_t const * tile FD_FN_UNUSED,
2633 : ulong out_cnt,
2634 0 : struct sock_filter * out ) {
2635 :
2636 0 : populate_sock_filter_policy_fd_replay_tile( out_cnt, out, (uint)fd_log_private_logfile_fd() );
2637 0 : return sock_filter_policy_fd_replay_tile_instr_cnt;
2638 0 : }
2639 :
2640 : static ulong
2641 : populate_allowed_fds( fd_topo_t const * topo FD_FN_UNUSED,
2642 : fd_topo_tile_t const * tile FD_FN_UNUSED,
2643 : ulong out_fds_cnt,
2644 0 : int * out_fds ) {
2645 :
2646 0 : if( FD_UNLIKELY( out_fds_cnt<2UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
2647 :
2648 0 : ulong out_cnt = 0UL;
2649 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
2650 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
2651 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
2652 0 : return out_cnt;
2653 0 : }
2654 :
2655 : #undef DEBUG_LOGGING
2656 :
2657 : /* counting carefully, after_credit can generate at most 7 frags and
2658 : returnable_frag boot_genesis can also generate at most 7 frags, so 14
2659 : is a conservative bound. */
2660 0 : #define STEM_BURST (14UL)
2661 :
2662 : /* TODO: calculate this properly/fix stem to work with larger numbers of links */
2663 : /* 1000 chosen empirically as anything larger slowed down replay times. Need to calculate
2664 : this properly. */
2665 0 : #define STEM_LAZY ((long)10e3)
2666 :
2667 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_replay_tile_t
2668 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_replay_tile_t)
2669 :
2670 0 : #define STEM_CALLBACK_METRICS_WRITE metrics_write
2671 0 : #define STEM_CALLBACK_AFTER_CREDIT after_credit
2672 0 : #define STEM_CALLBACK_BEFORE_FRAG before_frag
2673 0 : #define STEM_CALLBACK_RETURNABLE_FRAG returnable_frag
2674 :
2675 : #include "../../disco/stem/fd_stem.c"
2676 :
2677 : fd_topo_run_tile_t fd_tile_replay = {
2678 : .name = "replay",
2679 : .populate_allowed_seccomp = populate_allowed_seccomp,
2680 : .populate_allowed_fds = populate_allowed_fds,
2681 : .scratch_align = scratch_align,
2682 : .scratch_footprint = scratch_footprint,
2683 : .privileged_init = privileged_init,
2684 : .unprivileged_init = unprivileged_init,
2685 : .run = stem_run,
2686 : };
|