Line data Source code
1 : #include "fd_replay_tile.h"
2 : #include "fd_sched.h"
3 : #include "fd_exec.h"
4 : #include "fd_vote_tracker.h"
5 : #include "generated/fd_replay_tile_seccomp.h"
6 :
7 : #include "../poh/fd_poh.h"
8 : #include "../poh/fd_poh_tile.h"
9 : #include "../tower/fd_tower_tile.h"
10 : #include "../resolv/fd_resolv_tile.h"
11 : #include "../restore/utils/fd_ssload.h"
12 :
13 : #include "../../disco/tiles.h"
14 : #include "../../disco/fd_txn_m.h"
15 : #include "../../disco/store/fd_store.h"
16 : #include "../../discof/reasm/fd_reasm.h"
17 : #include "../../disco/keyguard/fd_keyload.h"
18 : #include "../../util/pod/fd_pod.h"
19 : #include "../../flamenco/rewards/fd_rewards.h"
20 : #include "../../flamenco/leaders/fd_multi_epoch_leaders.h"
21 : #include "../../disco/metrics/fd_metrics.h"
22 :
23 : #include "../../flamenco/runtime/fd_runtime.h"
24 : #include "../../flamenco/fd_flamenco_base.h"
25 : #include "../../flamenco/runtime/sysvar/fd_sysvar_epoch_schedule.h"
26 :
27 : #include <errno.h>
28 :
29 : /* Replay concepts:
30 :
31 : - Blocks are aggregations of entries aka. microblocks which are
32 : groupings of txns and are constructed by the block producer (see
33 : fd_pack).
34 :
35 : - Entries are grouped into entry batches by the block producer (see
36 : fd_pack / fd_shredder).
37 :
38 : - Entry batches are divided into chunks known as shreds by the block
39 : producer (see fd_shredder).
40 :
41 : - Shreds are grouped into forward-error-correction sets (FEC sets) by
42 : the block producer (see fd_shredder).
43 :
44 : - Shreds are transmitted to the rest of the cluster via the Turbine
45 : protocol (see fd_shredder / fd_shred).
46 :
47 : - Once enough shreds within a FEC set are received to recover the
48 : entirety of the shred data encoded by that FEC set, the receiver
49 : can "complete" the FEC set (see fd_fec_resolver).
50 :
51 : - If shreds in the FEC set are missing such that it can't complete,
52 : the receiver can use the Repair protocol to request missing shreds
53 : in FEC set (see fd_repair).
54 :
55 : - The current Repair protocol does not support requesting coding
56 : shreds. As a result, some FEC sets might be actually complete
57 : (contain all data shreds). Repair currently hacks around this by
58 : forcing completion but the long-term solution is to add support for
59 : fec_repairing coding shreds via Repair.
60 :
61 : - FEC sets are delivered in partial-order to the Replay tile by the
62 : Repair tile. Currently Replay only supports replaying entry batches
63 : so FEC sets need to reassembled into an entry batch before they can
64 : be replayed. The new Dispatcher will change this by taking a FEC
65 : set as input instead. */
66 :
67 0 : #define IN_KIND_SNAP (0)
68 0 : #define IN_KIND_GENESIS (1)
69 0 : #define IN_KIND_TOWER (2)
70 0 : #define IN_KIND_RESOLV (3)
71 0 : #define IN_KIND_POH (4)
72 0 : #define IN_KIND_EXEC (5)
73 0 : #define IN_KIND_SHRED (6)
74 0 : #define IN_KIND_VTXN (7)
75 :
76 : #define DEBUG_LOGGING 0
77 :
78 : /* The first bank that that the replay tile produces either for genesis
79 : or the snapshot boot will always be at bank index 0. */
80 0 : #define FD_REPLAY_BOOT_BANK_IDX (0UL)
81 :
82 : struct fd_replay_in_link {
83 : fd_wksp_t * mem;
84 : ulong chunk0;
85 : ulong wmark;
86 : ulong mtu;
87 : };
88 :
89 : typedef struct fd_replay_in_link fd_replay_in_link_t;
90 :
91 : struct fd_replay_out_link {
92 : ulong idx;
93 : fd_wksp_t * mem;
94 : ulong chunk0;
95 : ulong wmark;
96 : ulong chunk;
97 : };
98 :
99 : typedef struct fd_replay_out_link fd_replay_out_link_t;
100 :
101 : /* fd_block_id_map is a simple map of block-ids to bank indices. The
102 : map sits on top of an array of fd_block_id_ele_t. This serves as a
103 : translation layer between block ids to bank indices. */
104 :
105 : struct fd_block_id_ele {
106 : fd_hash_t block_id;
107 : ulong slot; /* = FD_SLOT_NULL if not initialized */
108 : ulong next_;
109 : };
110 : typedef struct fd_block_id_ele fd_block_id_ele_t;
111 :
112 : #define MAP_NAME fd_block_id_map
113 : #define MAP_ELE_T fd_block_id_ele_t
114 : #define MAP_KEY_T fd_hash_t
115 0 : #define MAP_KEY block_id
116 0 : #define MAP_NEXT next_
117 0 : #define MAP_KEY_EQ(k0,k1) (!memcmp((k0),(k1), sizeof(fd_hash_t)))
118 0 : #define MAP_KEY_HASH(key,seed) (fd_hash((seed),(key),sizeof(fd_hash_t)))
119 : #include "../../util/tmpl/fd_map_chain.c"
120 :
121 : static inline ulong
122 0 : fd_block_id_ele_get_idx( fd_block_id_ele_t * ele_arr, fd_block_id_ele_t * ele ) {
123 0 : return (ulong)(ele - ele_arr);
124 0 : }
125 :
126 : FD_STATIC_ASSERT( FD_PACK_MAX_BANK_TILES<=64UL, exec_bitset );
127 :
128 : struct fd_replay_tile {
129 : fd_wksp_t * wksp;
130 :
131 : /* tx_metadata_storage enables the log collector if enabled */
132 : int tx_metadata_storage;
133 :
134 : fd_funk_t funk[1];
135 :
136 : fd_txncache_t * txncache;
137 : fd_store_t * store;
138 : fd_banks_t * banks;
139 :
140 : /* This flag is 1 If we have seen a vote signature that our node has
141 : sent out get rooted at least one time. The value is 0 otherwise.
142 : We can't become leader and pack blocks until this flag has been
143 : set. This parallels the Agave 'has_new_vote_been_rooted'.
144 :
145 : TODO: Add a flag to the toml to make this optional. */
146 : int has_identity_vote_rooted;
147 :
148 : fd_reasm_t * reasm;
149 :
150 : /* Replay state machine. */
151 : fd_sched_t * sched;
152 : uint enable_bank_hash_cmp:1;
153 : fd_bank_hash_cmp_t * bank_hash_cmp;
154 : ulong exec_cnt;
155 : ulong exec_ready_bitset; /* Bit i set if exec tile i is idle */
156 : fd_replay_out_link_t exec_out[ 1 ]; /* Sending work down to exec tiles */
157 :
158 : fd_vote_tracker_t * vote_tracker;
159 :
160 : /* A note on publishing ...
161 :
162 : The watermarks are used to publish our fork-aware structures. For
163 : example, store, banks, and txncache need to be published to release
164 : resources occupied by rooted or dead blocks. In general,
165 : publishing has the effect of pruning forks in those structures,
166 : indicating that it is ok to release the memory being occupied by
167 : the blocks on said forks. Tower is responsible for informing us of
168 : the latest block on the consensus rooted fork. As soon as we can,
169 : we should move the published root as close as possible to the
170 : latest consensus root, publishing/pruning everything on the fork
171 : tree along the way. That is, all the blocks that directly descend
172 : from the current published root (inclusive) to the new published
173 : root (exclusive) on the rooted fork, as well as all the minority
174 : forks that branch from said blocks.
175 :
176 : Ideally, we'd move the published root to the consensus root
177 : immediately upon receiving a new consensus root. However, that's
178 : not always safe to do. One thing we need to be careful about is
179 : making sure that there are no more users/consumers of
180 : soon-to-be-pruned blocks, lest a use-after-free occurs. This can
181 : be done by using a reference counter for each block. Any
182 : concurrent activity, such as transaction execution in the exec
183 : tiles, should retain a refcnt on the block for as
184 : long as it needs access to the shared fork-aware structures related
185 : to that block. Eventually, refcnt on a given block will drop down
186 : to 0 as the block either finishes replaying or gets marked as dead,
187 : and any other tile that has retained a refcnt on the block releases
188 : it. At that point, it becomes a candidate for pruning. The key to
189 : safe publishing then becomes figuring out how far we could advance
190 : the published root, such that every minority fork branching off of
191 : blocks in between the current published root (inclusive) and the
192 : new published root (exclusive) is safe to be pruned. This is a
193 : straightforward tree traversal, where if a block B on the rooted
194 : fork has refcnt 0, and all minority forks branching off of B also
195 : have refcnt 0, then B is safe to be pruned. We advance the
196 : published root to the farthest consecutively prunable block on the
197 : rooted fork. Note that reasm presents the replay tile with a clean
198 : view of the world where every block is chained off of a parent
199 : block. So there are no orpahned/dangling tree nodes to worry
200 : about. The world is a nice single tree as far as replay is
201 : concerned.
202 :
203 : In the following fork tree, every node is a block and the number in
204 : parentheses is the refcnt on the block. The chain marked with
205 : double slashes is the rooted fork. Suppose the published root is
206 : at block P, and consensus root is at block T. We can't publish
207 : past block P because Q has refcnt 1.
208 :
209 :
210 : P(0)
211 : / \\
212 : Q(1) A(0)
213 : / || \
214 : X(0) B(0) C(0)
215 : / || \
216 : Y(0) M(0) R(0)
217 : / || / \
218 : D(2) T(0) J(0) L(0)
219 : ||
220 : ..
221 : ..
222 : ..
223 : ||
224 : blocks we might be actively replaying
225 :
226 :
227 : When refcnt on Q drops to 0, we would be able to advance the
228 : published root to block M, because blocks P, A, and B, as well as
229 : all subtrees branching off of them, have refcnt 0, and therefore
230 : can be pruned. Block M itself cannot be pruned yet because its
231 : child block D has refcnt 2. After publishing/pruning, the fork
232 : tree would be:
233 :
234 :
235 : M(0)
236 : / ||
237 : D(2) T(0)
238 : ||
239 : ..
240 : ..
241 : ..
242 : ||
243 : blocks we might be actively replaying
244 :
245 :
246 : As a result, the shared fork-aware structures can free resources
247 : for blocks P, A, B, and all subtrees branching off of them.
248 :
249 : For the reference counting part, the replay tile is the sole entity
250 : that can update the refcnt. This ensures that all refcnt increment
251 : and decrement attempts are serialized at the replay tile, and that
252 : there are no racy resurrection of a soon-to-be-pruned block. If a
253 : refcnt increment request arrives after a block has been pruned,
254 : replay simply rejects the request.
255 :
256 : A note on the implementation of the above ...
257 :
258 : Upon receiving a new consensus root, we descend down the rooted
259 : fork from the current published root to the new consensus root. On
260 : each node/block of the rooted fork, we do a summation of the refcnt
261 : on the block and all the minority fork blocks branching from the
262 : block. If the summation is 0, the block is safe for pruning. We
263 : advance the published root to the far end of the consecutive run of
264 : 0 refcnt sums originating from the current published root. On our
265 : descent down the minority forks, we also mark any block that hasn't
266 : finished replaying as dead, so we don't waste time executing them.
267 : No more transactions shall be dispatched for execution from dead
268 : blocks.
269 :
270 : Blocks start out with a refcnt of 0. Other tiles may send a
271 : request to the replay tile for a reference on a block. The
272 : transaction dispatcher is another source of refcnt updates. On
273 : every dispatch of a transaction for block B, we increment the
274 : refcnt for B. And on every transaction finalization, we decrement
275 : the refcnt for B. This means that whenever the refcnt on a block
276 : is 0, there is no more reference on that block from the execution
277 : pipeline. While it might be tempting to simply increment the
278 : refcnt once when we start replaying a block, and decrement the
279 : refcnt once when we finish a block, this more fine-grained refcnt
280 : update strategy allows for aborting and potentially immediate
281 : pruning of blocks under interleaved block replay. Upon receiving a
282 : new consensus root, we can simply look at the refcnt on minority
283 : fork blocks, and a refcnt of 0 would imply that the block is safe
284 : for pruning, even if we haven't finished replaying it. Without the
285 : fine-grained refcnt, we would need to first stop dispatching from
286 : the aborted block, and then wait for a full drain of the execution
287 : pipeline to know for sure that there are no more in-flight
288 : transactions executing on the aborted block. Note that this will
289 : allow the refcnt on any block to transiently drop down to 0. We
290 : will not mistakenly prune an actively replaying block, aka a leaf
291 : node, that is chaining off of the rooted fork, because the
292 : consensus root is always an ancestor of the actively replaying tip.
293 : */
294 : fd_hash_t consensus_root; /* The most recent block to have reached max lockout in the tower. */
295 : ulong consensus_root_slot; /* slot number of the above. */
296 : ulong consensus_root_bank_idx; /* bank index of the above. */
297 : ulong published_root_slot; /* slot number of the published root. */
298 : ulong published_root_bank_idx; /* bank index of the published root. */
299 :
300 : /* We need to maintain a tile-local mapping of block-ids to bank index
301 : and vice versa. This translation layer is needed for conversion
302 : since tower operates on block-ids and downstream consumers of FEC
303 : sets operate on bank indices. This mapping must happen both ways:
304 : 1. tower sends us block ids and we must map them to bank indices.
305 : 2. when a block is completed, we must map the bank index to a block
306 : id to send a slot complete message to tower. */
307 : ulong block_id_len;
308 : fd_block_id_ele_t * block_id_arr;
309 : fd_block_id_map_t * block_id_map;
310 :
311 : /* Capture-related configs */
312 : fd_capture_ctx_t * capture_ctx;
313 : FILE * capture_file;
314 :
315 : /* Whether the runtime has been booted either from snapshot loading
316 : or from genesis. */
317 : int is_booted;
318 :
319 : /* Stack allocator for slot boundary allocations.
320 : TODO: Should be replaced by tile-level allocations. */
321 : fd_spad_t * runtime_spad;
322 :
323 : /* Buffer to store vote towers that need to be published to the Tower
324 : tile. */
325 : ulong vote_tower_out_idx; /* index of vote tower to publish next */
326 : ulong vote_tower_out_len; /* number of vote towers in the buffer */
327 : fd_replay_tower_t vote_tower_out[FD_REPLAY_TOWER_VOTE_ACC_MAX];
328 :
329 : fd_multi_epoch_leaders_t * mleaders;
330 :
331 : fd_pubkey_t identity_pubkey[1]; /* TODO: Keyswitch */
332 :
333 : /* When we transition to becoming leader, we can only unbecome the
334 : leader if we have received a block id from the FEC reassembler, and
335 : a message from PoH that the leader slot has ended. After both of
336 : these conditions are met, then we are free to unbecome the leader.
337 : */
338 : int is_leader;
339 : int recv_poh;
340 : int recv_block_id;
341 : ulong next_leader_slot;
342 : long next_leader_tickcount;
343 : ulong highwater_leader_slot;
344 : ulong reset_slot;
345 : fd_hash_t reset_block_id;
346 : long reset_timestamp_nanos;
347 : double slot_duration_nanos;
348 : double slot_duration_ticks;
349 : ulong max_active_descendant;
350 : fd_bank_t * leader_bank; /* ==NULL if not currently the leader */
351 :
352 : ulong resolv_tile_cnt;
353 :
354 : int in_kind[ 64 ];
355 : fd_replay_in_link_t in[ 64 ];
356 :
357 : fd_replay_out_link_t replay_out[1];
358 :
359 : fd_replay_out_link_t stake_out[1];
360 :
361 : struct {
362 : fd_histf_t store_read_wait[ 1 ];
363 : fd_histf_t store_read_work[ 1 ];
364 : fd_histf_t store_publish_wait[ 1 ];
365 : fd_histf_t store_publish_work[ 1 ];
366 : fd_histf_t store_link_wait[ 1 ];
367 : fd_histf_t store_link_work[ 1 ];
368 :
369 : ulong slots_total;
370 : ulong transactions_total;
371 : } metrics;
372 :
373 : uchar __attribute__((aligned(FD_MULTI_EPOCH_LEADERS_ALIGN))) mleaders_mem[ FD_MULTI_EPOCH_LEADERS_FOOTPRINT ];
374 : };
375 :
376 : typedef struct fd_replay_tile fd_replay_tile_t;
377 :
378 : FD_FN_CONST static inline ulong
379 0 : scratch_align( void ) {
380 0 : return 128UL;
381 0 : }
382 :
383 : FD_FN_PURE static inline ulong
384 0 : scratch_footprint( fd_topo_tile_t const * tile ) {
385 0 : ulong chain_cnt = fd_block_id_map_chain_cnt_est( tile->replay.max_live_slots );
386 :
387 0 : ulong l = FD_LAYOUT_INIT;
388 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_replay_tile_t), sizeof(fd_replay_tile_t) );
389 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_block_id_ele_t), sizeof(fd_block_id_ele_t) * tile->replay.max_live_slots );
390 0 : l = FD_LAYOUT_APPEND( l, fd_block_id_map_align(), fd_block_id_map_footprint( chain_cnt ) );
391 0 : l = FD_LAYOUT_APPEND( l, fd_txncache_align(), fd_txncache_footprint( tile->replay.max_live_slots ) );
392 0 : l = FD_LAYOUT_APPEND( l, fd_reasm_align(), fd_reasm_footprint( 1 << 20 ) );
393 0 : l = FD_LAYOUT_APPEND( l, fd_sched_align(), fd_sched_footprint( tile->replay.max_live_slots ) );
394 0 : l = FD_LAYOUT_APPEND( l, fd_vote_tracker_align(), fd_vote_tracker_footprint() );
395 0 : l = FD_LAYOUT_APPEND( l, fd_capture_ctx_align(), fd_capture_ctx_footprint() );
396 0 : l = FD_LAYOUT_APPEND( l, fd_spad_align(), fd_spad_footprint( tile->replay.heap_size_gib<<30 ) );
397 0 : l = FD_LAYOUT_FINI ( l, scratch_align() );
398 0 : return l;
399 0 : }
400 :
401 : static inline void
402 0 : metrics_write( fd_replay_tile_t * ctx ) {
403 0 : FD_MHIST_COPY( REPLAY, STORE_LINK_WAIT, ctx->metrics.store_link_wait );
404 0 : FD_MHIST_COPY( REPLAY, STORE_LINK_WORK, ctx->metrics.store_link_work );
405 0 : FD_MHIST_COPY( REPLAY, STORE_READ_WAIT, ctx->metrics.store_read_wait );
406 0 : FD_MHIST_COPY( REPLAY, STORE_READ_WORK, ctx->metrics.store_read_work );
407 0 : FD_MHIST_COPY( REPLAY, STORE_PUBLISH_WAIT, ctx->metrics.store_publish_wait );
408 0 : FD_MHIST_COPY( REPLAY, STORE_PUBLISH_WORK, ctx->metrics.store_publish_work );
409 :
410 0 : FD_MGAUGE_SET( REPLAY, ROOT_SLOT, ctx->consensus_root_slot==ULONG_MAX ? 0UL : ctx->consensus_root_slot );
411 0 : ulong leader_slot = ctx->leader_bank ? fd_bank_slot_get( ctx->leader_bank ) : 0UL;
412 0 : FD_MGAUGE_SET( REPLAY, LEADER_SLOT, leader_slot );
413 :
414 0 : if( FD_LIKELY( ctx->leader_bank ) ) {
415 0 : FD_MGAUGE_SET( REPLAY, NEXT_LEADER_SLOT, leader_slot );
416 0 : FD_MGAUGE_SET( REPLAY, LEADER_SLOT, leader_slot );
417 0 : } else {
418 0 : FD_MGAUGE_SET( REPLAY, NEXT_LEADER_SLOT, ctx->next_leader_slot==ULONG_MAX ? 0UL : ctx->next_leader_slot );
419 0 : FD_MGAUGE_SET( REPLAY, LEADER_SLOT, 0UL );
420 0 : }
421 0 : FD_MGAUGE_SET( REPLAY, RESET_SLOT, ctx->reset_slot==ULONG_MAX ? 0UL : ctx->reset_slot );
422 :
423 0 : fd_bank_t * bank_pool = fd_banks_get_bank_pool( ctx->banks );
424 0 : ulong live_banks = fd_banks_pool_max( bank_pool ) - fd_banks_pool_free( bank_pool );
425 0 : FD_MGAUGE_SET( REPLAY, LIVE_BANKS, live_banks );
426 :
427 0 : FD_MCNT_SET( REPLAY, SLOTS_TOTAL, ctx->metrics.slots_total );
428 0 : FD_MCNT_SET( REPLAY, TRANSACTIONS_TOTAL, ctx->metrics.transactions_total );
429 0 : }
430 :
431 : static inline ulong
432 : generate_stake_weight_msg( ulong epoch,
433 : fd_epoch_schedule_t const * epoch_schedule,
434 : fd_vote_states_t const * epoch_stakes,
435 0 : ulong * stake_weight_msg_out ) {
436 0 : fd_stake_weight_msg_t * stake_weight_msg = (fd_stake_weight_msg_t *)fd_type_pun( stake_weight_msg_out );
437 0 : fd_vote_stake_weight_t * stake_weights = stake_weight_msg->weights;
438 :
439 0 : stake_weight_msg->epoch = epoch;
440 0 : stake_weight_msg->start_slot = fd_epoch_slot0( epoch_schedule, epoch );
441 0 : stake_weight_msg->slot_cnt = epoch_schedule->slots_per_epoch;
442 0 : stake_weight_msg->excluded_stake = 0UL;
443 0 : stake_weight_msg->vote_keyed_lsched = 1UL;
444 :
445 : /* FIXME: SIMD-0180 - hack to (de)activate in testnet vs mainnet.
446 : This code can be removed once the feature is active. */
447 0 : if( (1==epoch_schedule->warmup && epoch<FD_SIMD0180_ACTIVE_EPOCH_TESTNET) ||
448 0 : (0==epoch_schedule->warmup && epoch<FD_SIMD0180_ACTIVE_EPOCH_MAINNET) ) {
449 0 : stake_weight_msg->vote_keyed_lsched = 0UL;
450 0 : }
451 :
452 : /* epoch_stakes from manifest are already filtered (stake>0), but not sorted */
453 0 : fd_vote_states_iter_t iter_[1];
454 0 : ulong idx = 0UL;
455 0 : for( fd_vote_states_iter_t * iter = fd_vote_states_iter_init( iter_, epoch_stakes ); !fd_vote_states_iter_done( iter ); fd_vote_states_iter_next( iter ) ) {
456 0 : fd_vote_state_ele_t * vote_state = fd_vote_states_iter_ele( iter );
457 0 : if( FD_UNLIKELY( !vote_state->stake ) ) continue;
458 :
459 0 : stake_weights[ idx ].stake = vote_state->stake;
460 0 : memcpy( stake_weights[ idx ].id_key.uc, &vote_state->node_account, sizeof(fd_pubkey_t) );
461 0 : memcpy( stake_weights[ idx ].vote_key.uc, &vote_state->vote_account, sizeof(fd_pubkey_t) );
462 0 : idx++;
463 0 : }
464 0 : stake_weight_msg->staked_cnt = idx;
465 0 : sort_vote_weights_by_stake_vote_inplace( stake_weights, idx );
466 :
467 0 : return fd_stake_weight_msg_sz( idx );
468 0 : }
469 :
470 : static void
471 : publish_stake_weights( fd_replay_tile_t * ctx,
472 : fd_stem_context_t * stem,
473 : fd_bank_t * bank,
474 0 : int current_epoch ) {
475 0 : fd_epoch_schedule_t const * schedule = fd_bank_epoch_schedule_query( bank );
476 0 : ulong epoch = fd_slot_to_epoch( schedule, fd_bank_slot_get( bank ), NULL );
477 :
478 0 : fd_vote_states_t const * vote_states_prev;
479 0 : if( FD_LIKELY( current_epoch ) ) vote_states_prev = fd_bank_vote_states_prev_locking_query( bank );
480 0 : else vote_states_prev = fd_bank_vote_states_prev_prev_locking_query( bank );
481 :
482 0 : ulong * stake_weights_msg = fd_chunk_to_laddr( ctx->stake_out->mem, ctx->stake_out->chunk );
483 0 : ulong stake_weights_sz = generate_stake_weight_msg( epoch+fd_ulong_if( current_epoch, 1UL, 0UL), schedule, vote_states_prev, stake_weights_msg );
484 0 : ulong stake_weights_sig = 4UL;
485 0 : fd_stem_publish( stem, ctx->stake_out->idx, stake_weights_sig, ctx->stake_out->chunk, stake_weights_sz, 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
486 0 : ctx->stake_out->chunk = fd_dcache_compact_next( ctx->stake_out->chunk, stake_weights_sz, ctx->stake_out->chunk0, ctx->stake_out->wmark );
487 :
488 0 : FD_LOG_NOTICE(( "sending stake weights for epoch %lu (slot %lu - %lu) with %lu stakes", stake_weights_msg[ 0 ], stake_weights_msg[ 2 ], stake_weights_msg[ 2 ]+stake_weights_msg[ 3 ], stake_weights_msg[ 1 ] ));
489 :
490 0 : if( FD_LIKELY( current_epoch ) ) fd_bank_vote_states_prev_end_locking_query( bank );
491 0 : else fd_bank_vote_states_prev_prev_end_locking_query( bank );
492 :
493 0 : fd_multi_epoch_leaders_stake_msg_init( ctx->mleaders, fd_type_pun_const( stake_weights_msg ) );
494 0 : fd_multi_epoch_leaders_stake_msg_fini( ctx->mleaders );
495 0 : }
496 :
497 : /**********************************************************************/
498 : /* Vote tower publishing helpers */
499 : /**********************************************************************/
500 :
501 : /* fd_replay_out_vote_tower_from_funk queries Funk for the state of the vote
502 : account with the given pubkey, and copies the state into the given
503 : fd_replay_tower_t structure. The account data is simply copied as-is.
504 :
505 : Parameters:
506 : - funk: The funk database instance to query vote account data from
507 : - funk_txn: The funk transaction context for consistent reads
508 : - pubkey: The public key of the vote account to retrieve
509 : - stake: The stake amount associated with this vote account
510 : - vote_tower_out: Output structure to populate with vote state information
511 :
512 : Failure modes:
513 : - Vote account data is too large (returns -1)
514 : - Vote account is not found in Funk (returns -1) */
515 : static int
516 : fd_replay_out_vote_tower_from_funk(
517 : fd_funk_t const * funk,
518 : fd_funk_txn_xid_t const * xid,
519 : fd_pubkey_t const * pubkey,
520 : ulong stake,
521 0 : fd_replay_tower_t * vote_tower_out ) {
522 :
523 0 : fd_memset( vote_tower_out, 0, sizeof(fd_replay_tower_t) );
524 0 : vote_tower_out->key = *pubkey;
525 0 : vote_tower_out->stake = stake;
526 :
527 : /* Speculatively copy out the raw vote account state from Funk */
528 0 : for(;;) {
529 0 : fd_memset( vote_tower_out->acc, 0, sizeof(vote_tower_out->acc) );
530 :
531 0 : fd_funk_rec_query_t query;
532 0 : fd_funk_rec_key_t funk_key = fd_funk_acc_key( pubkey );
533 0 : fd_funk_rec_t const * rec = fd_funk_rec_query_try_global( funk, xid, &funk_key, NULL, &query );
534 0 : if( FD_UNLIKELY( !rec ) ) {
535 0 : FD_LOG_WARNING(( "vote account not found. address: %s", FD_BASE58_ENC_32_ALLOCA( pubkey->uc ) ));
536 0 : return -1;
537 0 : }
538 :
539 0 : uchar const * raw = fd_funk_val_const( rec, fd_funk_wksp(funk) );
540 0 : fd_account_meta_t const * metadata = fd_type_pun_const( raw );
541 :
542 0 : ulong data_sz = metadata->dlen;
543 0 : if( FD_UNLIKELY( data_sz > sizeof(vote_tower_out->acc) ) ) {
544 0 : FD_LOG_WARNING(( "vote account %s has too large data. dlen %lu > %lu",
545 0 : FD_BASE58_ENC_32_ALLOCA( pubkey->uc ),
546 0 : data_sz,
547 0 : sizeof(vote_tower_out->acc) ));
548 0 : return -1;
549 0 : }
550 :
551 0 : fd_memcpy( vote_tower_out->acc, raw + sizeof(fd_account_meta_t), data_sz );
552 0 : vote_tower_out->acc_sz = data_sz;
553 :
554 0 : if( FD_LIKELY( fd_funk_rec_query_test( &query ) == FD_FUNK_SUCCESS ) ) {
555 0 : break;
556 0 : }
557 0 : }
558 :
559 0 : return 0;
560 0 : }
561 :
562 : /* This function buffers all the vote account towers that Tower needs at
563 : the end of this slot into the ctx->vote_tower_out buffer. These will
564 : then be published in after_credit.
565 :
566 : This function should be called at the end of a slot, before any epoch
567 : boundary processing. */
568 : static void
569 : buffer_vote_towers( fd_replay_tile_t * ctx,
570 : fd_funk_txn_xid_t const * xid,
571 0 : fd_bank_t * bank ) {
572 0 : ctx->vote_tower_out_idx = 0UL;
573 0 : ctx->vote_tower_out_len = 0UL;
574 :
575 0 : fd_vote_states_t const * vote_states = fd_bank_vote_states_prev_locking_query( bank );
576 0 : fd_vote_states_iter_t iter_[1];
577 0 : for( fd_vote_states_iter_t * iter = fd_vote_states_iter_init( iter_, vote_states );
578 0 : !fd_vote_states_iter_done( iter );
579 0 : fd_vote_states_iter_next( iter ) ) {
580 0 : fd_vote_state_ele_t const * vote_state = fd_vote_states_iter_ele( iter );
581 0 : if( FD_UNLIKELY( vote_state->stake == 0 ) ) continue; /* skip unstaked vote accounts */
582 0 : fd_pubkey_t const * vote_account_pubkey = &vote_state->vote_account;
583 0 : if( FD_UNLIKELY( ctx->vote_tower_out_len >= (FD_REPLAY_TOWER_VOTE_ACC_MAX-1UL) ) ) FD_LOG_ERR(( "vote_tower_out_len too large" ));
584 0 : if( FD_UNLIKELY( fd_replay_out_vote_tower_from_funk( ctx->funk,
585 0 : xid,
586 0 : vote_account_pubkey,
587 0 : vote_state->stake,
588 0 : &ctx->vote_tower_out[ctx->vote_tower_out_len++] ) ) ) {
589 0 : FD_LOG_DEBUG(( "failed to get vote state for vote account %s", FD_BASE58_ENC_32_ALLOCA( vote_account_pubkey->uc ) ));
590 0 : }
591 0 : }
592 0 : fd_bank_vote_states_prev_end_locking_query( bank );
593 0 : }
594 :
595 : /* This function publishes the next vote tower in the
596 : ctx->vote_tower_out buffer to the tower tile.
597 :
598 : This function should be called in after_credit, after all the vote
599 : towers for the end of a slot have been buffered in
600 : ctx->vote_tower_out. */
601 :
602 : static void
603 : publish_next_vote_tower( fd_replay_tile_t * ctx,
604 0 : fd_stem_context_t * stem ) {
605 0 : int som = ctx->vote_tower_out_idx==0;
606 0 : int eom = ctx->vote_tower_out_idx==( ctx->vote_tower_out_len - 1 );
607 :
608 0 : fd_replay_tower_t * vote_state = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
609 0 : *vote_state = ctx->vote_tower_out[ ctx->vote_tower_out_idx ];
610 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_VOTE_STATE, ctx->replay_out->chunk, sizeof(fd_replay_tower_t), fd_frag_meta_ctl( 0UL, som, eom, 0 ), 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
611 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_tower_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
612 :
613 0 : ctx->vote_tower_out_idx++;
614 0 : }
615 :
616 : /**********************************************************************/
617 : /* Transaction execution state machine helpers */
618 : /**********************************************************************/
619 :
620 : static fd_bank_t *
621 : replay_block_start( fd_replay_tile_t * ctx,
622 : fd_stem_context_t * stem,
623 : ulong bank_idx,
624 : ulong parent_bank_idx,
625 0 : ulong slot ) {
626 0 : long before = fd_log_wallclock();
627 :
628 : /* Switch to a new block that we don't have a bank for. */
629 :
630 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, bank_idx );
631 0 : if( FD_UNLIKELY( !bank ) ) {
632 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for bank index %lu", bank_idx ));
633 0 : }
634 0 : if( FD_UNLIKELY( bank->flags!=FD_BANK_FLAGS_INIT ) ) {
635 0 : FD_LOG_CRIT(( "invariant violation: bank is not in correct state for bank index %lu", bank_idx ));
636 0 : }
637 :
638 0 : bank->preparation_begin_nanos = before;
639 :
640 0 : fd_bank_t * parent_bank = fd_banks_bank_query( ctx->banks, parent_bank_idx );
641 0 : if( FD_UNLIKELY( !parent_bank ) ) {
642 0 : FD_LOG_CRIT(( "invariant violation: parent bank is NULL for bank index %lu", parent_bank_idx ));
643 0 : }
644 0 : if( FD_UNLIKELY( !(parent_bank->flags&FD_BANK_FLAGS_FROZEN) ) ) {
645 0 : FD_LOG_CRIT(( "invariant violation: parent bank is not frozen for bank index %lu", parent_bank_idx ));
646 0 : }
647 0 : ulong parent_slot = fd_bank_slot_get( parent_bank );
648 :
649 : /* Clone the bank from the parent. We must special case the first
650 : slot that is executed as the snapshot does not provide a parent
651 : block id. */
652 :
653 0 : bank = fd_banks_clone_from_parent( ctx->banks, bank_idx, parent_bank_idx );
654 0 : if( FD_UNLIKELY( !bank ) ) {
655 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for bank index %lu", bank_idx ));
656 0 : }
657 0 : fd_bank_slot_set( bank, slot );
658 0 : fd_bank_parent_slot_set( bank, parent_slot );
659 0 : bank->txncache_fork_id = fd_txncache_attach_child( ctx->txncache, parent_bank->txncache_fork_id );
660 :
661 : /* Create a new funk txn for the block. */
662 :
663 0 : fd_funk_txn_xid_t xid = { .ul = { slot, slot } };
664 0 : fd_funk_txn_xid_t parent_xid = { .ul = { parent_slot, parent_slot } };
665 0 : fd_funk_txn_prepare( ctx->funk, &parent_xid, &xid );
666 :
667 : /* Update any required runtime state and handle any potential epoch
668 : boundary change. */
669 :
670 0 : if( ctx->capture_ctx ) {
671 0 : fd_solcap_writer_set_slot( ctx->capture_ctx->capture, slot );
672 0 : }
673 :
674 0 : fd_bank_shred_cnt_set( bank, 0UL );
675 0 : fd_bank_execution_fees_set( bank, 0UL );
676 0 : fd_bank_priority_fees_set( bank, 0UL );
677 :
678 0 : fd_bank_has_identity_vote_set( bank, 0 );
679 :
680 : /* Set the tick height. */
681 0 : fd_bank_tick_height_set( bank, fd_bank_max_tick_height_get( bank ) );
682 :
683 : /* Update block height. */
684 0 : fd_bank_block_height_set( bank, fd_bank_block_height_get( bank ) + 1UL );
685 :
686 0 : ulong * max_tick_height = fd_bank_max_tick_height_modify( bank );
687 0 : ulong ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
688 0 : if( FD_UNLIKELY( FD_RUNTIME_EXECUTE_SUCCESS != fd_runtime_compute_max_tick_height( ticks_per_slot, slot, max_tick_height ) ) ) {
689 0 : FD_LOG_CRIT(( "couldn't compute tick height/max tick height slot %lu ticks_per_slot %lu", slot, ticks_per_slot ));
690 0 : }
691 0 : bank->flags |= fd_ulong_if( ctx->tx_metadata_storage, FD_BANK_FLAGS_EXEC_RECORDING, 0UL );
692 :
693 0 : int is_epoch_boundary = 0;
694 0 : fd_runtime_block_pre_execute_process_new_epoch(
695 0 : ctx->banks,
696 0 : bank,
697 0 : ctx->funk,
698 0 : &xid,
699 0 : ctx->capture_ctx,
700 0 : ctx->runtime_spad,
701 0 : &is_epoch_boundary );
702 0 : if( FD_UNLIKELY( is_epoch_boundary ) ) publish_stake_weights( ctx, stem, bank, 1 );
703 :
704 0 : int res = fd_runtime_block_execute_prepare( bank, ctx->funk, &xid, ctx->capture_ctx, ctx->runtime_spad );
705 0 : if( FD_UNLIKELY( res!=FD_RUNTIME_EXECUTE_SUCCESS ) ) {
706 0 : FD_LOG_CRIT(( "block prep execute failed" ));
707 0 : }
708 :
709 0 : return bank;
710 0 : }
711 :
712 : static void
713 : publish_slot_completed( fd_replay_tile_t * ctx,
714 : fd_stem_context_t * stem,
715 : fd_bank_t * bank,
716 0 : int is_initial ) {
717 :
718 0 : ulong slot = fd_bank_slot_get( bank );
719 :
720 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ bank->idx ];
721 :
722 : /* HACKY: hacky way of checking if we should send a null parent block
723 : id */
724 0 : fd_hash_t parent_block_id = {0};
725 0 : if( FD_UNLIKELY( !is_initial ) ) {
726 0 : parent_block_id = ctx->block_id_arr[ bank->parent_idx ].block_id;
727 0 : }
728 :
729 0 : fd_hash_t const * bank_hash = fd_bank_bank_hash_query( bank );
730 0 : fd_hash_t const * block_hash = fd_blockhashes_peek_last( fd_bank_block_hash_queue_query( bank ) );
731 0 : FD_TEST( bank_hash );
732 0 : FD_TEST( block_hash );
733 :
734 0 : if( FD_LIKELY( !is_initial ) ) fd_txncache_finalize_fork( ctx->txncache, bank->txncache_fork_id, 0UL, block_hash->uc );
735 :
736 0 : fd_epoch_schedule_t const * epoch_schedule = fd_bank_epoch_schedule_query( bank );
737 0 : ulong slot_idx;
738 0 : ulong epoch = fd_slot_to_epoch( epoch_schedule, slot, &slot_idx );
739 :
740 0 : ctx->metrics.slots_total++;
741 0 : ctx->metrics.transactions_total = fd_bank_txn_count_get( bank );
742 :
743 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
744 0 : slot_info->slot = slot;
745 0 : slot_info->root_slot = ctx->consensus_root_slot;
746 0 : slot_info->epoch = epoch;
747 0 : slot_info->slot_in_epoch = slot_idx;
748 0 : slot_info->block_height = fd_bank_block_height_get( bank );
749 0 : slot_info->parent_slot = fd_bank_parent_slot_get( bank );
750 0 : slot_info->block_id = block_id_ele->block_id;
751 0 : slot_info->parent_block_id = parent_block_id;
752 0 : slot_info->bank_hash = *bank_hash;
753 0 : slot_info->block_hash = *block_hash;
754 :
755 0 : slot_info->transaction_count = fd_bank_txn_count_get( bank );
756 0 : slot_info->nonvote_txn_count = fd_bank_nonvote_txn_count_get( bank );
757 0 : slot_info->failed_txn_count = fd_bank_failed_txn_count_get( bank );
758 0 : slot_info->nonvote_failed_txn_count = fd_bank_nonvote_failed_txn_count_get( bank );
759 0 : slot_info->total_compute_units_used = fd_bank_total_compute_units_used_get( bank );
760 0 : slot_info->execution_fees = fd_bank_execution_fees_get( bank );
761 0 : slot_info->priority_fees = fd_bank_priority_fees_get( bank );
762 0 : slot_info->tips = 0UL; /* todo ... tip accounts balance delta */
763 0 : slot_info->shred_count = fd_bank_shred_cnt_get( bank );
764 :
765 0 : fd_cost_tracker_t const * cost_tracker = fd_bank_cost_tracker_locking_query( bank );
766 0 : slot_info->max_compute_units = !!cost_tracker ? cost_tracker->block_cost_limit : ULONG_MAX;
767 0 : fd_bank_cost_tracker_end_locking_query( bank );
768 :
769 0 : slot_info->first_fec_set_received_nanos = bank->first_fec_set_received_nanos;
770 0 : slot_info->preparation_begin_nanos = bank->preparation_begin_nanos;
771 0 : slot_info->first_transaction_scheduled_nanos = bank->first_transaction_scheduled_nanos;
772 0 : slot_info->last_transaction_finished_nanos = bank->last_transaction_finished_nanos;
773 0 : slot_info->completion_time_nanos = fd_log_wallclock();
774 :
775 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_SLOT_COMPLETED, ctx->replay_out->chunk, sizeof(fd_replay_slot_completed_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
776 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_slot_completed_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
777 0 : }
778 :
779 : static void
780 : replay_block_finalize( fd_replay_tile_t * ctx,
781 : fd_stem_context_t * stem,
782 0 : fd_bank_t * bank ) {
783 :
784 0 : bank->last_transaction_finished_nanos = fd_log_wallclock();
785 :
786 0 : if( FD_UNLIKELY( ctx->capture_ctx ) ) fd_solcap_writer_flush( ctx->capture_ctx->capture );
787 :
788 0 : FD_TEST( !(bank->flags&FD_BANK_FLAGS_FROZEN) );
789 :
790 0 : ulong slot = fd_bank_slot_get( bank );
791 0 : fd_funk_txn_xid_t xid = { .ul = { slot, slot } };
792 :
793 : /* Set poh hash in bank. */
794 0 : fd_hash_t * poh = fd_sched_get_poh( ctx->sched, bank->idx );
795 0 : fd_bank_poh_set( bank, *poh );
796 :
797 : /* Set shred count in bank. */
798 0 : fd_bank_shred_cnt_set( bank, fd_sched_get_shred_cnt( ctx->sched, bank->idx ) );
799 :
800 : /* Do hashing and other end-of-block processing. */
801 0 : fd_runtime_block_execute_finalize( bank, ctx->funk, &xid, ctx->capture_ctx, 1 );
802 :
803 : /* Mark the bank as frozen. */
804 0 : fd_banks_mark_bank_frozen( ctx->banks, bank );
805 :
806 : /* Copy the vote tower of all the vote accounts into the buffer,
807 : which will be published in after_credit. */
808 0 : buffer_vote_towers( ctx, &xid, bank );
809 :
810 : /**********************************************************************/
811 : /* Bank hash comparison, and halt if there's a mismatch after replay */
812 : /**********************************************************************/
813 :
814 0 : fd_hash_t const * bank_hash = fd_bank_bank_hash_query( bank );
815 0 : FD_TEST( bank_hash );
816 :
817 0 : fd_bank_hash_cmp_t * bank_hash_cmp = ctx->bank_hash_cmp;
818 0 : fd_bank_hash_cmp_lock( bank_hash_cmp );
819 0 : fd_bank_hash_cmp_insert( bank_hash_cmp, fd_bank_slot_get( bank ), bank_hash, 1, 0 );
820 :
821 : /* Try to move the bank hash comparison watermark forward */
822 0 : for( ulong cmp_slot = bank_hash_cmp->watermark + 1; cmp_slot < fd_bank_slot_get( bank ); cmp_slot++ ) {
823 0 : if( FD_UNLIKELY( !ctx->enable_bank_hash_cmp ) ) {
824 0 : bank_hash_cmp->watermark = cmp_slot;
825 0 : break;
826 0 : }
827 0 : int rc = fd_bank_hash_cmp_check( bank_hash_cmp, cmp_slot );
828 0 : switch ( rc ) {
829 0 : case -1:
830 : /* Mismatch */
831 0 : FD_LOG_WARNING(( "Bank hash mismatch on slot: %lu. Halting.", cmp_slot ));
832 0 : break;
833 0 : case 0:
834 : /* Not ready */
835 0 : break;
836 0 : case 1:
837 : /* Match*/
838 0 : bank_hash_cmp->watermark = cmp_slot;
839 0 : break;
840 0 : default:;
841 0 : }
842 0 : }
843 :
844 0 : fd_bank_hash_cmp_unlock( bank_hash_cmp );
845 :
846 : /* Must be last so we can measure completion time correctly, even
847 : though we could technically do this before the hash cmp and vote
848 : tower stuff. */
849 0 : publish_slot_completed( ctx, stem, bank, 0 );
850 0 : }
851 :
852 : /**********************************************************************/
853 : /* Leader bank management */
854 : /**********************************************************************/
855 :
856 : static fd_bank_t *
857 : prepare_leader_bank( fd_replay_tile_t * ctx,
858 : ulong slot,
859 : long now,
860 : fd_hash_t const * parent_block_id,
861 0 : fd_stem_context_t * stem ) {
862 0 : long before = fd_log_wallclock();
863 :
864 : /* Make sure that we are not already leader. */
865 0 : FD_TEST( ctx->leader_bank==NULL );
866 :
867 0 : fd_block_id_ele_t * parent_ele = fd_block_id_map_ele_query( ctx->block_id_map, parent_block_id, NULL, ctx->block_id_arr );
868 0 : if( FD_UNLIKELY( !parent_ele ) ) {
869 0 : FD_LOG_CRIT(( "invariant violation: parent bank index not found for merkle root %s", FD_BASE58_ENC_32_ALLOCA( parent_block_id->uc ) ));
870 0 : }
871 0 : ulong parent_bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, parent_ele );
872 :
873 0 : fd_bank_t * parent_bank = fd_banks_bank_query( ctx->banks, parent_bank_idx );
874 0 : if( FD_UNLIKELY( !parent_bank ) ) {
875 0 : FD_LOG_CRIT(( "invariant violation: parent bank not found for bank index %lu", parent_bank_idx ));
876 0 : }
877 0 : ulong parent_slot = fd_bank_slot_get( parent_bank );
878 :
879 0 : ctx->leader_bank = fd_banks_new_bank( ctx->banks, parent_bank_idx, now );
880 0 : if( FD_UNLIKELY( !ctx->leader_bank ) ) {
881 0 : FD_LOG_CRIT(( "invariant violation: leader bank is NULL for slot %lu", slot ));
882 0 : }
883 :
884 0 : if( FD_UNLIKELY( !fd_banks_clone_from_parent( ctx->banks, ctx->leader_bank->idx, parent_bank_idx ) ) ) {
885 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for slot %lu", slot ));
886 0 : }
887 :
888 0 : ctx->leader_bank->preparation_begin_nanos = before;
889 :
890 0 : fd_bank_slot_set( ctx->leader_bank, slot );
891 0 : fd_bank_parent_slot_set( ctx->leader_bank, parent_slot );
892 0 : ctx->leader_bank->txncache_fork_id = fd_txncache_attach_child( ctx->txncache, parent_bank->txncache_fork_id );
893 : /* prepare the funk transaction for the leader bank */
894 0 : fd_funk_txn_xid_t xid = { .ul = { slot, slot } };
895 0 : fd_funk_txn_xid_t parent_xid = { .ul = { parent_slot, parent_slot } };
896 0 : fd_funk_txn_prepare( ctx->funk, &parent_xid, &xid );
897 :
898 0 : fd_bank_execution_fees_set( ctx->leader_bank, 0UL );
899 0 : fd_bank_priority_fees_set( ctx->leader_bank, 0UL );
900 0 : fd_bank_shred_cnt_set( ctx->leader_bank, 0UL );
901 :
902 : /* Set the tick height. */
903 0 : fd_bank_tick_height_set( ctx->leader_bank, fd_bank_max_tick_height_get( ctx->leader_bank ) );
904 :
905 : /* Update block height. */
906 0 : fd_bank_block_height_set( ctx->leader_bank, fd_bank_block_height_get( ctx->leader_bank ) + 1UL );
907 :
908 0 : ulong * max_tick_height = fd_bank_max_tick_height_modify( ctx->leader_bank );
909 0 : ulong ticks_per_slot = fd_bank_ticks_per_slot_get( ctx->leader_bank );
910 0 : if( FD_UNLIKELY( FD_RUNTIME_EXECUTE_SUCCESS != fd_runtime_compute_max_tick_height( ticks_per_slot, slot, max_tick_height ) ) ) {
911 0 : FD_LOG_CRIT(( "couldn't compute tick height/max tick height slot %lu ticks_per_slot %lu", slot, ticks_per_slot ));
912 0 : }
913 :
914 0 : ctx->leader_bank->flags |= fd_ulong_if( ctx->tx_metadata_storage, FD_BANK_FLAGS_EXEC_RECORDING, 0UL );
915 :
916 0 : int is_epoch_boundary = 0;
917 0 : fd_runtime_block_pre_execute_process_new_epoch(
918 0 : ctx->banks,
919 0 : ctx->leader_bank,
920 0 : ctx->funk,
921 0 : &xid,
922 0 : ctx->capture_ctx,
923 0 : ctx->runtime_spad,
924 0 : &is_epoch_boundary );
925 0 : if( FD_UNLIKELY( is_epoch_boundary ) ) publish_stake_weights( ctx, stem, ctx->leader_bank, 1 );
926 :
927 0 : int res = fd_runtime_block_execute_prepare( ctx->leader_bank, ctx->funk, &xid, ctx->capture_ctx, ctx->runtime_spad );
928 0 : if( FD_UNLIKELY( res!=FD_RUNTIME_EXECUTE_SUCCESS ) ) {
929 0 : FD_LOG_CRIT(( "block prep execute failed" ));
930 0 : }
931 :
932 : /* Now that a bank has been created for the leader slot, increment the
933 : reference count until we are done with the leader slot. */
934 0 : ctx->leader_bank->refcnt++;
935 :
936 0 : return ctx->leader_bank;
937 0 : }
938 :
939 : static void
940 : fini_leader_bank( fd_replay_tile_t * ctx,
941 0 : fd_stem_context_t * stem ) {
942 :
943 0 : FD_TEST( ctx->leader_bank!=NULL );
944 0 : FD_TEST( ctx->is_leader );
945 0 : FD_TEST( ctx->recv_block_id );
946 0 : FD_TEST( ctx->recv_poh );
947 :
948 0 : ctx->leader_bank->last_transaction_finished_nanos = fd_log_wallclock();
949 :
950 0 : fd_banks_mark_bank_frozen( ctx->banks, ctx->leader_bank );
951 :
952 0 : fd_sched_block_add_done( ctx->sched, ctx->leader_bank->idx, ctx->leader_bank->parent_idx );
953 :
954 0 : ulong curr_slot = fd_bank_slot_get( ctx->leader_bank );
955 :
956 : /* Do hashing and other end-of-block processing */
957 0 : fd_funk_txn_map_t * txn_map = fd_funk_txn_map( ctx->funk );
958 0 : if( FD_UNLIKELY( !txn_map->map ) ) {
959 0 : FD_LOG_ERR(( "Could not find valid funk transaction map" ));
960 0 : }
961 0 : fd_funk_txn_xid_t xid = { .ul = { curr_slot, curr_slot } };
962 :
963 0 : fd_runtime_block_execute_finalize( ctx->leader_bank, ctx->funk, &xid, ctx->capture_ctx, 0 );
964 :
965 0 : publish_slot_completed( ctx, stem, ctx->leader_bank, 0 );
966 :
967 : /* Copy the vote tower of all the vote accounts into the buffer,
968 : which will be published in after_credit. */
969 0 : buffer_vote_towers( ctx, &xid, ctx->leader_bank );
970 :
971 : /* The reference on the bank is finally no longer needed. */
972 0 : ctx->leader_bank->refcnt--;
973 :
974 : /* We are no longer leader so we can clear the bank index we use for
975 : being the leader. */
976 0 : ctx->leader_bank = NULL;
977 0 : ctx->recv_block_id = 0;
978 0 : ctx->recv_poh = 0;
979 0 : ctx->is_leader = 0;
980 0 : }
981 :
982 : static void
983 : publish_root_advanced( fd_replay_tile_t * ctx,
984 0 : fd_stem_context_t * stem ) {
985 :
986 : /* FIXME: for now we want to send the child of the consensus root to
987 : avoid data races with funk root advancing. This is a temporary
988 : hack because currently it is not safe to query against the xid for
989 : the root that is being advanced in funk. This doesn't eliminate
990 : the data race that exists in funk, but reduces how often it occurs.
991 :
992 : Case that causes a data race:
993 : replay: we are advancing the root from slot A->B
994 : resolv: we are resolving ALUTs against slot B */
995 :
996 0 : fd_bank_t * consensus_root_bank = fd_banks_bank_query( ctx->banks, ctx->consensus_root_bank_idx );
997 0 : if( FD_UNLIKELY( !consensus_root_bank ) ) {
998 0 : FD_LOG_CRIT(( "invariant violation: consensus root bank is NULL at bank index %lu", ctx->consensus_root_bank_idx ));
999 0 : }
1000 :
1001 0 : if( FD_UNLIKELY( consensus_root_bank->child_idx==ULONG_MAX ) ) {
1002 0 : return;
1003 0 : }
1004 :
1005 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, consensus_root_bank->child_idx );
1006 0 : if( FD_UNLIKELY( !bank ) ) {
1007 0 : FD_LOG_CRIT(( "invariant violation: consensus root bank child is NULL at bank index %lu", consensus_root_bank->child_idx ));
1008 0 : }
1009 :
1010 : /* Increment the reference count on the consensus root bank to account
1011 : for the number of exec tiles that are waiting on it. */
1012 0 : bank->refcnt += ctx->resolv_tile_cnt;
1013 :
1014 0 : fd_replay_root_advanced_t * msg = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
1015 0 : msg->bank_idx = bank->idx;
1016 :
1017 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_ROOT_ADVANCED, ctx->replay_out->chunk, sizeof(fd_replay_root_advanced_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
1018 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_root_advanced_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
1019 0 : }
1020 :
1021 : static void
1022 0 : init_after_snapshot( fd_replay_tile_t * ctx ) {
1023 : /* Now that the snapshot has been loaded in, we have to refresh the
1024 : stake delegations since the manifest does not contain the full set
1025 : of data required for the stake delegations. See
1026 : fd_stake_delegations.h for why this is required. */
1027 :
1028 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, FD_REPLAY_BOOT_BANK_IDX );
1029 0 : if( FD_UNLIKELY( !bank ) ) {
1030 0 : FD_LOG_CRIT(( "invariant violation: replay bank is NULL at bank index %lu", FD_REPLAY_BOOT_BANK_IDX ));
1031 0 : }
1032 :
1033 0 : fd_stake_delegations_t * root_delegations = fd_banks_stake_delegations_root_query( ctx->banks );
1034 :
1035 0 : fd_funk_txn_xid_t xid = { .ul = { fd_bank_slot_get( bank ), fd_bank_slot_get( bank ) } };
1036 :
1037 0 : fd_stake_delegations_refresh( root_delegations, ctx->funk, &xid );
1038 :
1039 : /* After both snapshots have been loaded in, we can determine if we should
1040 : start distributing rewards. */
1041 :
1042 0 : fd_rewards_recalculate_partitioned_rewards( ctx->banks, bank, ctx->funk, &xid, ctx->capture_ctx, ctx->runtime_spad );
1043 :
1044 0 : ulong snapshot_slot = fd_bank_slot_get( bank );
1045 0 : if( FD_UNLIKELY( !snapshot_slot ) ) {
1046 : /* Genesis-specific setup. */
1047 : /* FIXME: This branch does not set up a new block exec ctx
1048 : properly. Needs to do whatever prepare_new_block_execution
1049 : does, but just hacking that in breaks stuff. */
1050 0 : fd_runtime_update_leaders( bank, ctx->runtime_spad );
1051 :
1052 0 : ulong hashcnt_per_slot = fd_bank_hashes_per_tick_get( bank ) * fd_bank_ticks_per_slot_get( bank );
1053 0 : fd_hash_t * poh = fd_bank_poh_modify( bank );
1054 0 : while( hashcnt_per_slot-- ) {
1055 0 : fd_sha256_hash( poh->hash, 32UL, poh->hash );
1056 0 : }
1057 :
1058 0 : FD_TEST( fd_runtime_block_execute_prepare( bank, ctx->funk, &xid, ctx->capture_ctx, ctx->runtime_spad ) == 0 );
1059 0 : fd_runtime_block_execute_finalize( bank, ctx->funk, &xid, ctx->capture_ctx, 1 );
1060 :
1061 0 : snapshot_slot = 0UL;
1062 :
1063 : /* Now setup exec tiles for execution */
1064 0 : ctx->exec_ready_bitset = fd_ulong_mask_lsb( (int)ctx->exec_cnt );
1065 0 : }
1066 :
1067 : /* Initialize consensus structures post-snapshot */
1068 :
1069 0 : fd_vote_states_t const * vote_states = fd_bank_vote_states_locking_query( bank );
1070 :
1071 0 : fd_bank_hash_cmp_t * bank_hash_cmp = ctx->bank_hash_cmp;
1072 :
1073 0 : fd_vote_states_iter_t iter_[1];
1074 0 : for( fd_vote_states_iter_t * iter = fd_vote_states_iter_init( iter_, vote_states ); !fd_vote_states_iter_done( iter ); fd_vote_states_iter_next( iter ) ) {
1075 0 : fd_vote_state_ele_t const * vote_state = fd_vote_states_iter_ele( iter );
1076 0 : bank_hash_cmp->total_stake += vote_state->stake;
1077 0 : }
1078 0 : bank_hash_cmp->watermark = snapshot_slot;
1079 :
1080 0 : fd_bank_vote_states_end_locking_query( bank );
1081 :
1082 : /* Now that the snapshot(s) are done loading, we can mark all of the
1083 : exec tiles as ready. */
1084 0 : ctx->exec_ready_bitset = fd_ulong_mask_lsb( (int)ctx->exec_cnt );
1085 :
1086 0 : if( FD_UNLIKELY( ctx->capture_ctx ) ) fd_solcap_writer_flush( ctx->capture_ctx->capture );
1087 0 : }
1088 :
1089 : static inline int
1090 : maybe_become_leader( fd_replay_tile_t * ctx,
1091 0 : fd_stem_context_t * stem ) {
1092 0 : FD_TEST( ctx->is_booted );
1093 0 : if( FD_LIKELY( ctx->next_leader_slot==ULONG_MAX || ctx->is_leader || !ctx->has_identity_vote_rooted || ctx->replay_out->idx==ULONG_MAX ) ) return 0;
1094 :
1095 0 : FD_TEST( ctx->next_leader_slot>ctx->reset_slot );
1096 0 : long now = fd_tickcount();
1097 0 : if( FD_LIKELY( now<ctx->next_leader_tickcount ) ) return 0;
1098 :
1099 : /* TODO:
1100 : if( FD_UNLIKELY( ctx->halted_switching_key ) ) return 0; */
1101 :
1102 : /* If a prior leader is still in the process of publishing their slot,
1103 : delay ours to let them finish ... unless they are so delayed that
1104 : we risk getting skipped by the leader following us. 1.2 seconds
1105 : is a reasonable default here, although any value between 0 and 1.6
1106 : seconds could be considered reasonable. This is arbitrary and
1107 : chosen due to intuition. */
1108 0 : if( FD_UNLIKELY( now<ctx->next_leader_tickcount+(long)(3.0*ctx->slot_duration_ticks) ) ) {
1109 : /* If the max_active_descendant is >= next_leader_slot, we waited
1110 : too long and a leader after us started publishing to try and skip
1111 : us. Just start our leader slot immediately, we might win ... */
1112 0 : if( FD_LIKELY( ctx->max_active_descendant>=ctx->reset_slot && ctx->max_active_descendant<ctx->next_leader_slot ) ) {
1113 : /* If one of the leaders between the reset slot and our leader
1114 : slot is in the process of publishing (they have a descendant
1115 : bank that is in progress of being replayed), then keep waiting.
1116 : We probably wouldn't get a leader slot out before they
1117 : finished.
1118 :
1119 : Unless... we are past the deadline to start our slot by more
1120 : than 1.2 seconds, in which case we should probably start it to
1121 : avoid getting skipped by the leader behind us. */
1122 0 : return 0;
1123 0 : }
1124 0 : }
1125 :
1126 0 : ctx->is_leader = 1;
1127 0 : ctx->recv_poh = 0;
1128 0 : ctx->recv_block_id = 0;
1129 :
1130 0 : FD_TEST( ctx->highwater_leader_slot==ULONG_MAX || ctx->highwater_leader_slot<ctx->next_leader_slot );
1131 0 : ctx->highwater_leader_slot = ctx->next_leader_slot;
1132 :
1133 0 : FD_LOG_INFO(( "becoming leader for slot %lu, parent slot is %lu", ctx->next_leader_slot, ctx->reset_slot ));
1134 :
1135 : /* Acquires bank, sets up initial state, and refcnts it. */
1136 0 : fd_bank_t * bank = prepare_leader_bank( ctx, ctx->next_leader_slot, now, &ctx->reset_block_id, stem );
1137 :
1138 0 : fd_became_leader_t * msg = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
1139 0 : msg->slot = ctx->next_leader_slot;
1140 0 : msg->slot_start_ns = now;
1141 0 : msg->slot_end_ns = now+(long)ctx->slot_duration_nanos;
1142 0 : msg->bank = NULL;
1143 0 : msg->bank_idx = bank->idx;
1144 0 : msg->ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
1145 0 : msg->hashcnt_per_tick = fd_bank_hashes_per_tick_get( bank );
1146 0 : msg->tick_duration_ns = (ulong)(ctx->slot_duration_nanos/(double)msg->ticks_per_slot);
1147 :
1148 0 : if( FD_UNLIKELY( msg->hashcnt_per_tick==1UL ) ) {
1149 : /* Low power producer, maximum of one microblock per tick in the slot */
1150 0 : msg->max_microblocks_in_slot = msg->ticks_per_slot;
1151 0 : } else {
1152 : /* See the long comment in after_credit for this limit */
1153 0 : msg->max_microblocks_in_slot = fd_ulong_min( MAX_MICROBLOCKS_PER_SLOT, msg->ticks_per_slot*(msg->hashcnt_per_tick-1UL) );
1154 0 : }
1155 :
1156 0 : msg->total_skipped_ticks = msg->ticks_per_slot*(ctx->next_leader_slot-ctx->reset_slot);
1157 0 : msg->epoch = fd_slot_to_epoch( fd_bank_epoch_schedule_query( bank ), ctx->next_leader_slot, NULL );
1158 0 : fd_memset( msg->bundle, 0, sizeof(msg->bundle) );
1159 :
1160 0 : fd_cost_tracker_t const * cost_tracker = fd_bank_cost_tracker_locking_query( bank );
1161 :
1162 0 : msg->limits.slot_max_cost = cost_tracker->block_cost_limit;
1163 0 : msg->limits.slot_max_vote_cost = cost_tracker->vote_cost_limit;
1164 0 : msg->limits.slot_max_write_cost_per_acct = cost_tracker->account_cost_limit;
1165 :
1166 0 : fd_bank_cost_tracker_end_locking_query( bank );
1167 :
1168 0 : if( FD_UNLIKELY( msg->ticks_per_slot+msg->total_skipped_ticks>USHORT_MAX ) ) {
1169 : /* There can be at most USHORT_MAX skipped ticks, because the
1170 : parent_offset field in the shred data is only 2 bytes wide. */
1171 0 : FD_LOG_ERR(( "too many skipped ticks %lu for slot %lu, chain must halt", msg->ticks_per_slot+msg->total_skipped_ticks, ctx->next_leader_slot ));
1172 0 : }
1173 :
1174 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_BECAME_LEADER, ctx->replay_out->chunk, sizeof(fd_became_leader_t), 0UL, 0UL, 0UL );
1175 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_became_leader_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
1176 :
1177 0 : ctx->next_leader_slot = ULONG_MAX;
1178 0 : ctx->next_leader_tickcount = LONG_MAX;
1179 :
1180 0 : return 1;
1181 0 : }
1182 :
1183 : static void
1184 : process_poh_message( fd_replay_tile_t * ctx,
1185 0 : fd_poh_leader_slot_ended_t const * slot_ended ) {
1186 :
1187 0 : FD_TEST( ctx->is_booted );
1188 0 : FD_TEST( ctx->is_leader );
1189 0 : FD_TEST( ctx->leader_bank!=NULL );
1190 :
1191 0 : FD_TEST( ctx->highwater_leader_slot>=slot_ended->slot );
1192 0 : FD_TEST( ctx->next_leader_slot>ctx->highwater_leader_slot );
1193 :
1194 : /* Update the poh hash in the bank. We will want to maintain a refcnt
1195 : on the bank until we have recieved the block id for the block after
1196 : it has been shredded. */
1197 :
1198 0 : memcpy( fd_bank_poh_modify( ctx->leader_bank ), slot_ended->blockhash, sizeof(fd_hash_t) );
1199 :
1200 0 : ctx->recv_poh = 1;
1201 0 : }
1202 :
1203 : static void
1204 : publish_reset( fd_replay_tile_t * ctx,
1205 : fd_stem_context_t * stem,
1206 0 : fd_bank_t const * bank ) {
1207 0 : if( FD_UNLIKELY( ctx->replay_out->idx==ULONG_MAX ) ) return;
1208 :
1209 0 : fd_hash_t const * block_hash = fd_blockhashes_peek_last( fd_bank_block_hash_queue_query( bank ) );
1210 0 : FD_TEST( block_hash );
1211 :
1212 0 : fd_poh_reset_t * reset = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
1213 :
1214 0 : reset->timestamp = fd_log_wallclock();
1215 0 : reset->completed_slot = fd_bank_slot_get( bank );
1216 0 : reset->hashcnt_per_tick = fd_bank_hashes_per_tick_get( bank );
1217 0 : reset->ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
1218 0 : reset->tick_duration_ns = (ulong)(ctx->slot_duration_nanos/(double)reset->ticks_per_slot);
1219 0 : fd_memcpy( reset->completed_blockhash, block_hash->uc, sizeof(fd_hash_t) );
1220 :
1221 0 : ulong ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
1222 0 : if( FD_UNLIKELY( reset->hashcnt_per_tick==1UL ) ) {
1223 : /* Low power producer, maximum of one microblock per tick in the slot */
1224 0 : reset->max_microblocks_in_slot = ticks_per_slot;
1225 0 : } else {
1226 : /* See the long comment in after_credit for this limit */
1227 0 : reset->max_microblocks_in_slot = fd_ulong_min( MAX_MICROBLOCKS_PER_SLOT, ticks_per_slot*(reset->hashcnt_per_tick-1UL) );
1228 0 : }
1229 0 : reset->next_leader_slot = ctx->next_leader_slot;
1230 :
1231 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_RESET, ctx->replay_out->chunk, sizeof(fd_poh_reset_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
1232 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_poh_reset_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
1233 0 : }
1234 :
1235 : static void
1236 : boot_genesis( fd_replay_tile_t * ctx,
1237 : fd_stem_context_t * stem,
1238 : ulong in_idx,
1239 0 : ulong chunk ) {
1240 :
1241 : /* If we are bootstrapping, we can't wait to wait for our identity
1242 : vote to be rooted as this creates a circular dependency. */
1243 0 : ctx->has_identity_vote_rooted = 1;
1244 :
1245 0 : uchar const * lthash = (uchar*)fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
1246 0 : uchar const * genesis_hash = (uchar*)fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk )+sizeof(fd_lthash_value_t);
1247 :
1248 : // TODO: Do not pass the fd_types type between tiles, it have offsets
1249 : // that are unsafe and can't be validated as being in-bounds. Need to
1250 : // pass an actual owned genesis type.
1251 0 : fd_genesis_solana_global_t const * genesis = fd_type_pun( (uchar*)fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk )+sizeof(fd_hash_t)+sizeof(fd_lthash_value_t) );
1252 :
1253 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, FD_REPLAY_BOOT_BANK_IDX );
1254 0 : if( FD_UNLIKELY( !bank ) ) {
1255 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for bank index %lu", FD_REPLAY_BOOT_BANK_IDX ));
1256 0 : }
1257 0 : fd_funk_txn_xid_t xid = { .ul = { 0UL, 0UL } };
1258 :
1259 0 : fd_runtime_read_genesis( ctx->banks, bank, ctx->funk, &xid, NULL, fd_type_pun_const( genesis_hash ), fd_type_pun_const( lthash ), genesis, ctx->runtime_spad );
1260 :
1261 0 : static const fd_txncache_fork_id_t txncache_root = { .val = USHORT_MAX };
1262 0 : bank->txncache_fork_id = fd_txncache_attach_child( ctx->txncache, txncache_root );
1263 :
1264 0 : fd_hash_t const * block_hash = fd_blockhashes_peek_last( fd_bank_block_hash_queue_query( bank ) );
1265 0 : fd_txncache_finalize_fork( ctx->txncache, bank->txncache_fork_id, 0UL, block_hash->uc );
1266 :
1267 0 : publish_stake_weights( ctx, stem, bank, 0 );
1268 0 : publish_stake_weights( ctx, stem, bank, 1 );
1269 :
1270 : /* We call this after fd_runtime_read_genesis, which sets up the
1271 : slot_bank needed in blockstore_init. */
1272 0 : init_after_snapshot( ctx );
1273 :
1274 : /* Initialize store for genesis case, similar to snapshot case */
1275 0 : fd_hash_t genesis_block_id = { .ul[0] = FD_RUNTIME_INITIAL_BLOCK_ID };
1276 0 : fd_store_exacq( ctx->store );
1277 0 : if( FD_UNLIKELY( fd_store_root( ctx->store ) ) ) {
1278 0 : FD_LOG_CRIT(( "invariant violation: store root is not 0 for genesis" ));
1279 0 : }
1280 0 : fd_store_insert( ctx->store, 0, &genesis_block_id );
1281 0 : ctx->store->slot0 = 0UL; /* Genesis slot */
1282 0 : fd_store_exrel( ctx->store );
1283 :
1284 0 : ctx->published_root_slot = 0UL;
1285 0 : fd_sched_block_add_done( ctx->sched, bank->idx, ULONG_MAX );
1286 :
1287 0 : fd_bank_block_height_set( bank, 1UL );
1288 :
1289 0 : ctx->consensus_root = (fd_hash_t){ .ul[0] = FD_RUNTIME_INITIAL_BLOCK_ID };
1290 0 : ctx->consensus_root_slot = 0UL;
1291 0 : ctx->consensus_root_bank_idx = 0UL;
1292 0 : ctx->published_root_slot = 0UL;
1293 0 : ctx->published_root_bank_idx = 0UL;
1294 :
1295 0 : ctx->reset_slot = 0UL;
1296 0 : ctx->reset_timestamp_nanos = fd_log_wallclock();
1297 0 : ctx->next_leader_slot = fd_multi_epoch_leaders_get_next_slot( ctx->mleaders, 1UL, ctx->identity_pubkey );
1298 0 : if( FD_LIKELY( ctx->next_leader_slot ) ) {
1299 0 : ctx->next_leader_tickcount = (long)((double)(ctx->next_leader_slot-ctx->reset_slot-1UL)*ctx->slot_duration_ticks) + fd_tickcount();
1300 0 : } else {
1301 0 : ctx->next_leader_tickcount = LONG_MAX;
1302 0 : }
1303 :
1304 0 : ctx->is_booted = 1;
1305 0 : maybe_become_leader( ctx, stem );
1306 :
1307 0 : fd_hash_t initial_block_id = { .ul = { FD_RUNTIME_INITIAL_BLOCK_ID } };
1308 0 : fd_reasm_fec_t * fec = fd_reasm_insert( ctx->reasm, &initial_block_id, NULL, 0 /* genesis slot */, 0, 0, 0, 0, 1, 0 ); /* FIXME manifest block_id */
1309 0 : fec->bank_idx = 0UL;
1310 :
1311 :
1312 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ 0 ];
1313 0 : FD_TEST( block_id_ele );
1314 0 : block_id_ele->block_id = initial_block_id;
1315 0 : block_id_ele->slot = 0UL;
1316 :
1317 0 : FD_TEST( fd_block_id_map_ele_insert( ctx->block_id_map, block_id_ele, ctx->block_id_arr ) );
1318 :
1319 0 : publish_slot_completed( ctx, stem, bank, 1 );
1320 0 : publish_root_advanced( ctx, stem );
1321 0 : publish_reset( ctx, stem, bank );
1322 :
1323 0 : }
1324 :
1325 : static void
1326 : on_snapshot_message( fd_replay_tile_t * ctx,
1327 : fd_stem_context_t * stem,
1328 : ulong in_idx,
1329 : ulong chunk,
1330 0 : ulong sig ) {
1331 0 : ulong msg = fd_ssmsg_sig_message( sig );
1332 0 : if( FD_LIKELY( msg==FD_SSMSG_DONE ) ) {
1333 : /* An end of message notification indicates the snapshot is loaded.
1334 : Replay is able to start executing from this point onwards. */
1335 : /* TODO: replay should finish booting. Could make replay a
1336 : state machine and set the state here accordingly. */
1337 0 : ctx->is_booted = 1;
1338 :
1339 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, FD_REPLAY_BOOT_BANK_IDX );
1340 0 : if( FD_UNLIKELY( !bank ) ) {
1341 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for bank index %lu", FD_REPLAY_BOOT_BANK_IDX ));
1342 0 : }
1343 :
1344 0 : ulong snapshot_slot = fd_bank_slot_get( bank );
1345 : /* FIXME: This is a hack because the block id of the snapshot slot
1346 : is not provided in the snapshot. A possible solution is to get
1347 : the block id of the snapshot slot from repair. */
1348 0 : fd_hash_t manifest_block_id = { .ul = { FD_RUNTIME_INITIAL_BLOCK_ID } };
1349 :
1350 0 : fd_store_exacq( ctx->store );
1351 0 : FD_TEST( !fd_store_root( ctx->store ) );
1352 0 : fd_store_insert( ctx->store, 0, &manifest_block_id );
1353 0 : ctx->store->slot0 = snapshot_slot; /* FIXME manifest_block_id */
1354 0 : fd_store_exrel( ctx->store );
1355 :
1356 : /* Typically, when we cross an epoch boundary during normal
1357 : operation, we publish the stake weights for the new epoch. But
1358 : since we are starting from a snapshot, we need to publish two
1359 : epochs worth of stake weights: the previous epoch (which is
1360 : needed for voting on the current epoch), and the current epoch
1361 : (which is needed for voting on the next epoch). */
1362 0 : publish_stake_weights( ctx, stem, bank, 0 );
1363 0 : publish_stake_weights( ctx, stem, bank, 1 );
1364 :
1365 0 : ctx->consensus_root = manifest_block_id;
1366 0 : ctx->consensus_root_slot = snapshot_slot;
1367 0 : ctx->consensus_root_bank_idx = 0UL;
1368 0 : ctx->published_root_slot = ctx->consensus_root_slot;
1369 0 : ctx->published_root_bank_idx = 0UL;
1370 :
1371 0 : ctx->reset_slot = snapshot_slot;
1372 0 : ctx->reset_timestamp_nanos = fd_log_wallclock();
1373 0 : ctx->next_leader_slot = fd_multi_epoch_leaders_get_next_slot( ctx->mleaders, 1UL, ctx->identity_pubkey );
1374 0 : if( FD_LIKELY( ctx->next_leader_slot ) ) {
1375 0 : ctx->next_leader_tickcount = (long)((double)(ctx->next_leader_slot-ctx->reset_slot-1UL)*ctx->slot_duration_ticks) + fd_tickcount();
1376 0 : } else {
1377 0 : ctx->next_leader_tickcount = LONG_MAX;
1378 0 : }
1379 :
1380 0 : fd_sched_block_add_done( ctx->sched, bank->idx, ULONG_MAX );
1381 0 : FD_TEST( bank->idx==0UL );
1382 :
1383 :
1384 0 : fd_funk_txn_xid_t xid = { .ul = { snapshot_slot, snapshot_slot } };
1385 :
1386 0 : fd_features_restore( bank, ctx->funk, &xid );
1387 :
1388 0 : fd_runtime_update_leaders( bank, ctx->runtime_spad );
1389 :
1390 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ 0 ];
1391 0 : FD_TEST( block_id_ele );
1392 0 : block_id_ele->block_id = manifest_block_id;
1393 0 : block_id_ele->slot = snapshot_slot;
1394 0 : FD_TEST( fd_block_id_map_ele_insert( ctx->block_id_map, block_id_ele, ctx->block_id_arr ) );
1395 :
1396 : /* We call this after fd_runtime_read_genesis, which sets up the
1397 : slot_bank needed in blockstore_init. */
1398 0 : init_after_snapshot( ctx );
1399 :
1400 0 : publish_slot_completed( ctx, stem, bank, 1 );
1401 0 : publish_root_advanced( ctx, stem );
1402 :
1403 0 : fd_reasm_fec_t * fec = fd_reasm_insert( ctx->reasm, &manifest_block_id, NULL, snapshot_slot, 0, 0, 0, 0, 1, 0 ); /* FIXME manifest block_id */
1404 0 : fec->bank_idx = 0UL;
1405 0 : return;
1406 0 : }
1407 :
1408 0 : switch( msg ) {
1409 0 : case FD_SSMSG_MANIFEST_FULL:
1410 0 : case FD_SSMSG_MANIFEST_INCREMENTAL: {
1411 : /* We may either receive a full snapshot manifest or an
1412 : incremental snapshot manifest. Note that this external message
1413 : id is only used temporarily because replay cannot yet receive
1414 : the firedancer-internal snapshot manifest message. */
1415 0 : if( FD_UNLIKELY( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark ) )
1416 0 : FD_LOG_ERR(( "chunk %lu from in %d corrupt, not in range [%lu,%lu]", chunk, ctx->in_kind[ in_idx ], ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
1417 :
1418 0 : fd_ssload_recover( fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ), ctx->banks, fd_banks_bank_query( ctx->banks, FD_REPLAY_BOOT_BANK_IDX ) );
1419 0 : break;
1420 0 : }
1421 0 : default: {
1422 0 : FD_LOG_ERR(( "Received unknown snapshot message with msg %lu", msg ));
1423 0 : return;
1424 0 : }
1425 0 : }
1426 :
1427 0 : return;
1428 0 : }
1429 :
1430 : static void
1431 : dispatch_task( fd_replay_tile_t * ctx,
1432 : fd_stem_context_t * stem,
1433 0 : fd_sched_task_t * task ) {
1434 : /* Find an exec tile and mark it busy. */
1435 0 : int exec_idx = fd_ulong_find_lsb( ctx->exec_ready_bitset );
1436 0 : ctx->exec_ready_bitset = fd_ulong_pop_lsb( ctx->exec_ready_bitset );
1437 :
1438 0 : switch( task->task_type ) {
1439 0 : case FD_SCHED_TT_TXN_EXEC: {
1440 0 : fd_txn_p_t * txn_p = fd_sched_get_txn( ctx->sched, task->txn_exec->txn_idx );
1441 :
1442 : /* FIXME: this should be done during txn parsing so that we don't
1443 : have to loop over all accounts a second time. */
1444 : /* Insert or reverify invoked programs for this epoch, if needed. */
1445 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, task->txn_exec->bank_idx );
1446 0 : fd_funk_txn_xid_t xid = { .ul = { task->txn_exec->slot, task->txn_exec->slot } };
1447 0 : fd_runtime_update_program_cache( bank, ctx->funk, &xid, txn_p, ctx->runtime_spad );
1448 :
1449 0 : bank->refcnt++;
1450 :
1451 0 : if( FD_UNLIKELY( !bank->first_transaction_scheduled_nanos ) ) bank->first_transaction_scheduled_nanos = fd_log_wallclock();
1452 :
1453 0 : fd_replay_out_link_t * exec_out = ctx->exec_out;
1454 0 : fd_exec_txn_exec_msg_t * exec_msg = fd_chunk_to_laddr( exec_out->mem, exec_out->chunk );
1455 0 : memcpy( &exec_msg->txn, txn_p, sizeof(fd_txn_p_t) );
1456 0 : exec_msg->bank_idx = task->txn_exec->bank_idx;
1457 0 : exec_msg->txn_idx = task->txn_exec->txn_idx;
1458 0 : fd_stem_publish( stem, exec_out->idx, (FD_EXEC_TT_TXN_EXEC<<32) | (ulong)exec_idx, exec_out->chunk, sizeof(*exec_msg), 0UL, 0UL, 0UL );
1459 0 : exec_out->chunk = fd_dcache_compact_next( exec_out->chunk, sizeof(*exec_msg), exec_out->chunk0, exec_out->wmark );
1460 0 : break;
1461 0 : }
1462 0 : case FD_SCHED_TT_TXN_SIGVERIFY: {
1463 0 : fd_txn_p_t * txn_p = fd_sched_get_txn( ctx->sched, task->txn_sigverify->txn_idx );
1464 :
1465 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, task->txn_sigverify->bank_idx );
1466 0 : bank->refcnt++;
1467 :
1468 0 : fd_replay_out_link_t * exec_out = ctx->exec_out;
1469 0 : fd_exec_txn_sigverify_msg_t * exec_msg = fd_chunk_to_laddr( exec_out->mem, exec_out->chunk );
1470 0 : memcpy( &exec_msg->txn, txn_p, sizeof(fd_txn_p_t) );
1471 0 : exec_msg->bank_idx = task->txn_sigverify->bank_idx;
1472 0 : exec_msg->txn_idx = task->txn_sigverify->txn_idx;
1473 0 : fd_stem_publish( stem, exec_out->idx, (FD_EXEC_TT_TXN_SIGVERIFY<<32) | (ulong)exec_idx, exec_out->chunk, sizeof(*exec_msg), 0UL, 0UL, 0UL );
1474 0 : exec_out->chunk = fd_dcache_compact_next( exec_out->chunk, sizeof(*exec_msg), exec_out->chunk0, exec_out->wmark );
1475 0 : break;
1476 0 : };
1477 0 : default: {
1478 0 : FD_LOG_CRIT(( "unexpected task type %lu", task->task_type ));
1479 0 : }
1480 0 : }
1481 0 : }
1482 :
1483 : /* Returns 1 if charge_busy. */
1484 : static int
1485 : replay( fd_replay_tile_t * ctx,
1486 0 : fd_stem_context_t * stem ) {
1487 :
1488 0 : if( FD_UNLIKELY( !ctx->is_booted ) ) return 0;
1489 :
1490 0 : int charge_busy = 0;
1491 0 : if( FD_LIKELY( ctx->exec_ready_bitset ) ) {
1492 0 : fd_sched_task_t task[ 1 ];
1493 0 : if( FD_UNLIKELY( !fd_sched_task_next_ready( ctx->sched, task, (ulong)fd_ulong_popcnt( ctx->exec_ready_bitset ) ) ) ) {
1494 0 : return charge_busy; /* Nothing to execute or do. */
1495 0 : }
1496 :
1497 0 : charge_busy = 1;
1498 :
1499 0 : switch( task->task_type ) {
1500 0 : case FD_SCHED_TT_BLOCK_START: {
1501 0 : replay_block_start( ctx, stem, task->block_start->bank_idx, task->block_start->parent_bank_idx, task->block_start->slot );
1502 0 : fd_sched_task_done( ctx->sched, FD_SCHED_TT_BLOCK_START, ULONG_MAX );
1503 0 : break;
1504 0 : }
1505 0 : case FD_SCHED_TT_BLOCK_END: {
1506 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, task->block_end->bank_idx );
1507 0 : if( FD_LIKELY( !(bank->flags&FD_BANK_FLAGS_DEAD) ) ) replay_block_finalize( ctx, stem, bank );
1508 0 : fd_sched_task_done( ctx->sched, FD_SCHED_TT_BLOCK_END, ULONG_MAX );
1509 0 : break;
1510 0 : }
1511 0 : case FD_SCHED_TT_TXN_EXEC:
1512 0 : case FD_SCHED_TT_TXN_SIGVERIFY: {
1513 : /* Likely/common case: we have a transaction we actually need to
1514 : execute. */
1515 0 : dispatch_task( ctx, stem, task );
1516 0 : break;
1517 0 : }
1518 0 : default: {
1519 0 : FD_LOG_CRIT(( "unexpected task type %lu", task->task_type ));
1520 0 : }
1521 0 : }
1522 0 : }
1523 :
1524 0 : return charge_busy;
1525 0 : }
1526 :
1527 : static void
1528 : process_fec_set( fd_replay_tile_t * ctx,
1529 0 : fd_reasm_fec_t * reasm_fec ) {
1530 0 : long now = fd_log_wallclock();
1531 :
1532 0 : if( FD_UNLIKELY( reasm_fec->eqvoc ) ) {
1533 0 : FD_LOG_ERR(( "Firedancer currently does not support mid-block equivocation and this was detected on slot %lu.", reasm_fec->slot ));
1534 0 : }
1535 :
1536 : /* Linking only requires a shared lock because the fields that are
1537 : modified are only read on publish which uses exclusive lock. */
1538 :
1539 0 : long shacq_start, shacq_end, shrel_end;
1540 :
1541 0 : FD_STORE_SHARED_LOCK( ctx->store, shacq_start, shacq_end, shrel_end ) {
1542 0 : if( FD_UNLIKELY( !fd_store_link( ctx->store, &reasm_fec->key, &reasm_fec->cmr ) ) ) FD_LOG_WARNING(( "failed to link %s %s. slot %lu fec_set_idx %u", FD_BASE58_ENC_32_ALLOCA( &reasm_fec->key ), FD_BASE58_ENC_32_ALLOCA( &reasm_fec->cmr ), reasm_fec->slot, reasm_fec->fec_set_idx ));
1543 0 : } FD_STORE_SHARED_LOCK_END;
1544 0 : fd_histf_sample( ctx->metrics.store_link_wait, (ulong)fd_long_max( shacq_end - shacq_start, 0L ) );
1545 0 : fd_histf_sample( ctx->metrics.store_link_work, (ulong)fd_long_max( shrel_end - shacq_end, 0L ) );
1546 :
1547 : /* Update the reasm_fec with the correct bank index and parent bank
1548 : index. If the FEC belongs to a leader, we have already allocated
1549 : a bank index for the FEC and it just needs to be propagated to the
1550 : reasm_fec. */
1551 :
1552 0 : reasm_fec->parent_bank_idx = fd_reasm_parent( ctx->reasm, reasm_fec )->bank_idx;
1553 :
1554 0 : if( FD_UNLIKELY( reasm_fec->leader ) ) {
1555 : /* If we are the leader we just need to copy in the bank index that
1556 : the leader slot is using. */
1557 0 : FD_TEST( ctx->leader_bank!=NULL );
1558 0 : reasm_fec->bank_idx = ctx->leader_bank->idx;
1559 0 : } else if( FD_UNLIKELY( reasm_fec->fec_set_idx==0U ) ) {
1560 : /* If we are seeing a FEC with fec set idx 0, this means that we are
1561 : starting a new slot, and we need a new bank index. */
1562 0 : reasm_fec->bank_idx = fd_banks_new_bank( ctx->banks, reasm_fec->parent_bank_idx, now )->idx;
1563 0 : } else {
1564 : /* We are continuing to execute through a slot that we already have
1565 : a bank index for. */
1566 0 : reasm_fec->bank_idx = reasm_fec->parent_bank_idx;
1567 0 : }
1568 :
1569 0 : if( FD_UNLIKELY( reasm_fec->slot_complete ) ) {
1570 : /* Once the block id for a block is known it must be added to the
1571 : leader block mapping. */
1572 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ reasm_fec->bank_idx ];
1573 0 : FD_TEST( block_id_ele );
1574 :
1575 : /* If an entry already exists for this bank index in the block id
1576 : map, we can safely remove it and replace it with the new entry.
1577 : This is safe because we know that the old entry for this fork
1578 : index has already been pruned away. */
1579 0 : if( FD_LIKELY( block_id_ele->slot!=FD_SLOT_NULL && fd_block_id_map_ele_query( ctx->block_id_map, &block_id_ele->block_id, NULL, ctx->block_id_arr ) ) ) {
1580 0 : FD_TEST( fd_block_id_map_ele_remove( ctx->block_id_map, &block_id_ele->block_id, NULL, ctx->block_id_arr ) );
1581 0 : }
1582 :
1583 0 : block_id_ele->block_id = reasm_fec->key;
1584 0 : block_id_ele->slot = reasm_fec->slot;
1585 :
1586 0 : FD_TEST( fd_block_id_map_ele_insert( ctx->block_id_map, block_id_ele, ctx->block_id_arr ) );
1587 :
1588 0 : if( FD_UNLIKELY( reasm_fec->leader ) ) {
1589 0 : ctx->recv_block_id = 1;
1590 0 : }
1591 0 : }
1592 :
1593 0 : if( FD_UNLIKELY( reasm_fec->leader ) ) {
1594 0 : return;
1595 0 : }
1596 :
1597 : /* Forks form a partial ordering over FEC sets. The Repair tile
1598 : delivers FEC sets in-order per fork, but FEC set ordering across
1599 : forks is arbitrary */
1600 0 : fd_sched_fec_t sched_fec[ 1 ];
1601 :
1602 : # if DEBUG_LOGGING
1603 : FD_LOG_INFO(( "replay processing FEC set for slot %lu fec_set_idx %u, mr %s cmr %s", reasm_fec->slot, reasm_fec->fec_set_idx, FD_BASE58_ENC_32_ALLOCA( &reasm_fec->key ), FD_BASE58_ENC_32_ALLOCA( &reasm_fec->cmr ) ));
1604 : # endif
1605 :
1606 : /* Read FEC set from the store. This should happen before we try to
1607 : ingest the FEC set. This allows us to filter out frags that were
1608 : in-flight when we published away minority forks that the frags land
1609 : on. These frags would have no bank to execute against, because
1610 : their corresponding banks, or parent banks, have also been pruned
1611 : during publishing. A query against store will rightfully tell us
1612 : that the underlying data is not found, implying that this is for a
1613 : minority fork that we can safely ignore. */
1614 0 : FD_STORE_SHARED_LOCK( ctx->store, shacq_start, shacq_end, shrel_end ) {
1615 0 : fd_store_fec_t * store_fec = fd_store_query( ctx->store, &reasm_fec->key );
1616 0 : if( FD_UNLIKELY( !store_fec ) ) {
1617 : /* The only case in which a FEC is not found in the store after
1618 : repair has notified is if the FEC was on a minority fork that
1619 : has already been published away. In this case we abandon the
1620 : entire slice because it is no longer relevant. */
1621 0 : FD_LOG_WARNING(( "store fec for slot: %lu is on minority fork already pruned by publish. abandoning slice. root: %lu. pruned merkle: %s", reasm_fec->slot, ctx->consensus_root_slot, FD_BASE58_ENC_32_ALLOCA( &reasm_fec->key ) ));
1622 0 : return;
1623 0 : }
1624 0 : FD_TEST( store_fec );
1625 0 : sched_fec->fec = store_fec;
1626 0 : sched_fec->shred_cnt = reasm_fec->data_cnt;
1627 0 : } FD_STORE_SHARED_LOCK_END;
1628 :
1629 0 : fd_histf_sample( ctx->metrics.store_read_wait, (ulong)fd_long_max( shacq_end - shacq_start, 0UL ) );
1630 0 : fd_histf_sample( ctx->metrics.store_read_work, (ulong)fd_long_max( shrel_end - shacq_end, 0UL ) );
1631 :
1632 0 : sched_fec->is_last_in_batch = !!reasm_fec->data_complete;
1633 0 : sched_fec->is_last_in_block = !!reasm_fec->slot_complete;
1634 0 : sched_fec->bank_idx = reasm_fec->bank_idx;
1635 0 : sched_fec->parent_bank_idx = reasm_fec->parent_bank_idx;
1636 0 : sched_fec->slot = reasm_fec->slot;
1637 0 : sched_fec->parent_slot = reasm_fec->slot - reasm_fec->parent_off;
1638 0 : sched_fec->is_first_in_block = reasm_fec->fec_set_idx==0U;
1639 0 : fd_funk_txn_xid_copy( sched_fec->alut_ctx->xid, fd_funk_last_publish( ctx->funk ) );
1640 0 : sched_fec->alut_ctx->funk = ctx->funk;
1641 0 : sched_fec->alut_ctx->els = ctx->published_root_slot;
1642 0 : sched_fec->alut_ctx->runtime_spad = ctx->runtime_spad;
1643 :
1644 0 : if( FD_UNLIKELY( !fd_sched_fec_ingest( ctx->sched, sched_fec ) ) ) {
1645 0 : fd_banks_mark_bank_dead( ctx->banks, fd_banks_bank_query( ctx->banks, sched_fec->bank_idx ) );
1646 0 : }
1647 0 : }
1648 :
1649 : static void
1650 : funk_publish( fd_replay_tile_t * ctx,
1651 0 : ulong slot ) {
1652 0 : fd_funk_txn_xid_t xid = { .ul[0] = slot, .ul[1] = slot };
1653 0 : FD_LOG_DEBUG(( "publishing slot=%lu", slot ));
1654 :
1655 : /* This is the standard case. Publish all transactions up to and
1656 : including the watermark. This will publish any in-prep ancestors
1657 : of root_txn as well. */
1658 0 : if( FD_UNLIKELY( !fd_funk_txn_publish( ctx->funk, &xid ) ) ) FD_LOG_CRIT(( "failed to funk publish slot %lu", slot ));
1659 0 : }
1660 :
1661 : static int
1662 0 : advance_published_root( fd_replay_tile_t * ctx ) {
1663 :
1664 0 : fd_block_id_ele_t * block_id_ele = fd_block_id_map_ele_query( ctx->block_id_map, &ctx->consensus_root, NULL, ctx->block_id_arr );
1665 0 : if( FD_UNLIKELY( !block_id_ele ) ) {
1666 0 : FD_LOG_CRIT(( "invariant violation: block id ele not found for consensus root %s", FD_BASE58_ENC_32_ALLOCA( &ctx->consensus_root ) ));
1667 0 : }
1668 0 : ulong target_bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, block_id_ele );
1669 :
1670 0 : fd_sched_root_notify( ctx->sched, target_bank_idx );
1671 :
1672 : /* If the identity vote has been seen on a bank that should be rooted,
1673 : then we are now ready to produce blocks. */
1674 0 : if( FD_UNLIKELY( !ctx->has_identity_vote_rooted ) ) {
1675 0 : fd_bank_t * root_bank = fd_banks_bank_query( ctx->banks, target_bank_idx );
1676 0 : if( FD_UNLIKELY( !root_bank ) ) FD_LOG_CRIT(( "invariant violation: root bank not found for bank index %lu", target_bank_idx ));
1677 0 : if( FD_LIKELY( fd_bank_has_identity_vote_get( root_bank ) ) ) ctx->has_identity_vote_rooted = 1;
1678 0 : }
1679 :
1680 0 : ulong advanceable_root_idx = ULONG_MAX;
1681 0 : if( FD_UNLIKELY( !fd_banks_advance_root_prepare( ctx->banks, target_bank_idx, &advanceable_root_idx ) ) ) return 0;
1682 :
1683 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, advanceable_root_idx );
1684 0 : FD_TEST( bank );
1685 :
1686 0 : fd_block_id_ele_t * advanceable_root_ele = &ctx->block_id_arr[ advanceable_root_idx ];
1687 0 : if( FD_UNLIKELY( !advanceable_root_ele ) ) {
1688 0 : FD_LOG_CRIT(( "invariant violation: advanceable root ele not found for bank index %lu", advanceable_root_idx ));
1689 0 : }
1690 :
1691 0 : long exacq_start, exacq_end, exrel_end;
1692 0 : FD_STORE_EXCLUSIVE_LOCK( ctx->store, exacq_start, exacq_end, exrel_end ) {
1693 0 : fd_store_publish( ctx->store, &advanceable_root_ele->block_id );
1694 0 : } FD_STORE_EXCLUSIVE_LOCK_END;
1695 :
1696 0 : fd_histf_sample( ctx->metrics.store_publish_wait, (ulong)fd_long_max( exacq_end-exacq_start, 0UL ) );
1697 0 : fd_histf_sample( ctx->metrics.store_publish_work, (ulong)fd_long_max( exrel_end-exacq_end, 0UL ) );
1698 :
1699 0 : ulong advanceable_root_slot = fd_bank_slot_get( bank );
1700 0 : funk_publish( ctx, advanceable_root_slot );
1701 :
1702 0 : fd_txncache_advance_root( ctx->txncache, bank->txncache_fork_id );
1703 0 : fd_sched_advance_root( ctx->sched, advanceable_root_idx );
1704 0 : fd_banks_advance_root( ctx->banks, advanceable_root_idx );
1705 0 : fd_reasm_publish( ctx->reasm, &advanceable_root_ele->block_id );
1706 :
1707 0 : ctx->published_root_slot = advanceable_root_slot;
1708 0 : ctx->published_root_bank_idx = advanceable_root_idx;
1709 :
1710 0 : return 1;
1711 0 : }
1712 :
1713 : static void
1714 : after_credit( fd_replay_tile_t * ctx,
1715 : fd_stem_context_t * stem,
1716 : int * opt_poll_in,
1717 0 : int * charge_busy ) {
1718 0 : if( FD_UNLIKELY( !ctx->is_booted ) ) return;
1719 :
1720 : /* Send any outstanding vote states to tower. TODO: Not sure why this
1721 : is here? Should happen when the slot completes instead? */
1722 0 : if( FD_UNLIKELY( ctx->vote_tower_out_idx<ctx->vote_tower_out_len ) ) {
1723 0 : *charge_busy = 1;
1724 0 : publish_next_vote_tower( ctx, stem );
1725 : /* Don't continue polling for fragments but instead skip to the next
1726 : iteration of the stem loop.
1727 :
1728 : This is necessary so that all the votes states for the end of a
1729 : particular slot are sent in one atomic block, and are not
1730 : interleaved with votes states at the end of other slots. */
1731 0 : *opt_poll_in = 0;
1732 0 : return;
1733 0 : }
1734 :
1735 0 : if( FD_UNLIKELY( maybe_become_leader( ctx, stem ) ) ) {
1736 0 : *charge_busy = 1;
1737 0 : *opt_poll_in = 0;
1738 0 : return;
1739 0 : }
1740 :
1741 : /* If the reassembler has a fec that is ready, we should process it
1742 : and pass it to the scheduler. */
1743 :
1744 0 : fd_reasm_fec_t * fec;
1745 0 : if( FD_LIKELY( fd_sched_can_ingest( ctx->sched ) && !fd_banks_is_full( ctx->banks ) && (fec = fd_reasm_out( ctx->reasm )) ) ) {
1746 : /* If sched is full or there are no free banks, we cannot ingest any
1747 : more FEC sets into the scheduler. */
1748 0 : process_fec_set( ctx, fec );
1749 0 : *charge_busy = 1;
1750 0 : *opt_poll_in = 0;
1751 0 : return;
1752 0 : }
1753 :
1754 : /* If we are leader, we can only unbecome the leader iff we have
1755 : received the poh hash from the poh tile and block id from reasm. */
1756 0 : if( FD_UNLIKELY( ctx->is_leader && ctx->recv_block_id && ctx->recv_poh ) ) {
1757 0 : fini_leader_bank( ctx, stem );
1758 0 : *charge_busy = 1;
1759 0 : *opt_poll_in = 0;
1760 0 : return;
1761 0 : }
1762 :
1763 : /* If the published_root is not caught up to the consensus root, then
1764 : we should try to advance the published root. */
1765 0 : if( FD_UNLIKELY( ctx->consensus_root_bank_idx!=ctx->published_root_bank_idx && advance_published_root( ctx ) ) ) {
1766 0 : *charge_busy = 1;
1767 0 : *opt_poll_in = 0;
1768 0 : return;
1769 0 : }
1770 :
1771 0 : *charge_busy = replay( ctx, stem );
1772 0 : }
1773 :
1774 : static int
1775 : before_frag( fd_replay_tile_t * ctx,
1776 : ulong in_idx,
1777 : ulong seq FD_PARAM_UNUSED,
1778 0 : ulong sig FD_PARAM_UNUSED ) {
1779 :
1780 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_SHRED ) ) {
1781 : /* If reasm is full, we can not insert any more FEC sets. We must
1782 : not consume any frags from shred_out until reasm can process more
1783 : FEC sets. */
1784 :
1785 0 : if( FD_UNLIKELY( !fd_reasm_free( ctx->reasm ) ) ) {
1786 0 : return -1;
1787 0 : }
1788 0 : }
1789 :
1790 0 : return 0;
1791 0 : }
1792 :
1793 : static void
1794 : process_solcap_account_update( fd_replay_tile_t * ctx,
1795 0 : fd_capture_ctx_account_update_msg_t const * msg ) {
1796 :
1797 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, msg->bank_idx );
1798 0 : if( FD_UNLIKELY( !bank ) ) {
1799 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for bank index %lu", msg->bank_idx ));
1800 0 : }
1801 :
1802 0 : if( FD_UNLIKELY( !ctx->capture_ctx || !ctx->capture_ctx->capture ) ) return;
1803 0 : if( FD_UNLIKELY( fd_bank_slot_get( bank )<ctx->capture_ctx->solcap_start_slot ) ) return;
1804 :
1805 0 : uchar const * account_data = (uchar const *)fd_type_pun_const( msg )+sizeof(fd_capture_ctx_account_update_msg_t);
1806 0 : fd_solcap_write_account( ctx->capture_ctx->capture, &msg->pubkey, &msg->info, account_data, msg->data_sz );
1807 0 : }
1808 :
1809 : static void
1810 : process_exec_task_done( fd_replay_tile_t * ctx,
1811 : fd_exec_task_done_msg_t * msg,
1812 0 : ulong sig ) {
1813 0 : if( FD_UNLIKELY( sig==0UL ) ) {
1814 : // FIXME remove this branch with new solcap
1815 0 : process_solcap_account_update( ctx, fd_type_pun( msg ) );
1816 0 : return;
1817 0 : }
1818 :
1819 0 : ulong exec_tile_idx = sig&0xFFFFFFFFUL;
1820 0 : FD_TEST( !fd_ulong_extract_bit( ctx->exec_ready_bitset, (int)exec_tile_idx ) );
1821 0 : ctx->exec_ready_bitset = fd_ulong_set_bit( ctx->exec_ready_bitset, (int)exec_tile_idx );
1822 :
1823 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, msg->bank_idx );
1824 0 : bank->refcnt--;
1825 :
1826 0 : switch( sig>>32 ) {
1827 0 : case FD_EXEC_TT_TXN_EXEC: {
1828 0 : if( FD_UNLIKELY( !ctx->has_identity_vote_rooted ) ) {
1829 : /* Query the txn signature against our recently generated vote
1830 : txn signatures. If the query is successful, then we have
1831 : seen our own vote transaction land and this should be marked
1832 : in the bank. We go through this exercise until we've seen
1833 : our vote rooted. */
1834 0 : fd_txn_p_t * txn_p = fd_sched_get_txn( ctx->sched, msg->txn_exec->txn_idx );
1835 0 : if( fd_vote_tracker_query_sig( ctx->vote_tracker, fd_type_pun_const( txn_p->payload+TXN( txn_p )->signature_off ) ) ) {
1836 0 : *fd_bank_has_identity_vote_modify( bank ) += 1;
1837 0 : }
1838 0 : }
1839 0 : fd_sched_task_done( ctx->sched, FD_SCHED_TT_TXN_EXEC, msg->txn_exec->txn_idx );
1840 0 : if( FD_UNLIKELY( msg->txn_exec->err && !(bank->flags&FD_BANK_FLAGS_DEAD) ) ) {
1841 : /* Every transaction in a valid block has to execute.
1842 : Otherwise, we should mark the block as dead. Also freeze the
1843 : bank if possible. */
1844 0 : fd_banks_mark_bank_dead( ctx->banks, bank );
1845 0 : fd_sched_block_abandon( ctx->sched, bank->idx );
1846 0 : }
1847 0 : if( FD_UNLIKELY( (bank->flags&FD_BANK_FLAGS_DEAD) && bank->refcnt==0UL ) ) {
1848 0 : fd_banks_mark_bank_frozen( ctx->banks, bank );
1849 0 : }
1850 0 : break;
1851 0 : }
1852 0 : case FD_EXEC_TT_TXN_SIGVERIFY: {
1853 0 : fd_sched_task_done( ctx->sched, FD_SCHED_TT_TXN_SIGVERIFY, msg->txn_sigverify->txn_idx );
1854 0 : if( FD_UNLIKELY( msg->txn_sigverify->err && !(bank->flags&FD_BANK_FLAGS_DEAD) ) ) {
1855 : /* Every transaction in a valid block has to sigverify.
1856 : Otherwise, we should mark the block as dead. Also freeze the
1857 : bank if possible. */
1858 0 : fd_banks_mark_bank_dead( ctx->banks, bank );
1859 0 : fd_sched_block_abandon( ctx->sched, bank->idx );
1860 0 : }
1861 0 : if( FD_UNLIKELY( (bank->flags&FD_BANK_FLAGS_DEAD) && bank->refcnt==0UL ) ) {
1862 0 : fd_banks_mark_bank_frozen( ctx->banks, bank );
1863 0 : }
1864 0 : break;
1865 0 : }
1866 0 : default: FD_LOG_CRIT(( "unexpected sig 0x%lx", sig ));
1867 0 : }
1868 :
1869 : /* Reference counter just decreased, and an exec tile just got freed
1870 : up. If there's a need to be more aggressively pruning, we could
1871 : check here if more slots just became publishable and publish. Not
1872 : publishing here shouldn't bloat the fork tree too much though. We
1873 : mark minority forks dead as soon as we can, and execution dispatch
1874 : stops on dead blocks. So shortly afterwards, dead blocks should be
1875 : eligible for pruning as in-flight transactions retire from the
1876 : execution pipeline. */
1877 :
1878 0 : }
1879 :
1880 : static void
1881 : process_tower_update( fd_replay_tile_t * ctx,
1882 : fd_stem_context_t * stem,
1883 0 : fd_tower_slot_done_t const * msg ) {
1884 :
1885 0 : ctx->reset_block_id = msg->reset_block_id;
1886 0 : ctx->reset_slot = msg->reset_slot;
1887 0 : ctx->reset_timestamp_nanos = fd_log_wallclock();
1888 0 : ulong min_leader_slot = fd_ulong_max( msg->reset_slot+1UL, fd_ulong_if( ctx->highwater_leader_slot==ULONG_MAX, 0UL, ctx->highwater_leader_slot+1UL ) );
1889 0 : ctx->next_leader_slot = fd_multi_epoch_leaders_get_next_slot( ctx->mleaders, min_leader_slot, ctx->identity_pubkey );
1890 0 : if( FD_LIKELY( ctx->next_leader_slot ) ) {
1891 0 : ctx->next_leader_tickcount = (long)((double)(ctx->next_leader_slot-ctx->reset_slot-1UL)*ctx->slot_duration_ticks) + fd_tickcount();
1892 0 : } else {
1893 0 : ctx->next_leader_tickcount = LONG_MAX;
1894 0 : }
1895 :
1896 0 : fd_block_id_ele_t * block_id_ele = fd_block_id_map_ele_query( ctx->block_id_map, &msg->reset_block_id, NULL, ctx->block_id_arr );
1897 0 : if( FD_UNLIKELY( !block_id_ele ) ) {
1898 0 : FD_LOG_CRIT(( "invariant violation: block id ele doesn't exist for reset block id: %s, slot: %lu", FD_BASE58_ENC_32_ALLOCA( &msg->reset_block_id ), msg->reset_slot ));
1899 0 : }
1900 0 : ulong reset_bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, block_id_ele );
1901 :
1902 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, reset_bank_idx );
1903 0 : if( FD_UNLIKELY( !bank ) ) {
1904 0 : FD_LOG_CRIT(( "invariant violation: bank not found for bank index %lu", reset_bank_idx ));
1905 0 : }
1906 :
1907 0 : if( FD_LIKELY( ctx->replay_out->idx!=ULONG_MAX ) ) {
1908 0 : fd_poh_reset_t * reset = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
1909 :
1910 0 : reset->timestamp = ctx->reset_timestamp_nanos;
1911 0 : reset->completed_slot = ctx->reset_slot;
1912 0 : reset->hashcnt_per_tick = fd_bank_hashes_per_tick_get( bank );
1913 0 : reset->ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
1914 0 : reset->tick_duration_ns = (ulong)(ctx->slot_duration_nanos/(double)reset->ticks_per_slot);
1915 :
1916 0 : fd_memcpy( reset->completed_block_id, &block_id_ele->block_id, sizeof(fd_hash_t) );
1917 :
1918 0 : fd_blockhashes_t const * block_hash_queue = fd_bank_block_hash_queue_query( bank );
1919 0 : fd_hash_t const * last_hash = fd_blockhashes_peek_last( block_hash_queue );
1920 0 : FD_TEST( last_hash );
1921 0 : fd_memcpy( reset->completed_blockhash, last_hash->uc, sizeof(fd_hash_t) );
1922 :
1923 0 : ulong ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
1924 0 : if( FD_UNLIKELY( reset->hashcnt_per_tick==1UL ) ) {
1925 : /* Low power producer, maximum of one microblock per tick in the slot */
1926 0 : reset->max_microblocks_in_slot = ticks_per_slot;
1927 0 : } else {
1928 : /* See the long comment in after_credit for this limit */
1929 0 : reset->max_microblocks_in_slot = fd_ulong_min( MAX_MICROBLOCKS_PER_SLOT, ticks_per_slot*(reset->hashcnt_per_tick-1UL) );
1930 0 : }
1931 0 : reset->next_leader_slot = ctx->next_leader_slot;
1932 :
1933 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_RESET, ctx->replay_out->chunk, sizeof(fd_poh_reset_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
1934 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_poh_reset_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
1935 0 : }
1936 :
1937 0 : FD_LOG_INFO(( "tower_update(reset_slot=%lu, next_leader_slot=%lu, vote_slot=%lu, new_root=%d, root_slot=%lu, root_block_id=%s)", msg->reset_slot, ctx->next_leader_slot, msg->vote_slot, msg->new_root, msg->root_slot, FD_BASE58_ENC_32_ALLOCA( &msg->root_block_id ) ));
1938 0 : maybe_become_leader( ctx, stem );
1939 :
1940 0 : if( FD_LIKELY( msg->new_root ) ) {
1941 :
1942 0 : FD_TEST( msg->root_slot>=ctx->consensus_root_slot );
1943 0 : fd_block_id_ele_t * block_id_ele = fd_block_id_map_ele_query( ctx->block_id_map, &msg->root_block_id, NULL, ctx->block_id_arr );
1944 0 : FD_TEST( block_id_ele );
1945 :
1946 0 : ctx->consensus_root_slot = msg->root_slot;
1947 0 : ctx->consensus_root = msg->root_block_id;
1948 0 : ctx->consensus_root_bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, block_id_ele );
1949 :
1950 0 : publish_root_advanced( ctx, stem );
1951 0 : }
1952 :
1953 0 : ulong distance = 0UL;
1954 0 : fd_bank_t * parent = bank;
1955 0 : while( parent ) {
1956 0 : if( FD_UNLIKELY( parent->idx==ctx->consensus_root_bank_idx ) ) break;
1957 0 : parent = fd_banks_get_parent( ctx->banks, parent );
1958 0 : distance++;
1959 0 : }
1960 :
1961 0 : FD_MGAUGE_SET( REPLAY, ROOT_DISTANCE, distance );
1962 0 : }
1963 :
1964 : static void
1965 : process_fec_complete( fd_replay_tile_t * ctx,
1966 0 : uchar const * shred_buf ) {
1967 0 : fd_shred_t const * shred = (fd_shred_t const *)fd_type_pun_const( shred_buf );
1968 :
1969 0 : fd_hash_t const * merkle_root = (fd_hash_t const *)fd_type_pun_const( shred_buf + FD_SHRED_DATA_HEADER_SZ );
1970 0 : fd_hash_t const * chained_merkle_root = (fd_hash_t const *)fd_type_pun_const( shred_buf + FD_SHRED_DATA_HEADER_SZ + sizeof(fd_hash_t) );
1971 0 : int is_leader_fec = *(int const *) fd_type_pun_const( shred_buf + FD_SHRED_DATA_HEADER_SZ + sizeof(fd_hash_t) + sizeof(fd_hash_t) );
1972 :
1973 0 : int data_complete = !!( shred->data.flags & FD_SHRED_DATA_FLAG_DATA_COMPLETE );
1974 0 : int slot_complete = !!( shred->data.flags & FD_SHRED_DATA_FLAG_SLOT_COMPLETE );
1975 :
1976 0 : FD_TEST( !fd_reasm_query( ctx->reasm, merkle_root ) );
1977 0 : if( FD_UNLIKELY( shred->slot - shred->data.parent_off == fd_reasm_slot0( ctx->reasm ) && shred->fec_set_idx == 0) ) {
1978 0 : chained_merkle_root = &fd_reasm_root( ctx->reasm )->key;
1979 0 : }
1980 0 : FD_TEST( fd_reasm_insert( ctx->reasm, merkle_root, chained_merkle_root, shred->slot, shred->fec_set_idx, shred->data.parent_off, (ushort)(shred->idx - shred->fec_set_idx + 1), data_complete, slot_complete, is_leader_fec ) );
1981 0 : }
1982 :
1983 : static void
1984 0 : process_resolv_slot_completed( fd_replay_tile_t * ctx, ulong bank_idx ) {
1985 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, bank_idx );
1986 0 : FD_TEST( bank );
1987 :
1988 0 : bank->refcnt--;
1989 0 : }
1990 :
1991 : static void
1992 : process_vote_txn_sent( fd_replay_tile_t * ctx,
1993 0 : fd_txn_m_t * txnm ) {
1994 : /* The send tile has signed and sent a vote. Add this vote to the
1995 : vote tracker. We go through this exercise until we've seen our
1996 : vote rooted. */
1997 0 : if( FD_UNLIKELY( !ctx->has_identity_vote_rooted ) ) {
1998 0 : uchar * payload = ((uchar *)txnm) + sizeof(fd_txn_m_t);
1999 0 : uchar txn_mem[ FD_TXN_MAX_SZ ] __attribute__((aligned(alignof(fd_txn_t))));
2000 0 : fd_txn_t * txn = (fd_txn_t *)txn_mem;
2001 0 : if( FD_UNLIKELY( !fd_txn_parse( payload, txnm->payload_sz, txn_mem, NULL ) ) ) {
2002 0 : FD_LOG_CRIT(( "Could not parse txn from send tile" ));
2003 0 : }
2004 0 : fd_vote_tracker_insert( ctx->vote_tracker, fd_type_pun_const( payload+txn->signature_off ) );
2005 0 : }
2006 0 : }
2007 :
2008 : static inline int
2009 : returnable_frag( fd_replay_tile_t * ctx,
2010 : ulong in_idx,
2011 : ulong seq,
2012 : ulong sig,
2013 : ulong chunk,
2014 : ulong sz,
2015 : ulong ctl,
2016 : ulong tsorig,
2017 : ulong tspub,
2018 0 : fd_stem_context_t * stem ) {
2019 0 : (void)seq;
2020 0 : (void)ctl;
2021 0 : (void)tsorig;
2022 0 : (void)tspub;
2023 :
2024 0 : if( FD_UNLIKELY( sz!=0UL && (chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>ctx->in[ in_idx ].mtu ) ) )
2025 0 : FD_LOG_ERR(( "chunk %lu %lu from in %d corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in_kind[ in_idx ], ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
2026 :
2027 0 : switch( ctx->in_kind[in_idx] ) {
2028 0 : case IN_KIND_GENESIS:
2029 0 : boot_genesis( ctx, stem, in_idx, chunk );
2030 0 : break;
2031 0 : case IN_KIND_SNAP:
2032 0 : on_snapshot_message( ctx, stem, in_idx, chunk, sig );
2033 0 : break;
2034 0 : case IN_KIND_EXEC: {
2035 0 : process_exec_task_done( ctx, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ), sig );
2036 0 : break;
2037 0 : }
2038 0 : case IN_KIND_POH: {
2039 0 : process_poh_message( ctx, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
2040 0 : break;
2041 0 : }
2042 0 : case IN_KIND_RESOLV: {
2043 0 : fd_resolv_slot_exchanged_t * exchanged_slot = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
2044 0 : process_resolv_slot_completed( ctx, exchanged_slot->bank_idx );
2045 0 : break;
2046 0 : }
2047 0 : case IN_KIND_TOWER: {
2048 0 : process_tower_update( ctx, stem, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
2049 0 : break;
2050 0 : }
2051 0 : case IN_KIND_SHRED: {
2052 : /* TODO: This message/sz should be defined. */
2053 0 : if( sz==FD_SHRED_DATA_HEADER_SZ + sizeof(fd_hash_t) + sizeof(fd_hash_t) + sizeof(int) ) {
2054 : /* If receive a FEC complete message. */
2055 0 : process_fec_complete( ctx, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
2056 0 : }
2057 0 : break;
2058 0 : }
2059 0 : case IN_KIND_VTXN: {
2060 0 : process_vote_txn_sent( ctx, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
2061 0 : break;
2062 0 : }
2063 0 : default:
2064 0 : FD_LOG_ERR(( "unhandled kind %d", ctx->in_kind[ in_idx ] ));
2065 0 : }
2066 :
2067 0 : return 0;
2068 0 : }
2069 :
2070 : static inline fd_replay_out_link_t
2071 : out1( fd_topo_t const * topo,
2072 : fd_topo_tile_t const * tile,
2073 0 : char const * name ) {
2074 0 : ulong idx = ULONG_MAX;
2075 :
2076 0 : for( ulong i=0UL; i<tile->out_cnt; i++ ) {
2077 0 : fd_topo_link_t const * link = &topo->links[ tile->out_link_id[ i ] ];
2078 0 : if( !strcmp( link->name, name ) ) {
2079 0 : if( FD_UNLIKELY( idx!=ULONG_MAX ) ) FD_LOG_ERR(( "tile %s:%lu had multiple output links named %s but expected one", tile->name, tile->kind_id, name ));
2080 0 : idx = i;
2081 0 : }
2082 0 : }
2083 :
2084 0 : if( FD_UNLIKELY( idx==ULONG_MAX ) ) return (fd_replay_out_link_t){ .idx = ULONG_MAX, .mem = NULL, .chunk0 = 0, .wmark = 0, .chunk = 0 };
2085 :
2086 0 : void * mem = topo->workspaces[ topo->objs[ topo->links[ tile->out_link_id[ idx ] ].dcache_obj_id ].wksp_id ].wksp;
2087 0 : ulong chunk0 = fd_dcache_compact_chunk0( mem, topo->links[ tile->out_link_id[ idx ] ].dcache );
2088 0 : ulong wmark = fd_dcache_compact_wmark ( mem, topo->links[ tile->out_link_id[ idx ] ].dcache, topo->links[ tile->out_link_id[ idx ] ].mtu );
2089 :
2090 0 : return (fd_replay_out_link_t){ .idx = idx, .mem = mem, .chunk0 = chunk0, .wmark = wmark, .chunk = chunk0 };
2091 0 : }
2092 :
2093 : static void
2094 : privileged_init( fd_topo_t * topo,
2095 0 : fd_topo_tile_t * tile ) {
2096 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
2097 :
2098 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
2099 0 : fd_replay_tile_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_replay_tile_t), sizeof(fd_replay_tile_t) );
2100 :
2101 0 : if( FD_UNLIKELY( !strcmp( tile->replay.identity_key_path, "" ) ) ) FD_LOG_ERR(( "identity_key_path not set" ));
2102 :
2103 0 : ctx->identity_pubkey[ 0 ] = *(fd_pubkey_t const *)fd_type_pun_const( fd_keyload_load( tile->replay.identity_key_path, /* pubkey only: */ 1 ) );
2104 0 : }
2105 :
2106 : static void
2107 : unprivileged_init( fd_topo_t * topo,
2108 0 : fd_topo_tile_t * tile ) {
2109 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
2110 :
2111 0 : ulong chain_cnt = fd_block_id_map_chain_cnt_est( tile->replay.max_live_slots );
2112 :
2113 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
2114 0 : fd_replay_tile_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_replay_tile_t), sizeof(fd_replay_tile_t) );
2115 0 : void * block_id_arr_mem = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_block_id_ele_t), sizeof(fd_block_id_ele_t) * tile->replay.max_live_slots );
2116 0 : void * block_id_map_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_block_id_map_align(), fd_block_id_map_footprint( chain_cnt ) );
2117 0 : void * _txncache = FD_SCRATCH_ALLOC_APPEND( l, fd_txncache_align(), fd_txncache_footprint( tile->replay.max_live_slots ) );
2118 0 : void * reasm_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_reasm_align(), fd_reasm_footprint( 1 << 20 ) );
2119 0 : void * sched_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(), fd_sched_footprint( tile->replay.max_live_slots ) );
2120 0 : void * vote_tracker_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_vote_tracker_align(), fd_vote_tracker_footprint() );
2121 0 : void * _capture_ctx = FD_SCRATCH_ALLOC_APPEND( l, fd_capture_ctx_align(), fd_capture_ctx_footprint() );
2122 0 : void * spad_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_spad_align(), fd_spad_footprint( tile->replay.heap_size_gib<<30 ) );
2123 :
2124 0 : ulong store_obj_id = fd_pod_query_ulong( topo->props, "store", ULONG_MAX );
2125 0 : FD_TEST( store_obj_id!=ULONG_MAX );
2126 0 : ctx->store = fd_store_join( fd_topo_obj_laddr( topo, store_obj_id ) );
2127 0 : FD_TEST( ctx->store );
2128 :
2129 0 : ctx->vote_tower_out_idx = 0UL;
2130 0 : ctx->vote_tower_out_len = 0UL;
2131 :
2132 0 : ulong banks_obj_id = fd_pod_query_ulong( topo->props, "banks", ULONG_MAX );
2133 0 : FD_TEST( banks_obj_id!=ULONG_MAX );
2134 0 : ctx->banks = fd_banks_join( fd_topo_obj_laddr( topo, banks_obj_id ) );
2135 0 : FD_TEST( ctx->banks );
2136 :
2137 0 : fd_bank_t * bank_pool = fd_banks_get_bank_pool( ctx->banks );
2138 0 : FD_MGAUGE_SET( REPLAY, MAX_LIVE_BANKS, fd_banks_pool_max( bank_pool ) );
2139 :
2140 0 : fd_bank_t * bank = fd_banks_init_bank( ctx->banks );
2141 0 : fd_bank_slot_set( bank, 0UL );
2142 0 : FD_TEST( bank );
2143 0 : FD_TEST( bank->idx==FD_REPLAY_BOOT_BANK_IDX );
2144 :
2145 0 : ctx->consensus_root_slot = ULONG_MAX;
2146 0 : ctx->consensus_root = (fd_hash_t){ .ul[0] = FD_RUNTIME_INITIAL_BLOCK_ID };
2147 0 : ctx->published_root_slot = ULONG_MAX;
2148 :
2149 : /* Set some initial values for the bank: hardcoded features and the
2150 : cluster version. */
2151 0 : fd_cluster_version_t * cluster_version = fd_bank_cluster_version_modify( bank );
2152 0 : if( FD_UNLIKELY( sscanf( tile->replay.cluster_version, "%u.%u.%u", &cluster_version->major, &cluster_version->minor, &cluster_version->patch )!=3 ) ) {
2153 0 : FD_LOG_ERR(( "failed to decode cluster version, configured as \"%s\"", tile->replay.cluster_version ));
2154 0 : }
2155 :
2156 0 : fd_features_t * features = fd_bank_features_modify( bank );
2157 0 : fd_features_enable_cleaned_up( features, cluster_version );
2158 :
2159 0 : char const * one_off_features[ 16UL ];
2160 0 : FD_TEST( tile->replay.enable_features_cnt<=sizeof(one_off_features)/sizeof(one_off_features[0]) );
2161 0 : for( ulong i=0UL; i<tile->replay.enable_features_cnt; i++ ) one_off_features[ i ] = tile->replay.enable_features[i];
2162 0 : fd_features_enable_one_offs( features, one_off_features, (uint)tile->replay.enable_features_cnt, 0UL );
2163 :
2164 0 : FD_TEST( fd_funk_join( ctx->funk, fd_topo_obj_laddr( topo, tile->replay.funk_obj_id ) ) );
2165 :
2166 0 : void * _txncache_shmem = fd_topo_obj_laddr( topo, tile->replay.txncache_obj_id );
2167 0 : fd_txncache_shmem_t * txncache_shmem = fd_txncache_shmem_join( _txncache_shmem );
2168 0 : FD_TEST( txncache_shmem );
2169 0 : ctx->txncache = fd_txncache_join( fd_txncache_new( _txncache, txncache_shmem ) );
2170 0 : FD_TEST( ctx->txncache );
2171 :
2172 0 : ctx->tx_metadata_storage = tile->replay.tx_metadata_storage;
2173 :
2174 0 : ctx->capture_ctx = NULL;
2175 0 : if( FD_UNLIKELY( strcmp( "", tile->replay.solcap_capture ) || strcmp( "", tile->replay.dump_proto_dir ) ) ) {
2176 0 : ctx->capture_ctx = fd_capture_ctx_join( fd_capture_ctx_new( _capture_ctx ) );
2177 0 : }
2178 :
2179 0 : if( FD_UNLIKELY( strcmp( "", tile->replay.solcap_capture ) ) ) {
2180 0 : ctx->capture_ctx->checkpt_freq = ULONG_MAX;
2181 0 : ctx->capture_file = fopen( tile->replay.solcap_capture, "w+" );
2182 0 : if( FD_UNLIKELY( !ctx->capture_file ) ) FD_LOG_ERR(( "fopen(%s) failed (%d-%s)", tile->replay.solcap_capture, errno, fd_io_strerror( errno ) ));
2183 :
2184 0 : ctx->capture_ctx->capture_txns = 0;
2185 0 : ctx->capture_ctx->solcap_start_slot = tile->replay.capture_start_slot;
2186 0 : fd_solcap_writer_init( ctx->capture_ctx->capture, ctx->capture_file );
2187 0 : }
2188 :
2189 0 : if( FD_UNLIKELY( strcmp( "", tile->replay.dump_proto_dir ) ) ) {
2190 0 : ctx->capture_ctx->dump_proto_output_dir = tile->replay.dump_proto_dir;
2191 0 : if( FD_LIKELY( tile->replay.dump_block_to_pb ) ) ctx->capture_ctx->dump_block_to_pb = tile->replay.dump_block_to_pb;
2192 0 : }
2193 :
2194 0 : ctx->exec_cnt = fd_topo_tile_name_cnt( topo, "exec" );
2195 :
2196 0 : if( FD_UNLIKELY( ctx->exec_cnt>FD_PACK_MAX_BANK_TILES ) ) FD_LOG_ERR(( "replay tile has too many exec tiles %lu", ctx->exec_cnt ));
2197 :
2198 0 : ctx->exec_ready_bitset = 0UL;
2199 0 : ctx->is_booted = 0;
2200 :
2201 0 : ctx->reasm = fd_reasm_join( fd_reasm_new( reasm_mem, 1 << 20, 0 ) );
2202 0 : FD_TEST( ctx->reasm );
2203 :
2204 0 : ctx->sched = fd_sched_join( fd_sched_new( sched_mem, tile->replay.max_live_slots ), tile->replay.max_live_slots );
2205 0 : FD_TEST( ctx->sched );
2206 :
2207 0 : ctx->enable_bank_hash_cmp = !!tile->replay.enable_bank_hash_cmp;
2208 :
2209 0 : ulong bank_hash_cmp_obj_id = fd_pod_query_ulong( topo->props, "bh_cmp", ULONG_MAX );
2210 0 : FD_TEST( bank_hash_cmp_obj_id!=ULONG_MAX );
2211 0 : ctx->bank_hash_cmp = fd_bank_hash_cmp_join( fd_bank_hash_cmp_new( fd_topo_obj_laddr( topo, bank_hash_cmp_obj_id ) ) );
2212 0 : FD_TEST( ctx->bank_hash_cmp );
2213 :
2214 0 : ctx->vote_tracker = fd_vote_tracker_join( fd_vote_tracker_new( vote_tracker_mem, 0UL ) );
2215 0 : FD_TEST( ctx->vote_tracker );
2216 :
2217 : /* Now attach to the runtime spad which is part of the tile memory.
2218 : FIXME: Replace runtime spad with a non-stack allocator. */
2219 0 : ctx->runtime_spad = fd_spad_join( fd_spad_new( spad_mem, fd_spad_footprint( tile->replay.heap_size_gib<<30UL ) ) );
2220 0 : FD_TEST( ctx->runtime_spad );
2221 :
2222 0 : ctx->has_identity_vote_rooted = 0;
2223 :
2224 0 : ctx->mleaders = fd_multi_epoch_leaders_join( fd_multi_epoch_leaders_new( ctx->mleaders_mem ) );
2225 0 : FD_TEST( ctx->mleaders );
2226 :
2227 0 : ctx->is_leader = 0;
2228 0 : ctx->reset_slot = 0UL;
2229 0 : ctx->reset_block_id = (fd_hash_t){ .ul[0] = FD_RUNTIME_INITIAL_BLOCK_ID };
2230 0 : ctx->reset_timestamp_nanos = 0UL;
2231 0 : ctx->next_leader_slot = ULONG_MAX;
2232 0 : ctx->next_leader_tickcount = LONG_MAX;
2233 0 : ctx->highwater_leader_slot = ULONG_MAX;
2234 0 : ctx->slot_duration_nanos = 400L*1000L*1000L; /* TODO: Not fixed ... not always 400ms ... */
2235 0 : ctx->slot_duration_ticks = (double)ctx->slot_duration_nanos*fd_tempo_tick_per_ns( NULL );
2236 0 : ctx->max_active_descendant = 0UL; /* TODO: Update this properly ... */
2237 0 : ctx->leader_bank = NULL;
2238 :
2239 : /* TODO: We need a real seed here. */
2240 0 : ctx->block_id_len = tile->replay.max_live_slots;
2241 0 : ctx->block_id_arr = (fd_block_id_ele_t *)block_id_arr_mem;
2242 0 : ctx->block_id_map = fd_block_id_map_join( fd_block_id_map_new( block_id_map_mem, chain_cnt, 999UL ) );
2243 0 : FD_TEST( ctx->block_id_map );
2244 :
2245 0 : for( ulong i=0UL; i<tile->replay.max_live_slots; i++ ) {
2246 0 : ctx->block_id_arr[ i ].slot = FD_SLOT_NULL;
2247 0 : }
2248 :
2249 0 : ctx->resolv_tile_cnt = fd_topo_tile_name_cnt( topo, "resolv" );
2250 :
2251 0 : FD_TEST( tile->in_cnt<=sizeof(ctx->in)/sizeof(ctx->in[0]) );
2252 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
2253 0 : fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
2254 0 : fd_topo_wksp_t * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
2255 :
2256 0 : if( FD_LIKELY( link->dcache ) ) {
2257 0 : ctx->in[ i ].mem = link_wksp->wksp;
2258 0 : ctx->in[ i ].chunk0 = fd_dcache_compact_chunk0( ctx->in[ i ].mem, link->dcache );
2259 0 : ctx->in[ i ].wmark = fd_dcache_compact_wmark ( ctx->in[ i ].mem, link->dcache, link->mtu );
2260 0 : ctx->in[ i ].mtu = link->mtu;
2261 0 : }
2262 :
2263 0 : if( !strcmp( link->name, "genesi_out" ) ) ctx->in_kind[ i ] = IN_KIND_GENESIS;
2264 0 : else if( !strcmp( link->name, "snap_out" ) ) ctx->in_kind[ i ] = IN_KIND_SNAP;
2265 0 : else if( !strcmp( link->name, "exec_replay" ) ) ctx->in_kind[ i ] = IN_KIND_EXEC;
2266 0 : else if( !strcmp( link->name, "tower_out" ) ) ctx->in_kind[ i ] = IN_KIND_TOWER;
2267 0 : else if( !strcmp( link->name, "poh_replay" ) ) ctx->in_kind[ i ] = IN_KIND_POH;
2268 0 : else if( !strcmp( link->name, "resolv_repla" ) ) ctx->in_kind[ i ] = IN_KIND_RESOLV;
2269 0 : else if( !strcmp( link->name, "shred_out" ) ) ctx->in_kind[ i ] = IN_KIND_SHRED;
2270 0 : else if( !strcmp( link->name, "send_txns" ) ) ctx->in_kind[ i ] = IN_KIND_VTXN;
2271 0 : else FD_LOG_ERR(( "unexpected input link name %s", link->name ));
2272 0 : }
2273 :
2274 0 : *ctx->stake_out = out1( topo, tile, "replay_stake" ); FD_TEST( ctx->stake_out->idx!=ULONG_MAX );
2275 0 : *ctx->replay_out = out1( topo, tile, "replay_out" ); FD_TEST( ctx->replay_out->idx!=ULONG_MAX );
2276 :
2277 0 : ulong idx = fd_topo_find_tile_out_link( topo, tile, "replay_exec", 0UL );
2278 0 : FD_TEST( idx!=ULONG_MAX );
2279 0 : fd_topo_link_t * link = &topo->links[ tile->out_link_id[ idx ] ];
2280 :
2281 0 : fd_replay_out_link_t * exec_out = ctx->exec_out;
2282 0 : exec_out->idx = idx;
2283 0 : exec_out->mem = topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ].wksp;
2284 0 : exec_out->chunk0 = fd_dcache_compact_chunk0( exec_out->mem, link->dcache );
2285 0 : exec_out->wmark = fd_dcache_compact_wmark( exec_out->mem, link->dcache, link->mtu );
2286 0 : exec_out->chunk = exec_out->chunk0;
2287 :
2288 0 : fd_memset( &ctx->metrics, 0, sizeof(ctx->metrics) );
2289 :
2290 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_link_wait, FD_MHIST_SECONDS_MIN( REPLAY, STORE_LINK_WAIT ),
2291 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_LINK_WAIT ) ) );
2292 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_link_work, FD_MHIST_SECONDS_MIN( REPLAY, STORE_LINK_WORK ),
2293 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_LINK_WORK ) ) );
2294 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_read_wait, FD_MHIST_SECONDS_MIN( REPLAY, STORE_READ_WAIT ),
2295 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_READ_WAIT ) ) );
2296 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_read_work, FD_MHIST_SECONDS_MIN( REPLAY, STORE_READ_WORK ),
2297 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_READ_WORK ) ) );
2298 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_publish_wait, FD_MHIST_SECONDS_MIN( REPLAY, STORE_PUBLISH_WAIT ),
2299 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_PUBLISH_WAIT ) ) );
2300 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_publish_work, FD_MHIST_SECONDS_MIN( REPLAY, STORE_PUBLISH_WORK ),
2301 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_PUBLISH_WORK ) ) );
2302 :
2303 0 : ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, 1UL );
2304 0 : if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
2305 0 : FD_LOG_ERR(( "scratch overflow %lu %lu %lu", scratch_top - (ulong)scratch - scratch_footprint( tile ), scratch_top, (ulong)scratch + scratch_footprint( tile ) ));
2306 0 : }
2307 :
2308 : static ulong
2309 : populate_allowed_seccomp( fd_topo_t const * topo FD_FN_UNUSED,
2310 : fd_topo_tile_t const * tile FD_FN_UNUSED,
2311 : ulong out_cnt,
2312 0 : struct sock_filter * out ) {
2313 :
2314 0 : populate_sock_filter_policy_fd_replay_tile( out_cnt, out, (uint)fd_log_private_logfile_fd() );
2315 0 : return sock_filter_policy_fd_replay_tile_instr_cnt;
2316 0 : }
2317 :
2318 : static ulong
2319 : populate_allowed_fds( fd_topo_t const * topo FD_FN_UNUSED,
2320 : fd_topo_tile_t const * tile FD_FN_UNUSED,
2321 : ulong out_fds_cnt,
2322 0 : int * out_fds ) {
2323 :
2324 0 : if( FD_UNLIKELY( out_fds_cnt<2UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
2325 :
2326 0 : ulong out_cnt = 0UL;
2327 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
2328 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
2329 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
2330 0 : return out_cnt;
2331 0 : }
2332 :
2333 : #undef DEBUG_LOGGING
2334 :
2335 : /* counting carefully, after_credit can generate at most 7 frags and
2336 : returnable_frag boot_genesis can also generate at most 7 frags, so 14
2337 : is a conservative bound. */
2338 0 : #define STEM_BURST (14UL)
2339 :
2340 : /* TODO: calculate this properly/fix stem to work with larger numbers of links */
2341 : /* 1000 chosen empirically as anything larger slowed down replay times. Need to calculate
2342 : this properly. */
2343 0 : #define STEM_LAZY ((long)10e3)
2344 :
2345 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_replay_tile_t
2346 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_replay_tile_t)
2347 :
2348 0 : #define STEM_CALLBACK_METRICS_WRITE metrics_write
2349 0 : #define STEM_CALLBACK_AFTER_CREDIT after_credit
2350 0 : #define STEM_CALLBACK_BEFORE_FRAG before_frag
2351 0 : #define STEM_CALLBACK_RETURNABLE_FRAG returnable_frag
2352 :
2353 : #include "../../disco/stem/fd_stem.c"
2354 :
2355 : fd_topo_run_tile_t fd_tile_replay = {
2356 : .name = "replay",
2357 : .populate_allowed_seccomp = populate_allowed_seccomp,
2358 : .populate_allowed_fds = populate_allowed_fds,
2359 : .scratch_align = scratch_align,
2360 : .scratch_footprint = scratch_footprint,
2361 : .privileged_init = privileged_init,
2362 : .unprivileged_init = unprivileged_init,
2363 : .run = stem_run,
2364 : };
|