Line data Source code
1 : #include "fd_sched.h"
2 : #include "fd_exec.h"
3 : #include "fd_replay_tile.h"
4 : #include "generated/fd_replay_tile_seccomp.h"
5 :
6 : #include "../poh/fd_poh.h"
7 : #include "../poh/fd_poh_tile.h"
8 : #include "../tower/fd_tower_tile.h"
9 : #include "../resolv/fd_resolv_tile.h"
10 : #include "../restore/utils/fd_ssload.h"
11 :
12 : #include "../../disco/tiles.h"
13 : #include "../../disco/store/fd_store.h"
14 : #include "../../discof/replay/fd_eslot_mgr.h"
15 : #include "../../discof/reasm/fd_reasm.h"
16 : #include "../../discof/replay/fd_exec.h"
17 : #include "../../disco/keyguard/fd_keyload.h"
18 : #include "../../util/pod/fd_pod.h"
19 : #include "../../flamenco/rewards/fd_rewards.h"
20 : #include "../../flamenco/leaders/fd_multi_epoch_leaders.h"
21 : #include "../../disco/metrics/fd_metrics.h"
22 : #include "../../choreo/fd_choreo_base.h"
23 :
24 : #include <errno.h>
25 :
26 : /* Replay concepts:
27 :
28 : - Blocks are aggregations of entries aka. microblocks which are
29 : groupings of txns and are constructed by the block producer (see
30 : fd_pack).
31 :
32 : - Entries are grouped into entry batches by the block producer (see
33 : fd_pack / fd_shredder).
34 :
35 : - Entry batches are divided into chunks known as shreds by the block
36 : producer (see fd_shredder).
37 :
38 : - Shreds are grouped into forward-error-correction sets (FEC sets) by
39 : the block producer (see fd_shredder).
40 :
41 : - Shreds are transmitted to the rest of the cluster via the Turbine
42 : protocol (see fd_shredder / fd_shred).
43 :
44 : - Once enough shreds within a FEC set are received to recover the
45 : entirety of the shred data encoded by that FEC set, the receiver
46 : can "complete" the FEC set (see fd_fec_resolver).
47 :
48 : - If shreds in the FEC set are missing such that it can't complete,
49 : the receiver can use the Repair protocol to request missing shreds
50 : in FEC set (see fd_repair).
51 :
52 : - The current Repair protocol does not support requesting coding
53 : shreds. As a result, some FEC sets might be actually complete
54 : (contain all data shreds). Repair currently hacks around this by
55 : forcing completion but the long-term solution is to add support for
56 : fec_repairing coding shreds via Repair.
57 :
58 : - FEC sets are delivered in partial-order to the Replay tile by the
59 : Repair tile. Currently Replay only supports replaying entry batches
60 : so FEC sets need to reassembled into an entry batch before they can
61 : be replayed. The new Dispatcher will change this by taking a FEC
62 : set as input instead. */
63 :
64 0 : #define IN_KIND_SNAP (0)
65 0 : #define IN_KIND_GENESIS (1)
66 0 : #define IN_KIND_REPAIR (2)
67 0 : #define IN_KIND_TOWER (3)
68 0 : #define IN_KIND_RESOLV (4)
69 0 : #define IN_KIND_POH (5)
70 0 : #define IN_KIND_WRITER (6)
71 0 : #define IN_KIND_CAPTURE (7)
72 :
73 :
74 : struct fd_replay_in_link {
75 : fd_wksp_t * mem;
76 : ulong chunk0;
77 : ulong wmark;
78 : ulong mtu;
79 : };
80 :
81 : typedef struct fd_replay_in_link fd_replay_in_link_t;
82 :
83 : struct fd_replay_out_link {
84 : ulong idx;
85 : fd_wksp_t * mem;
86 : ulong chunk0;
87 : ulong wmark;
88 : ulong chunk;
89 : };
90 :
91 : typedef struct fd_replay_out_link fd_replay_out_link_t;
92 :
93 : struct block_id_eslot {
94 : fd_eslot_t eslot; /* immutable */
95 : fd_hash_t block_id; /* mutable via rekey */
96 : fd_eslot_t parent_eslot; /* immutable */
97 : int is_leader; /* immutable*/
98 : ulong next;
99 : };
100 : typedef struct block_id_eslot block_id_eslot_t;
101 :
102 : #define POOL_NAME eslot_pool
103 : #define POOL_T block_id_eslot_t
104 : #include "../../util/tmpl/fd_pool.c"
105 :
106 : #define MAP_NAME eslot_map
107 : #define MAP_ELE_T block_id_eslot_t
108 : #define MAP_KEY_T fd_hash_t
109 : #define MAP_KEY block_id
110 : #define MAP_KEY_EQ(k0,k1) (!memcmp((k0),(k1), sizeof(fd_hash_t)))
111 : #define MAP_KEY_HASH(key,seed) (fd_ulong_hash(key->ul[3]^seed))
112 : #include "../../util/tmpl/fd_map_chain.c"
113 :
114 : struct eslot_block_id {
115 : ulong slot;
116 : ulong eqvoc_bitset; /* bit i set if block_id[i] is valid */
117 : fd_hash_t block_id[ FD_ESLOT_EQVOC_PER_SLOT_CNT_MAX ];
118 : };
119 : typedef struct eslot_block_id eslot_block_id_t;
120 : /* FIXME: consistent limit with everything else */
121 : #define ESLOT_CNT_MAX (1024UL)
122 :
123 : FD_STATIC_ASSERT( FD_PACK_MAX_BANK_TILES<=64UL, exec_bitset );
124 :
125 : struct fd_replay_tile {
126 : fd_wksp_t * wksp;
127 :
128 : /* Inputs to plugin/gui */
129 : fd_replay_out_link_t plugin_out[1];
130 : fd_replay_out_link_t votes_plugin_out[1];
131 : long last_plugin_push_time;
132 :
133 : /* tx_metadata_storage enables the log collector if enabled */
134 : int tx_metadata_storage;
135 :
136 : int bootstrap;
137 : char genesis_path[ PATH_MAX ];
138 :
139 : /* Funk */
140 : fd_funk_t funk[1];
141 :
142 : /* Store */
143 : fd_store_t * store;
144 :
145 : /* Banks */
146 : fd_banks_t * banks;
147 :
148 : /* slot_ctx holds a local view of the bank, accounts database, and
149 : the capture ctx for the slot that is currently being replayed.
150 : This does NOT correspond to the execution state for any active
151 : leader. */
152 : fd_exec_slot_ctx_t * slot_ctx;
153 :
154 : /* This flag is 1 If we have seen a vote signature that our node has
155 : sent out get rooted at least one time. The value is 0 otherwise.
156 : We can't become leader and pack blocks until this flag has been
157 : set. This parallels the Agave 'has_new_vote_been_rooted'.
158 : TODO: Add documentation for this flag more in depth. */
159 : int has_identity_vote_rooted;
160 :
161 : /* Replay state machine. */
162 : fd_sched_t * sched;
163 : uint block_draining:1;
164 : uint enable_bank_hash_cmp:1;
165 : fd_bank_hash_cmp_t * bank_hash_cmp;
166 : ulong exec_cnt;
167 : ulong exec_ready_bitset; /* Bit i set if exec tile i is idle */
168 : ulong exec_txn_id[ FD_PACK_MAX_BANK_TILES ]; /* In-flight txn id */
169 : fd_replay_out_link_t exec_out[ FD_PACK_MAX_BANK_TILES ]; /* Sending work down to exec tiles */
170 :
171 : /* Tracks equivocation and translates between block id and equivocated
172 : slot numbers. These structures handle continuous re-keying from
173 : the incoming stream of FEC set merkle roots, and translate the full
174 : 32-byte merkle hash into a
175 :
176 : (slot number, prime counter)
177 :
178 : tuple encoded in a single ulong. The tuple is called an
179 : equivocatable slot, or eslot. A slot K starts off as (K,0). If an
180 : equivocation on slot K is observed, that would be slot K', or
181 : (K,1). And then K'' (K,2), K''' (K,3), and so on and so forth.
182 : Downstream components work with this tuple and are shielded from
183 : having to be incessantly re-keyed. This strategy also reduces the
184 : cache footprint of downstream components. For instance, with a
185 : single ulong rather than a 32-byte hash, the header for a bank will
186 : fit in a cache line. An eslot also has the benefit of being known
187 : and unique upfront at the beginning of a leader slot. */
188 : fd_eslot_mgr_t * eslot_mgr;
189 :
190 : /* A note on publishing ...
191 :
192 : The watermarks are used to publish our fork-aware structures. For
193 : example, store, banks, and txncache need to be published to release
194 : resources occupied by rooted or dead blocks. In general,
195 : publishing has the effect of pruning forks in those structures,
196 : indicating that it is ok to release the memory being occupied by
197 : the blocks on said forks. Tower is responsible for informing us of
198 : the latest block on the consensus rooted fork. As soon as we can,
199 : we should move the published root as close as possible to the
200 : latest consensus root, publishing/pruning everything on the fork
201 : tree along the way. That is, all the blocks that directly descend
202 : from the current published root (inclusive) to the new published
203 : root (exclusive) on the rooted fork, as well as all the minority
204 : forks that branch from said blocks.
205 :
206 : Ideally, we'd move the published root to the consensus root
207 : immediately upon receiving a new consensus root. However, that's
208 : not always safe to do. One thing we need to be careful about is
209 : making sure that there are no more users/consumers of
210 : soon-to-be-pruned blocks, lest a use-after-free occurs. This can
211 : be done by using a reference counter for each block. Any
212 : concurrent activity, such as transaction execution through the
213 : exec-writer pipeline, should retain a refcnt on the block for as
214 : long as it needs access to the shared fork-aware structures related
215 : to that block. Eventually, refcnt on a given block will drop down
216 : to 0 as the block either finishes replaying or gets marked as dead,
217 : and any other tile that has retained a refcnt on the block releases
218 : it. At that point, it becomes a candidate for pruning. The key to
219 : safe publishing then becomes figuring out how far we could advance
220 : the published root, such that every minority fork branching off of
221 : blocks in between the current published root (inclusive) and the
222 : new published root (exclusive) is safe to be pruned. This is a
223 : straightforward tree traversal, where if a block B on the rooted
224 : fork has refcnt 0, and all minority forks branching off of B also
225 : have refcnt 0, then B is safe to be pruned. We advance the
226 : published root to the farthest consecutively prunable block on the
227 : rooted fork. Note that reasm presents the replay tile with a clean
228 : view of the world where every block is chained off of a parent
229 : block. So there are no orpahned/dangling tree nodes to worry
230 : about. The world is a nice single tree as far as replay is
231 : concerned.
232 :
233 : In the following fork tree, every node is a block and the number in
234 : parentheses is the refcnt on the block. The chain marked with
235 : double slashes is the rooted fork. Suppose the published root is
236 : at block P, and consensus root is at block T. We can't publish
237 : past block P because Q has refcnt 1.
238 :
239 :
240 : P(0)
241 : / \\
242 : Q(1) A(0)
243 : / || \
244 : X(0) B(0) C(0)
245 : / || \
246 : Y(0) M(0) R(0)
247 : / || / \
248 : D(2) T(0) J(0) L(0)
249 : ||
250 : ..
251 : ..
252 : ..
253 : ||
254 : blocks we might be actively replaying
255 :
256 :
257 : When refcnt on Q drops to 0, we would be able to advance the
258 : published root to block M, because blocks P, A, and B, as well as
259 : all subtrees branching off of them, have refcnt 0, and therefore
260 : can be pruned. Block M itself cannot be pruned yet because its
261 : child block D has refcnt 2. After publishing/pruning, the fork
262 : tree would be:
263 :
264 :
265 : M(0)
266 : / ||
267 : D(2) T(0)
268 : ||
269 : ..
270 : ..
271 : ..
272 : ||
273 : blocks we might be actively replaying
274 :
275 :
276 : As a result, the shared fork-aware structures can free resources
277 : for blocks P, A, B, and all subtrees branching off of them.
278 :
279 : For the reference counting part, the replay tile is the sole entity
280 : that can update the refcnt. This ensures that all refcnt increment
281 : and decrement attempts are serialized at the replay tile, and that
282 : there are no racy resurrection of a soon-to-be-pruned block. If a
283 : refcnt increment request arrives after a block has been pruned,
284 : replay simply rejects the request.
285 :
286 : A note on the implementation of the above ...
287 :
288 : Upon receiving a new consensus root, we descend down the rooted
289 : fork from the current published root to the new consensus root. On
290 : each node/block of the rooted fork, we do a summation of the refcnt
291 : on the block and all the minority fork blocks branching from the
292 : block. If the summation is 0, the block is safe for pruning. We
293 : advance the published root to the far end of the consecutive run of
294 : 0 refcnt sums originating from the current published root. On our
295 : descent down the minority forks, we also mark any block that hasn't
296 : finished replaying as dead, so we don't waste time executing them.
297 : No more transactions shall be dispatched for execution from dead
298 : blocks.
299 :
300 : Blocks start out with a refcnt of 0. Other tiles may send a
301 : request to the replay tile for a reference on a block. The
302 : transaction dispatcher is another source of refcnt updates. On
303 : every dispatch of a transaction for block B, we increment the
304 : refcnt for B. And on every transaction finalization, we decrement
305 : the refcnt for B. This means that whenever the refcnt on a block
306 : is 0, there is no more reference on that block from the execution
307 : pipeline. While it might be tempting to simply increment the
308 : refcnt once when we start replaying a block, and decrement the
309 : refcnt once when we finish a block, this more fine-grained refcnt
310 : update strategy allows for aborting and potentially immediate
311 : pruning of blocks under interleaved block replay. Upon receiving a
312 : new consensus root, we can simply look at the refcnt on minority
313 : fork blocks, and a refcnt of 0 would imply that the block is safe
314 : for pruning, even if we haven't finished replaying it. Without the
315 : fine-grained refcnt, we would need to first stop dispatching from
316 : the aborted block, and then wait for a full drain of the execution
317 : pipeline to know for sure that there are no more in-flight
318 : transactions executing on the aborted block. Note that this will
319 : allow the refcnt on any block to transiently drop down to 0. We
320 : will not mistakenly prune an actively replaying block, aka a leaf
321 : node, that is chaining off of the rooted fork, because the
322 : consensus root is always an ancestor of the actively replaying tip.
323 : */
324 : fd_hash_t consensus_root; /* The most recent block to have reached max lockout in the tower. */
325 : ulong consensus_root_slot; /* slot number of the above. */
326 : ulong published_root_slot; /* slot number of the published root. */
327 :
328 : /* Capture-related configs */
329 : fd_capture_ctx_t * capture_ctx;
330 : FILE * capture_file;
331 :
332 : /* Whether the runtime has been booted either from snapshot loading
333 : or from genesis.*/
334 : int is_booted;
335 :
336 : /* Stack allocator for slot boundary allocations.
337 : TODO: Should be replaced by tile-level allocations. */
338 : fd_spad_t * runtime_spad;
339 :
340 : /* Buffer to store vote towers that need to be published to the Tower
341 : tile. */
342 : ulong vote_tower_out_idx; /* index of vote tower to publish next */
343 : ulong vote_tower_out_len; /* number of vote towers in the buffer */
344 : fd_replay_tower_t vote_tower_out[FD_REPLAY_TOWER_VOTE_ACC_MAX];
345 :
346 : fd_multi_epoch_leaders_t * mleaders;
347 :
348 : fd_pubkey_t identity_pubkey[1]; /* TODO: Keyswitch */
349 :
350 : int is_leader;
351 : ulong next_leader_slot;
352 : ulong highwater_leader_slot;
353 : ulong reset_slot;
354 : fd_hash_t reset_block_id;
355 : long reset_timestamp_nanos;
356 : double slot_duration_nanos;
357 : ulong max_active_descendant;
358 :
359 : ulong resolv_tile_cnt;
360 :
361 : int in_kind[ 64 ];
362 : fd_replay_in_link_t in[ 64 ];
363 :
364 : fd_replay_out_link_t replay_out[1];
365 :
366 : fd_replay_out_link_t stake_out[1];
367 : fd_replay_out_link_t shredcap_out[1];
368 : fd_replay_out_link_t pack_out[1];
369 :
370 : struct {
371 : fd_histf_t store_read_wait[ 1 ];
372 : fd_histf_t store_read_work[ 1 ];
373 : fd_histf_t store_publish_wait[ 1 ];
374 : fd_histf_t store_publish_work[ 1 ];
375 : } metrics;
376 :
377 : uchar __attribute__((aligned(FD_MULTI_EPOCH_LEADERS_ALIGN))) mleaders_mem[ FD_MULTI_EPOCH_LEADERS_FOOTPRINT ];
378 : };
379 :
380 : typedef struct fd_replay_tile fd_replay_tile_t;
381 :
382 : FD_FN_CONST static inline ulong
383 0 : scratch_align( void ) {
384 0 : return 128UL;
385 0 : }
386 :
387 : FD_FN_PURE static inline ulong
388 0 : scratch_footprint( fd_topo_tile_t const * tile ) {
389 0 : ulong l = FD_LAYOUT_INIT;
390 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_replay_tile_t), sizeof(fd_replay_tile_t) );
391 0 : l = FD_LAYOUT_APPEND( l, fd_sched_align(), fd_sched_footprint() );
392 0 : l = FD_LAYOUT_APPEND( l, fd_eslot_mgr_align(), fd_eslot_mgr_footprint( FD_BLOCK_MAX ) );
393 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_exec_slot_ctx_t), sizeof(fd_exec_slot_ctx_t) );
394 0 : l = FD_LAYOUT_APPEND( l, FD_CAPTURE_CTX_ALIGN, FD_CAPTURE_CTX_FOOTPRINT );
395 0 : l = FD_LAYOUT_APPEND( l, fd_spad_align(), fd_spad_footprint( tile->replay.heap_size_gib<<30 ) );
396 0 : l = FD_LAYOUT_FINI ( l, scratch_align() );
397 0 : return l;
398 0 : }
399 :
400 : static inline void
401 0 : metrics_write( fd_replay_tile_t * ctx ) {
402 0 : FD_MHIST_COPY( REPLAY, STORE_READ_WAIT, ctx->metrics.store_read_wait );
403 0 : FD_MHIST_COPY( REPLAY, STORE_READ_WORK, ctx->metrics.store_read_work );
404 0 : FD_MHIST_COPY( REPLAY, STORE_PUBLISH_WAIT, ctx->metrics.store_publish_wait );
405 0 : FD_MHIST_COPY( REPLAY, STORE_PUBLISH_WORK, ctx->metrics.store_publish_work );
406 0 : }
407 :
408 : static void
409 : publish_stake_weights( fd_replay_tile_t * ctx,
410 : fd_stem_context_t * stem,
411 : fd_exec_slot_ctx_t * slot_ctx,
412 0 : int current_epoch ) {
413 0 : fd_epoch_schedule_t const * schedule = fd_bank_epoch_schedule_query( slot_ctx->bank );
414 0 : ulong epoch = fd_slot_to_epoch( schedule, fd_bank_slot_get( slot_ctx->bank ), NULL );
415 :
416 0 : fd_vote_states_t const * vote_states_prev;
417 0 : if( FD_LIKELY( current_epoch ) ) vote_states_prev = fd_bank_vote_states_prev_locking_query( slot_ctx->bank );
418 0 : else vote_states_prev = fd_bank_vote_states_prev_prev_locking_query( ctx->slot_ctx->bank );
419 :
420 0 : ulong * stake_weights_msg = fd_chunk_to_laddr( ctx->stake_out->mem, ctx->stake_out->chunk );
421 0 : ulong stake_weights_sz = generate_stake_weight_msg( epoch+fd_ulong_if( current_epoch, 1UL, 0UL), schedule, vote_states_prev, stake_weights_msg );
422 0 : ulong stake_weights_sig = 4UL;
423 0 : fd_stem_publish( stem, ctx->stake_out->idx, stake_weights_sig, ctx->stake_out->chunk, stake_weights_sz, 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
424 0 : ctx->stake_out->chunk = fd_dcache_compact_next( ctx->stake_out->chunk, stake_weights_sz, ctx->stake_out->chunk0, ctx->stake_out->wmark );
425 :
426 0 : FD_LOG_NOTICE(( "sending stake weights for epoch %lu (slot %lu - %lu) with %lu stakes", stake_weights_msg[ 0 ], stake_weights_msg[ 2 ], stake_weights_msg[ 2 ]+stake_weights_msg[ 3 ], stake_weights_msg[ 1 ] ));
427 :
428 0 : if( FD_LIKELY( current_epoch ) ) fd_bank_vote_states_prev_end_locking_query( slot_ctx->bank );
429 0 : else fd_bank_vote_states_prev_prev_end_locking_query( ctx->slot_ctx->bank );
430 :
431 0 : fd_multi_epoch_leaders_stake_msg_init( ctx->mleaders, fd_type_pun_const( stake_weights_msg ) );
432 0 : fd_multi_epoch_leaders_stake_msg_fini( ctx->mleaders );
433 0 : }
434 :
435 : /**********************************************************************/
436 : /* Vote tower publishing helpers */
437 : /**********************************************************************/
438 :
439 : /* fd_replay_out_vote_tower_from_funk queries Funk for the state of the vote
440 : account with the given pubkey, and copies the state into the given
441 : fd_replay_tower_t structure. The account data is simply copied as-is.
442 :
443 : Parameters:
444 : - funk: The funk database instance to query vote account data from
445 : - funk_txn: The funk transaction context for consistent reads
446 : - pubkey: The public key of the vote account to retrieve
447 : - stake: The stake amount associated with this vote account
448 : - vote_tower_out: Output structure to populate with vote state information
449 :
450 : Failure modes:
451 : - Vote account data is too large (returns -1)
452 : - Vote account is not found in Funk (returns -1)
453 : - Account metadata has wrong magic (returns -1) */
454 : static int
455 : fd_replay_out_vote_tower_from_funk(
456 : fd_funk_t const * funk,
457 : fd_funk_txn_t const * funk_txn,
458 : fd_pubkey_t const * pubkey,
459 : ulong stake,
460 0 : fd_replay_tower_t * vote_tower_out ) {
461 :
462 0 : fd_memset( vote_tower_out, 0, sizeof(fd_replay_tower_t) );
463 0 : vote_tower_out->key = *pubkey;
464 0 : vote_tower_out->stake = stake;
465 :
466 : /* Speculatively copy out the raw vote account state from Funk */
467 0 : for(;;) {
468 0 : fd_memset( vote_tower_out->acc, 0, sizeof(vote_tower_out->acc) );
469 :
470 0 : fd_funk_rec_query_t query;
471 0 : fd_funk_rec_key_t funk_key = fd_funk_acc_key( pubkey );
472 0 : fd_funk_rec_t const * rec = fd_funk_rec_query_try_global( funk, funk_txn, &funk_key, NULL, &query );
473 0 : if( FD_UNLIKELY( !rec ) ) {
474 0 : FD_LOG_WARNING(( "vote account not found. address: %s",
475 0 : FD_BASE58_ENC_32_ALLOCA( pubkey->uc ) ));
476 0 : return -1;
477 0 : }
478 :
479 0 : uchar const * raw = fd_funk_val_const( rec, fd_funk_wksp(funk) );
480 0 : fd_account_meta_t const * metadata = fd_type_pun_const( raw );
481 :
482 0 : ulong data_sz = metadata->dlen;
483 0 : if( FD_UNLIKELY( data_sz > sizeof(vote_tower_out->acc) ) ) {
484 0 : FD_LOG_WARNING(( "vote account %s has too large data. dlen %lu > %lu",
485 0 : FD_BASE58_ENC_32_ALLOCA( pubkey->uc ),
486 0 : data_sz,
487 0 : sizeof(vote_tower_out->acc) ));
488 0 : return -1;
489 0 : }
490 :
491 0 : fd_memcpy( vote_tower_out->acc, raw + sizeof(fd_account_meta_t), data_sz );
492 0 : vote_tower_out->acc_sz = (ushort)data_sz;
493 :
494 0 : if( FD_LIKELY( fd_funk_rec_query_test( &query ) == FD_FUNK_SUCCESS ) ) {
495 0 : break;
496 0 : }
497 0 : }
498 :
499 0 : return 0;
500 0 : }
501 :
502 : /* This function buffers all the vote account towers that Tower needs at
503 : the end of this slot into the ctx->vote_tower_out buffer. These will
504 : then be published in after_credit.
505 :
506 : This function should be called at the end of a slot, before any epoch
507 : boundary processing. */
508 : static void
509 : buffer_vote_towers( fd_replay_tile_t * ctx,
510 : fd_funk_txn_t * funk_txn,
511 0 : fd_bank_t * bank ) {
512 0 : ctx->vote_tower_out_idx = 0UL;
513 0 : ctx->vote_tower_out_len = 0UL;
514 :
515 0 : fd_vote_states_t const * vote_states = fd_bank_vote_states_prev_locking_query( bank );
516 0 : fd_vote_states_iter_t iter_[1];
517 0 : for( fd_vote_states_iter_t * iter = fd_vote_states_iter_init( iter_, vote_states );
518 0 : !fd_vote_states_iter_done( iter );
519 0 : fd_vote_states_iter_next( iter ) ) {
520 0 : fd_vote_state_ele_t const * vote_state = fd_vote_states_iter_ele( iter );
521 0 : if( FD_UNLIKELY( vote_state->stake == 0 ) ) continue; /* skip unstaked vote accounts */
522 0 : fd_pubkey_t const * vote_account_pubkey = &vote_state->vote_account;
523 0 : if( FD_UNLIKELY( ctx->vote_tower_out_len >= (FD_REPLAY_TOWER_VOTE_ACC_MAX-1UL) ) ) FD_LOG_ERR(( "vote_tower_out_len too large" ));
524 0 : if( FD_UNLIKELY( fd_replay_out_vote_tower_from_funk( ctx->funk,
525 0 : funk_txn,
526 0 : vote_account_pubkey,
527 0 : vote_state->stake,
528 0 : &ctx->vote_tower_out[ctx->vote_tower_out_len++] ) ) ) {
529 0 : FD_LOG_DEBUG(( "failed to get vote state for vote account %s", FD_BASE58_ENC_32_ALLOCA( vote_account_pubkey->uc ) ));
530 0 : }
531 0 : }
532 0 : fd_bank_vote_states_prev_end_locking_query( bank );
533 0 : }
534 :
535 : /* This function publishes the next vote tower in the
536 : ctx->vote_tower_out buffer to the tower tile.
537 :
538 : This function should be called in after_credit, after all the vote
539 : towers for the end of a slot have been buffered in
540 : ctx->vote_tower_out. */
541 :
542 : static void
543 : publish_next_vote_tower( fd_replay_tile_t * ctx,
544 0 : fd_stem_context_t * stem ) {
545 0 : int som = ctx->vote_tower_out_idx==0;
546 0 : int eom = ctx->vote_tower_out_idx==( ctx->vote_tower_out_len - 1 );
547 :
548 0 : fd_replay_tower_t * vote_state = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
549 0 : *vote_state = ctx->vote_tower_out[ ctx->vote_tower_out_idx ];
550 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_VOTE_STATE, ctx->replay_out->chunk, sizeof(fd_replay_tower_t), fd_frag_meta_ctl( 0UL, som, eom, 0 ), 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
551 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_tower_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
552 :
553 0 : ctx->vote_tower_out_idx++;
554 0 : }
555 :
556 : /**********************************************************************/
557 : /* Transaction execution state machine helpers */
558 : /**********************************************************************/
559 :
560 : static fd_bank_t *
561 : replay_block_start( fd_replay_tile_t * ctx,
562 : fd_stem_context_t * stem,
563 : fd_eslot_t eslot,
564 0 : fd_eslot_t parent_eslot ) {
565 :
566 : /* Switch to a new block that we don't have a bank for. */
567 0 : FD_LOG_INFO(( "Creating new bank (slot: %lu, prime: %u; parent slot: %lu, prime %u) ", fd_eslot_slot( eslot ), fd_eslot_prime( eslot ), fd_eslot_slot( parent_eslot ), fd_eslot_prime( parent_eslot ) ));
568 :
569 0 : fd_bank_t * bank = fd_banks_get_bank( ctx->banks, eslot );
570 0 : if( FD_UNLIKELY( !!bank ) ) {
571 0 : FD_LOG_CRIT(( "invariant violation: block with (slot: %lu, prime: %u) already exists", fd_eslot_slot( eslot ), fd_eslot_prime( eslot ) ));
572 0 : }
573 :
574 : /* Clone the bank from the parent. We must special case the first
575 : slot that is executed as the snapshot does not provide a parent
576 : block id. */
577 :
578 0 : bank = fd_banks_clone_from_parent( ctx->banks, eslot, parent_eslot );
579 0 : if( FD_UNLIKELY( !bank ) ) {
580 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for (slot %lu, prime %u)", fd_eslot_slot( eslot ), fd_eslot_prime( eslot ) ));
581 0 : }
582 :
583 : /* Create a new funk txn for the block. */
584 :
585 0 : fd_funk_txn_start_write( ctx->funk );
586 :
587 0 : fd_funk_txn_xid_t xid = { .ul = { fd_eslot_slot( eslot ), fd_eslot_slot( eslot ) } };
588 0 : fd_funk_txn_xid_t parent_xid = { .ul = { fd_eslot_slot( parent_eslot ), fd_eslot_slot( parent_eslot ) } };
589 :
590 0 : fd_funk_txn_map_t * txn_map = fd_funk_txn_map( ctx->funk );
591 0 : if( FD_UNLIKELY( !txn_map ) ) {
592 0 : FD_LOG_CRIT(( "invariant violation: funk_txn_map is NULL" ));
593 0 : }
594 :
595 0 : fd_funk_txn_t * parent_txn = fd_funk_txn_query( &parent_xid, txn_map );
596 :
597 0 : fd_funk_txn_t * funk_txn = fd_funk_txn_prepare( ctx->funk, parent_txn, &xid, 1 );
598 0 : if( FD_UNLIKELY( !funk_txn ) ) {
599 0 : FD_LOG_CRIT(( "invariant violation: can't prepare funk_txn for (slot %lu, prime %u)", fd_eslot_slot( eslot ), fd_eslot_prime( eslot ) ));
600 0 : }
601 :
602 0 : fd_funk_txn_end_write( ctx->funk );
603 :
604 : /* Update any required runtime state and handle any potential epoch
605 : boundary change. */
606 :
607 0 : if( ctx->capture_ctx ) {
608 0 : fd_solcap_writer_set_slot( ctx->capture_ctx->capture, fd_eslot_slot( eslot ) );
609 0 : }
610 :
611 0 : fd_bank_shred_cnt_set( bank, 0UL );
612 0 : fd_bank_execution_fees_set( bank, 0UL );
613 0 : fd_bank_priority_fees_set( bank, 0UL );
614 :
615 0 : fd_bank_has_identity_vote_set( bank, 0 );
616 :
617 : /* Set the tick height. */
618 0 : fd_bank_tick_height_set( bank, fd_bank_max_tick_height_get( bank ) );
619 :
620 : /* Update block height. */
621 0 : fd_bank_block_height_set( bank, fd_bank_block_height_get( bank ) + 1UL );
622 :
623 0 : ulong * max_tick_height = fd_bank_max_tick_height_modify( bank );
624 0 : ulong ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
625 0 : if( FD_UNLIKELY( FD_RUNTIME_EXECUTE_SUCCESS != fd_runtime_compute_max_tick_height(ticks_per_slot, fd_eslot_slot( eslot ), max_tick_height ) ) ) {
626 0 : FD_LOG_CRIT(( "couldn't compute tick height/max tick height slot %lu ticks_per_slot %lu", fd_eslot_slot( eslot ), ticks_per_slot ));
627 0 : }
628 0 : bank->flags |= fd_ulong_if( ctx->tx_metadata_storage, FD_BANK_FLAGS_EXEC_RECORDING, 0UL );
629 :
630 0 : ctx->slot_ctx->funk_txn = funk_txn;
631 0 : ctx->slot_ctx->bank = bank;
632 :
633 0 : int is_epoch_boundary = 0;
634 0 : fd_runtime_block_pre_execute_process_new_epoch(
635 0 : ctx->slot_ctx,
636 0 : ctx->capture_ctx,
637 0 : ctx->runtime_spad,
638 0 : &is_epoch_boundary );
639 0 : if( FD_UNLIKELY( is_epoch_boundary ) ) publish_stake_weights( ctx, stem, ctx->slot_ctx, 1 );
640 :
641 0 : int res = fd_runtime_block_execute_prepare( ctx->slot_ctx, ctx->runtime_spad );
642 0 : if( FD_UNLIKELY( res!=FD_RUNTIME_EXECUTE_SUCCESS ) ) {
643 0 : FD_LOG_CRIT(( "block prep execute failed" ));
644 0 : }
645 :
646 0 : return bank;
647 0 : }
648 :
649 : /* By the time this function returns, replay context will have been set
650 : up for execution of the target block. This will create a new bank if
651 : needed. */
652 : static void
653 : replay_ctx_switch( fd_replay_tile_t * ctx,
654 0 : fd_eslot_t to_eslot ) {
655 :
656 0 : ctx->slot_ctx->bank = fd_banks_get_bank( ctx->banks, to_eslot );
657 0 : if( FD_UNLIKELY( !ctx->slot_ctx->bank ) ) {
658 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for slot (%lu, %u)", fd_eslot_slot( to_eslot ), fd_eslot_prime( to_eslot ) ));
659 0 : }
660 :
661 0 : ulong slot = fd_bank_slot_get( ctx->slot_ctx->bank );
662 :
663 0 : fd_funk_txn_map_t * txn_map = fd_funk_txn_map( ctx->funk );
664 0 : fd_funk_txn_xid_t xid = { .ul = { slot, slot } };
665 0 : fd_funk_txn_start_read( ctx->funk );
666 0 : ctx->slot_ctx->funk_txn = fd_funk_txn_query( &xid, txn_map );
667 0 : fd_funk_txn_end_read( ctx->funk );
668 0 : if( FD_UNLIKELY( !ctx->slot_ctx->funk_txn ) ) {
669 0 : FD_LOG_CRIT(( "invariant violation: funk_txn is NULL for slot %lu", slot ));
670 0 : }
671 0 : }
672 :
673 : static void
674 : publish_slot_completed( fd_replay_tile_t * ctx,
675 : fd_stem_context_t * stem,
676 : fd_bank_t * bank,
677 0 : int is_initial ) {
678 0 : ulong slot = fd_bank_slot_get( bank );
679 :
680 0 : fd_eslot_ele_t * ele = fd_eslot_mgr_ele_query_eslot( ctx->eslot_mgr, fd_bank_eslot_get( bank ) );
681 0 : if( FD_UNLIKELY( !ele ) ) {
682 0 : FD_LOG_CRIT(( "invariant violation: eslot entry not found: (slot %lu, prime %u)", fd_eslot_slot( fd_bank_eslot_get( bank ) ), fd_eslot_prime( fd_bank_eslot_get( bank ) ) ));
683 0 : }
684 :
685 0 : fd_hash_t parent_block_id = {0};
686 0 : if( FD_LIKELY( !is_initial ) ) {
687 0 : fd_eslot_ele_t * parent_ele = fd_eslot_mgr_ele_query_eslot( ctx->eslot_mgr, fd_bank_parent_eslot_get( bank ) );
688 0 : if( FD_UNLIKELY( !parent_ele ) ) {
689 0 : FD_LOG_CRIT(( "invariant violation: eslot entry not found: (slot %lu, prime %u)", fd_eslot_slot( fd_bank_parent_eslot_get( bank ) ), fd_eslot_prime( fd_bank_parent_eslot_get( bank ) ) ));
690 0 : }
691 0 : parent_block_id = parent_ele->merkle_root;
692 0 : }
693 :
694 0 : fd_hash_t const * bank_hash = fd_bank_bank_hash_query( bank );
695 0 : fd_hash_t const * block_hash = fd_blockhashes_peek_last( fd_bank_block_hash_queue_query( bank ) );
696 0 : FD_TEST( bank_hash );
697 0 : FD_TEST( block_hash );
698 :
699 0 : fd_epoch_schedule_t const * epoch_schedule = fd_bank_epoch_schedule_query( bank );
700 0 : ulong slot_idx;
701 0 : ulong epoch = fd_slot_to_epoch( epoch_schedule, slot, &slot_idx );
702 :
703 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
704 0 : slot_info->slot = slot;
705 0 : slot_info->root_slot = ctx->consensus_root_slot;
706 0 : slot_info->epoch = epoch;
707 0 : slot_info->slot_in_epoch = slot_idx;
708 0 : slot_info->block_height = fd_bank_block_height_get( bank );
709 0 : slot_info->parent_slot = fd_bank_parent_slot_get( bank );
710 0 : slot_info->completion_time_nanos = fd_log_wallclock();
711 0 : slot_info->block_id = ele->merkle_root;
712 0 : slot_info->parent_block_id = parent_block_id;
713 0 : slot_info->bank_hash = *bank_hash;
714 0 : slot_info->block_hash = *block_hash;
715 0 : slot_info->transaction_count = fd_bank_txn_count_get( ctx->slot_ctx->bank );
716 0 : slot_info->shred_count = fd_bank_shred_cnt_get( ctx->slot_ctx->bank );
717 :
718 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_SLOT_COMPLETED, ctx->replay_out->chunk, sizeof(fd_replay_slot_completed_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
719 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_slot_completed_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
720 0 : }
721 :
722 : static void
723 : replay_block_finalize( fd_replay_tile_t * ctx,
724 0 : fd_stem_context_t * stem ) {
725 0 : if( FD_UNLIKELY( ctx->capture_ctx ) ) fd_solcap_writer_flush( ctx->capture_ctx->capture );
726 :
727 0 : fd_bank_t * bank = ctx->slot_ctx->bank;
728 0 : FD_TEST( !(bank->flags&FD_BANK_FLAGS_FROZEN) );
729 :
730 0 : fd_eslot_t eslot = fd_bank_eslot_get( bank );
731 :
732 : /* We know at this point that we must have an entry in the eslot mgr
733 : for both the current bank's eslot and the parent eslot as well. */
734 0 : fd_eslot_ele_t * ele = fd_eslot_mgr_ele_query_eslot( ctx->eslot_mgr, eslot );
735 0 : if( FD_UNLIKELY( !ele ) ) {
736 0 : FD_LOG_CRIT(( "invariant violation: eslot entry not found: (slot %lu, prime %u)", fd_eslot_slot( eslot ), fd_eslot_prime( eslot ) ));
737 0 : }
738 0 : fd_eslot_ele_t * parent_ele = fd_eslot_mgr_ele_query_eslot( ctx->eslot_mgr, fd_bank_parent_eslot_get( bank ) );
739 0 : if( FD_UNLIKELY( !parent_ele ) ) {
740 0 : FD_LOG_CRIT(( "invariant violation: eslot entry not found: (slot %lu, prime %u)", fd_eslot_slot( fd_bank_parent_eslot_get( bank ) ), fd_eslot_prime( fd_bank_parent_eslot_get( bank ) ) ));
741 0 : }
742 :
743 0 : fd_hash_t const * block_id = &ele->merkle_root;
744 0 : fd_hash_t const * parent_block_id = &parent_ele->merkle_root;
745 0 : fd_hash_t const * bank_hash = fd_bank_bank_hash_query( bank );
746 0 : fd_hash_t const * block_hash = fd_blockhashes_peek_last( fd_bank_block_hash_queue_query( bank ) );
747 0 : FD_TEST( block_id );
748 0 : FD_TEST( parent_block_id );
749 0 : FD_TEST( bank_hash );
750 0 : FD_TEST( block_hash );
751 :
752 : /* Set poh hash in bank. */
753 0 : fd_hash_t * poh = fd_sched_get_poh( ctx->sched, &eslot );
754 0 : fd_bank_poh_set( bank, *poh );
755 :
756 : /* Set shred count in bank. */
757 0 : fd_bank_shred_cnt_set( bank, fd_sched_get_shred_cnt( ctx->sched, &eslot ) );
758 :
759 : /* Do hashing and other end-of-block processing. */
760 0 : fd_runtime_block_execute_finalize( ctx->slot_ctx );
761 0 : bank->flags |= FD_BANK_FLAGS_FROZEN;
762 :
763 0 : publish_slot_completed( ctx, stem, bank, 0 );
764 :
765 : /* Copy the vote tower of all the vote accounts into the buffer,
766 : which will be published in after_credit. */
767 0 : buffer_vote_towers( ctx, ctx->slot_ctx->funk_txn, ctx->slot_ctx->bank );
768 :
769 : /* TODO: Don't think we want to reset pack/poh here? Should probably
770 : be deleted. */
771 0 : if( FD_LIKELY( ctx->pack_out->idx!=ULONG_MAX ) ) {
772 0 : fd_poh_reset_t * reset = fd_chunk_to_laddr( ctx->pack_out->mem, ctx->pack_out->chunk );
773 :
774 0 : reset->completed_slot = fd_eslot_slot( eslot );
775 0 : reset->hashcnt_per_tick = fd_bank_hashes_per_tick_get( bank );
776 0 : reset->ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
777 0 : reset->tick_duration_ns = (ulong)(ctx->slot_duration_nanos/(double)reset->ticks_per_slot);
778 0 : fd_memcpy( reset->completed_blockhash, block_hash->uc, sizeof(fd_hash_t) );
779 0 : fd_memcpy( reset->completed_block_id, block_id->uc, sizeof(fd_hash_t) );
780 :
781 0 : ulong ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
782 0 : if( FD_UNLIKELY( reset->hashcnt_per_tick==1UL ) ) {
783 : /* Low power producer, maximum of one microblock per tick in the slot */
784 0 : reset->max_microblocks_in_slot = ticks_per_slot;
785 0 : } else {
786 : /* See the long comment in after_credit for this limit */
787 0 : reset->max_microblocks_in_slot = fd_ulong_min( MAX_MICROBLOCKS_PER_SLOT, ticks_per_slot*(reset->hashcnt_per_tick-1UL) );
788 0 : }
789 0 : reset->next_leader_slot = ctx->next_leader_slot;
790 :
791 0 : ulong sig = fd_disco_poh_sig( ctx->next_leader_slot, POH_PKT_TYPE_FEAT_ACT_SLOT /* Rubbish .. but threads the needle correctly for now */, 0UL );
792 0 : fd_stem_publish( stem, ctx->pack_out->idx, sig, ctx->pack_out->chunk, sizeof(fd_poh_reset_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
793 0 : ctx->pack_out->chunk = fd_dcache_compact_next( ctx->pack_out->chunk, sizeof(fd_poh_reset_t), ctx->pack_out->chunk0, ctx->pack_out->wmark );
794 0 : }
795 :
796 : /**********************************************************************/
797 : /* Bank hash comparison, and halt if there's a mismatch after replay */
798 : /**********************************************************************/
799 :
800 0 : fd_bank_hash_cmp_t * bank_hash_cmp = ctx->bank_hash_cmp;
801 0 : fd_bank_hash_cmp_lock( bank_hash_cmp );
802 0 : fd_bank_hash_cmp_insert( bank_hash_cmp, fd_eslot_slot( eslot ), bank_hash, 1, 0 );
803 :
804 0 : if( FD_UNLIKELY( ctx->shredcap_out->idx!=ULONG_MAX ) ) {
805 : /* TODO: We need some way to define common headers. */
806 0 : uchar * chunk_laddr = fd_chunk_to_laddr( ctx->shredcap_out->mem, ctx->shredcap_out->chunk );
807 0 : fd_hash_t const * bank_hash = fd_bank_bank_hash_query( bank );
808 0 : ulong slot = fd_bank_slot_get( bank );
809 0 : memcpy( chunk_laddr, bank_hash, sizeof(fd_hash_t) );
810 0 : memcpy( chunk_laddr+sizeof(fd_hash_t), &slot, sizeof(ulong) );
811 0 : fd_stem_publish( stem, ctx->shredcap_out->idx, 0UL, ctx->shredcap_out->chunk, sizeof(fd_hash_t) + sizeof(ulong), 0UL, fd_frag_meta_ts_comp( fd_tickcount() ), fd_frag_meta_ts_comp( fd_tickcount() ) );
812 0 : ctx->shredcap_out->chunk = fd_dcache_compact_next( ctx->shredcap_out->chunk, sizeof(fd_hash_t) + sizeof(ulong), ctx->shredcap_out->chunk0, ctx->shredcap_out->wmark );
813 0 : }
814 :
815 : /* Try to move the bank hash comparison watermark forward */
816 0 : for( ulong cmp_slot = bank_hash_cmp->watermark + 1; cmp_slot < fd_eslot_slot( eslot ); cmp_slot++ ) {
817 0 : if( FD_UNLIKELY( !ctx->enable_bank_hash_cmp ) ) {
818 0 : bank_hash_cmp->watermark = cmp_slot;
819 0 : break;
820 0 : }
821 0 : int rc = fd_bank_hash_cmp_check( bank_hash_cmp, cmp_slot );
822 0 : switch ( rc ) {
823 0 : case -1:
824 : /* Mismatch */
825 0 : FD_LOG_WARNING(( "Bank hash mismatch on slot: %lu. Halting.", cmp_slot ));
826 0 : break;
827 0 : case 0:
828 : /* Not ready */
829 0 : break;
830 0 : case 1:
831 : /* Match*/
832 0 : bank_hash_cmp->watermark = cmp_slot;
833 0 : break;
834 0 : default:;
835 0 : }
836 0 : }
837 :
838 0 : fd_bank_hash_cmp_unlock( bank_hash_cmp );
839 0 : }
840 :
841 : /**********************************************************************/
842 : /* Leader bank management */
843 : /**********************************************************************/
844 :
845 : static fd_bank_t *
846 : prepare_leader_bank( fd_replay_tile_t * ctx,
847 : ulong slot,
848 : fd_hash_t const * parent_block_id,
849 0 : fd_stem_context_t * stem ) {
850 :
851 : /* When we are leader we assume that there is no equivocation
852 : possible. */
853 0 : fd_eslot_t curr_eslot = fd_eslot( slot, 0UL );
854 :
855 0 : fd_eslot_ele_t * ele = fd_eslot_mgr_ele_query_merkle_root( ctx->eslot_mgr, parent_block_id );
856 0 : if( FD_UNLIKELY( !ele ) ) {
857 0 : FD_LOG_CRIT(( "invariant violation: eslot entry not found for merkle root %s", FD_BASE58_ENC_32_ALLOCA( parent_block_id->uc ) ));
858 0 : }
859 0 : fd_eslot_t parent_eslot = ele->eslot;
860 0 : ulong parent_slot = fd_eslot_slot( parent_eslot );
861 :
862 0 : fd_eslot_mgr_ele_insert_leader( ctx->eslot_mgr, slot, parent_eslot );
863 :
864 0 : fd_bank_t * bank = fd_banks_clone_from_parent( ctx->banks, curr_eslot, parent_eslot );
865 0 : if( FD_UNLIKELY( !bank ) ) {
866 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for slot %lu", slot ));
867 0 : }
868 :
869 : /* prepare the funk transaction for the leader bank */
870 0 : fd_funk_txn_start_write( ctx->funk );
871 :
872 0 : fd_funk_txn_xid_t xid = { .ul = { slot, slot } };
873 0 : fd_funk_txn_xid_t parent_xid = { .ul = { parent_slot, parent_slot } };
874 :
875 0 : fd_funk_txn_map_t * txn_map = fd_funk_txn_map( ctx->funk );
876 0 : if( FD_UNLIKELY( !txn_map ) ) {
877 0 : FD_LOG_CRIT(( "invariant violation: funk_txn_map is NULL for slot %lu", slot ));
878 0 : }
879 :
880 0 : fd_funk_txn_t * parent_txn = fd_funk_txn_query( &parent_xid, txn_map );
881 :
882 0 : fd_funk_txn_t * funk_txn = fd_funk_txn_prepare( ctx->funk, parent_txn, &xid, 1 );
883 0 : if( FD_UNLIKELY( !funk_txn ) ) {
884 0 : FD_LOG_CRIT(( "invariant violation: funk_txn is NULL for slot %lu", slot ));
885 0 : }
886 :
887 0 : fd_funk_txn_end_write( ctx->funk );
888 :
889 0 : fd_bank_execution_fees_set( bank, 0UL );
890 0 : fd_bank_priority_fees_set( bank, 0UL );
891 0 : fd_bank_shred_cnt_set( bank, 0UL );
892 :
893 : /* Set the tick height. */
894 0 : fd_bank_tick_height_set( bank, fd_bank_max_tick_height_get( bank ) );
895 :
896 : /* Update block height. */
897 0 : fd_bank_block_height_set( bank, fd_bank_block_height_get( bank ) + 1UL );
898 :
899 0 : ulong * max_tick_height = fd_bank_max_tick_height_modify( bank );
900 0 : ulong ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
901 0 : if( FD_UNLIKELY( FD_RUNTIME_EXECUTE_SUCCESS != fd_runtime_compute_max_tick_height( ticks_per_slot, slot, max_tick_height ) ) ) {
902 0 : FD_LOG_CRIT(( "couldn't compute tick height/max tick height slot %lu ticks_per_slot %lu", slot, ticks_per_slot ));
903 0 : }
904 :
905 0 : bank->flags |= fd_ulong_if( ctx->tx_metadata_storage, FD_BANK_FLAGS_EXEC_RECORDING, 0UL );
906 :
907 0 : fd_exec_slot_ctx_t slot_ctx = {
908 0 : .bank = bank,
909 0 : .funk = ctx->funk,
910 0 : .banks = ctx->banks,
911 0 : .funk_txn = funk_txn,
912 0 : };
913 :
914 0 : int is_epoch_boundary = 0;
915 0 : fd_runtime_block_pre_execute_process_new_epoch(
916 0 : &slot_ctx,
917 0 : ctx->capture_ctx,
918 0 : ctx->runtime_spad,
919 0 : &is_epoch_boundary );
920 0 : if( FD_UNLIKELY( is_epoch_boundary ) ) publish_stake_weights( ctx, stem, &slot_ctx, 1 );
921 :
922 0 : int res = fd_runtime_block_execute_prepare( &slot_ctx, ctx->runtime_spad );
923 0 : if( FD_UNLIKELY( res!=FD_RUNTIME_EXECUTE_SUCCESS ) ) {
924 0 : FD_LOG_CRIT(( "block prep execute failed" ));
925 0 : }
926 :
927 0 : bank->refcnt++;
928 :
929 0 : return bank;
930 0 : }
931 :
932 : static void
933 : fini_leader_bank( fd_replay_tile_t * ctx,
934 : fd_bank_t * bank,
935 0 : fd_stem_context_t * stem ) {
936 0 : FD_TEST( !(bank->flags&FD_BANK_FLAGS_FROZEN) );
937 0 : bank->flags |= FD_BANK_FLAGS_FROZEN;
938 :
939 0 : fd_eslot_t leader_eslot = fd_bank_eslot_get( bank );
940 0 : fd_eslot_t parent_eslot = fd_eslot( fd_bank_parent_slot_get( bank ), 0UL );
941 0 : fd_sched_block_add_done( ctx->sched, &leader_eslot, &parent_eslot );
942 :
943 0 : ulong curr_slot = fd_bank_slot_get( bank );
944 :
945 : /* Do hashing and other end-of-block processing */
946 0 : fd_funk_txn_map_t * txn_map = fd_funk_txn_map( ctx->funk );
947 0 : if( FD_UNLIKELY( !txn_map->map ) ) {
948 0 : FD_LOG_ERR(( "Could not find valid funk transaction map" ));
949 0 : }
950 0 : fd_funk_txn_xid_t xid = { .ul = { curr_slot, curr_slot } };
951 0 : fd_funk_txn_start_read( ctx->funk );
952 0 : fd_funk_txn_t * funk_txn = fd_funk_txn_query( &xid, txn_map );
953 0 : fd_funk_txn_end_read( ctx->funk );
954 0 : if( FD_UNLIKELY( !funk_txn ) ) {
955 0 : FD_LOG_ERR(( "Could not find valid funk transaction for slot %lu", curr_slot ));
956 0 : }
957 :
958 0 : fd_exec_slot_ctx_t slot_ctx = {
959 0 : .funk = ctx->funk,
960 0 : .banks = ctx->banks,
961 0 : .bank = bank,
962 0 : .funk_txn = funk_txn,
963 0 : };
964 :
965 0 : fd_runtime_block_execute_finalize( &slot_ctx );
966 :
967 0 : publish_slot_completed( ctx, stem, bank, 0 );
968 :
969 : /* Copy the vote tower of all the vote accounts into the buffer,
970 : which will be published in after_credit. */
971 0 : buffer_vote_towers( ctx, ctx->slot_ctx->funk_txn, ctx->slot_ctx->bank );
972 0 : }
973 :
974 : static void
975 : publish_root_advanced( fd_replay_tile_t * ctx,
976 0 : fd_stem_context_t * stem ) {
977 :
978 0 : fd_bank_t * consensus_root_bank = fd_banks_get_bank( ctx->banks, fd_eslot( ctx->consensus_root_slot, 0UL ) );
979 0 : FD_TEST( consensus_root_bank );
980 0 : consensus_root_bank->refcnt += ctx->resolv_tile_cnt;
981 :
982 0 : fd_replay_root_advanced_t * msg = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
983 0 : msg->bank_idx = fd_banks_get_pool_idx( ctx->banks, consensus_root_bank );
984 :
985 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_ROOT_ADVANCED, ctx->replay_out->chunk, sizeof(fd_replay_root_advanced_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
986 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_root_advanced_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
987 0 : }
988 :
989 : static void
990 0 : init_after_snapshot( fd_replay_tile_t * ctx ) {
991 : /* Now that the snapshot has been loaded in, we have to refresh the
992 : stake delegations since the manifest does not contain the full set
993 : of data required for the stake delegations. See
994 : fd_stake_delegations.h for why this is required. */
995 :
996 0 : fd_stake_delegations_t * root_delegations = fd_banks_stake_delegations_root_query( ctx->slot_ctx->banks );
997 :
998 0 : fd_stake_delegations_refresh( root_delegations, ctx->funk, ctx->slot_ctx->funk_txn );
999 :
1000 : /* After both snapshots have been loaded in, we can determine if we should
1001 : start distributing rewards. */
1002 :
1003 0 : fd_rewards_recalculate_partitioned_rewards( ctx->slot_ctx, ctx->capture_ctx, ctx->runtime_spad );
1004 :
1005 0 : ulong snapshot_slot = fd_bank_slot_get( ctx->slot_ctx->bank );
1006 0 : if( FD_UNLIKELY( !snapshot_slot ) ) {
1007 : /* Genesis-specific setup. */
1008 : /* FIXME: This branch does not set up a new block exec ctx
1009 : properly. Needs to do whatever prepare_new_block_execution
1010 : does, but just hacking that in breaks stuff. */
1011 0 : fd_runtime_update_leaders( ctx->slot_ctx->bank,
1012 0 : fd_bank_slot_get( ctx->slot_ctx->bank ),
1013 0 : ctx->runtime_spad );
1014 :
1015 0 : ulong hashcnt_per_slot = fd_bank_hashes_per_tick_get( ctx->slot_ctx->bank ) * fd_bank_ticks_per_slot_get( ctx->slot_ctx->bank );
1016 0 : fd_hash_t * poh = fd_bank_poh_modify( ctx->slot_ctx->bank );
1017 0 : while( hashcnt_per_slot-- ) {
1018 0 : fd_sha256_hash( poh->hash, 32UL, poh->hash );
1019 0 : }
1020 :
1021 0 : FD_TEST( fd_runtime_block_execute_prepare( ctx->slot_ctx, ctx->runtime_spad ) == 0 );
1022 0 : fd_runtime_block_execute_finalize( ctx->slot_ctx );
1023 :
1024 0 : snapshot_slot = 0UL;
1025 :
1026 : /* Now setup exec tiles for execution */
1027 0 : ctx->exec_ready_bitset = fd_ulong_mask_lsb( (int)ctx->exec_cnt );
1028 0 : }
1029 :
1030 : /* Initialize consensus structures post-snapshot */
1031 :
1032 0 : fd_vote_states_t const * vote_states = fd_bank_vote_states_locking_query( ctx->slot_ctx->bank );
1033 :
1034 0 : fd_bank_hash_cmp_t * bank_hash_cmp = ctx->bank_hash_cmp;
1035 :
1036 0 : fd_vote_states_iter_t iter_[1];
1037 0 : for( fd_vote_states_iter_t * iter = fd_vote_states_iter_init( iter_, vote_states ); !fd_vote_states_iter_done( iter ); fd_vote_states_iter_next( iter ) ) {
1038 0 : fd_vote_state_ele_t const * vote_state = fd_vote_states_iter_ele( iter );
1039 0 : bank_hash_cmp->total_stake += vote_state->stake;
1040 0 : }
1041 0 : bank_hash_cmp->watermark = snapshot_slot;
1042 :
1043 0 : fd_bank_vote_states_end_locking_query( ctx->slot_ctx->bank );
1044 :
1045 : /* Now that the snapshot(s) are done loading, we can mark all of the
1046 : exec tiles as ready. */
1047 0 : ctx->exec_ready_bitset = fd_ulong_mask_lsb( (int)ctx->exec_cnt );
1048 :
1049 0 : if( FD_UNLIKELY( ctx->capture_ctx ) ) fd_solcap_writer_flush( ctx->capture_ctx->capture );
1050 :
1051 0 : ctx->consensus_root_slot = snapshot_slot;
1052 0 : }
1053 :
1054 : static int
1055 : maybe_become_leader( fd_replay_tile_t * ctx,
1056 0 : fd_stem_context_t * stem ) {
1057 0 : FD_TEST( ctx->is_booted );
1058 0 : if( FD_UNLIKELY( ctx->pack_out->idx==ULONG_MAX ) ) return 0;
1059 0 : if( FD_UNLIKELY( ctx->is_leader || ctx->next_leader_slot==ULONG_MAX ) ) return 0;
1060 :
1061 0 : FD_TEST( ctx->next_leader_slot>ctx->reset_slot );
1062 0 : long now = fd_log_wallclock();
1063 0 : long next_leader_timestamp = (long)((double)(ctx->next_leader_slot-ctx->reset_slot-1UL)*ctx->slot_duration_nanos) + ctx->reset_timestamp_nanos;
1064 0 : if( FD_UNLIKELY( now<next_leader_timestamp ) ) return 0;
1065 :
1066 : /* TODO:
1067 : if( FD_UNLIKELY( ctx->halted_switching_key ) ) return 0; */
1068 :
1069 : /* If a prior leader is still in the process of publishing their slot,
1070 : delay ours to let them finish ... unless they are so delayed that
1071 : we risk getting skipped by the leader following us. 1.2 seconds
1072 : is a reasonable default here, although any value between 0 and 1.6
1073 : seconds could be considered reasonable. This is arbitrary and
1074 : chosen due to intuition. */
1075 0 : if( FD_UNLIKELY( now<next_leader_timestamp+(long)(3.0*ctx->slot_duration_nanos) ) ) {
1076 : /* If the max_active_descendant is >= next_leader_slot, we waited
1077 : too long and a leader after us started publishing to try and skip
1078 : us. Just start our leader slot immediately, we might win ... */
1079 0 : if( FD_LIKELY( ctx->max_active_descendant>=ctx->reset_slot && ctx->max_active_descendant<ctx->next_leader_slot ) ) {
1080 : /* If one of the leaders between the reset slot and our leader
1081 : slot is in the process of publishing (they have a descendant
1082 : bank that is in progress of being replayed), then keep waiting.
1083 : We probably wouldn't get a leader slot out before they
1084 : finished.
1085 :
1086 : Unless... we are past the deadline to start our slot by more
1087 : than 1.2 seconds, in which case we should probably start it to
1088 : avoid getting skipped by the leader behind us. */
1089 0 : return 0;
1090 0 : }
1091 0 : }
1092 :
1093 0 : ctx->is_leader = 1;
1094 0 : ctx->highwater_leader_slot = fd_ulong_max( ctx->next_leader_slot, fd_ulong_if( ctx->highwater_leader_slot==ULONG_MAX, 0UL, ctx->highwater_leader_slot ) );
1095 :
1096 0 : FD_LOG_NOTICE(( "becoming leader for slot %lu, parent slot is %lu", ctx->next_leader_slot, ctx->reset_slot ));
1097 :
1098 : /* Acquires bank, sets up initial state, and refcnts it. */
1099 0 : fd_bank_t * bank = prepare_leader_bank( ctx, ctx->next_leader_slot, &ctx->reset_block_id, stem );
1100 :
1101 0 : fd_became_leader_t * msg = fd_chunk_to_laddr( ctx->pack_out->mem, ctx->pack_out->chunk );
1102 0 : msg->slot = ctx->next_leader_slot;
1103 0 : msg->slot_start_ns = now;
1104 0 : msg->slot_end_ns = now+(long)ctx->slot_duration_nanos;
1105 0 : msg->bank = NULL;
1106 0 : msg->bank_idx = fd_banks_get_pool_idx( ctx->banks, bank );
1107 0 : msg->ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
1108 0 : msg->hashcnt_per_tick = fd_bank_hashes_per_tick_get( bank );
1109 0 : msg->tick_duration_ns = (ulong)(ctx->slot_duration_nanos/(double)msg->ticks_per_slot);
1110 :
1111 0 : if( FD_UNLIKELY( msg->hashcnt_per_tick==1UL ) ) {
1112 : /* Low power producer, maximum of one microblock per tick in the slot */
1113 0 : msg->max_microblocks_in_slot = msg->ticks_per_slot;
1114 0 : } else {
1115 : /* See the long comment in after_credit for this limit */
1116 0 : msg->max_microblocks_in_slot = fd_ulong_min( MAX_MICROBLOCKS_PER_SLOT, msg->ticks_per_slot*(msg->hashcnt_per_tick-1UL) );
1117 0 : }
1118 :
1119 0 : msg->total_skipped_ticks = msg->ticks_per_slot*(ctx->next_leader_slot-ctx->reset_slot);
1120 0 : msg->epoch = fd_slot_to_epoch( fd_bank_epoch_schedule_query( ctx->slot_ctx->bank ), ctx->next_leader_slot, NULL );
1121 0 : fd_memset( msg->bundle, 0, sizeof(msg->bundle) );
1122 :
1123 0 : fd_cost_tracker_t const * cost_tracker = fd_bank_cost_tracker_locking_query( bank );
1124 :
1125 0 : msg->limits.slot_max_cost = cost_tracker->block_cost_limit;
1126 0 : msg->limits.slot_max_vote_cost = cost_tracker->vote_cost_limit;
1127 0 : msg->limits.slot_max_write_cost_per_acct = cost_tracker->account_cost_limit;
1128 :
1129 0 : fd_bank_cost_tracker_end_locking_query( bank );
1130 :
1131 0 : if( FD_UNLIKELY( msg->ticks_per_slot+msg->total_skipped_ticks>USHORT_MAX ) ) {
1132 : /* There can be at most USHORT_MAX skipped ticks, because the
1133 : parent_offset field in the shred data is only 2 bytes wide. */
1134 0 : FD_LOG_ERR(( "too many skipped ticks %lu for slot %lu, chain must halt", msg->ticks_per_slot+msg->total_skipped_ticks, ctx->next_leader_slot ));
1135 0 : }
1136 :
1137 0 : ulong sig = fd_disco_poh_sig( ctx->next_leader_slot, POH_PKT_TYPE_BECAME_LEADER, 0UL );
1138 0 : fd_stem_publish( stem, ctx->pack_out->idx, sig, ctx->pack_out->chunk, sizeof(fd_became_leader_t), 0UL, 0UL, 0UL );
1139 0 : ctx->pack_out->chunk = fd_dcache_compact_next( ctx->pack_out->chunk, sizeof(fd_became_leader_t), ctx->pack_out->chunk0, ctx->pack_out->wmark );
1140 :
1141 0 : ctx->next_leader_slot = ULONG_MAX;
1142 :
1143 0 : return 1;
1144 0 : }
1145 :
1146 : static void
1147 : unbecome_leader( fd_replay_tile_t * ctx,
1148 0 : fd_poh_leader_slot_ended_t const * slot_ended ) {
1149 :
1150 0 : FD_TEST( ctx->is_booted );
1151 0 : FD_TEST( ctx->is_leader );
1152 :
1153 0 : FD_TEST( ctx->highwater_leader_slot>=slot_ended->slot );
1154 0 : FD_TEST( ctx->next_leader_slot>ctx->highwater_leader_slot );
1155 0 : ctx->is_leader = 0;
1156 :
1157 : /* Update the poh hash in the bank. We will want to maintain a refcnt
1158 : on the bank until we have recieved the block id for the block after
1159 : it has been shredded. */
1160 0 : fd_bank_t * bank = fd_banks_get_bank( ctx->banks, fd_eslot( slot_ended->slot, 0UL ) );
1161 0 : if( FD_UNLIKELY( !bank ) ) {
1162 0 : FD_LOG_CRIT(( "bank for leader slot %lu not found", slot_ended->slot ));
1163 0 : }
1164 :
1165 0 : memcpy( fd_bank_poh_modify( bank ), slot_ended->blockhash, sizeof(fd_hash_t) );
1166 0 : }
1167 :
1168 : static void
1169 : publish_reset( fd_replay_tile_t * ctx,
1170 : fd_stem_context_t * stem,
1171 0 : fd_bank_t const * bank ) {
1172 0 : if( FD_LIKELY( ctx->pack_out->idx==ULONG_MAX ) ) return;
1173 :
1174 0 : fd_hash_t const * block_hash = fd_blockhashes_peek_last( fd_bank_block_hash_queue_query( bank ) );
1175 0 : FD_TEST( block_hash );
1176 :
1177 0 : fd_poh_reset_t * reset = fd_chunk_to_laddr( ctx->pack_out->mem, ctx->pack_out->chunk );
1178 :
1179 0 : reset->completed_slot = fd_bank_slot_get( ctx->slot_ctx->bank );
1180 0 : reset->hashcnt_per_tick = fd_bank_hashes_per_tick_get( bank );
1181 0 : reset->ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
1182 0 : reset->tick_duration_ns = (ulong)(ctx->slot_duration_nanos/(double)reset->ticks_per_slot);
1183 0 : fd_memcpy( reset->completed_blockhash, block_hash->uc, sizeof(fd_hash_t) );
1184 :
1185 0 : ulong ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
1186 0 : if( FD_UNLIKELY( reset->hashcnt_per_tick==1UL ) ) {
1187 : /* Low power producer, maximum of one microblock per tick in the slot */
1188 0 : reset->max_microblocks_in_slot = ticks_per_slot;
1189 0 : } else {
1190 : /* See the long comment in after_credit for this limit */
1191 0 : reset->max_microblocks_in_slot = fd_ulong_min( MAX_MICROBLOCKS_PER_SLOT, ticks_per_slot*(reset->hashcnt_per_tick-1UL) );
1192 0 : }
1193 0 : reset->next_leader_slot = ctx->next_leader_slot;
1194 :
1195 0 : ulong sig = fd_disco_poh_sig( ctx->next_leader_slot, POH_PKT_TYPE_FEAT_ACT_SLOT /* Rubbish .. but threads the needle correctly for now */, 0UL );
1196 0 : fd_stem_publish( stem, ctx->pack_out->idx, sig, ctx->pack_out->chunk, sizeof(fd_poh_reset_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
1197 0 : ctx->pack_out->chunk = fd_dcache_compact_next( ctx->pack_out->chunk, sizeof(fd_poh_reset_t), ctx->pack_out->chunk0, ctx->pack_out->wmark );
1198 0 : }
1199 :
1200 : static void
1201 : boot_genesis( fd_replay_tile_t * ctx,
1202 : fd_stem_context_t * stem,
1203 : ulong in_idx,
1204 0 : ulong chunk ) {
1205 0 : FD_TEST( ctx->bootstrap );
1206 :
1207 0 : uchar const * lthash = (uchar*)fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
1208 0 : uchar const * genesis_hash = (uchar*)fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk )+sizeof(fd_lthash_value_t);
1209 :
1210 : // TODO: Do not pass the fd_types type between tiles, it have offsets
1211 : // that are unsafe and can't be validated as being in-bounds. Need to
1212 : // pass an actual owned genesis type.
1213 0 : fd_genesis_solana_global_t const * genesis = fd_type_pun( (uchar*)fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk )+sizeof(fd_hash_t)+sizeof(fd_lthash_value_t) );
1214 :
1215 0 : fd_runtime_read_genesis( ctx->slot_ctx, fd_type_pun_const( genesis_hash ), fd_type_pun_const( lthash ), genesis, ctx->runtime_spad );
1216 :
1217 0 : publish_stake_weights( ctx, stem, ctx->slot_ctx, 0 );
1218 0 : publish_stake_weights( ctx, stem, ctx->slot_ctx, 1 );
1219 :
1220 : /* We call this after fd_runtime_read_genesis, which sets up the
1221 : slot_bank needed in blockstore_init. */
1222 0 : init_after_snapshot( ctx );
1223 :
1224 : /* Initialize store for genesis case, similar to snapshot case */
1225 0 : fd_hash_t genesis_block_id = { .ul[0] = FD_RUNTIME_INITIAL_BLOCK_ID };
1226 0 : fd_store_exacq( ctx->store );
1227 0 : if( FD_UNLIKELY( fd_store_root( ctx->store ) ) ) {
1228 0 : FD_LOG_CRIT(( "invariant violation: store root is not 0 for genesis" ));
1229 0 : }
1230 0 : fd_store_insert( ctx->store, 0, &genesis_block_id );
1231 0 : ctx->store->slot0 = 0UL; /* Genesis slot */
1232 0 : fd_store_exrel( ctx->store );
1233 :
1234 : /* Initialize eslot map. */
1235 0 : fd_eslot_mgr_ele_insert_initial( ctx->eslot_mgr, 0UL );
1236 :
1237 0 : ctx->consensus_root_slot = 0UL;
1238 0 : ctx->published_root_slot = 0UL;
1239 0 : fd_sched_block_add_done( ctx->sched, &(fd_sched_block_id_t){ .slot = 0UL, .prime = 0UL }, NULL );
1240 :
1241 0 : fd_bank_block_height_set( ctx->slot_ctx->bank, 1UL );
1242 :
1243 0 : ctx->reset_slot = 0UL;
1244 0 : ctx->reset_timestamp_nanos = fd_log_wallclock();
1245 0 : ctx->next_leader_slot = fd_multi_epoch_leaders_get_next_slot( ctx->mleaders, 1UL, ctx->identity_pubkey );
1246 0 : ctx->consensus_root_slot = 0UL;
1247 :
1248 0 : publish_slot_completed( ctx, stem, ctx->slot_ctx->bank, 1 );
1249 0 : publish_root_advanced( ctx, stem );
1250 0 : publish_reset( ctx, stem, ctx->slot_ctx->bank );
1251 :
1252 0 : ctx->is_booted = 1;
1253 0 : maybe_become_leader( ctx, stem );
1254 0 : }
1255 :
1256 : static void
1257 : on_snapshot_message( fd_replay_tile_t * ctx,
1258 : fd_stem_context_t * stem,
1259 : ulong in_idx,
1260 : ulong chunk,
1261 0 : ulong sig ) {
1262 0 : ulong msg = fd_ssmsg_sig_message( sig );
1263 0 : if( FD_LIKELY( msg==FD_SSMSG_DONE ) ) {
1264 : /* An end of message notification indicates the snapshot is loaded.
1265 : Replay is able to start executing from this point onwards. */
1266 : /* TODO: replay should finish booting. Could make replay a
1267 : state machine and set the state here accordingly. */
1268 0 : ctx->is_booted = 1;
1269 :
1270 0 : ulong snapshot_slot = fd_bank_slot_get( ctx->slot_ctx->bank );
1271 : /* FIXME: This is a hack because the block id of the snapshot slot
1272 : is not provided in the snapshot. A possible solution is to get
1273 : the block id of the snapshot slot from repair. */
1274 0 : fd_hash_t manifest_block_id = { .ul = { FD_RUNTIME_INITIAL_BLOCK_ID } };
1275 :
1276 0 : fd_store_exacq( ctx->store );
1277 0 : FD_TEST( !fd_store_root( ctx->store ) );
1278 0 : fd_store_insert( ctx->store, 0, &manifest_block_id );
1279 0 : ctx->store->slot0 = snapshot_slot; /* FIXME manifest_block_id */
1280 0 : fd_store_exrel( ctx->store );
1281 :
1282 : /* Typically, when we cross an epoch boundary during normal
1283 : operation, we publish the stake weights for the new epoch. But
1284 : since we are starting from a snapshot, we need to publish two
1285 : epochs worth of stake weights: the previous epoch (which is
1286 : needed for voting on the current epoch), and the current epoch
1287 : (which is needed for voting on the next epoch). */
1288 0 : publish_stake_weights( ctx, stem, ctx->slot_ctx, 0 );
1289 0 : publish_stake_weights( ctx, stem, ctx->slot_ctx, 1 );
1290 :
1291 0 : fd_eslot_mgr_ele_insert_initial( ctx->eslot_mgr, snapshot_slot );
1292 0 : ctx->consensus_root_slot = snapshot_slot;
1293 0 : ctx->published_root_slot = snapshot_slot;
1294 0 : fd_sched_block_add_done( ctx->sched, &(fd_sched_block_id_t){ .slot = snapshot_slot&FD_ESLOT_SLOT_LSB_MASK, .prime = 0UL }, NULL );
1295 :
1296 0 : fd_features_restore( ctx->slot_ctx, ctx->runtime_spad );
1297 :
1298 0 : fd_runtime_update_leaders( ctx->slot_ctx->bank, fd_bank_slot_get( ctx->slot_ctx->bank ), ctx->runtime_spad );
1299 :
1300 : /* We call this after fd_runtime_read_genesis, which sets up the
1301 : slot_bank needed in blockstore_init. */
1302 0 : init_after_snapshot( ctx );
1303 :
1304 0 : ctx->slot_ctx->bank->flags |= FD_BANK_FLAGS_FROZEN;
1305 :
1306 0 : publish_slot_completed( ctx, stem, ctx->slot_ctx->bank, 1 );
1307 0 : publish_root_advanced( ctx, stem );
1308 :
1309 0 : return;
1310 0 : }
1311 :
1312 0 : switch( msg ) {
1313 0 : case FD_SSMSG_MANIFEST_FULL:
1314 0 : case FD_SSMSG_MANIFEST_INCREMENTAL: {
1315 : /* We may either receive a full snapshot manifest or an
1316 : incremental snapshot manifest. Note that this external message
1317 : id is only used temporarily because replay cannot yet receive
1318 : the firedancer-internal snapshot manifest message. */
1319 0 : if( FD_UNLIKELY( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark ) )
1320 0 : FD_LOG_ERR(( "chunk %lu from in %d corrupt, not in range [%lu,%lu]", chunk, ctx->in_kind[ in_idx ], ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
1321 :
1322 0 : fd_ssload_recover( fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ), ctx->slot_ctx );
1323 0 : break;
1324 0 : }
1325 0 : default: {
1326 0 : FD_LOG_ERR(( "Received unknown snapshot message with msg %lu", msg ));
1327 0 : return;
1328 0 : }
1329 0 : }
1330 :
1331 0 : return;
1332 0 : }
1333 :
1334 : /* Returns 1 if charge_busy. */
1335 : static int
1336 : replay( fd_replay_tile_t * ctx,
1337 0 : fd_stem_context_t * stem ) {
1338 :
1339 0 : if( FD_UNLIKELY( !ctx->is_booted ) ) return 0;
1340 :
1341 0 : if( ctx->block_draining ) {
1342 0 : fd_eslot_t eslot = fd_bank_eslot_get( ctx->slot_ctx->bank );
1343 0 : if( fd_sched_block_is_done( ctx->sched, &eslot ) ) {
1344 0 : ctx->block_draining = 0;
1345 0 : replay_block_finalize( ctx, stem );
1346 0 : return 1;
1347 0 : }
1348 0 : return 0;
1349 0 : }
1350 :
1351 0 : int charge_busy = 0;
1352 0 : while( ctx->exec_ready_bitset ) {
1353 0 : fd_sched_txn_ready_t ready_txn[ 1 ];
1354 0 : if( FD_LIKELY( fd_sched_txn_next_ready( ctx->sched, ready_txn ) ) ) {
1355 0 : FD_TEST( ready_txn->txn_id!=FD_SCHED_TXN_ID_NULL );
1356 0 : charge_busy = 1;
1357 0 : fd_eslot_t bank_eslot = fd_bank_eslot_get( ctx->slot_ctx->bank );
1358 :
1359 0 : if( FD_UNLIKELY( ready_txn->block_start ) ) {
1360 0 : replay_block_start( ctx,
1361 0 : stem,
1362 0 : ready_txn->block_id,
1363 0 : ready_txn->parent_block_id );
1364 0 : fd_sched_txn_done( ctx->sched, ready_txn->txn_id );
1365 0 : replay_ctx_switch( ctx, ready_txn->block_id );
1366 0 : continue;
1367 0 : }
1368 :
1369 0 : if( FD_UNLIKELY( ready_txn->block_end ) ) {
1370 0 : ctx->block_draining = 1;
1371 0 : fd_sched_txn_done( ctx->sched, ready_txn->txn_id );
1372 0 : break;
1373 0 : }
1374 :
1375 : /* We got a real transaction. See if we need to context switch. */
1376 0 : if( FD_UNLIKELY( bank_eslot.id!=ready_txn->block_id.id ) ) {
1377 : /* Context switch. */
1378 0 : replay_ctx_switch( ctx, ready_txn->block_id );
1379 0 : }
1380 :
1381 : /* Find an exec tile and mark it busy. */
1382 0 : int exec_idx = fd_ulong_find_lsb( ctx->exec_ready_bitset );
1383 0 : ctx->exec_ready_bitset = fd_ulong_pop_lsb( ctx->exec_ready_bitset );
1384 0 : ctx->exec_txn_id[ exec_idx ] = ready_txn->txn_id;
1385 :
1386 0 : fd_txn_p_t * txn_p = fd_sched_get_txn( ctx->sched, ready_txn->txn_id );
1387 :
1388 : /* FIXME: this should be done during txn parsing so that we don't
1389 : have to loop over all accounts a second time. */
1390 : /* Insert or reverify invoked programs for this epoch, if needed. */
1391 0 : fd_runtime_update_program_cache( ctx->slot_ctx, txn_p, ctx->runtime_spad );
1392 :
1393 : /* At this point, we are going to send the txn down the execution
1394 : pipeline. Increment the refcnt so we don't prematurely prune a
1395 : bank that's needed by an in-flight txn. */
1396 0 : ctx->slot_ctx->bank->refcnt++;
1397 :
1398 : /* Send. */
1399 0 : fd_replay_out_link_t * exec_out = &ctx->exec_out[ exec_idx ];
1400 0 : fd_exec_txn_msg_t * exec_msg = (fd_exec_txn_msg_t *)fd_chunk_to_laddr( exec_out->mem, exec_out->chunk );
1401 0 : memcpy( &exec_msg->txn, txn_p, sizeof(fd_txn_p_t) );
1402 0 : exec_msg->bank_idx = fd_banks_get_pool_idx( ctx->banks, ctx->slot_ctx->bank );
1403 0 : fd_stem_publish( stem, exec_out->idx, EXEC_NEW_TXN_SIG, exec_out->chunk, sizeof(fd_exec_txn_msg_t), 0UL, 0UL, 0UL );
1404 0 : exec_out->chunk = fd_dcache_compact_next( exec_out->chunk, sizeof(fd_exec_txn_msg_t), exec_out->chunk0, exec_out->wmark );
1405 0 : } else {
1406 : /* Nothing more the scheduler can offer. */
1407 0 : break;
1408 0 : }
1409 0 : }
1410 :
1411 0 : return charge_busy;
1412 0 : }
1413 :
1414 : static void
1415 : after_credit( fd_replay_tile_t * ctx,
1416 : fd_stem_context_t * stem,
1417 : int * opt_poll_in,
1418 0 : int * charge_busy ) {
1419 0 : if( FD_UNLIKELY( !ctx->is_booted ) ) return;
1420 :
1421 : /* Send any outstanding vote states to tower. TODO: Not sure why this
1422 : is here? Should happen when the slot completes instead? */
1423 0 : if( FD_UNLIKELY( ctx->vote_tower_out_idx<ctx->vote_tower_out_len ) ) {
1424 0 : *charge_busy = 1;
1425 0 : publish_next_vote_tower( ctx, stem );
1426 : /* Don't continue polling for fragments but instead skip to the next
1427 : iteration of the stem loop.
1428 :
1429 : This is necessary so that all the votes states for the end of a
1430 : particular slot are sent in one atomic block, and are not
1431 : interleaved with votes states at the end of other slots. */
1432 0 : *opt_poll_in = 0;
1433 0 : return;
1434 0 : }
1435 :
1436 0 : if( FD_UNLIKELY( maybe_become_leader( ctx, stem ) ) ) {
1437 0 : *charge_busy = 1;
1438 0 : *opt_poll_in = 0;
1439 0 : return;
1440 0 : }
1441 :
1442 0 : *charge_busy = replay( ctx, stem );
1443 0 : }
1444 :
1445 : static int
1446 : before_frag( fd_replay_tile_t * ctx,
1447 : ulong in_idx,
1448 : ulong seq,
1449 0 : ulong sig ) {
1450 0 : (void)seq;
1451 0 : (void)sig;
1452 :
1453 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_REPAIR ) ) {
1454 : /* If the transaction scheduler is full, there is nowhere for the
1455 : fragment to go and we cannot pull it off the incoming queue yet.
1456 : This will cause backpressure to the repair system. */
1457 0 : if( FD_UNLIKELY( !fd_sched_can_ingest( ctx->sched ) ) ) return -1;
1458 0 : }
1459 :
1460 0 : return 0;
1461 0 : }
1462 :
1463 : static void
1464 : process_txn_finalized( fd_replay_tile_t * ctx,
1465 0 : fd_writer_replay_txn_finalized_msg_t const * msg ) {
1466 0 : FD_TEST( !fd_ulong_extract_bit( ctx->exec_ready_bitset, msg->exec_tile_id ) );
1467 0 : ctx->exec_ready_bitset = fd_ulong_set_bit( ctx->exec_ready_bitset, msg->exec_tile_id );
1468 0 : ctx->slot_ctx->bank->refcnt--;
1469 0 : fd_sched_txn_done( ctx->sched, ctx->exec_txn_id[ msg->exec_tile_id ] );
1470 : /* Reference counter just decreased, and an exec tile just got freed
1471 : up. If there's a need to be more aggressively pruning, we could
1472 : check here if more slots just became publishable and publish. Not
1473 : publishing here shouldn't bloat the fork tree too much though. We
1474 : mark minority forks dead as soon as we can, and execution dispatch
1475 : stops on dead blocks. So shortly afterwards, dead blocks should be
1476 : eligible for pruning as in-flight transactions retire from the
1477 : execution pipeline. */
1478 :
1479 : /* Abort bad blocks. */
1480 0 : if( FD_UNLIKELY( fd_banks_is_bank_dead( ctx->slot_ctx->bank ) ) ) {
1481 0 : fd_eslot_t eslot = fd_bank_eslot_get( ctx->slot_ctx->bank );
1482 0 : fd_sched_block_abandon( ctx->sched, &eslot );
1483 0 : }
1484 0 : }
1485 :
1486 : static void
1487 : process_solcap_account_update( fd_replay_tile_t * ctx,
1488 0 : fd_capture_ctx_account_update_msg_t const * msg ) {
1489 0 : if( FD_UNLIKELY( !ctx->capture_ctx || !ctx->capture_ctx->capture ) ) return;
1490 0 : if( FD_UNLIKELY( fd_bank_slot_get( ctx->slot_ctx->bank )<ctx->capture_ctx->solcap_start_slot ) ) return;
1491 :
1492 0 : uchar const * account_data = (uchar const *)fd_type_pun_const( msg )+sizeof(fd_capture_ctx_account_update_msg_t);
1493 0 : fd_solcap_write_account( ctx->capture_ctx->capture, &msg->pubkey, &msg->info, account_data, msg->data_sz );
1494 0 : }
1495 :
1496 : static void
1497 : funk_publish( fd_replay_tile_t * ctx,
1498 0 : ulong slot ) {
1499 0 : fd_funk_txn_start_write( ctx->funk );
1500 :
1501 0 : fd_funk_txn_xid_t xid = { .ul[0] = slot, .ul[1] = slot };
1502 0 : fd_funk_txn_map_t * txn_map = fd_funk_txn_map( ctx->funk );
1503 0 : fd_funk_txn_t * to_root_txn = fd_funk_txn_query( &xid, txn_map );
1504 :
1505 0 : if( FD_UNLIKELY( xid.ul[0]!=slot ) ) FD_LOG_CRIT(( "Invariant violation: xid.ul[0] != slot %lu %lu", xid.ul[0], slot ));
1506 :
1507 0 : FD_LOG_DEBUG(( "publishing slot=%lu xid=%lu", slot, xid.ul[0] ));
1508 :
1509 : /* This is the standard case. Publish all transactions up to and
1510 : including the watermark. This will publish any in-prep ancestors
1511 : of root_txn as well. */
1512 0 : if( FD_UNLIKELY( !fd_funk_txn_publish( ctx->funk, to_root_txn, 1 ) ) ) FD_LOG_CRIT(( "failed to funk publish slot %lu", slot ));
1513 :
1514 0 : fd_funk_txn_end_write( ctx->funk );
1515 0 : }
1516 :
1517 : static void
1518 0 : advance_published_root( fd_replay_tile_t * ctx ) {
1519 0 : fd_eslot_ele_t * ele = fd_eslot_mgr_ele_query_merkle_root( ctx->eslot_mgr, &ctx->consensus_root );
1520 0 : if( FD_UNLIKELY( !ele ) ) FD_LOG_CRIT(( "invariant violation: eslot not found for consensus root %s", FD_BASE58_ENC_32_ALLOCA( &ctx->consensus_root ) ));
1521 0 : fd_sched_root_notify( ctx->sched, &ele->eslot );
1522 :
1523 : /* If the identity vote has been seen on a bank that should be rooted,
1524 : then we are now ready to produce blocks. */
1525 0 : if( !ctx->has_identity_vote_rooted ) {
1526 0 : fd_bank_t * root_bank = fd_banks_get_bank( ctx->banks, fd_eslot( ctx->consensus_root_slot, 0UL ) );
1527 0 : if( FD_LIKELY( !!root_bank ) ) {
1528 0 : if( FD_UNLIKELY( !ctx->has_identity_vote_rooted && fd_bank_has_identity_vote_get( root_bank ) ) ) {
1529 0 : ctx->has_identity_vote_rooted = 1;
1530 0 : }
1531 0 : }
1532 0 : }
1533 :
1534 0 : fd_eslot_t publishable_root;
1535 0 : if( FD_UNLIKELY( !fd_banks_publish_prepare( ctx->banks, fd_eslot( ctx->consensus_root_slot, 0UL ), &publishable_root ) ) ) return;
1536 :
1537 0 : fd_bank_t * bank = fd_banks_get_bank( ctx->banks, publishable_root );
1538 0 : FD_TEST( bank );
1539 :
1540 0 : fd_eslot_ele_t * publishable_root_ele = fd_eslot_mgr_ele_query_eslot( ctx->eslot_mgr, publishable_root );
1541 :
1542 0 : long exacq_start, exacq_end, exrel_end;
1543 0 : FD_STORE_EXCLUSIVE_LOCK( ctx->store, exacq_start, exacq_end, exrel_end ) {
1544 0 : fd_store_publish( ctx->store, &publishable_root_ele->merkle_root );
1545 0 : } FD_STORE_EXCLUSIVE_LOCK_END;
1546 :
1547 0 : fd_histf_sample( ctx->metrics.store_publish_wait, (ulong)fd_long_max( exacq_end-exacq_start, 0UL ) );
1548 0 : fd_histf_sample( ctx->metrics.store_publish_work, (ulong)fd_long_max( exrel_end-exacq_end, 0UL ) );
1549 :
1550 0 : ulong publishable_root_slot = fd_bank_slot_get( bank );
1551 :
1552 0 : funk_publish( ctx, publishable_root_slot );
1553 :
1554 0 : fd_sched_root_publish( ctx->sched, &ele->eslot );
1555 :
1556 0 : fd_eslot_mgr_publish( ctx->eslot_mgr, ctx->published_root_slot, publishable_root_slot );
1557 :
1558 0 : fd_banks_publish( ctx->banks, publishable_root );
1559 :
1560 0 : ctx->published_root_slot = publishable_root_slot;
1561 0 : }
1562 :
1563 : static void
1564 : process_tower_update( fd_replay_tile_t * ctx,
1565 : fd_stem_context_t * stem,
1566 0 : fd_tower_slot_done_t const * msg ) {
1567 :
1568 0 : ctx->reset_block_id = msg->reset_block_id;
1569 0 : ctx->reset_slot = msg->reset_slot;
1570 0 : ctx->reset_timestamp_nanos = fd_log_wallclock();
1571 0 : ulong min_leader_slot = fd_ulong_max( msg->reset_slot+1UL, fd_ulong_if( ctx->highwater_leader_slot==ULONG_MAX, 0UL, ctx->highwater_leader_slot ) );
1572 0 : ctx->next_leader_slot = fd_multi_epoch_leaders_get_next_slot( ctx->mleaders, min_leader_slot, ctx->identity_pubkey );
1573 :
1574 0 : fd_eslot_ele_t * ele = fd_eslot_mgr_ele_query_merkle_root( ctx->eslot_mgr, &msg->reset_block_id );
1575 :
1576 0 : fd_bank_t * bank = fd_banks_get_bank( ctx->banks, ele->eslot );
1577 0 : if( FD_UNLIKELY( !bank ) ) FD_LOG_ERR(( "error looking for bank with slot %lu", (ulong)ele->eslot.slot ));
1578 0 : FD_TEST( bank );
1579 :
1580 0 : if( FD_LIKELY( ctx->pack_out->idx!=ULONG_MAX ) ) {
1581 0 : fd_poh_reset_t * reset = fd_chunk_to_laddr( ctx->pack_out->mem, ctx->pack_out->chunk );
1582 :
1583 0 : reset->completed_slot = ctx->reset_slot;
1584 0 : reset->hashcnt_per_tick = fd_bank_hashes_per_tick_get( bank );
1585 0 : reset->ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
1586 0 : reset->tick_duration_ns = (ulong)(ctx->slot_duration_nanos/(double)reset->ticks_per_slot);
1587 :
1588 0 : fd_memcpy( reset->completed_block_id, &ele->merkle_root, sizeof(fd_hash_t) );
1589 :
1590 0 : fd_blockhashes_t const * block_hash_queue = fd_bank_block_hash_queue_query( bank );
1591 0 : fd_hash_t const * last_hash = fd_blockhashes_peek_last( block_hash_queue );
1592 0 : FD_TEST( last_hash );
1593 0 : fd_memcpy( reset->completed_blockhash, last_hash->uc, sizeof(fd_hash_t) );
1594 :
1595 0 : ulong ticks_per_slot = fd_bank_ticks_per_slot_get( bank );
1596 0 : if( FD_UNLIKELY( reset->hashcnt_per_tick==1UL ) ) {
1597 : /* Low power producer, maximum of one microblock per tick in the slot */
1598 0 : reset->max_microblocks_in_slot = ticks_per_slot;
1599 0 : } else {
1600 : /* See the long comment in after_credit for this limit */
1601 0 : reset->max_microblocks_in_slot = fd_ulong_min( MAX_MICROBLOCKS_PER_SLOT, ticks_per_slot*(reset->hashcnt_per_tick-1UL) );
1602 0 : }
1603 0 : reset->next_leader_slot = ctx->next_leader_slot;
1604 :
1605 0 : ulong sig = fd_disco_poh_sig( ctx->next_leader_slot, POH_PKT_TYPE_FEAT_ACT_SLOT /* Rubbish .. but threads the needle correctly for now */, 0UL );
1606 0 : fd_stem_publish( stem, ctx->pack_out->idx, sig, ctx->pack_out->chunk, sizeof(fd_poh_reset_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
1607 0 : ctx->pack_out->chunk = fd_dcache_compact_next( ctx->pack_out->chunk, sizeof(fd_poh_reset_t), ctx->pack_out->chunk0, ctx->pack_out->wmark );
1608 0 : }
1609 :
1610 0 : FD_LOG_INFO(( "tower_update(reset_slot=%lu, next_leader_slot=%lu, vote_slot=%lu, new_root=%d, root_slot=%lu, root_block_id=%s", msg->reset_slot, ctx->next_leader_slot, msg->vote_slot, msg->new_root, msg->root_slot, FD_BASE58_ENC_32_ALLOCA( &msg->root_block_id ) ));
1611 0 : maybe_become_leader( ctx, stem );
1612 :
1613 0 : if( FD_LIKELY( msg->new_root ) ) {
1614 :
1615 0 : FD_TEST( msg->root_slot>=ctx->consensus_root_slot );
1616 0 : ctx->consensus_root_slot = msg->root_slot;
1617 0 : ctx->consensus_root = msg->root_block_id;
1618 :
1619 0 : publish_root_advanced( ctx, stem );
1620 0 : advance_published_root( ctx );
1621 0 : }
1622 0 : }
1623 :
1624 : static void
1625 : process_fec_set( fd_replay_tile_t * ctx,
1626 : fd_stem_context_t * stem,
1627 0 : fd_reasm_fec_t * reasm_fec ) {
1628 :
1629 : /* If the incoming reasm_fec's slot is a slot that we were leader for,
1630 : then we need to do some special handling. If we were the leader
1631 : this means that we have already finished executing and packing the
1632 : block: the accounts database and bank is already updated. We are
1633 : now receiving the FEC sets for the block from the repair tile now
1634 : that the packed block has been shredded.
1635 :
1636 : The only thing left to do is to populate the block-id in the bank
1637 : and send a message to the tower tile so that we can correctly vote
1638 : on our leader block. We are also now free to remove a refcnt from
1639 : the bank. */
1640 :
1641 0 : if( fd_eslot_mgr_is_leader( ctx->eslot_mgr, reasm_fec->slot ) ) {
1642 0 : if( !!reasm_fec->slot_complete ) {
1643 : /* The block id for the slot is the merkle root for the last FEC
1644 : set. We need to update the fd_eslot_mgr_t entry and the
1645 : corresponding fd_bank_t with the new merkle root. */
1646 0 : fd_eslot_ele_t * ele = fd_eslot_mgr_ele_query_eslot( ctx->eslot_mgr, fd_eslot( reasm_fec->slot, 0UL ) );
1647 0 : if( FD_UNLIKELY( !ele ) ) {
1648 0 : FD_LOG_CRIT(( "eslot_mgr entry for leader slot %lu not found", reasm_fec->slot ));
1649 0 : }
1650 0 : fd_eslot_mgr_rekey_merkle_root( ctx->eslot_mgr, ele, &reasm_fec->key );
1651 :
1652 0 : fd_bank_t * bank = fd_banks_get_bank( ctx->banks, fd_eslot( reasm_fec->slot, 0UL ) );
1653 0 : if( FD_UNLIKELY( !bank ) ) {
1654 0 : FD_LOG_CRIT(( "bank for leader slot %lu not found", reasm_fec->slot ));
1655 0 : }
1656 0 : FD_LOG_WARNING(("FINI LEADER BANK %lu", reasm_fec->slot));
1657 0 : fini_leader_bank( ctx, bank, stem );
1658 0 : bank->refcnt--;
1659 0 : }
1660 : /* We don't want to replay the block again, so we will not add any
1661 : of the FEC sets for a leader block to the scheduler. */
1662 0 : return;
1663 0 : }
1664 :
1665 : /* Forks form a partial ordering over FEC sets. The Repair tile
1666 : delivers FEC sets in-order per fork, but FEC set ordering across
1667 : forks is arbitrary */
1668 0 : fd_sched_fec_t sched_fec[ 1 ];
1669 :
1670 : /* Read FEC set from the store. This should happen before we try to
1671 : ingest the FEC set. This allows us to filter out frags that were
1672 : in-flight when we published away minority forks that the frags land
1673 : on. These frags would have no bank to execute against, because
1674 : their corresponding banks, or parent banks, have also been pruned
1675 : during publishing. A query against store will rightfully tell us
1676 : that the underlying data is not found, implying that this is for a
1677 : minority fork that we can safely ignore. */
1678 0 : long shacq_start, shacq_end, shrel_end;
1679 0 : FD_STORE_SHARED_LOCK( ctx->store, shacq_start, shacq_end, shrel_end ) {
1680 0 : fd_store_fec_t * store_fec = fd_store_query( ctx->store, &reasm_fec->key );
1681 0 : if( FD_UNLIKELY( !store_fec ) ) {
1682 : /* The only case in which a FEC is not found in the store after
1683 : repair has notified is if the FEC was on a minority fork that
1684 : has already been published away. In this case we abandon the
1685 : entire slice because it is no longer relevant. */
1686 0 : FD_LOG_WARNING(( "store fec for slot: %lu is on minority fork already pruned by publish. abandoning slice. root: %lu. pruned merkle: %s", reasm_fec->slot, ctx->consensus_root_slot, FD_BASE58_ENC_32_ALLOCA( &reasm_fec->key ) ));
1687 0 : return;
1688 0 : }
1689 0 : FD_TEST( store_fec );
1690 0 : sched_fec->fec = store_fec;
1691 0 : sched_fec->shred_cnt = reasm_fec->data_cnt;
1692 0 : } FD_STORE_SHARED_LOCK_END;
1693 :
1694 0 : fd_histf_sample( ctx->metrics.store_read_wait, (ulong)fd_long_max( shacq_end - shacq_start, 0UL ) );
1695 0 : fd_histf_sample( ctx->metrics.store_read_work, (ulong)fd_long_max( shrel_end - shacq_end, 0UL ) );
1696 :
1697 : /* Update the eslot_mgr with the incoming FEC. This will detect any
1698 : equivocation that may have occurred and return the corresponding
1699 : eslot that the scheduler should use. */
1700 0 : int is_equiv = 0;
1701 0 : fd_eslot_ele_t * ele = fd_eslot_mgr_ele_insert_fec( ctx->eslot_mgr,
1702 0 : reasm_fec->slot,
1703 0 : &reasm_fec->key,
1704 0 : &reasm_fec->cmr,
1705 0 : reasm_fec->fec_set_idx,
1706 0 : &is_equiv );
1707 0 : if( FD_UNLIKELY( is_equiv ) ) {
1708 : /* FIXME: There are two places where equivocation is still not
1709 : supported.
1710 : 1. The Accounts DB does not support equivocation yet. This is
1711 : a relatively simple fix and just involves keying the funk xid
1712 : by (slot, prime count) very similar to how the fd_eslot_t is
1713 : keyed.
1714 : 2. Mid-block equivocation is not yet supported. This is a little
1715 : tricky because it involves inserting all FEC sets up to the
1716 : FEC where mid-block equivocation was detected again into the
1717 : scheduler with the equivocated eslot. Another blocker for
1718 : this is that reasm fecs do not include the parent block id. */
1719 0 : FD_LOG_ERR(( "equivocation detected for slot %lu. This behavior is not yet fully supported.", reasm_fec->slot ));
1720 0 : }
1721 :
1722 0 : ulong parent_slot = reasm_fec->slot-reasm_fec->parent_off;
1723 0 : if( FD_UNLIKELY( ele->parent_eslot.slot!=parent_slot ) ) {
1724 0 : FD_LOG_ERR(( "parent_slot %lu != %lu", parent_slot, (ulong)ele->parent_eslot.slot ));
1725 0 : }
1726 :
1727 0 : sched_fec->is_last_in_batch = !!reasm_fec->data_complete;
1728 0 : sched_fec->is_last_in_block = !!reasm_fec->slot_complete;
1729 0 : sched_fec->block_id = ele->eslot;
1730 0 : sched_fec->parent_block_id = ele->parent_eslot;
1731 0 : sched_fec->alut_ctx->funk_txn = NULL; /* Corresponds to the root txn. */
1732 0 : sched_fec->alut_ctx->funk = ctx->funk;
1733 0 : sched_fec->alut_ctx->els = ctx->published_root_slot;
1734 0 : sched_fec->alut_ctx->runtime_spad = ctx->runtime_spad;
1735 0 : fd_sched_fec_ingest( ctx->sched, sched_fec );
1736 0 : }
1737 :
1738 : static void
1739 0 : process_resolv_slot_completed( fd_replay_tile_t * ctx, ulong bank_idx ) {
1740 0 : fd_bank_t * bank = fd_banks_get_bank_idx( ctx->banks, bank_idx );
1741 0 : FD_TEST( bank );
1742 :
1743 0 : bank->refcnt--;
1744 0 : }
1745 :
1746 : static inline int
1747 : returnable_frag( fd_replay_tile_t * ctx,
1748 : ulong in_idx,
1749 : ulong seq,
1750 : ulong sig,
1751 : ulong chunk,
1752 : ulong sz,
1753 : ulong ctl,
1754 : ulong tsorig,
1755 : ulong tspub,
1756 0 : fd_stem_context_t * stem ) {
1757 0 : (void)seq;
1758 0 : (void)ctl;
1759 0 : (void)tsorig;
1760 0 : (void)tspub;
1761 :
1762 0 : if( FD_UNLIKELY( sz!=0UL && (chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>ctx->in[ in_idx ].mtu ) ) )
1763 0 : FD_LOG_ERR(( "chunk %lu %lu from in %d corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in_kind[ in_idx ], ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
1764 :
1765 0 : switch( ctx->in_kind[in_idx] ) {
1766 0 : case IN_KIND_GENESIS:
1767 0 : boot_genesis( ctx, stem, in_idx, chunk );
1768 0 : break;
1769 0 : case IN_KIND_SNAP:
1770 0 : on_snapshot_message( ctx, stem, in_idx, chunk, sig );
1771 0 : break;
1772 0 : case IN_KIND_WRITER: {
1773 0 : process_txn_finalized( ctx, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
1774 0 : break;
1775 0 : }
1776 0 : case IN_KIND_CAPTURE: {
1777 0 : process_solcap_account_update( ctx, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
1778 0 : break;
1779 0 : }
1780 0 : case IN_KIND_POH: {
1781 0 : unbecome_leader( ctx, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
1782 0 : break;
1783 0 : }
1784 0 : case IN_KIND_RESOLV: {
1785 0 : fd_resolv_slot_exchanged_t * exchanged_slot = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
1786 0 : process_resolv_slot_completed( ctx, exchanged_slot->bank_idx );
1787 0 : break;
1788 0 : }
1789 0 : case IN_KIND_TOWER: {
1790 0 : process_tower_update( ctx, stem, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
1791 0 : break;
1792 0 : }
1793 0 : case IN_KIND_REPAIR: {
1794 0 : FD_TEST( sz==sizeof(fd_reasm_fec_t) );
1795 0 : process_fec_set( ctx, stem, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
1796 0 : break;
1797 0 : }
1798 0 : default:
1799 0 : FD_LOG_ERR(( "unhandled kind %d", ctx->in_kind[ in_idx ] ));
1800 0 : }
1801 :
1802 0 : return 0;
1803 0 : }
1804 :
1805 : static inline fd_replay_out_link_t
1806 : out1( fd_topo_t const * topo,
1807 : fd_topo_tile_t const * tile,
1808 0 : char const * name ) {
1809 0 : ulong idx = ULONG_MAX;
1810 :
1811 0 : for( ulong i=0UL; i<tile->out_cnt; i++ ) {
1812 0 : fd_topo_link_t const * link = &topo->links[ tile->out_link_id[ i ] ];
1813 0 : if( !strcmp( link->name, name ) ) {
1814 0 : if( FD_UNLIKELY( idx!=ULONG_MAX ) ) FD_LOG_ERR(( "tile %s:%lu had multiple output links named %s but expected one", tile->name, tile->kind_id, name ));
1815 0 : idx = i;
1816 0 : }
1817 0 : }
1818 :
1819 0 : if( FD_UNLIKELY( idx==ULONG_MAX ) ) return (fd_replay_out_link_t){ .idx = ULONG_MAX, .mem = NULL, .chunk0 = 0, .wmark = 0, .chunk = 0 };
1820 :
1821 0 : void * mem = topo->workspaces[ topo->objs[ topo->links[ tile->out_link_id[ idx ] ].dcache_obj_id ].wksp_id ].wksp;
1822 0 : ulong chunk0 = fd_dcache_compact_chunk0( mem, topo->links[ tile->out_link_id[ idx ] ].dcache );
1823 0 : ulong wmark = fd_dcache_compact_wmark ( mem, topo->links[ tile->out_link_id[ idx ] ].dcache, topo->links[ tile->out_link_id[ idx ] ].mtu );
1824 :
1825 0 : return (fd_replay_out_link_t){ .idx = idx, .mem = mem, .chunk0 = chunk0, .wmark = wmark, .chunk = chunk0 };
1826 0 : }
1827 :
1828 : static void
1829 : privileged_init( fd_topo_t * topo,
1830 0 : fd_topo_tile_t * tile ) {
1831 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
1832 :
1833 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
1834 0 : fd_replay_tile_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_replay_tile_t), sizeof(fd_replay_tile_t) );
1835 :
1836 0 : if( FD_UNLIKELY( !strcmp( tile->replay.identity_key_path, "" ) ) ) FD_LOG_ERR(( "identity_key_path not set" ));
1837 :
1838 0 : ctx->identity_pubkey[ 0 ] = *(fd_pubkey_t const *)fd_type_pun_const( fd_keyload_load( tile->replay.identity_key_path, /* pubkey only: */ 1 ) );
1839 0 : }
1840 :
1841 : static void
1842 : unprivileged_init( fd_topo_t * topo,
1843 0 : fd_topo_tile_t * tile ) {
1844 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
1845 :
1846 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
1847 0 : fd_replay_tile_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_replay_tile_t), sizeof(fd_replay_tile_t) );
1848 0 : void * sched_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(), fd_sched_footprint() );
1849 0 : void * eslot_mgr_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_eslot_mgr_align(), fd_eslot_mgr_footprint( FD_BLOCK_MAX ) );
1850 0 : void * slot_ctx_mem = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_exec_slot_ctx_t), sizeof(fd_exec_slot_ctx_t) );
1851 0 : void * _capture_ctx = FD_SCRATCH_ALLOC_APPEND( l, FD_CAPTURE_CTX_ALIGN, FD_CAPTURE_CTX_FOOTPRINT );
1852 0 : void * spad_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_spad_align(), fd_spad_footprint( tile->replay.heap_size_gib<<30 ) );
1853 :
1854 0 : ulong store_obj_id = fd_pod_query_ulong( topo->props, "store", ULONG_MAX );
1855 0 : FD_TEST( store_obj_id!=ULONG_MAX );
1856 0 : ctx->store = fd_store_join( fd_topo_obj_laddr( topo, store_obj_id ) );
1857 0 : FD_TEST( ctx->store );
1858 :
1859 0 : ctx->vote_tower_out_idx = 0UL;
1860 0 : ctx->vote_tower_out_len = 0UL;
1861 :
1862 0 : ulong banks_obj_id = fd_pod_query_ulong( topo->props, "banks", ULONG_MAX );
1863 0 : FD_TEST( banks_obj_id!=ULONG_MAX );
1864 0 : ctx->banks = fd_banks_join( fd_topo_obj_laddr( topo, banks_obj_id ) );
1865 0 : FD_TEST( ctx->banks );
1866 :
1867 0 : fd_bank_t * bank = fd_banks_init_bank( ctx->banks, fd_eslot( 0UL, 0UL ) );
1868 0 : FD_TEST( bank );
1869 :
1870 0 : ctx->consensus_root_slot = ULONG_MAX;
1871 0 : ctx->consensus_root = (fd_hash_t){ .ul[0] = FD_RUNTIME_INITIAL_BLOCK_ID };
1872 :
1873 : /* Set some initial values for the bank: hardcoded features and the
1874 : cluster version. */
1875 0 : fd_cluster_version_t * cluster_version = fd_bank_cluster_version_modify( bank );
1876 0 : if( FD_UNLIKELY( sscanf( tile->replay.cluster_version, "%u.%u.%u", &cluster_version->major, &cluster_version->minor, &cluster_version->patch )!=3 ) ) {
1877 0 : FD_LOG_ERR(( "failed to decode cluster version, configured as \"%s\"", tile->replay.cluster_version ));
1878 0 : }
1879 :
1880 0 : fd_features_t * features = fd_bank_features_modify( bank );
1881 0 : fd_features_enable_cleaned_up( features, cluster_version );
1882 :
1883 0 : char const * one_off_features[ 16UL ];
1884 0 : FD_TEST( tile->replay.enable_features_cnt<=sizeof(one_off_features)/sizeof(one_off_features[0]) );
1885 0 : for( ulong i=0UL; i<tile->replay.enable_features_cnt; i++ ) one_off_features[ i ] = tile->replay.enable_features[i];
1886 0 : fd_features_enable_one_offs( features, one_off_features, (uint)tile->replay.enable_features_cnt, 0UL );
1887 :
1888 0 : FD_TEST( fd_funk_join( ctx->funk, fd_topo_obj_laddr( topo, tile->replay.funk_obj_id ) ) );
1889 :
1890 0 : ctx->tx_metadata_storage = tile->replay.tx_metadata_storage;
1891 :
1892 0 : ctx->bootstrap = tile->replay.bootstrap;
1893 0 : if( FD_UNLIKELY( ctx->bootstrap ) ) strncpy( ctx->genesis_path, tile->replay.genesis_path, sizeof(ctx->genesis_path) );
1894 :
1895 0 : ctx->capture_ctx = NULL;
1896 0 : if( FD_UNLIKELY( strcmp( "", tile->replay.solcap_capture ) || strcmp( "", tile->replay.dump_proto_dir ) ) ) {
1897 0 : ctx->capture_ctx = fd_capture_ctx_join( fd_capture_ctx_new( _capture_ctx ) );
1898 0 : }
1899 :
1900 0 : if( FD_UNLIKELY( strcmp( "", tile->replay.solcap_capture ) ) ) {
1901 0 : ctx->capture_ctx->checkpt_freq = ULONG_MAX;
1902 0 : ctx->capture_file = fopen( tile->replay.solcap_capture, "w+" );
1903 0 : if( FD_UNLIKELY( !ctx->capture_file ) ) FD_LOG_ERR(( "fopen(%s) failed (%d-%s)", tile->replay.solcap_capture, errno, fd_io_strerror( errno ) ));
1904 :
1905 0 : ctx->capture_ctx->capture_txns = 0;
1906 0 : ctx->capture_ctx->solcap_start_slot = tile->replay.capture_start_slot;
1907 0 : fd_solcap_writer_init( ctx->capture_ctx->capture, ctx->capture_file );
1908 0 : }
1909 :
1910 0 : if( FD_UNLIKELY( strcmp( "", tile->replay.dump_proto_dir ) ) ) {
1911 0 : ctx->capture_ctx->dump_proto_output_dir = tile->replay.dump_proto_dir;
1912 0 : if( FD_LIKELY( tile->replay.dump_block_to_pb ) ) ctx->capture_ctx->dump_block_to_pb = tile->replay.dump_block_to_pb;
1913 0 : }
1914 :
1915 0 : ctx->exec_cnt = fd_topo_tile_name_cnt( topo, "exec" );
1916 :
1917 0 : FD_TEST( FD_PACK_MAX_BANK_TILES<=UCHAR_MAX ); /* Exec tile id needs to fit in a uchar for the writer tile txn done message. */
1918 0 : if( FD_UNLIKELY( ctx->exec_cnt>FD_PACK_MAX_BANK_TILES ) ) FD_LOG_ERR(( "replay tile has too many exec tiles %lu", ctx->exec_cnt ));
1919 :
1920 0 : ctx->exec_ready_bitset = 0UL;
1921 0 : ctx->is_booted = 0;
1922 :
1923 0 : ctx->sched = fd_sched_join( fd_sched_new( sched_mem ) );
1924 0 : FD_TEST( ctx->sched );
1925 :
1926 0 : ctx->eslot_mgr = fd_eslot_mgr_join( fd_eslot_mgr_new( eslot_mgr_mem, FD_BLOCK_MAX, 999UL ) );
1927 0 : FD_TEST( ctx->eslot_mgr );
1928 :
1929 0 : ctx->consensus_root_slot = ULONG_MAX;
1930 0 : ctx->published_root_slot = ULONG_MAX;
1931 :
1932 0 : ctx->block_draining = 0;
1933 :
1934 0 : ctx->enable_bank_hash_cmp = !!tile->replay.enable_bank_hash_cmp;
1935 :
1936 0 : ulong bank_hash_cmp_obj_id = fd_pod_query_ulong( topo->props, "bh_cmp", ULONG_MAX );
1937 0 : FD_TEST( bank_hash_cmp_obj_id!=ULONG_MAX );
1938 0 : ctx->bank_hash_cmp = fd_bank_hash_cmp_join( fd_bank_hash_cmp_new( fd_topo_obj_laddr( topo, bank_hash_cmp_obj_id ) ) );
1939 0 : FD_TEST( ctx->bank_hash_cmp );
1940 :
1941 : /* Now attach to the runtime spad which is part of the tile memory.
1942 : FIXME: Replace runtime spad with a non-stack allocator. */
1943 0 : ctx->runtime_spad = fd_spad_join( fd_spad_new( spad_mem, fd_spad_footprint( tile->replay.heap_size_gib<<30UL ) ) );
1944 0 : FD_TEST( ctx->runtime_spad );
1945 :
1946 0 : ctx->slot_ctx = fd_exec_slot_ctx_join( fd_exec_slot_ctx_new( slot_ctx_mem ) );
1947 0 : FD_TEST( ctx->slot_ctx );
1948 0 : ctx->slot_ctx->banks = ctx->banks;
1949 :
1950 0 : ctx->slot_ctx->bank = bank;
1951 0 : FD_TEST( ctx->slot_ctx->bank );
1952 :
1953 0 : ctx->slot_ctx->funk = ctx->funk;
1954 0 : ctx->slot_ctx->status_cache = NULL; /* TODO: Integrate status cache */
1955 0 : ctx->slot_ctx->capture_ctx = ctx->capture_ctx;
1956 :
1957 0 : ctx->has_identity_vote_rooted = 0;
1958 :
1959 0 : ctx->mleaders = fd_multi_epoch_leaders_join( fd_multi_epoch_leaders_new( ctx->mleaders_mem ) );
1960 0 : FD_TEST( ctx->mleaders );
1961 :
1962 0 : ctx->is_leader = 0;
1963 0 : ctx->reset_slot = 0UL;
1964 0 : ctx->reset_block_id = (fd_hash_t){ .ul[0] = FD_RUNTIME_INITIAL_BLOCK_ID };
1965 0 : ctx->reset_timestamp_nanos = 0UL;
1966 0 : ctx->next_leader_slot = ULONG_MAX;
1967 0 : ctx->highwater_leader_slot = ULONG_MAX;
1968 0 : ctx->slot_duration_nanos = 400L*1000L*1000L; /* TODO: Not fixed ... not always 400ms ... */
1969 0 : ctx->max_active_descendant = 0UL; /* TODO: Update this properly ... */
1970 :
1971 0 : ctx->resolv_tile_cnt = fd_topo_tile_name_cnt( topo, "resolv" );
1972 :
1973 0 : FD_TEST( tile->in_cnt<=sizeof(ctx->in)/sizeof(ctx->in[0]) );
1974 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
1975 0 : fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
1976 0 : fd_topo_wksp_t * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
1977 :
1978 0 : if( FD_LIKELY( link->dcache ) ) {
1979 0 : ctx->in[ i ].mem = link_wksp->wksp;
1980 0 : ctx->in[ i ].chunk0 = fd_dcache_compact_chunk0( ctx->in[ i ].mem, link->dcache );
1981 0 : ctx->in[ i ].wmark = fd_dcache_compact_wmark ( ctx->in[ i ].mem, link->dcache, link->mtu );
1982 0 : ctx->in[ i ].mtu = link->mtu;
1983 0 : }
1984 :
1985 0 : if( !strcmp( link->name, "genesi_out" ) ) ctx->in_kind[ i ] = IN_KIND_GENESIS;
1986 0 : else if( !strcmp( link->name, "repair_repla" ) ) ctx->in_kind[ i ] = IN_KIND_REPAIR;
1987 0 : else if( !strcmp( link->name, "snap_out" ) ) ctx->in_kind[ i ] = IN_KIND_SNAP;
1988 0 : else if( !strcmp( link->name, "writ_repl" ) ) ctx->in_kind[ i ] = IN_KIND_WRITER;
1989 0 : else if( !strcmp( link->name, "tower_out" ) ) ctx->in_kind[ i ] = IN_KIND_TOWER;
1990 0 : else if( !strcmp( link->name, "capt_replay" ) ) ctx->in_kind[ i ] = IN_KIND_CAPTURE;
1991 0 : else if( !strcmp( link->name, "poh_replay" ) ) ctx->in_kind[ i ] = IN_KIND_POH;
1992 0 : else if( !strcmp( link->name, "resolv_repla" ) ) ctx->in_kind[ i ] = IN_KIND_RESOLV;
1993 0 : else FD_LOG_ERR(( "unexpected input link name %s", link->name ));
1994 0 : }
1995 :
1996 0 : *ctx->shredcap_out = out1( topo, tile, "replay_scap" );
1997 0 : *ctx->plugin_out = out1( topo, tile, "replay_plugi" );
1998 0 : *ctx->votes_plugin_out = out1( topo, tile, "votes_plugin" ); /* TODO: Delete this */
1999 0 : *ctx->stake_out = out1( topo, tile, "replay_stake" ); FD_TEST( ctx->stake_out->idx!=ULONG_MAX );
2000 0 : *ctx->replay_out = out1( topo, tile, "replay_out" );
2001 0 : *ctx->pack_out = out1( topo, tile, "replay_pack" );
2002 :
2003 0 : for( ulong i=0UL; i<ctx->exec_cnt; i++ ) {
2004 0 : ulong idx = fd_topo_find_tile_out_link( topo, tile, "replay_exec", i );
2005 0 : FD_TEST( idx!=ULONG_MAX );
2006 0 : fd_topo_link_t * link = &topo->links[ tile->out_link_id[ idx ] ];
2007 :
2008 0 : fd_replay_out_link_t * exec_out = &ctx->exec_out[ i ];
2009 0 : exec_out->idx = idx;
2010 0 : exec_out->mem = topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ].wksp;
2011 0 : exec_out->chunk0 = fd_dcache_compact_chunk0( exec_out->mem, link->dcache );
2012 0 : exec_out->wmark = fd_dcache_compact_wmark( exec_out->mem, link->dcache, link->mtu );
2013 0 : exec_out->chunk = exec_out->chunk0;
2014 0 : }
2015 :
2016 0 : fd_memset( &ctx->metrics, 0, sizeof(ctx->metrics) );
2017 :
2018 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_read_wait, FD_MHIST_SECONDS_MIN( REPLAY, STORE_READ_WAIT ),
2019 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_READ_WAIT ) ) );
2020 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_read_work, FD_MHIST_SECONDS_MIN( REPLAY, STORE_READ_WORK ),
2021 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_READ_WORK ) ) );
2022 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_publish_wait, FD_MHIST_SECONDS_MIN( REPLAY, STORE_PUBLISH_WAIT ),
2023 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_PUBLISH_WAIT ) ) );
2024 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_publish_work, FD_MHIST_SECONDS_MIN( REPLAY, STORE_PUBLISH_WORK ),
2025 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_PUBLISH_WORK ) ) );
2026 :
2027 0 : ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, 1UL );
2028 0 : if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
2029 0 : FD_LOG_ERR(( "scratch overflow %lu %lu %lu", scratch_top - (ulong)scratch - scratch_footprint( tile ), scratch_top, (ulong)scratch + scratch_footprint( tile ) ));
2030 0 : }
2031 :
2032 : static ulong
2033 : populate_allowed_seccomp( fd_topo_t const * topo FD_FN_UNUSED,
2034 : fd_topo_tile_t const * tile FD_FN_UNUSED,
2035 : ulong out_cnt,
2036 0 : struct sock_filter * out ) {
2037 :
2038 0 : populate_sock_filter_policy_fd_replay_tile( out_cnt, out, (uint)fd_log_private_logfile_fd() );
2039 0 : return sock_filter_policy_fd_replay_tile_instr_cnt;
2040 0 : }
2041 :
2042 : static ulong
2043 : populate_allowed_fds( fd_topo_t const * topo FD_FN_UNUSED,
2044 : fd_topo_tile_t const * tile FD_FN_UNUSED,
2045 : ulong out_fds_cnt,
2046 0 : int * out_fds ) {
2047 :
2048 0 : if( FD_UNLIKELY( out_fds_cnt<2UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
2049 :
2050 0 : ulong out_cnt = 0UL;
2051 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
2052 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
2053 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
2054 0 : return out_cnt;
2055 0 : }
2056 :
2057 : /* TODO: This needs to get sized out correctly. */
2058 0 : #define STEM_BURST (128UL)
2059 :
2060 : /* TODO: calculate this properly/fix stem to work with larger numbers of links */
2061 : /* 1000 chosen empirically as anything larger slowed down replay times. Need to calculate
2062 : this properly. */
2063 0 : #define STEM_LAZY ((long)10e3)
2064 :
2065 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_replay_tile_t
2066 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_replay_tile_t)
2067 :
2068 0 : #define STEM_CALLBACK_METRICS_WRITE metrics_write
2069 0 : #define STEM_CALLBACK_AFTER_CREDIT after_credit
2070 0 : #define STEM_CALLBACK_BEFORE_FRAG before_frag
2071 0 : #define STEM_CALLBACK_RETURNABLE_FRAG returnable_frag
2072 :
2073 : #include "../../disco/stem/fd_stem.c"
2074 :
2075 : fd_topo_run_tile_t fd_tile_replay = {
2076 : .name = "replay",
2077 : .populate_allowed_seccomp = populate_allowed_seccomp,
2078 : .populate_allowed_fds = populate_allowed_fds,
2079 : .scratch_align = scratch_align,
2080 : .scratch_footprint = scratch_footprint,
2081 : .privileged_init = privileged_init,
2082 : .unprivileged_init = unprivileged_init,
2083 : .run = stem_run,
2084 : };
|