Line data Source code
1 : #include "fd_replay_tile.h"
2 : #include "fd_replay_tile_private.h"
3 : #include "fd_sched.h"
4 : #include "fd_execrp.h"
5 : #include "generated/fd_replay_tile_seccomp.h"
6 :
7 : #include "../genesis/fd_genesi_tile.h"
8 : #include "../poh/fd_poh.h"
9 : #include "../poh/fd_poh_tile.h"
10 : #include "../tower/fd_tower_tile.h"
11 : #include "../resolv/fd_resolv_tile.h"
12 : #include "../restore/utils/fd_ssload.h"
13 :
14 : #include "../../disco/tiles.h"
15 : #include "../../disco/fd_txn_m.h"
16 : #include "../../disco/shred/fd_fec_set.h"
17 : #include "../../disco/shred/fd_shred_tile.h"
18 : #include "../../disco/pack/fd_pack.h"
19 : #include "../../discof/reasm/fd_reasm.h"
20 : #include "../../disco/keyguard/fd_keyload.h"
21 : #include "../../disco/genesis/fd_genesis_cluster.h"
22 : #include "../../discof/genesis/genesis_hash.h"
23 : #include "../../util/pod/fd_pod.h"
24 : #include "../../flamenco/rewards/fd_rewards.h"
25 : #include "../../flamenco/leaders/fd_multi_epoch_leaders.h"
26 : #include "../../flamenco/progcache/fd_progcache_admin.h"
27 : #include "../../flamenco/rewards/fd_rewards.h"
28 : #include "../../disco/metrics/fd_metrics.h"
29 : #include "../repair/fd_repair_tile.h"
30 : #include "../repair/fd_repair_tile.h"
31 : #include "../../flamenco/fd_flamenco_base.h"
32 : #include "../../flamenco/runtime/fd_runtime.h"
33 : #include "../../flamenco/runtime/fd_runtime_stack.h"
34 : #include "../../flamenco/runtime/sysvar/fd_sysvar_cache.h"
35 : #include "../../flamenco/runtime/sysvar/fd_sysvar_epoch_schedule.h"
36 : #include "../../flamenco/runtime/sysvar/fd_sysvar_rent.h"
37 : #include "../../flamenco/runtime/program/fd_precompiles.h"
38 : #include "../../flamenco/runtime/program/vote/fd_vote_state_versioned.h"
39 : #include "../../flamenco/runtime/program/vote/fd_vote_codec.h"
40 : #include "../../flamenco/runtime/tests/fd_dump_pb.h"
41 :
42 : /* Replay concepts:
43 :
44 : - Blocks are aggregations of entries aka. microblocks which are
45 : groupings of txns and are constructed by the block producer (see
46 : fd_pack).
47 :
48 : - Entries are grouped into entry batches by the block producer (see
49 : fd_pack / fd_shredder).
50 :
51 : - Entry batches are divided into chunks known as shreds by the block
52 : producer (see fd_shredder).
53 :
54 : - Shreds are grouped into forward-error-correction sets (FEC sets) by
55 : the block producer (see fd_shredder).
56 :
57 : - Shreds are transmitted to the rest of the cluster via the Turbine
58 : protocol (see fd_shredder / fd_shred).
59 :
60 : - Once enough shreds within a FEC set are received to recover the
61 : entirety of the shred data encoded by that FEC set, the receiver
62 : can "complete" the FEC set (see fd_fec_resolver).
63 :
64 : - If shreds in the FEC set are missing such that it can't complete,
65 : the receiver can use the Repair protocol to request missing shreds
66 : in FEC set (see fd_repair).
67 :
68 : - The current Repair protocol does not support requesting coding
69 : shreds. As a result, some FEC sets might be actually complete
70 : (contain all data shreds). Repair currently hacks around this by
71 : forcing completion but the long-term solution is to add support for
72 : fec_repairing coding shreds via Repair.
73 :
74 : - FEC sets are delivered in partial-order to the Replay tile by the
75 : Repair tile. Currently Replay only supports replaying entry batches
76 : so FEC sets need to reassembled into an entry batch before they can
77 : be replayed. The new Dispatcher will change this by taking a FEC
78 : set as input instead. */
79 :
80 0 : #define IN_KIND_SNAP ( 0)
81 0 : #define IN_KIND_GENESIS ( 1)
82 0 : #define IN_KIND_IPECHO ( 2)
83 0 : #define IN_KIND_TOWER ( 3)
84 0 : #define IN_KIND_RESOLV ( 4)
85 0 : #define IN_KIND_POH ( 5)
86 0 : #define IN_KIND_EXECRP ( 6)
87 0 : #define IN_KIND_REPAIR ( 7)
88 0 : #define IN_KIND_TXSEND ( 8)
89 0 : #define IN_KIND_RPC ( 9)
90 0 : #define IN_KIND_GOSSIP_OUT (10)
91 :
92 : #define DEBUG_LOGGING 0
93 :
94 : /* The first bank that the replay tile produces either for genesis
95 : or the snapshot boot will always be at bank index 0. */
96 0 : #define FD_REPLAY_BOOT_BANK_SEQ (0UL)
97 :
98 : static inline ulong
99 0 : fd_block_id_ele_get_idx( fd_block_id_ele_t * ele_arr, fd_block_id_ele_t * ele ) {
100 0 : return (ulong)(ele - ele_arr);
101 0 : }
102 :
103 : FD_FN_CONST static inline ulong
104 0 : scratch_align( void ) {
105 0 : return 128UL;
106 0 : }
107 : FD_FN_PURE static inline ulong
108 0 : scratch_footprint( fd_topo_tile_t const * tile ) {
109 0 : ulong chain_cnt = fd_block_id_map_chain_cnt_est( tile->replay.max_live_slots );
110 :
111 0 : ulong l = FD_LAYOUT_INIT;
112 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_replay_tile_t), sizeof(fd_replay_tile_t) );
113 0 : l = FD_LAYOUT_APPEND( l, fd_runtime_stack_align(), fd_runtime_stack_footprint( FD_RUNTIME_MAX_VOTE_ACCOUNTS, FD_RUNTIME_EXPECTED_VOTE_ACCOUNTS, FD_RUNTIME_EXPECTED_STAKE_ACCOUNTS ) );
114 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_block_id_ele_t), sizeof(fd_block_id_ele_t) * tile->replay.max_live_slots );
115 0 : l = FD_LAYOUT_APPEND( l, fd_block_id_map_align(), fd_block_id_map_footprint( chain_cnt ) );
116 0 : l = FD_LAYOUT_APPEND( l, fd_txncache_align(), fd_txncache_footprint( tile->replay.max_live_slots ) );
117 0 : l = FD_LAYOUT_APPEND( l, fd_accdb_align(), fd_accdb_footprint( tile->replay.max_live_slots ) );
118 0 : l = FD_LAYOUT_APPEND( l, fd_reasm_align(), fd_reasm_footprint( tile->replay.fec_max ) );
119 0 : l = FD_LAYOUT_APPEND( l, fd_sched_align(), fd_sched_footprint( tile->replay.sched_depth, tile->replay.max_live_slots ) );
120 0 : l = FD_LAYOUT_APPEND( l, fd_vote_tracker_align(), fd_vote_tracker_footprint() );
121 0 : l = FD_LAYOUT_APPEND( l, fd_capture_ctx_align(), fd_capture_ctx_footprint() );
122 0 : l = FD_LAYOUT_APPEND( l, alignof(fd_dump_proto_ctx_t), sizeof(fd_dump_proto_ctx_t) );
123 :
124 0 : if( FD_UNLIKELY( tile->replay.dump_block_to_pb ) ) {
125 0 : l = FD_LAYOUT_APPEND( l, fd_block_dump_context_align(), fd_block_dump_context_footprint() );
126 0 : }
127 :
128 0 : l = FD_LAYOUT_FINI( l, scratch_align() );
129 :
130 0 : return l;
131 0 : }
132 :
133 : static inline void
134 0 : metrics_write( fd_replay_tile_t * ctx ) {
135 0 : FD_MCNT_SET ( REPLAY, STORE_QUERY_ACQUIRED, ctx->metrics.store_query_acquire );
136 0 : FD_MCNT_SET ( REPLAY, STORE_QUERY_RELEASED, ctx->metrics.store_query_release );
137 0 : FD_MHIST_COPY( REPLAY, STORE_QUERY_WAIT_SECONDS, ctx->metrics.store_query_wait );
138 0 : FD_MHIST_COPY( REPLAY, STORE_QUERY_WORK_SECONDS, ctx->metrics.store_query_work );
139 0 : FD_MCNT_SET ( REPLAY, STORE_QUERIED, ctx->metrics.store_query_cnt );
140 0 : FD_MCNT_SET ( REPLAY, STORE_QUERY_MISSING, ctx->metrics.store_query_missing_cnt );
141 0 : FD_MGAUGE_SET( REPLAY, STORE_QUERY_MERKLE_ROOT_SAMPLE, ctx->metrics.store_query_mr );
142 0 : FD_MGAUGE_SET( REPLAY, STORE_QUERY_MISSING_MERKLE_ROOT_SAMPLE, ctx->metrics.store_query_missing_mr );
143 :
144 0 : FD_MGAUGE_SET( REPLAY, ROOT_SLOT, ctx->consensus_root_slot==ULONG_MAX ? 0UL : ctx->consensus_root_slot );
145 0 : ulong leader_slot = ctx->leader_bank ? ctx->leader_bank->f.slot : 0UL;
146 :
147 0 : if( FD_LIKELY( ctx->leader_bank ) ) {
148 0 : FD_MGAUGE_SET( REPLAY, NEXT_LEADER_SLOT, leader_slot );
149 0 : FD_MGAUGE_SET( REPLAY, LEADER_SLOT, leader_slot );
150 0 : } else {
151 0 : FD_MGAUGE_SET( REPLAY, NEXT_LEADER_SLOT, ctx->next_leader_slot==ULONG_MAX ? 0UL : ctx->next_leader_slot );
152 0 : FD_MGAUGE_SET( REPLAY, LEADER_SLOT, 0UL );
153 0 : }
154 0 : FD_MGAUGE_SET( REPLAY, RESET_SLOT, ctx->reset_slot==ULONG_MAX ? 0UL : ctx->reset_slot );
155 :
156 0 : FD_MGAUGE_SET( REPLAY, BANK_LIVE, fd_banks_pool_used_cnt( ctx->banks ) );
157 :
158 0 : ulong reasm_free = fd_reasm_free( ctx->reasm );
159 0 : FD_MGAUGE_SET( REPLAY, REASSEMBLY_FREE, reasm_free );
160 :
161 0 : FD_MCNT_SET( REPLAY, SLOT_REPLAYED, ctx->metrics.slots_total );
162 0 : FD_MCNT_SET( REPLAY, TXN_PROCESSED, ctx->metrics.transactions_total );
163 :
164 0 : FD_MGAUGE_SET( REPLAY, REASSEMBLY_LATEST_SLOT, ctx->metrics.reasm_latest_slot );
165 0 : FD_MGAUGE_SET( REPLAY, REASSEMBLY_LATEST_FEC_INDEX, ctx->metrics.reasm_latest_fec_idx );
166 :
167 0 : fd_sched_metrics_write( ctx->sched );
168 :
169 0 : FD_MCNT_SET( REPLAY, FEC_SCHED_FULL, ctx->metrics.sched_full );
170 0 : FD_MCNT_SET( REPLAY, FEC_REASSEMBLY_EMPTY, ctx->metrics.reasm_empty );
171 0 : FD_MCNT_SET( REPLAY, FEC_LEADER_BID_WAIT, ctx->metrics.leader_bid_wait );
172 0 : FD_MCNT_SET( REPLAY, FEC_BANK_FULL, ctx->metrics.banks_full );
173 0 : FD_MCNT_SET( REPLAY, STORAGE_ROOT_BEHIND, ctx->metrics.storage_root_behind );
174 :
175 0 : fd_progcache_admin_metrics_t const * pcm = &fd_progcache_admin_metrics_g;
176 0 : FD_MCNT_SET( REPLAY, PROGCACHE_ROOTED, pcm->root_cnt );
177 :
178 0 : fd_wksp_mon_t * wm = fd_wksp_mon_tick( ctx->progcache_wksp_mon, fd_tickcount() );
179 0 : FD_MGAUGE_SET( REPLAY, PROGCACHE_FREE_PARTITION, wm->free_cnt );
180 0 : FD_MGAUGE_SET( REPLAY, PROGCACHE_FREE_BYTES, wm->free_sz );
181 0 : FD_MGAUGE_SET( REPLAY, PROGCACHE_SIZE_BYTES, wm->wksp->data_max );
182 0 : FD_MGAUGE_SET( REPLAY, PROGCACHE_FREE_PARTITION_MAX_BYTES, wm->free_max_sz );
183 0 : FD_MGAUGE_SET( REPLAY, PROGCACHE_USED_PARTITION_MEDIAN_BYTES, wm->part_median_sz );
184 0 : FD_MGAUGE_SET( REPLAY, PROGCACHE_USED_PARTITION_MEAN_BYTES, wm->part_mean_sz );
185 :
186 0 : FD_ACCDB_METRICS_WRITE( REPLAY, fd_accdb_metrics( ctx->accdb ) );
187 0 : }
188 :
189 : static void
190 : publish_epoch_info( fd_replay_tile_t * ctx,
191 : fd_stem_context_t * stem,
192 : fd_bank_t * bank,
193 0 : int next_epoch ) {
194 0 : fd_epoch_schedule_t const * schedule = &bank->f.epoch_schedule;
195 0 : ulong epoch = fd_slot_to_epoch( schedule, bank->f.slot, NULL ) + fd_ulong_if( next_epoch, 1UL, 0UL );
196 :
197 0 : fd_features_t const * features = &bank->f.features;
198 :
199 0 : fd_runtime_stack_t * runtime_stack = ctx->runtime_stack;
200 :
201 0 : fd_epoch_info_msg_t * epoch_info_msg = fd_chunk_to_laddr( ctx->epoch_out->mem, ctx->epoch_out->chunk );
202 :
203 0 : epoch_info_msg->staked_vote_cnt = next_epoch ? runtime_stack->epoch_weights.next_stake_weights_cnt : runtime_stack->epoch_weights.stake_weights_cnt;
204 0 : epoch_info_msg->staked_id_cnt = next_epoch ? runtime_stack->epoch_weights.next_id_weights_cnt : runtime_stack->epoch_weights.id_weights_cnt;
205 0 : epoch_info_msg->epoch_schedule = *schedule;
206 0 : epoch_info_msg->features = *features;
207 0 : epoch_info_msg->epoch = epoch;
208 0 : epoch_info_msg->start_slot = fd_epoch_slot0( schedule, epoch );
209 0 : epoch_info_msg->slot_cnt = fd_epoch_slot_cnt( schedule, epoch );
210 0 : epoch_info_msg->excluded_id_stake = next_epoch ? runtime_stack->epoch_weights.next_id_weights_excluded : runtime_stack->epoch_weights.id_weights_excluded;
211 :
212 0 : fd_vote_stake_weight_t * stake_weights = fd_type_pun( epoch_info_msg + 1 );
213 0 : fd_vote_stake_weight_t * src_stake_weights = next_epoch ? runtime_stack->epoch_weights.next_stake_weights : runtime_stack->epoch_weights.stake_weights;
214 0 : memcpy( stake_weights, src_stake_weights, epoch_info_msg->staked_vote_cnt * sizeof(fd_vote_stake_weight_t) );
215 :
216 0 : fd_stake_weight_t * id_weights = fd_epoch_info_msg_id_weights( epoch_info_msg );
217 0 : fd_stake_weight_t * src_id_weights = next_epoch ? runtime_stack->epoch_weights.next_id_weights : runtime_stack->epoch_weights.id_weights;
218 0 : fd_memcpy( id_weights, src_id_weights, epoch_info_msg->staked_id_cnt * sizeof(fd_stake_weight_t) );
219 :
220 0 : ulong epoch_info_sz = fd_epoch_info_msg_sz( epoch_info_msg->staked_vote_cnt , epoch_info_msg->staked_id_cnt );
221 :
222 0 : ulong epoch_info_sig = 4UL;
223 0 : fd_stem_publish( stem, ctx->epoch_out->idx, epoch_info_sig, ctx->epoch_out->chunk, epoch_info_sz, 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
224 0 : ctx->epoch_out->chunk = fd_dcache_compact_next( ctx->epoch_out->chunk, epoch_info_sz, ctx->epoch_out->chunk0, ctx->epoch_out->wmark );
225 :
226 0 : fd_multi_epoch_leaders_epoch_msg_init( ctx->mleaders, epoch_info_msg );
227 0 : fd_multi_epoch_leaders_epoch_msg_fini( ctx->mleaders );
228 0 : }
229 :
230 : /**********************************************************************/
231 : /* Transaction execution state machine helpers */
232 : /**********************************************************************/
233 :
234 : static void
235 : replay_block_start( fd_replay_tile_t * ctx,
236 : ulong bank_idx,
237 : ulong parent_bank_idx,
238 0 : ulong slot ) {
239 0 : long before = fd_log_wallclock();
240 :
241 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, bank_idx );
242 0 : FD_CHECK_CRIT( bank, "invariant violation: bank is NULL" );
243 0 : FD_CHECK_CRIT( bank->state==FD_BANK_STATE_INIT, "invariant violation: bank is not in correct state" );
244 :
245 0 : bank->preparation_begin_nanos = before;
246 :
247 0 : fd_bank_t * parent_bank = fd_banks_bank_query( ctx->banks, parent_bank_idx );
248 0 : FD_CHECK_CRIT( parent_bank, "invariant violation: parent bank is NULL" );
249 0 : FD_CHECK_CRIT( parent_bank->state==FD_BANK_STATE_FROZEN, "invariant violation: parent bank is not in correct state" );
250 :
251 : /* Clone the bank from the parent. We must special case the first
252 : slot that is executed as the snapshot does not provide a parent
253 : block id. */
254 :
255 0 : bank = fd_banks_clone_from_parent( ctx->banks, bank_idx );
256 0 : if( FD_UNLIKELY( !bank ) ) {
257 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for bank index %lu", bank_idx ));
258 0 : }
259 0 : bank->f.slot = slot;
260 0 : bank->txncache_fork_id = fd_txncache_attach_child ( ctx->txncache, parent_bank->txncache_fork_id );
261 0 : bank->progcache_fork_id = fd_progcache_attach_child( ctx->progcache, parent_bank->progcache_fork_id );
262 0 : bank->accdb_fork_id = fd_accdb_attach_child ( ctx->accdb, parent_bank->accdb_fork_id );
263 :
264 0 : ulong new_epoch = fd_slot_to_epoch( &parent_bank->f.epoch_schedule, slot, NULL );
265 0 : ulong root_epoch = fd_slot_to_epoch( &parent_bank->f.epoch_schedule, ctx->published_root_slot, NULL );
266 0 : if( FD_UNLIKELY( new_epoch>root_epoch+1UL ) ) {
267 0 : FD_LOG_CRIT(( "firedancer replay does not support replaying more than one epoch ahead of the current root" ));
268 0 : }
269 :
270 : /* Update required runtime state and handle potential boundary. */
271 :
272 0 : int is_epoch_boundary = 0;
273 0 : fd_runtime_block_execute_prepare( ctx->banks, bank, ctx->accdb, ctx->runtime_stack, ctx->capture_ctx, &is_epoch_boundary );
274 :
275 0 : ulong max_tick_height;
276 0 : if( FD_UNLIKELY( FD_RUNTIME_EXECUTE_SUCCESS!=fd_runtime_compute_max_tick_height( parent_bank->f.ticks_per_slot, slot, &max_tick_height ) ) ) {
277 0 : FD_LOG_CRIT(( "couldn't compute tick height/max tick height slot %lu ticks_per_slot %lu", slot, parent_bank->f.ticks_per_slot ));
278 0 : }
279 0 : bank->f.max_tick_height = max_tick_height;
280 0 : fd_sched_set_poh_params( ctx->sched, bank->idx, bank->f.tick_height, bank->f.max_tick_height, bank->f.hashes_per_tick, &parent_bank->f.poh );
281 :
282 0 : FD_LOG_DEBUG(( "replay_block_start: bank_idx=%lu slot=%lu parent_bank_idx=%lu", bank_idx, slot, parent_bank_idx ));
283 0 : }
284 :
285 : static void
286 0 : cost_tracker_snap( fd_bank_t * bank, fd_replay_slot_completed_t * slot_info ) {
287 0 : if( FD_LIKELY( bank->cost_tracker_pool_idx!=ULONG_MAX ) ) {
288 0 : fd_cost_tracker_t const * cost_tracker = fd_bank_cost_tracker_query( bank );
289 0 : if( FD_UNLIKELY( cost_tracker->block_cost_limit==0UL ) ) {
290 0 : memset( &slot_info->cost_tracker, -1 /* ULONG_MAX */, sizeof(slot_info->cost_tracker) );
291 0 : } else {
292 0 : slot_info->cost_tracker.block_cost = cost_tracker->block_cost;
293 0 : slot_info->cost_tracker.vote_cost = cost_tracker->vote_cost;
294 0 : slot_info->cost_tracker.allocated_accounts_data_size = cost_tracker->allocated_accounts_data_size;
295 0 : slot_info->cost_tracker.block_cost_limit = cost_tracker->block_cost_limit;
296 0 : slot_info->cost_tracker.vote_cost_limit = cost_tracker->vote_cost_limit;
297 0 : slot_info->cost_tracker.account_cost_limit = cost_tracker->account_cost_limit;
298 0 : }
299 0 : } else {
300 0 : memset( &slot_info->cost_tracker, -1 /* ULONG_MAX */, sizeof(slot_info->cost_tracker) );
301 0 : }
302 0 : }
303 :
304 : static void
305 : publish_slot_completed( fd_replay_tile_t * ctx,
306 : fd_stem_context_t * stem,
307 : fd_bank_t * bank,
308 : int is_initial,
309 : int is_leader,
310 : ulong execution_fees_pre_settle,
311 0 : ulong priority_fees_pre_settle ) {
312 :
313 0 : ulong slot = bank->f.slot;
314 :
315 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ bank->idx ];
316 :
317 : /* HACKY: hacky way of checking if we should send a null parent block
318 : id */
319 0 : fd_hash_t parent_block_id = {0};
320 0 : if( FD_LIKELY( !is_initial ) ) {
321 0 : parent_block_id = ctx->block_id_arr[ bank->parent_idx ].latest_mr;
322 0 : }
323 :
324 0 : fd_hash_t const * bank_hash = &bank->f.bank_hash;
325 0 : fd_hash_t const * block_hash = fd_blockhashes_peek_last_hash( &bank->f.block_hash_queue );
326 0 : FD_TEST( block_hash );
327 :
328 0 : if( FD_LIKELY( !is_initial ) ) fd_txncache_finalize_fork( ctx->txncache, bank->txncache_fork_id, 0UL, block_hash->uc );
329 :
330 0 : fd_epoch_schedule_t const * epoch_schedule = &bank->f.epoch_schedule;
331 0 : ulong slot_idx;
332 0 : ulong epoch = fd_slot_to_epoch( epoch_schedule, slot, &slot_idx );
333 :
334 0 : ctx->metrics.slots_total++;
335 0 : ctx->metrics.transactions_total = bank->f.txn_count;
336 :
337 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
338 0 : slot_info->slot = slot;
339 0 : slot_info->root_slot = ctx->consensus_root_slot;
340 0 : slot_info->storage_slot = ctx->published_root_slot;
341 0 : slot_info->epoch = epoch;
342 0 : slot_info->slot_in_epoch = slot_idx;
343 0 : slot_info->slots_per_epoch = fd_epoch_slot_cnt( epoch_schedule, epoch );
344 0 : slot_info->block_height = bank->f.block_height;
345 0 : slot_info->parent_slot = bank->f.parent_slot;
346 0 : slot_info->block_id = block_id_ele->latest_mr;
347 0 : slot_info->parent_block_id = parent_block_id;
348 0 : slot_info->bank_hash = *bank_hash;
349 0 : slot_info->block_hash = *block_hash;
350 0 : slot_info->transaction_count = bank->f.txn_count;
351 :
352 0 : fd_inflation_t inflation = bank->f.inflation;
353 0 : slot_info->inflation.foundation = inflation.foundation;
354 0 : slot_info->inflation.foundation_term = inflation.foundation_term;
355 0 : slot_info->inflation.terminal = inflation.terminal;
356 0 : slot_info->inflation.initial = inflation.initial;
357 0 : slot_info->inflation.taper = inflation.taper;
358 :
359 0 : fd_rent_t rent = bank->f.rent;
360 0 : slot_info->rent.burn_percent = rent.burn_percent;
361 0 : slot_info->rent.lamports_per_uint8_year = rent.lamports_per_uint8_year;
362 0 : slot_info->rent.exemption_threshold = rent.exemption_threshold;
363 :
364 0 : slot_info->first_fec_set_received_nanos = bank->first_fec_set_received_nanos;
365 0 : slot_info->preparation_begin_nanos = bank->preparation_begin_nanos;
366 0 : slot_info->first_transaction_scheduled_nanos = bank->first_transaction_scheduled_nanos;
367 0 : slot_info->last_transaction_finished_nanos = bank->last_transaction_finished_nanos;
368 0 : slot_info->completion_time_nanos = fd_log_wallclock();
369 0 : if( !slot_info->first_transaction_scheduled_nanos ) { /* edge case: empty slot */
370 0 : slot_info->first_transaction_scheduled_nanos = slot_info->last_transaction_finished_nanos;
371 0 : }
372 :
373 : /* refcnt should be incremented by 1 for each consumer that uses
374 : `bank_idx`. Each consumer should decrement the bank's refcnt once
375 : they are done using the bank. */
376 0 : bank->refcnt++; /* tower_tile */
377 0 : if( FD_LIKELY( ctx->rpc_enabled ) ) bank->refcnt++; /* rpc tile */
378 0 : slot_info->bank_idx = bank->idx;
379 0 : slot_info->bank_seq = bank->bank_seq;
380 0 : slot_info->accdb_fork_id = bank->accdb_fork_id;
381 0 : FD_LOG_DEBUG(( "bank (idx=%lu, slot=%lu) refcnt incremented to %lu for tower, rpc", bank->idx, slot, bank->refcnt ));
382 :
383 0 : fd_bank_t * parent_bank = fd_banks_get_parent( ctx->banks, bank );
384 0 : if( FD_LIKELY( parent_bank ) ) {
385 0 : ulong total_txn_cnt = bank->f.txn_count;
386 0 : ulong nonvote_txn_cnt = bank->f.nonvote_txn_count;
387 0 : ulong failed_txn_cnt = bank->f.failed_txn_count;
388 0 : ulong nonvote_failed_txn_cnt = bank->f.nonvote_failed_txn_count;
389 :
390 0 : slot_info->nonvote_success = nonvote_txn_cnt - nonvote_failed_txn_cnt;
391 0 : slot_info->nonvote_failed = nonvote_failed_txn_cnt;
392 0 : slot_info->vote_failed = failed_txn_cnt - nonvote_failed_txn_cnt;
393 0 : slot_info->vote_success = total_txn_cnt - nonvote_txn_cnt - slot_info->vote_failed;
394 0 : } else {
395 0 : slot_info->vote_failed = ULONG_MAX;
396 0 : slot_info->vote_success = ULONG_MAX;
397 0 : slot_info->nonvote_success = ULONG_MAX;
398 0 : slot_info->nonvote_failed = ULONG_MAX;
399 0 : }
400 :
401 0 : slot_info->is_leader = is_leader;
402 0 : slot_info->transaction_fee = execution_fees_pre_settle;
403 0 : slot_info->transaction_fee -= (slot_info->transaction_fee>>1); /* burn */
404 0 : slot_info->priority_fee = priority_fees_pre_settle;
405 0 : slot_info->tips = bank->f.tips;
406 0 : slot_info->shred_cnt = bank->f.shred_cnt;
407 :
408 0 : FD_BASE58_ENCODE_32_BYTES( ctx->block_id_arr[ bank->idx ].latest_mr.uc, block_id_b58 );
409 0 : FD_BASE58_ENCODE_32_BYTES( bank->f.bank_hash.uc, bank_hash_b58 );
410 0 : FD_BASE58_ENCODE_32_BYTES( bank->f.poh.uc, poh_hash_b58 );
411 0 : FD_LOG_DEBUG(( "finished replaying slot %lu with (block id %s, bank hash %s, PoH hash %s, transactions %lu, votes %lu, shreds %lu, CUs used %lu, fees %lu) "
412 0 : "and timings [since parent fini %ld ns, started prepare %ld ns, started dispatching transactions %ld ns, finished executing transactions %ld ns, finished block %ld ns]",
413 0 : bank->f.slot, block_id_b58,
414 0 : bank_hash_b58,
415 0 : poh_hash_b58,
416 0 : bank->f.txn_count,
417 0 : bank->f.txn_count - bank->f.nonvote_txn_count,
418 0 : bank->f.shred_cnt,
419 0 : bank->f.total_compute_units_used,
420 0 : execution_fees_pre_settle + priority_fees_pre_settle,
421 0 : !!parent_bank ? parent_bank->block_completed_nanos - bank->first_fec_set_received_nanos : LONG_MAX,
422 0 : bank->preparation_begin_nanos - bank->first_fec_set_received_nanos,
423 0 : bank->first_transaction_scheduled_nanos - bank->preparation_begin_nanos,
424 0 : bank->last_transaction_finished_nanos - bank->first_transaction_scheduled_nanos,
425 0 : bank->block_completed_nanos - bank->last_transaction_finished_nanos ));
426 :
427 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_SLOT_COMPLETED, ctx->replay_out->chunk, sizeof(fd_replay_slot_completed_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
428 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_slot_completed_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
429 0 : }
430 :
431 : static void
432 : publish_slot_dead( fd_replay_tile_t * ctx,
433 : fd_stem_context_t * stem,
434 : ulong slot,
435 0 : fd_hash_t const * block_id ) {
436 0 : fd_replay_slot_dead_t * slot_dead = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
437 0 : slot_dead->slot = slot;
438 0 : slot_dead->block_id = *block_id;
439 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_SLOT_DEAD, ctx->replay_out->chunk, sizeof(fd_replay_slot_dead_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
440 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_slot_dead_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
441 0 : }
442 :
443 : static void
444 : publish_txn_executed( fd_replay_tile_t * ctx,
445 : fd_stem_context_t * stem,
446 0 : ulong txn_idx ) {
447 0 : fd_sched_txn_info_t * txn_info = fd_sched_get_txn_info( ctx->sched, txn_idx );
448 0 : fd_replay_txn_executed_t * txn_executed = fd_type_pun( fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk ) );
449 0 : *txn_executed->txn = *fd_sched_get_txn( ctx->sched, txn_idx );
450 0 : txn_executed->txn_err = txn_info->txn_err;
451 0 : txn_executed->is_committable = !!(txn_info->flags&FD_SCHED_TXN_IS_COMMITTABLE);
452 0 : txn_executed->is_fees_only = !!(txn_info->flags&FD_SCHED_TXN_IS_FEES_ONLY);
453 0 : txn_executed->tick_parsed = txn_info->tick_parsed;
454 0 : txn_executed->tick_sigverify_disp = txn_info->tick_sigverify_disp;
455 0 : txn_executed->tick_sigverify_done = txn_info->tick_sigverify_done;
456 0 : txn_executed->tick_exec_disp = txn_info->tick_exec_disp;
457 0 : txn_executed->tick_exec_done = txn_info->tick_exec_done;
458 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_TXN_EXECUTED, ctx->replay_out->chunk, sizeof(*txn_executed), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
459 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(*txn_executed), ctx->replay_out->chunk0, ctx->replay_out->wmark );
460 0 : }
461 :
462 : static void
463 : replay_block_finalize( fd_replay_tile_t * ctx,
464 : fd_stem_context_t * stem,
465 0 : fd_bank_t * bank ) {
466 0 : bank->last_transaction_finished_nanos = fd_log_wallclock();
467 :
468 : /* Set poh hash in bank. */
469 0 : fd_hash_t * poh = fd_sched_get_poh( ctx->sched, bank->idx );
470 0 : bank->f.poh = *poh;
471 :
472 : /* Set shred count in bank. */
473 0 : bank->f.shred_cnt = fd_sched_get_shred_cnt( ctx->sched, bank->idx );
474 :
475 0 : ulong execution_fees_pre_settle = bank->f.execution_fees;
476 0 : ulong priority_fees_pre_settle = bank->f.priority_fees;
477 :
478 : /* Do hashing and other end-of-block processing. */
479 0 : fd_runtime_block_execute_finalize( bank, ctx->accdb, ctx->capture_ctx );
480 :
481 : /* Copy out cost tracker fields before freezing */
482 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
483 0 : cost_tracker_snap( bank, slot_info );
484 :
485 : /* fetch identity / vote balance updates infrequently */
486 0 : slot_info->identity_balance = ULONG_MAX;
487 0 : if( FD_UNLIKELY( bank->f.slot%4096==0UL ) ) {
488 0 : slot_info->identity_balance = fd_accdb_lamports( ctx->accdb, bank->accdb_fork_id, ctx->identity_pubkey->uc );
489 0 : }
490 :
491 : /* Mark the bank as frozen. */
492 0 : fd_banks_mark_bank_frozen( bank );
493 0 : bank->block_completed_nanos = fd_log_wallclock();
494 :
495 : /**********************************************************************/
496 : /* Bank hash comparison, and halt if there's a mismatch after replay */
497 : /**********************************************************************/
498 :
499 : /* Must be last so we can measure completion time correctly, even
500 : though we could technically do this before the hash cmp and vote
501 : tower stuff. */
502 0 : publish_slot_completed( ctx, stem, bank, 0, 0 /* is_leader */, execution_fees_pre_settle, priority_fees_pre_settle );
503 :
504 : /* If enabled, dump the block to a file and reset the dumping
505 : context state */
506 0 : if( FD_UNLIKELY( ctx->dump_proto_ctx && ctx->dump_proto_ctx->dump_block_to_pb ) ) {
507 0 : fd_dump_block_to_protobuf( ctx->block_dump_ctx, ctx->banks, bank, ctx->accdb, ctx->dump_proto_ctx, ctx->runtime_stack );
508 0 : fd_block_dump_context_reset( ctx->block_dump_ctx );
509 0 : }
510 0 : }
511 :
512 : /**********************************************************************/
513 : /* Leader bank management */
514 : /**********************************************************************/
515 :
516 : static fd_bank_t *
517 : prepare_leader_bank( fd_replay_tile_t * ctx,
518 : ulong slot,
519 : long now,
520 0 : fd_hash_t const * parent_block_id ) {
521 0 : long before = fd_log_wallclock();
522 :
523 : /* Make sure that we are not already leader. */
524 0 : FD_TEST( ctx->leader_bank==NULL );
525 :
526 0 : fd_block_id_ele_t * parent_ele = fd_block_id_map_ele_query( ctx->block_id_map, parent_block_id, NULL, ctx->block_id_arr );
527 0 : if( FD_UNLIKELY( !parent_ele ) ) {
528 0 : FD_BASE58_ENCODE_32_BYTES( parent_block_id->key, parent_block_id_b58 );
529 0 : FD_LOG_CRIT(( "invariant violation: parent bank index not found for merkle root %s", parent_block_id_b58 ));
530 0 : }
531 0 : ulong parent_bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, parent_ele );
532 :
533 0 : fd_bank_t * parent_bank = fd_banks_bank_query( ctx->banks, parent_bank_idx );
534 0 : if( FD_UNLIKELY( !parent_bank ) ) {
535 0 : FD_LOG_CRIT(( "invariant violation: parent bank not found for bank index %lu", parent_bank_idx ));
536 0 : }
537 :
538 0 : ctx->leader_bank = fd_banks_new_bank( ctx->banks, parent_bank_idx, now, 1 );
539 0 : if( FD_UNLIKELY( !ctx->leader_bank ) ) {
540 0 : FD_LOG_CRIT(( "invariant violation: leader bank is NULL for slot %lu", slot ));
541 0 : }
542 :
543 0 : ctx->leader_bank = fd_banks_clone_from_parent( ctx->banks, ctx->leader_bank->idx );
544 0 : if( FD_UNLIKELY( !ctx->leader_bank ) ) {
545 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for slot %lu", slot ));
546 0 : }
547 :
548 0 : ctx->leader_bank->preparation_begin_nanos = before;
549 :
550 0 : ctx->leader_bank->f.slot = slot;
551 :
552 0 : ctx->leader_bank->txncache_fork_id = fd_txncache_attach_child ( ctx->txncache, parent_bank->txncache_fork_id );
553 0 : ctx->leader_bank->progcache_fork_id = fd_progcache_attach_child( ctx->progcache, parent_bank->progcache_fork_id );
554 0 : ctx->leader_bank->accdb_fork_id = fd_accdb_attach_child ( ctx->accdb, parent_bank->accdb_fork_id );
555 :
556 0 : int is_epoch_boundary = 0;
557 0 : fd_runtime_block_execute_prepare( ctx->banks, ctx->leader_bank, ctx->accdb, ctx->runtime_stack, ctx->capture_ctx, &is_epoch_boundary );
558 :
559 0 : ulong max_tick_height;
560 0 : if( FD_UNLIKELY( FD_RUNTIME_EXECUTE_SUCCESS!=fd_runtime_compute_max_tick_height( parent_bank->f.ticks_per_slot, slot, &max_tick_height ) ) ) {
561 0 : FD_LOG_CRIT(( "couldn't compute tick height/max tick height slot %lu ticks_per_slot %lu", slot, parent_bank->f.ticks_per_slot ));
562 0 : }
563 0 : ctx->leader_bank->f.max_tick_height = max_tick_height;
564 :
565 : /* Now that a bank has been created for the leader slot, increment the
566 : reference count until we are done with the leader slot. */
567 0 : ctx->leader_bank->refcnt++;
568 :
569 0 : return ctx->leader_bank;
570 0 : }
571 :
572 : static inline void
573 0 : maybe_switch_identity( fd_replay_tile_t * ctx ) {
574 :
575 0 : if( FD_LIKELY( fd_keyswitch_state_query( ctx->keyswitch )!=FD_KEYSWITCH_STATE_SWITCH_PENDING ) ) return;
576 :
577 : /* Switch identity */
578 :
579 0 : FD_LOG_DEBUG(( "keyswitch: switching identity" ));
580 :
581 0 : memcpy( ctx->identity_pubkey, ctx->keyswitch->bytes, 32UL );
582 :
583 0 : fd_node_info_write_begin( ctx->node_info );
584 0 : ctx->node_info->info.identity = *ctx->identity_pubkey;
585 0 : fd_node_info_write_end ( ctx->node_info );
586 :
587 0 : fd_keyswitch_state( ctx->keyswitch, FD_KEYSWITCH_STATE_COMPLETED );
588 :
589 : /* The next leader slot will be incorrect now that the identity has
590 : switched. The next leader slot normally gets updated based on the
591 : reset slot returned by tower. */
592 0 : ulong min_leader_slot = fd_ulong_max( ctx->reset_slot+1UL, fd_ulong_if( ctx->highwater_leader_slot==ULONG_MAX, 0UL, ctx->highwater_leader_slot+1UL ) );
593 0 : ctx->next_leader_slot = fd_multi_epoch_leaders_get_next_slot( ctx->mleaders, min_leader_slot, ctx->identity_pubkey );
594 0 : if( FD_LIKELY( ctx->next_leader_slot != ULONG_MAX ) ) {
595 0 : ctx->next_leader_tickcount = (long)((double)(ctx->next_leader_slot-ctx->reset_slot-1UL)*ctx->slot_duration_ticks) + fd_tickcount();
596 0 : } else {
597 0 : ctx->next_leader_tickcount = LONG_MAX;
598 0 : }
599 :
600 0 : ctx->identity_vote_rooted = 0;
601 0 : ctx->identity_idx++;
602 0 : fd_vote_tracker_reset( ctx->vote_tracker );
603 0 : }
604 :
605 : static int
606 : try_fini_leader( fd_replay_tile_t * ctx,
607 0 : fd_stem_context_t * stem ) {
608 :
609 : /* If we are leader, we can only unbecome the leader iff we have
610 : received the poh hash from the poh tile and block id from reasm.
611 : We have to do an additional check against the slot of the leader
612 : bank because we lazily remove entries from the block id arr. */
613 :
614 0 : if( FD_LIKELY( !ctx->is_leader ) ) return 0;
615 0 : if( !ctx->recv_poh ) return 0;
616 0 : if( !ctx->block_id_arr[ ctx->leader_bank->idx ].block_id_seen ) return 0;
617 0 : if( ctx->block_id_arr[ ctx->leader_bank->idx ].slot!=ctx->leader_bank->f.slot ) return 0;
618 :
619 0 : ctx->leader_bank->last_transaction_finished_nanos = fd_log_wallclock();
620 :
621 0 : ulong curr_slot = ctx->leader_bank->f.slot;
622 :
623 0 : fd_sched_block_add_done( ctx->sched, ctx->leader_bank->idx, ctx->leader_bank->parent_idx, curr_slot );
624 :
625 0 : ulong execution_fees_pre_settle = ctx->leader_bank->f.execution_fees;
626 0 : ulong priority_fees_pre_settle = ctx->leader_bank->f.priority_fees;
627 :
628 0 : fd_runtime_block_execute_finalize( ctx->leader_bank, ctx->accdb, ctx->capture_ctx );
629 :
630 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
631 0 : cost_tracker_snap( ctx->leader_bank, slot_info );
632 0 : slot_info->identity_balance = ULONG_MAX;
633 0 : if( FD_UNLIKELY( curr_slot%4096==0UL ) ) {
634 0 : slot_info->identity_balance = fd_accdb_lamports( ctx->accdb, ctx->leader_bank->accdb_fork_id, ctx->identity_pubkey->uc );
635 0 : }
636 :
637 0 : fd_banks_mark_bank_frozen( ctx->leader_bank );
638 0 : ctx->leader_bank->block_completed_nanos = fd_log_wallclock();
639 :
640 0 : publish_slot_completed( ctx, stem, ctx->leader_bank, 0, 1 /* is_leader */, execution_fees_pre_settle, priority_fees_pre_settle );
641 :
642 : /* The reference on the bank is finally no longer needed. */
643 0 : ctx->leader_bank->refcnt--;
644 :
645 : /* We are no longer leader so we can clear the bank index we use for
646 : being the leader. */
647 0 : ctx->leader_bank = NULL;
648 0 : ctx->recv_poh = 0;
649 0 : ctx->is_leader = 0;
650 :
651 0 : maybe_switch_identity( ctx );
652 :
653 0 : return 1;
654 0 : }
655 :
656 : static void
657 : publish_root_advanced( fd_replay_tile_t * ctx,
658 0 : fd_stem_context_t * stem ) {
659 :
660 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, ctx->consensus_root_bank_idx );
661 0 : if( FD_UNLIKELY( !bank ) ) {
662 0 : FD_LOG_CRIT(( "invariant violation: consensus root bank is NULL at bank index %lu", ctx->consensus_root_bank_idx ));
663 0 : }
664 :
665 0 : if( FD_UNLIKELY( bank->f.epoch>fd_slot_to_epoch( &bank->f.epoch_schedule, bank->f.parent_slot, NULL ) )) {
666 0 : fd_runtime_update_next_leaders( bank, ctx->runtime_stack );
667 0 : publish_epoch_info( ctx, stem, bank, 1 );
668 0 : }
669 :
670 0 : if( ctx->rpc_enabled ) {
671 0 : bank->refcnt++;
672 0 : FD_LOG_DEBUG(( "bank (idx=%lu, slot=%lu) refcnt incremented to %lu for rpc", bank->idx, bank->f.slot, bank->refcnt ));
673 0 : }
674 :
675 : /* Increment the reference count on the consensus root bank to account
676 : for the number of resolv tiles that are waiting on it. */
677 0 : bank->refcnt += ctx->resolv_tile_cnt;
678 0 : FD_LOG_DEBUG(( "bank (idx=%lu, slot=%lu) refcnt incremented to %lu for resolv", bank->idx, bank->f.slot, bank->refcnt ));
679 :
680 0 : fd_replay_root_advanced_t * msg = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
681 0 : msg->bank_idx = bank->idx;
682 0 : msg->slot = bank->f.slot;
683 0 : msg->bank_hash = bank->f.bank_hash;
684 :
685 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_ROOT_ADVANCED, ctx->replay_out->chunk, sizeof(fd_replay_root_advanced_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
686 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_root_advanced_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
687 0 : }
688 :
689 : static void
690 : init_after_snapshot( fd_replay_tile_t * ctx,
691 0 : fd_stem_context_t * stem ) {
692 : /* Now that the snapshot has been loaded in, we have to refresh the
693 : stake delegations since the manifest does not contain the full set
694 : of data required for the stake delegations. See
695 : fd_stake_delegations.h for why this is required. */
696 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, FD_REPLAY_BOOT_BANK_SEQ );
697 0 : if( FD_UNLIKELY( !bank ) ) {
698 0 : FD_LOG_CRIT(( "invariant violation: replay bank is NULL at bank index %lu", FD_REPLAY_BOOT_BANK_SEQ ));
699 0 : }
700 :
701 0 : char const * one_offs[ 16UL ];
702 0 : for( ulong i=0UL; i<ctx->enable_features_cnt; i++ ) one_offs[ i ] = ctx->enable_features[ i ];
703 0 : fd_features_enable_one_offs( &bank->f.features, one_offs, (uint)ctx->enable_features_cnt, 0UL );
704 :
705 0 : fd_runtime_update_next_leaders( bank, ctx->runtime_stack );
706 0 : fd_runtime_update_leaders( bank, ctx->runtime_stack );
707 :
708 : /* Typically, when we cross an epoch boundary during normal
709 : operation, we publish the stake weights for the new epoch. But
710 : since we are starting from a snapshot, we need to publish two
711 : epochs worth of stake weights: the previous epoch (which is
712 : needed for voting on the current epoch), and the current epoch
713 : (which is needed for voting on the next epoch). */
714 0 : publish_epoch_info( ctx, stem, bank, 0 );
715 0 : publish_epoch_info( ctx, stem, bank, 1 );
716 :
717 0 : fd_progcache_reset( ctx->progcache );
718 0 : bank->progcache_fork_id = fd_progcache_fork_id_initial();
719 :
720 0 : bank->f.warmup_cooldown_rate_epoch = fd_slot_to_epoch( &bank->f.epoch_schedule, bank->f.features.reduce_stake_warmup_cooldown, NULL );
721 0 : fd_stake_delegations_t * root_delegations = fd_banks_stake_delegations_root_query( ctx->banks );
722 0 : fd_stake_history_t stake_history_[1];
723 0 : fd_stake_history_t const * stake_history = fd_sysvar_cache_stake_history_view( &bank->f.sysvar_cache, stake_history_ );
724 0 : fd_stake_delegations_refresh(
725 0 : root_delegations,
726 0 : bank->f.epoch,
727 0 : stake_history, /* may be NULL */
728 0 : &bank->f.warmup_cooldown_rate_epoch,
729 0 : ctx->accdb,
730 0 : bank->accdb_fork_id );
731 0 : bank->f.total_effective_stake = root_delegations->effective_stake;
732 0 : bank->f.total_activating_stake = root_delegations->activating_stake;
733 0 : bank->f.total_deactivating_stake = root_delegations->deactivating_stake;
734 :
735 0 : fd_top_votes_t * top_votes_t_2 = fd_bank_top_votes_t_2_modify( bank );
736 0 : fd_top_votes_refresh( top_votes_t_2, ctx->accdb, bank->accdb_fork_id );
737 :
738 : /* After both snapshots have been loaded in, we can determine if we should
739 : start distributing rewards. */
740 :
741 0 : fd_rewards_recalculate_partitioned_rewards( ctx->banks, bank, ctx->accdb, ctx->runtime_stack, ctx->capture_ctx );
742 :
743 : /* Signals fd_sleep_until_replay_started */
744 0 : FD_MGAUGE_SET( REPLAY, RUNTIME_STATUS, 1UL );
745 0 : }
746 :
747 : static inline int
748 : try_become_leader( fd_replay_tile_t * ctx,
749 0 : fd_stem_context_t * stem ) {
750 0 : FD_TEST( ctx->is_booted );
751 0 : if( FD_LIKELY( ctx->next_leader_slot==ULONG_MAX || ctx->is_leader || (!ctx->identity_vote_rooted && ctx->wait_for_vote_to_start_leader) || ctx->replay_out->idx==ULONG_MAX || !ctx->wfs_complete ) ) {
752 0 : return 0;
753 0 : }
754 0 : if( FD_UNLIKELY( fd_banks_is_full( ctx->banks ) ) ) return 0;
755 0 : if( FD_UNLIKELY( ctx->halt_leader ) ) return 0;
756 0 : if( !ctx->supports_leader ) return 0;
757 :
758 0 : FD_TEST( ctx->next_leader_slot>ctx->reset_slot );
759 0 : long now = fd_tickcount();
760 0 : if( FD_LIKELY( now<ctx->next_leader_tickcount ) ) return 0;
761 :
762 : /* If a prior leader is still in the process of publishing their slot,
763 : delay ours to let them finish ... unless they are so delayed that
764 : we risk getting skipped by the leader following us. 1.2 seconds
765 : is a reasonable default here, although any value between 0 and 1.6
766 : seconds could be considered reasonable. This is arbitrary and
767 : chosen due to intuition. */
768 0 : if( FD_UNLIKELY( now<ctx->next_leader_tickcount+(long)(3.0*ctx->slot_duration_ticks) ) ) {
769 0 : FD_TEST( ctx->reset_bank );
770 :
771 : /* TODO: Make the max_active_descendant calculation more efficient
772 : by caching it in the bank structure and updating it as banks are
773 : created and completed. */
774 0 : ulong max_active_descendant = 0UL;
775 0 : ulong child_idx = ctx->reset_bank->child_idx;
776 0 : while( child_idx!=ULONG_MAX ) {
777 0 : fd_bank_t * child_bank = fd_banks_bank_query( ctx->banks, child_idx );
778 0 : max_active_descendant = fd_ulong_max( max_active_descendant, child_bank->f.slot );
779 0 : child_idx = child_bank->sibling_idx;
780 0 : }
781 :
782 : /* If the max_active_descendant is >= next_leader_slot, we waited
783 : too long and a leader after us started publishing to try and skip
784 : us. Just start our leader slot immediately, we might win ... */
785 0 : if( FD_LIKELY( max_active_descendant>=ctx->reset_slot && max_active_descendant<ctx->next_leader_slot ) ) {
786 : /* If one of the leaders between the reset slot and our leader
787 : slot is in the process of publishing (they have a descendant
788 : bank that is in progress of being replayed), then keep waiting.
789 : We probably wouldn't get a leader slot out before they
790 : finished.
791 :
792 : Unless... we are past the deadline to start our slot by more
793 : than 1.2 seconds, in which case we should probably start it to
794 : avoid getting skipped by the leader behind us. */
795 0 : return 0;
796 0 : }
797 0 : }
798 :
799 : /* If we haven't started replaying the prior block, but we have
800 : finished replaying the second to last slot of the prior
801 : leader (and that leader is not us), we should give the prior leader
802 : a little more time. */
803 0 : if( FD_UNLIKELY( ctx->next_leader_slot==ctx->reset_slot+2UL && now<ctx->next_leader_tickcount+(long)(1.0*ctx->slot_duration_ticks) ) ) {
804 :
805 0 : fd_pubkey_t const * reset_leader = fd_multi_epoch_leaders_get_leader_for_slot( ctx->mleaders, ctx->reset_slot );
806 0 : if( FD_UNLIKELY( reset_leader && !fd_memeq( reset_leader, ctx->identity_pubkey, 32UL ) ) ) return 0;
807 0 : }
808 :
809 0 : long now_nanos = fd_log_wallclock();
810 :
811 0 : ctx->is_leader = 1;
812 0 : ctx->recv_poh = 0;
813 :
814 0 : FD_TEST( ctx->highwater_leader_slot==ULONG_MAX || ctx->highwater_leader_slot<ctx->next_leader_slot );
815 0 : ctx->highwater_leader_slot = ctx->next_leader_slot;
816 :
817 0 : FD_LOG_INFO(( "becoming leader for slot %lu, parent slot is %lu", ctx->next_leader_slot, ctx->reset_slot ));
818 :
819 0 : fd_bank_t * bank = prepare_leader_bank( ctx, ctx->next_leader_slot, now_nanos, &ctx->reset_block_id );
820 :
821 0 : fd_bundle_crank_tip_payment_config_t config[1] = { 0 };
822 0 : fd_pubkey_t tip_receiver_owner = {0};
823 :
824 0 : if( FD_UNLIKELY( ctx->bundle.enabled ) ) {
825 0 : fd_acct_addr_t tip_payment_config[1];
826 0 : fd_acct_addr_t tip_receiver[1];
827 0 : fd_bundle_crank_get_addresses( ctx->bundle.gen, bank->f.epoch, tip_payment_config, tip_receiver );
828 :
829 0 : fd_acc_t tip_config_acc = fd_accdb_read_one( ctx->accdb, bank->accdb_fork_id, tip_payment_config->b );
830 0 : if( FD_UNLIKELY( !tip_config_acc.lamports ) ) {
831 0 : FD_BASE58_ENCODE_32_BYTES( tip_payment_config->b, tip_config_acc_b58 );
832 0 : FD_LOG_WARNING(( "tip payment config account %s does not exist", tip_config_acc_b58 ));
833 0 : fd_accdb_unread_one( ctx->accdb, &tip_config_acc );
834 0 : } else if( FD_UNLIKELY( tip_config_acc.data_len<sizeof(fd_bundle_crank_tip_payment_config_t) ) ) {
835 0 : FD_LOG_HEXDUMP_WARNING(( "invalid tip payment config account data", tip_config_acc.data, tip_config_acc.data_len ));
836 0 : fd_accdb_unread_one( ctx->accdb, &tip_config_acc );
837 0 : } else {
838 0 : memcpy( config, tip_config_acc.data, sizeof(fd_bundle_crank_tip_payment_config_t) );
839 0 : fd_accdb_unread_one( ctx->accdb, &tip_config_acc );
840 0 : }
841 :
842 : /* It is possible that the tip receiver account does not exist yet
843 : if it is the first time in an epoch. */
844 0 : fd_acc_t tip_receiver_acc = fd_accdb_read_one( ctx->accdb, bank->accdb_fork_id, tip_receiver->b );
845 0 : if( FD_LIKELY( tip_receiver_acc.lamports ) ) {
846 0 : fd_memcpy( tip_receiver_owner.uc, tip_receiver_acc.owner, 32UL );
847 0 : }
848 0 : fd_accdb_unread_one( ctx->accdb, &tip_receiver_acc );
849 0 : }
850 :
851 :
852 0 : fd_became_leader_t * msg = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
853 0 : msg->slot = ctx->next_leader_slot;
854 0 : msg->slot_start_ns = now_nanos;
855 0 : msg->slot_end_ns = now_nanos+(long)ctx->slot_duration_nanos;
856 0 : msg->bank = NULL;
857 0 : msg->bank_idx = bank->idx;
858 0 : msg->ticks_per_slot = bank->f.ticks_per_slot;
859 0 : msg->hashcnt_per_tick = bank->f.hashes_per_tick;
860 0 : msg->tick_duration_ns = (ulong)(ctx->slot_duration_nanos/(double)msg->ticks_per_slot);
861 0 : msg->bundle->config[0] = config[0];
862 0 : memcpy( msg->bundle->last_blockhash, bank->f.poh.hash, sizeof(fd_hash_t) );
863 0 : memcpy( msg->bundle->tip_receiver_owner, tip_receiver_owner.uc, sizeof(fd_pubkey_t) );
864 :
865 0 : if( FD_UNLIKELY( msg->hashcnt_per_tick==1UL ) ) {
866 : /* Low power producer, maximum of one microblock per tick in the slot */
867 0 : msg->max_microblocks_in_slot = msg->ticks_per_slot;
868 0 : } else {
869 : /* See the long comment in after_credit for this limit */
870 0 : msg->max_microblocks_in_slot = fd_ulong_min( MAX_MICROBLOCKS_PER_SLOT, msg->ticks_per_slot*(msg->hashcnt_per_tick-1UL) );
871 0 : }
872 :
873 0 : msg->total_skipped_ticks = msg->ticks_per_slot*(ctx->next_leader_slot-ctx->reset_slot);
874 0 : msg->epoch = fd_slot_to_epoch( &bank->f.epoch_schedule, ctx->next_leader_slot, NULL );
875 :
876 0 : fd_cost_tracker_t const * cost_tracker = fd_bank_cost_tracker_query( bank );
877 :
878 0 : msg->limits.slot_max_cost = ctx->larger_max_cost_per_block ? LARGER_MAX_COST_PER_BLOCK : cost_tracker->block_cost_limit;
879 0 : msg->limits.slot_max_vote_cost = cost_tracker->vote_cost_limit;
880 0 : msg->limits.slot_max_write_cost_per_acct = cost_tracker->account_cost_limit;
881 :
882 0 : if( FD_UNLIKELY( msg->ticks_per_slot+msg->total_skipped_ticks>USHORT_MAX ) ) {
883 : /* There can be at most USHORT_MAX skipped ticks, because the
884 : parent_offset field in the shred data is only 2 bytes wide. */
885 0 : FD_LOG_ERR(( "too many skipped ticks %lu for slot %lu, chain must halt", msg->ticks_per_slot+msg->total_skipped_ticks, ctx->next_leader_slot ));
886 0 : }
887 :
888 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_BECAME_LEADER, ctx->replay_out->chunk, sizeof(fd_became_leader_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
889 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_became_leader_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
890 :
891 0 : ctx->next_leader_slot = ULONG_MAX;
892 0 : ctx->next_leader_tickcount = LONG_MAX;
893 :
894 0 : return 1;
895 0 : }
896 :
897 : static void
898 : process_poh_message( fd_replay_tile_t * ctx,
899 0 : fd_poh_leader_slot_ended_t const * slot_ended ) {
900 :
901 0 : FD_TEST( ctx->is_booted );
902 0 : FD_TEST( ctx->is_leader );
903 0 : FD_TEST( ctx->leader_bank!=NULL );
904 :
905 0 : FD_TEST( ctx->highwater_leader_slot>=slot_ended->slot );
906 0 : FD_TEST( ctx->next_leader_slot>ctx->highwater_leader_slot );
907 :
908 : /* Update the poh hash in the bank. We will want to maintain a refcnt
909 : on the bank until we have received the block id for the block after
910 : it has been shredded. */
911 :
912 0 : memcpy( &ctx->leader_bank->f.poh, slot_ended->blockhash, sizeof(fd_hash_t) );
913 :
914 0 : ctx->recv_poh = 1;
915 0 : }
916 :
917 : static void
918 : publish_reset( fd_replay_tile_t * ctx,
919 : fd_stem_context_t * stem,
920 0 : fd_bank_t * bank ) {
921 0 : if( FD_UNLIKELY( ctx->replay_out->idx==ULONG_MAX ) ) return;
922 :
923 0 : fd_hash_t const * block_hash = fd_blockhashes_peek_last_hash( &bank->f.block_hash_queue );
924 0 : FD_TEST( block_hash );
925 :
926 0 : fd_poh_reset_t * reset = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
927 :
928 0 : reset->bank_idx = bank->idx;
929 0 : reset->timestamp = fd_log_wallclock();
930 0 : reset->completed_slot = bank->f.slot;
931 0 : reset->hashcnt_per_tick = bank->f.hashes_per_tick;
932 0 : reset->ticks_per_slot = bank->f.ticks_per_slot;
933 0 : reset->tick_duration_ns = (ulong)(ctx->slot_duration_nanos/(double)reset->ticks_per_slot);
934 0 : fd_memcpy( reset->completed_block_id, ctx->reset_block_id.uc, sizeof(fd_hash_t) );
935 0 : fd_memcpy( reset->completed_blockhash, block_hash->uc, sizeof(fd_hash_t) );
936 :
937 0 : ulong ticks_per_slot = bank->f.ticks_per_slot;
938 0 : if( FD_UNLIKELY( reset->hashcnt_per_tick==1UL ) ) {
939 : /* Low power producer, maximum of one microblock per tick in the slot */
940 0 : reset->max_microblocks_in_slot = ticks_per_slot;
941 0 : } else {
942 : /* See the long comment in after_credit for this limit */
943 0 : reset->max_microblocks_in_slot = fd_ulong_min( MAX_MICROBLOCKS_PER_SLOT, ticks_per_slot*(reset->hashcnt_per_tick-1UL) );
944 0 : }
945 0 : reset->next_leader_slot = ctx->next_leader_slot;
946 0 : reset->wfs_paused = !ctx->wfs_complete;
947 :
948 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_RESET, ctx->replay_out->chunk, sizeof(fd_poh_reset_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
949 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_poh_reset_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
950 0 : }
951 :
952 : static void
953 : store_xinsert( fd_store_t * store,
954 0 : fd_hash_t const * merkle_root ) {
955 0 : fd_store_pool_t pool = {
956 0 : .pool = fd_wksp_laddr_fast( fd_store_wksp( store ), store->pool_mem_gaddr ),
957 0 : .ele = fd_wksp_laddr_fast( fd_store_wksp( store ), store->pool_ele_gaddr ),
958 0 : .ele_max = store->fec_max
959 0 : };
960 0 : fd_store_fec_t * fec = fd_store_pool_acquire( &pool );
961 0 : if( FD_UNLIKELY( !fec ) ) FD_LOG_CRIT(( "fd_store_pool_acquire failed" ));
962 0 : fec->key.merkle_root = *merkle_root;
963 0 : fec->key.part_idx = 0;
964 0 : fec->next = fd_store_pool_idx_null();
965 0 : fec->data_sz = 0UL;
966 :
967 0 : FD_STORE_XLOCK_BEGIN( store ) {
968 0 : fd_store_map_ele_insert( fd_wksp_laddr_fast( fd_store_wksp( store ), store->map_gaddr ), fec, pool.ele );
969 0 : } FD_STORE_XLOCK_END;
970 0 : }
971 :
972 : static void
973 : boot_genesis( fd_replay_tile_t * ctx,
974 : fd_stem_context_t * stem,
975 0 : fd_genesis_meta_t const * meta ) {
976 : /* If we are bootstrapping, we can't wait to wait for our identity
977 : vote to be rooted as this creates a circular dependency. */
978 0 : ctx->identity_vote_rooted = 1;
979 :
980 0 : uchar const * genesis_blob = (uchar const *)( meta+1 );
981 0 : FD_TEST( meta->bootstrap && meta->has_lthash );
982 0 : FD_TEST( fd_genesis_parse( ctx->genesis, genesis_blob, meta->blob_sz ) );
983 :
984 0 : fd_bank_t * bank = fd_banks_init_bank( ctx->banks );
985 0 : FD_TEST( bank );
986 0 : bank->f.slot = 0UL;
987 0 : FD_TEST( bank->idx==FD_REPLAY_BOOT_BANK_SEQ );
988 :
989 0 : static const fd_accdb_fork_id_t accdb_root = { .val = USHORT_MAX };
990 0 : bank->accdb_fork_id = fd_accdb_attach_child( ctx->accdb, accdb_root );
991 :
992 0 : fd_runtime_read_genesis( ctx->banks, bank, ctx->accdb, NULL, &meta->genesis_hash, &meta->lthash, ctx->genesis, genesis_blob, ctx->runtime_stack );
993 :
994 0 : bank->txncache_fork_id = fd_txncache_attach_child ( ctx->txncache, (fd_txncache_fork_id_t){USHORT_MAX} );
995 0 : bank->progcache_fork_id = fd_progcache_attach_child( ctx->progcache, fd_progcache_fork_id_initial() );
996 :
997 0 : fd_hash_t const * block_hash = fd_blockhashes_peek_last_hash( &bank->f.block_hash_queue );
998 0 : fd_txncache_finalize_fork( ctx->txncache, bank->txncache_fork_id, 0UL, block_hash->uc );
999 :
1000 : /* We call this after fd_runtime_read_genesis, which sets up the
1001 : slot_bank needed in blockstore_init. */
1002 0 : init_after_snapshot( ctx, stem );
1003 :
1004 0 : ctx->published_root_slot = 0UL;
1005 0 : fd_sched_block_add_done( ctx->sched, bank->idx, ULONG_MAX, 0UL );
1006 :
1007 0 : bank->f.block_height = 1UL;
1008 :
1009 0 : ctx->consensus_root = ctx->initial_block_id;
1010 0 : ctx->consensus_root_slot = 0UL;
1011 0 : ctx->consensus_root_bank_idx = 0UL;
1012 0 : ctx->published_root_slot = 0UL;
1013 0 : ctx->published_root_bank_idx = 0UL;
1014 :
1015 0 : ctx->reset_slot = 0UL;
1016 0 : ctx->reset_bank = bank;
1017 0 : ctx->reset_block_id = ctx->initial_block_id;
1018 0 : ctx->reset_timestamp_nanos = fd_log_wallclock();
1019 0 : ctx->next_leader_slot = fd_multi_epoch_leaders_get_next_slot( ctx->mleaders, 1UL, ctx->identity_pubkey );
1020 0 : if( FD_LIKELY( ctx->next_leader_slot != ULONG_MAX ) ) {
1021 0 : ctx->next_leader_tickcount = (long)((double)(ctx->next_leader_slot-ctx->reset_slot-1UL)*ctx->slot_duration_ticks) + fd_tickcount();
1022 0 : } else {
1023 0 : ctx->next_leader_tickcount = LONG_MAX;
1024 0 : }
1025 :
1026 0 : ctx->is_booted = 1;
1027 0 : try_become_leader( ctx, stem );
1028 :
1029 0 : fd_hash_t initial_block_id = ctx->initial_block_id;
1030 0 : fd_reasm_fec_t * fec = fd_reasm_init( ctx->reasm, &initial_block_id, 0 /* genesis slot */ );
1031 0 : fec->bank_idx = bank->idx;
1032 0 : fec->bank_seq = bank->bank_seq;
1033 0 : store_xinsert( ctx->store, &initial_block_id );
1034 :
1035 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ 0 ];
1036 0 : block_id_ele->latest_mr = initial_block_id;
1037 0 : block_id_ele->slot = 0UL;
1038 :
1039 0 : FD_TEST( fd_block_id_map_ele_insert( ctx->block_id_map, block_id_ele, ctx->block_id_arr ) );
1040 :
1041 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
1042 0 : cost_tracker_snap( bank, slot_info );
1043 :
1044 0 : slot_info->identity_balance = fd_accdb_lamports( ctx->accdb, bank->accdb_fork_id, ctx->identity_pubkey->uc );
1045 :
1046 0 : publish_slot_completed( ctx, stem, bank, 1, 0 /* is_leader */, 0, 0 );
1047 0 : publish_root_advanced( ctx, stem );
1048 0 : publish_reset( ctx, stem, bank );
1049 0 : }
1050 :
1051 : static inline void
1052 0 : maybe_verify_cluster_type( fd_replay_tile_t * ctx ) {
1053 0 : if( FD_UNLIKELY( !ctx->is_booted || !ctx->has_genesis_hash ) ) {
1054 0 : return;
1055 0 : }
1056 :
1057 0 : FD_BASE58_ENCODE_32_BYTES( ctx->genesis_hash->uc, hash_cstr );
1058 0 : ulong cluster = fd_genesis_cluster_identify( hash_cstr );
1059 : /* Map pyth-related clusters to unkwown. */
1060 0 : switch( cluster ) {
1061 0 : case FD_CLUSTER_PYTHNET:
1062 0 : case FD_CLUSTER_PYTHTEST:
1063 0 : cluster = FD_CLUSTER_UNKNOWN;
1064 0 : }
1065 :
1066 0 : if( FD_UNLIKELY( cluster!=ctx->cluster_type ) ) {
1067 0 : FD_LOG_ERR(( "Your genesis.bin file at `%s` has a genesis hash of `%s` which means the cluster is %s "
1068 0 : "but the snapshot you loaded is for a different cluster %s. If you are trying to join the "
1069 0 : "%s cluster, you can delete the genesis.bin file and restart the node to download the correct "
1070 0 : "genesis file automatically.",
1071 0 : ctx->genesis_path,
1072 0 : hash_cstr,
1073 0 : fd_genesis_cluster_name( cluster ),
1074 0 : fd_genesis_cluster_name( ctx->cluster_type ),
1075 0 : fd_genesis_cluster_name( cluster ) ));
1076 0 : }
1077 0 : }
1078 :
1079 : static void
1080 : on_snapshot_message( fd_replay_tile_t * ctx,
1081 : fd_stem_context_t * stem,
1082 : ulong in_idx,
1083 : ulong chunk,
1084 0 : ulong sig ) {
1085 0 : ulong msg = fd_ssmsg_sig_message( sig );
1086 0 : if( FD_LIKELY( msg==FD_SSMSG_DONE ) ) {
1087 : /* An end of message notification indicates the snapshot is loaded.
1088 : Replay is able to start executing from this point onwards. */
1089 : /* TODO: replay should finish booting. Could make replay a
1090 : state machine and set the state here accordingly. */
1091 0 : ctx->is_booted = 1;
1092 :
1093 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, FD_REPLAY_BOOT_BANK_SEQ );
1094 0 : if( FD_UNLIKELY( !bank ) ) {
1095 0 : FD_LOG_CRIT(( "invariant violation: bank is NULL for bank index %lu", FD_REPLAY_BOOT_BANK_SEQ ));
1096 0 : }
1097 :
1098 0 : static const fd_accdb_fork_id_t accdb_root = { .val = USHORT_MAX };
1099 0 : bank->accdb_fork_id = fd_accdb_attach_child( ctx->accdb, accdb_root );
1100 :
1101 0 : ulong snapshot_slot = bank->f.slot;
1102 :
1103 0 : fd_hash_t bank_hash = bank->f.bank_hash;
1104 0 : if( FD_UNLIKELY( ctx->wfs_enabled && memcmp( ctx->expected_bank_hash.uc, bank_hash.uc, sizeof(fd_hash_t) ) ) ) {
1105 0 : FD_BASE58_ENCODE_32_BYTES( ctx->expected_bank_hash.uc, expected_bank_hash_cstr );
1106 0 : FD_BASE58_ENCODE_32_BYTES( bank_hash.uc, actual_bank_hash_cstr );
1107 0 : FD_LOG_ERR(( "[consensus.wait_for_supermajority_with_bank_hash] expected_bank_hash=%s does not match snapshot slot"
1108 0 : "=%lu bank_hash=%s. If you are loading a snapshot from the network, check that the slot matches the "
1109 0 : "cluster restart slot. ", expected_bank_hash_cstr, snapshot_slot, actual_bank_hash_cstr ));
1110 0 : }
1111 0 : if( FD_UNLIKELY( ctx->wfs_enabled ) ) {
1112 0 : FD_LOG_NOTICE(( "waiting for supermajority at snapshot slot %lu", snapshot_slot ));
1113 0 : }
1114 :
1115 : /* Manifest message must arrive before DONE */
1116 0 : if( FD_UNLIKELY( !ctx->has_expected_genesis_timestamp ) ) {
1117 0 : FD_LOG_CRIT(( "snapshot DONE received before manifest" ));
1118 0 : }
1119 :
1120 : /* FIXME: This is a hack when the block id of the snapshot slot
1121 : is not provided in the snapshot (Agave versions <4.1). A
1122 : possible solution is to get the block id of the snapshot slot
1123 : from repair. */
1124 0 : fd_hash_t manifest_block_id = ctx->has_manifest_block_id ? ctx->manifest_block_id : ctx->initial_block_id;
1125 :
1126 0 : FD_TEST( fd_sysvar_cache_restore( bank, ctx->accdb ) );
1127 : /* Agave zeroes manifest rent_params; reload from sysvar account */
1128 0 : FD_TEST( fd_sysvar_rent_read( ctx->accdb, bank->accdb_fork_id, &bank->f.rent ) );
1129 :
1130 0 : ctx->consensus_root = manifest_block_id;
1131 0 : ctx->consensus_root_slot = snapshot_slot;
1132 0 : ctx->consensus_root_bank_idx = 0UL;
1133 0 : ctx->published_root_slot = ctx->consensus_root_slot;
1134 0 : ctx->published_root_bank_idx = 0UL;
1135 :
1136 0 : ctx->reset_slot = snapshot_slot;
1137 0 : ctx->reset_bank = bank;
1138 0 : ctx->reset_block_id = manifest_block_id;
1139 0 : ctx->reset_timestamp_nanos = fd_log_wallclock();
1140 0 : ctx->next_leader_slot = fd_multi_epoch_leaders_get_next_slot( ctx->mleaders, 1UL, ctx->identity_pubkey );
1141 0 : if( FD_LIKELY( ctx->next_leader_slot != ULONG_MAX ) ) {
1142 0 : ctx->next_leader_tickcount = (long)((double)(ctx->next_leader_slot-ctx->reset_slot-1UL)*ctx->slot_duration_ticks) + fd_tickcount();
1143 0 : } else {
1144 0 : ctx->next_leader_tickcount = LONG_MAX;
1145 0 : }
1146 :
1147 0 : fd_sched_block_add_done( ctx->sched, bank->idx, ULONG_MAX, snapshot_slot );
1148 0 : FD_TEST( bank->idx==0UL );
1149 :
1150 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ 0 ];
1151 0 : block_id_ele->latest_mr = manifest_block_id;
1152 0 : block_id_ele->slot = snapshot_slot;
1153 0 : block_id_ele->block_id_seen = 1;
1154 0 : block_id_ele->latest_fec_idx = 0U;
1155 0 : FD_TEST( fd_block_id_map_ele_insert( ctx->block_id_map, block_id_ele, ctx->block_id_arr ) );
1156 :
1157 : /* We call this after fd_runtime_read_genesis, which sets up the
1158 : slot_bank needed in blockstore_init. */
1159 0 : init_after_snapshot( ctx, stem );
1160 :
1161 0 : fd_replay_slot_completed_t * slot_info = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
1162 0 : cost_tracker_snap( bank, slot_info );
1163 :
1164 0 : slot_info->identity_balance = fd_accdb_lamports( ctx->accdb, bank->accdb_fork_id, ctx->identity_pubkey->uc );
1165 :
1166 0 : publish_slot_completed( ctx, stem, bank, 1, 0 /* is_leader */, 0, 0 );
1167 0 : publish_root_advanced( ctx, stem );
1168 :
1169 0 : fd_reasm_fec_t * fec = fd_reasm_init( ctx->reasm, &manifest_block_id, snapshot_slot );
1170 0 : fec->bank_idx = bank->idx;
1171 0 : fec->bank_seq = bank->bank_seq;
1172 0 : store_xinsert( ctx->store, &manifest_block_id );
1173 :
1174 0 : ctx->cluster_type = bank->f.cluster_type;
1175 :
1176 0 : maybe_verify_cluster_type( ctx );
1177 :
1178 0 : return;
1179 0 : }
1180 :
1181 0 : switch( msg ) {
1182 0 : case FD_SSMSG_MANIFEST_FULL:
1183 0 : case FD_SSMSG_MANIFEST_INCREMENTAL: {
1184 : /* We may either receive a full snapshot manifest or an
1185 : incremental snapshot manifest. Note that this external message
1186 : id is only used temporarily because replay cannot yet receive
1187 : the firedancer-internal snapshot manifest message. */
1188 0 : if( FD_UNLIKELY( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark ) )
1189 0 : FD_LOG_ERR(( "chunk %lu from in %d corrupt, not in range [%lu,%lu]", chunk, ctx->in_kind[ in_idx ], ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
1190 :
1191 : /* Malformed manifests are rejected recoverably by snapin via
1192 : fd_ssload_manifest_validate. If recover fails here, then the
1193 : bank is partially mutated, and we must abort. */
1194 0 : if( FD_UNLIKELY( fd_ssload_recover( fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ),
1195 0 : ctx->banks,
1196 0 : fd_banks_bank_query( ctx->banks, FD_REPLAY_BOOT_BANK_SEQ ),
1197 0 : ctx->blockhash_seed ) ) ) {
1198 0 : FD_LOG_ERR(( "Snapshot manifest recovery failed, aborting." ));
1199 0 : }
1200 :
1201 0 : fd_snapshot_manifest_t const * manifest = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
1202 : /* hard_fork_cnt already validated by fd_ssload_recover. */
1203 0 : ctx->hard_fork_cnt = manifest->hard_fork_cnt;
1204 0 : for( ulong i=0UL; i<manifest->hard_fork_cnt; i++ ) {
1205 0 : ctx->hard_forks[ i ] = manifest->hard_forks[ i ];
1206 0 : }
1207 0 : ctx->has_expected_genesis_timestamp = 1;
1208 0 : ctx->expected_genesis_timestamp = manifest->creation_time_seconds;
1209 0 : ctx->has_manifest_block_id = manifest->has_block_id;
1210 0 : if( manifest->has_block_id ) memcpy( ctx->manifest_block_id.uc, manifest->block_id, 32UL );
1211 0 : break;
1212 0 : }
1213 0 : default: {
1214 0 : FD_LOG_ERR(( "Received unknown snapshot message with msg %lu", msg ));
1215 0 : return;
1216 0 : }
1217 0 : }
1218 :
1219 0 : return;
1220 0 : }
1221 :
1222 : static void
1223 : dispatch_task( fd_replay_tile_t * ctx,
1224 : fd_stem_context_t * stem,
1225 0 : fd_sched_task_t * task ) {
1226 :
1227 0 : switch( task->task_type ) {
1228 0 : case FD_SCHED_TT_TXN_EXEC: {
1229 0 : fd_txn_p_t * txn_p = fd_sched_get_txn( ctx->sched, task->txn_exec->txn_idx );
1230 :
1231 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, task->txn_exec->bank_idx );
1232 0 : FD_TEST( bank );
1233 :
1234 : /* Add the transaction to the block dumper if necessary. This
1235 : logic doesn't need to be fork-aware since it's only meant to
1236 : be used in backtest. */
1237 0 : if( FD_UNLIKELY( ctx->dump_proto_ctx && ctx->dump_proto_ctx->dump_block_to_pb ) ) {
1238 0 : fd_dump_block_to_protobuf_collect_tx( ctx->block_dump_ctx, txn_p );
1239 0 : }
1240 :
1241 0 : bank->refcnt++;
1242 :
1243 0 : if( FD_UNLIKELY( !bank->first_transaction_scheduled_nanos ) ) bank->first_transaction_scheduled_nanos = fd_log_wallclock();
1244 :
1245 0 : fd_replay_out_link_t * exec_out = ctx->exec_out;
1246 0 : fd_execrp_txn_exec_msg_t * exec_msg = fd_chunk_to_laddr( exec_out->mem, exec_out->chunk );
1247 0 : memcpy( exec_msg->txn, txn_p, sizeof(fd_txn_p_t) );
1248 0 : exec_msg->bank_idx = task->txn_exec->bank_idx;
1249 0 : exec_msg->txn_idx = task->txn_exec->txn_idx;
1250 0 : if( FD_UNLIKELY( ctx->capture_ctx ) ) {
1251 0 : exec_msg->capture_txn_idx = ctx->capture_ctx->current_txn_idx++;
1252 0 : }
1253 0 : fd_stem_publish( stem, exec_out->idx, (FD_EXECRP_TT_TXN_EXEC<<32) | task->txn_exec->exec_idx, exec_out->chunk, sizeof(*exec_msg), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
1254 0 : exec_out->chunk = fd_dcache_compact_next( exec_out->chunk, sizeof(*exec_msg), exec_out->chunk0, exec_out->wmark );
1255 0 : break;
1256 0 : }
1257 0 : case FD_SCHED_TT_TXN_SIGVERIFY: {
1258 0 : fd_txn_p_t * txn_p = fd_sched_get_txn( ctx->sched, task->txn_sigverify->txn_idx );
1259 :
1260 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, task->txn_sigverify->bank_idx );
1261 0 : FD_TEST( bank );
1262 0 : bank->refcnt++;
1263 :
1264 0 : fd_replay_out_link_t * exec_out = ctx->exec_out;
1265 0 : fd_execrp_txn_sigverify_msg_t * exec_msg = fd_chunk_to_laddr( exec_out->mem, exec_out->chunk );
1266 0 : memcpy( exec_msg->txn, txn_p, sizeof(fd_txn_p_t) );
1267 0 : exec_msg->bank_idx = task->txn_sigverify->bank_idx;
1268 0 : exec_msg->txn_idx = task->txn_sigverify->txn_idx;
1269 0 : fd_stem_publish( stem, exec_out->idx, (FD_EXECRP_TT_TXN_SIGVERIFY<<32) | task->txn_sigverify->exec_idx, exec_out->chunk, sizeof(*exec_msg), 0UL, 0UL, 0UL );
1270 0 : exec_out->chunk = fd_dcache_compact_next( exec_out->chunk, sizeof(*exec_msg), exec_out->chunk0, exec_out->wmark );
1271 0 : break;
1272 0 : };
1273 0 : case FD_SCHED_TT_POH_HASH: {
1274 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, task->poh_hash->bank_idx );
1275 0 : FD_TEST( bank );
1276 0 : bank->refcnt++;
1277 :
1278 0 : fd_replay_out_link_t * exec_out = ctx->exec_out;
1279 0 : fd_execrp_poh_hash_msg_t * exec_msg = fd_chunk_to_laddr( exec_out->mem, exec_out->chunk );
1280 0 : exec_msg->bank_idx = task->poh_hash->bank_idx;
1281 0 : exec_msg->mblk_idx = task->poh_hash->mblk_idx;
1282 0 : exec_msg->hashcnt = task->poh_hash->hashcnt;
1283 0 : memcpy( exec_msg->hash, task->poh_hash->hash, sizeof(fd_hash_t) );
1284 0 : fd_stem_publish( stem, exec_out->idx, (FD_EXECRP_TT_POH_HASH<<32) | task->poh_hash->exec_idx, exec_out->chunk, sizeof(*exec_msg), 0UL, 0UL, 0UL );
1285 0 : exec_out->chunk = fd_dcache_compact_next( exec_out->chunk, sizeof(*exec_msg), exec_out->chunk0, exec_out->wmark );
1286 0 : break;
1287 0 : };
1288 0 : default: {
1289 0 : FD_LOG_CRIT(( "unexpected task type %lu", task->task_type ));
1290 0 : }
1291 0 : }
1292 0 : }
1293 :
1294 : static void
1295 : mark_bank_dead( fd_replay_tile_t * ctx,
1296 : fd_stem_context_t * stem,
1297 0 : ulong bank_idx ) {
1298 0 : ulong dead_idxs[ FD_BANKS_MAX_BANKS ];
1299 0 : ulong dead_idxs_cnt = 0UL;
1300 0 : fd_banks_mark_bank_dead( ctx->banks, bank_idx, dead_idxs, &dead_idxs_cnt );
1301 :
1302 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ bank_idx ];
1303 0 : if( block_id_ele->block_id_seen ) publish_slot_dead( ctx, stem, block_id_ele->slot, &block_id_ele->latest_mr );
1304 :
1305 0 : for( ulong i=0UL; i<dead_idxs_cnt; i++ ) {
1306 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ dead_idxs[ i ] ];
1307 0 : fd_reasm_fec_t * fec = fd_reasm_query( ctx->reasm, &block_id_ele->latest_mr );
1308 0 : if( FD_LIKELY( fec ) ) fec->bank_dead = 1;
1309 0 : }
1310 0 : }
1311 :
1312 : static int
1313 : replay( fd_replay_tile_t * ctx,
1314 0 : fd_stem_context_t * stem ) {
1315 :
1316 0 : if( FD_UNLIKELY( !ctx->is_booted ) ) return 0;
1317 :
1318 0 : int charge_busy = 0;
1319 0 : fd_sched_task_t task[ 1 ];
1320 0 : if( FD_UNLIKELY( !fd_sched_task_next_ready( ctx->sched, task ) ) ) {
1321 0 : return charge_busy; /* Nothing to execute or do. */
1322 0 : }
1323 :
1324 0 : charge_busy = 1;
1325 :
1326 0 : switch( task->task_type ) {
1327 0 : case FD_SCHED_TT_BLOCK_START: {
1328 0 : replay_block_start( ctx, task->block_start->bank_idx, task->block_start->parent_bank_idx, task->block_start->slot );
1329 0 : fd_sched_task_done( ctx->sched, FD_SCHED_TT_BLOCK_START, ULONG_MAX, ULONG_MAX, NULL );
1330 0 : break;
1331 0 : }
1332 0 : case FD_SCHED_TT_BLOCK_END: {
1333 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, task->block_end->bank_idx );
1334 0 : if( FD_LIKELY( bank->state==FD_BANK_STATE_REPLAYABLE ) ) replay_block_finalize( ctx, stem, bank );
1335 0 : fd_sched_task_done( ctx->sched, FD_SCHED_TT_BLOCK_END, ULONG_MAX, ULONG_MAX, NULL );
1336 0 : break;
1337 0 : }
1338 0 : case FD_SCHED_TT_TXN_EXEC:
1339 0 : case FD_SCHED_TT_TXN_SIGVERIFY:
1340 0 : case FD_SCHED_TT_POH_HASH: {
1341 : /* Common case: we have a transaction we need to execute. */
1342 0 : dispatch_task( ctx, stem, task );
1343 0 : break;
1344 0 : }
1345 0 : case FD_SCHED_TT_MARK_DEAD: {
1346 0 : mark_bank_dead( ctx, stem, task->mark_dead->bank_idx );
1347 0 : break;
1348 0 : }
1349 0 : default: {
1350 0 : FD_LOG_CRIT(( "unexpected task type %lu", task->task_type ));
1351 0 : }
1352 0 : }
1353 :
1354 0 : return charge_busy;
1355 0 : }
1356 :
1357 : static int
1358 : can_process_fec( fd_replay_tile_t * ctx,
1359 0 : int * evict_banks_out ) {
1360 : /* We can process a FEC set if a few conditions are met:
1361 : - sched has capacity
1362 : - reasm has a FEC in its out queue ready to be processed */
1363 :
1364 0 : if( FD_UNLIKELY( fd_sched_can_ingest_cnt( ctx->sched )==0UL ) ) {
1365 0 : FD_TEST( !fd_sched_is_drained( ctx->sched ) );
1366 0 : ctx->metrics.sched_full++;
1367 0 : return 0;
1368 0 : }
1369 :
1370 0 : fd_reasm_fec_t * fec;
1371 0 : if( FD_UNLIKELY( (fec = fd_reasm_peek( ctx->reasm ))==NULL ) ) {
1372 0 : ctx->metrics.reasm_empty++;
1373 0 : return 0;
1374 0 : }
1375 :
1376 0 : fd_reasm_fec_t * parent = fd_reasm_parent( ctx->reasm, fec );
1377 0 : FD_TEST( parent ); /* FEC must be connected */
1378 :
1379 0 : ctx->metrics.reasm_latest_slot = fec->slot;
1380 0 : ctx->metrics.reasm_latest_fec_idx = fec->fec_set_idx;
1381 :
1382 0 : if( FD_UNLIKELY( ctx->is_leader && fec->fec_set_idx==0U && parent->bank_idx==ctx->leader_bank->idx ) ) {
1383 : /* This guards against a rare race where we receive the FEC set for
1384 : the slot right after our leader rotation before we freeze the
1385 : bank for the last slot in our leader rotation. Leader slot
1386 : freezing happens only after if we've received the final PoH hash
1387 : from the poh tile as well as the final FEC set for the leader
1388 : slot. So the race happens when FEC sets are delivered and
1389 : processed sooner than the PoH hash, aka when the
1390 : poh=>shred=>replay path for the block id beats the poh=>replay
1391 : path for the poh hash. To mitigate this race, we must block on
1392 : ingesting the FEC set for the ensuing slot before the leader
1393 : bank freezes, because that would violate ordering invariants in
1394 : banks and sched. */
1395 0 : FD_TEST( ctx->block_id_arr[ ctx->leader_bank->idx ].block_id_seen );
1396 0 : FD_TEST( !ctx->recv_poh );
1397 0 : ctx->metrics.leader_bid_wait++;
1398 0 : return 0;
1399 0 : }
1400 :
1401 : /* Should we evict banks if there are no more free banks? The answer
1402 : is it depends. Eviction should only happen if we can make no
1403 : forward replay progress. This can only happen if:
1404 : 1. banks are full
1405 : 2. sched is drained: pending txns could complete a block and
1406 : eventually advance the root.
1407 : AND
1408 : 3. next reasm FEC start a new block. A fec that chains off of a
1409 : bank that is already allocated can be processed. A FEC can
1410 : trigger a new block in two ways:
1411 : - fec_set_idx==0: we don't have any free banks to provision a new
1412 : bank for this FEC.
1413 : - equivocation: a FEC may be in the middle of a block, but if
1414 : it's the first equivocating FEC detected, we need to allocate a
1415 : new bank for the version of the block. */
1416 0 : int is_new_block = fec->fec_set_idx==0;
1417 0 : int is_eqvoc = fec->eqvoc && !parent->eqvoc;
1418 0 : if( FD_UNLIKELY( fd_banks_is_full( ctx->banks ) && (is_new_block || is_eqvoc) ) ) {
1419 0 : ctx->metrics.banks_full++;
1420 0 : if( FD_UNLIKELY( fd_sched_is_drained( ctx->sched ) ) ) *evict_banks_out = 1;
1421 0 : return 0;
1422 0 : }
1423 :
1424 : /* Otherwise, banks may not be full, so we can always create a new
1425 : bank if needed. Or, if banks are full, the current fec set's
1426 : ancestor (idx 0) already created a bank for this slot. */
1427 0 : return 1;
1428 0 : }
1429 :
1430 : /* Returns 0 on successful FEC ingestion, 1 if the block got marked
1431 : dead. insert_fec_set assumes that all FECs that are inserted are
1432 : directly connected to a parent FEC. Every block that is replayed
1433 : has initial fec set idx 0 up to and including a FEC with
1434 : slot_complete set. The caller is responsible for ensuring this. */
1435 : static int
1436 : insert_fec_set( fd_replay_tile_t * ctx,
1437 : fd_stem_context_t * stem,
1438 0 : fd_reasm_fec_t * reasm_fec ) {
1439 :
1440 : /* First, read FEC set from the store. If it's not there that means
1441 : that the FEC is on a minority fork which has been pruned away.
1442 : This means we shouldn't have a bank for the corresponding block so
1443 : we should just ignore and discard the FEC set. */
1444 :
1445 0 : ulong wait = (ulong)fd_log_wallclock();
1446 0 : ulong work = wait;
1447 0 : FD_STORE_SLOCK_BEGIN( ctx->store ) {
1448 0 : ctx->metrics.store_query_acquire++;
1449 0 : work = (ulong)fd_log_wallclock();
1450 0 : fd_histf_sample( ctx->metrics.store_query_wait, work - wait );
1451 :
1452 0 : fd_store_fec_t * store_fec = fd_store_query( ctx->store, &reasm_fec->key );
1453 0 : ctx->metrics.store_query_cnt++;
1454 0 : if( FD_UNLIKELY( !store_fec && !reasm_fec->is_leader ) ) {
1455 : /* The only case in which a FEC is not found in the store is either
1456 : if the FEC is from our own leader block or after repair has
1457 : notified is if the FEC was on a minority fork that has already
1458 : been published away. In this case we abandon the entire slice
1459 : because it is no longer relevant. If the FEC is from our own
1460 : leader block, process the FEC so we can unbecome leader. */
1461 0 : ctx->metrics.store_query_missing_cnt++;
1462 0 : ctx->metrics.store_query_missing_mr = reasm_fec->key.ul[0];
1463 0 : FD_BASE58_ENCODE_32_BYTES( reasm_fec->key.key, key_b58 );
1464 0 : FD_LOG_WARNING(( "store fec for slot: %lu is on minority fork already pruned by publish. abandoning slice. root: %lu. pruned merkle: %s", reasm_fec->slot, ctx->consensus_root_slot, key_b58 ));
1465 0 : return 1;
1466 0 : }
1467 :
1468 0 : long now = fd_log_wallclock();
1469 :
1470 : /* Assign parent bank idx + seq no to the FEC */
1471 0 : reasm_fec->parent_bank_idx = fd_reasm_parent( ctx->reasm, reasm_fec )->bank_idx;
1472 0 : fd_bank_t * parent_bank = fd_banks_bank_query( ctx->banks, reasm_fec->parent_bank_idx );
1473 :
1474 0 : if( FD_UNLIKELY( reasm_fec->fec_set_idx==0U ) ) {
1475 : /* Provision new bank if not leader. Assign bank idx and seq no
1476 : to the FEC. Remove stale block id map entry if any and update
1477 : pool element. */
1478 0 : fd_bank_t * bank = reasm_fec->is_leader ? ctx->leader_bank : fd_banks_new_bank( ctx->banks, reasm_fec->parent_bank_idx, now, 0 );
1479 0 : reasm_fec->bank_idx = bank->idx;
1480 0 : reasm_fec->bank_seq = bank->bank_seq;
1481 :
1482 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ reasm_fec->bank_idx ];
1483 0 : if( FD_LIKELY( fd_block_id_map_ele_query( ctx->block_id_map, &block_id_ele->latest_mr, NULL, ctx->block_id_arr )==block_id_ele ) ) {
1484 0 : FD_TEST( fd_block_id_map_ele_remove( ctx->block_id_map, &block_id_ele->latest_mr, NULL, ctx->block_id_arr ) );
1485 0 : }
1486 0 : block_id_ele->block_id_seen = 0;
1487 0 : block_id_ele->slot = reasm_fec->slot;
1488 0 : block_id_ele->latest_fec_idx = 0U;
1489 0 : block_id_ele->latest_mr = reasm_fec->key;
1490 0 : } else { /* FEC for the middle or end of a block */
1491 : /* Assign bank idx + seqno to the FEC. Update block id pool ele. */
1492 0 : reasm_fec->bank_idx = reasm_fec->parent_bank_idx;
1493 0 : reasm_fec->bank_seq = parent_bank->bank_seq;
1494 :
1495 0 : FD_TEST( reasm_fec->bank_idx!=ULONG_MAX );
1496 :
1497 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ reasm_fec->bank_idx ];
1498 0 : block_id_ele->latest_fec_idx = reasm_fec->fec_set_idx;
1499 0 : block_id_ele->latest_mr = reasm_fec->key;
1500 0 : }
1501 :
1502 : /* If the FEC set is a slot complete, this means we have finally seen
1503 : the block id (block's last mr). */
1504 0 : if( FD_UNLIKELY( reasm_fec->slot_complete ) ) {
1505 0 : fd_block_id_ele_t * block_id_ele = &ctx->block_id_arr[ reasm_fec->bank_idx ];
1506 0 : block_id_ele->block_id_seen = 1;
1507 0 : block_id_ele->latest_mr = reasm_fec->key;
1508 0 : block_id_ele->latest_fec_idx = reasm_fec->fec_set_idx;
1509 0 : FD_TEST( fd_block_id_map_ele_insert( ctx->block_id_map, block_id_ele, ctx->block_id_arr ) );
1510 0 : }
1511 :
1512 : /* For leader FECs, don't insert the FEC into the scheduler. */
1513 0 : if( FD_UNLIKELY( reasm_fec->is_leader ) ) return 0;
1514 :
1515 : /* Forks form a partial ordering over FEC sets. The Repair tile
1516 : delivers FEC sets in-order per fork, but FEC set ordering across
1517 : forks is arbitrary */
1518 0 : fd_sched_fec_t sched_fec[ 1 ];
1519 :
1520 : # if DEBUG_LOGGING
1521 : FD_BASE58_ENCODE_32_BYTES( reasm_fec->key.key, key_b58 );
1522 : FD_BASE58_ENCODE_32_BYTES( reasm_fec->cmr.key, cmr_b58 );
1523 : FD_LOG_INFO(( "replay processing FEC set for slot %lu fec_set_idx %u, mr %s cmr %s", reasm_fec->slot, reasm_fec->fec_set_idx, key_b58, cmr_b58 ));
1524 : # endif
1525 :
1526 0 : sched_fec->shred_cnt = reasm_fec->data_cnt;
1527 0 : sched_fec->is_last_in_batch = !!reasm_fec->data_complete;
1528 0 : sched_fec->is_last_in_block = !!reasm_fec->slot_complete;
1529 0 : sched_fec->bank_idx = reasm_fec->bank_idx;
1530 0 : sched_fec->parent_bank_idx = reasm_fec->parent_bank_idx;
1531 0 : sched_fec->slot = reasm_fec->slot;
1532 0 : sched_fec->parent_slot = reasm_fec->slot - reasm_fec->parent_off;
1533 0 : sched_fec->is_first_in_block = reasm_fec->fec_set_idx==0U;
1534 0 : sched_fec->fec = store_fec;
1535 0 : sched_fec->data = fd_store_fec_data( ctx->store, store_fec );
1536 0 : sched_fec->alut_ctx->fork_id = fd_banks_bank_query( ctx->banks, ctx->consensus_root_bank_idx )->accdb_fork_id;
1537 0 : sched_fec->alut_ctx->accdb = ctx->accdb;
1538 0 : sched_fec->alut_ctx->els = ctx->published_root_slot;
1539 :
1540 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, sched_fec->bank_idx );
1541 0 : if( sched_fec->is_first_in_block ) {
1542 0 : bank->refcnt++;
1543 0 : FD_LOG_DEBUG(( "bank (idx=%lu, slot=%lu) refcnt incremented to %lu for sched", bank->idx, sched_fec->slot, bank->refcnt ));
1544 0 : }
1545 :
1546 0 : if( FD_UNLIKELY( !fd_sched_fec_ingest( ctx->sched, sched_fec ) ) ) {
1547 0 : mark_bank_dead( ctx, stem, sched_fec->bank_idx );
1548 0 : return 1;
1549 0 : }
1550 :
1551 0 : } FD_STORE_SLOCK_END;
1552 :
1553 0 : ctx->metrics.store_query_release++;
1554 0 : fd_histf_sample( ctx->metrics.store_query_work, (ulong)fd_log_wallclock() - work );
1555 0 : return 0;
1556 0 : }
1557 :
1558 : static void
1559 : backfill_fec_sets( fd_replay_tile_t * ctx,
1560 : fd_stem_context_t * stem,
1561 0 : fd_reasm_fec_t * reasm_fec ) {
1562 0 : fd_reasm_fec_t * parent = fd_reasm_parent( ctx->reasm, reasm_fec );
1563 0 : FD_TEST( !!parent );
1564 :
1565 0 : fd_reasm_fec_t * path[ FD_BANKS_MAX_BANKS ];
1566 0 : ulong path_cnt = 0UL;
1567 0 : path[ path_cnt++ ] = reasm_fec;
1568 :
1569 : /* Collect all of the slot completes starting from the current FEC
1570 : iterating through the tree until we hit a FEC that is a slot
1571 : complete that corresponds to a valid bank. */
1572 0 : for( fd_reasm_fec_t * curr = reasm_fec;; ) {
1573 0 : curr = fd_reasm_parent( ctx->reasm, curr );
1574 0 : FD_TEST( curr );
1575 0 : if( FD_LIKELY( !curr->slot_complete ) ) continue;
1576 :
1577 0 : fd_bank_t * curr_bank = curr->bank_idx==ULONG_MAX ? NULL : fd_banks_bank_query( ctx->banks, curr->bank_idx );
1578 0 : if( FD_LIKELY( curr_bank && curr_bank->bank_seq==curr->bank_seq ) ) break;
1579 :
1580 0 : FD_TEST( path_cnt<FD_BANKS_MAX_BANKS );
1581 0 : path[ path_cnt++ ] = curr;
1582 0 : }
1583 :
1584 : /* For each bank's worth of FECs, insert all of the FECs into the
1585 : scheduler. Ensure that only a full bank's worth of FECs are
1586 : inserted at a time. If there's no capacity in the banks or the
1587 : scheduler, backoff for now and try again later. */
1588 0 : for( ulong i=path_cnt; i>0UL; i-- ) {
1589 0 : fd_reasm_fec_t * leaf = path[ i-1 ];
1590 :
1591 : /* If there's no capacity in the sched or banks, return early and
1592 : drop the FEC. We have inserted as much as we can for now. */
1593 0 : if( FD_UNLIKELY( fd_sched_can_ingest_cnt( ctx->sched ) < (leaf->fec_set_idx/FD_FEC_SHRED_CNT + 1) ) ) return;
1594 0 : if( FD_UNLIKELY( fd_banks_is_full( ctx->banks ) ) ) return;
1595 :
1596 : /* Gather all FECs for this slot */
1597 0 : fd_reasm_fec_t * slot_fecs[ FD_FEC_BLK_MAX ];
1598 0 : fd_reasm_fec_t * curr = leaf;
1599 0 : for(;;) {
1600 0 : slot_fecs[ curr->fec_set_idx/FD_FEC_SHRED_CNT ] = curr;
1601 0 : if( curr->fec_set_idx==0U ) break;
1602 0 : curr = fd_reasm_parent( ctx->reasm, curr );
1603 0 : FD_TEST( curr );
1604 0 : }
1605 0 : FD_LOG_NOTICE(( "backfilling FEC sets for slot %lu from fec_set_idx %u to fec_set_idx %u", leaf->slot, leaf->fec_set_idx, curr->fec_set_idx ));
1606 :
1607 0 : for( ulong j=0UL; j<=leaf->fec_set_idx/FD_FEC_SHRED_CNT; j++ ) {
1608 0 : if( FD_UNLIKELY( insert_fec_set( ctx, stem, slot_fecs[ j ] ) ) ) return;
1609 0 : }
1610 0 : }
1611 0 : }
1612 :
1613 : static void
1614 : process_fec_set( fd_replay_tile_t * ctx,
1615 : fd_stem_context_t * stem,
1616 0 : fd_reasm_fec_t * reasm_fec ) {
1617 :
1618 0 : fd_reasm_fec_t * parent = fd_reasm_parent( ctx->reasm, reasm_fec );
1619 0 : if( FD_UNLIKELY( parent->bank_dead ) ) {
1620 : /* Inherit the dead flag from the parent. If a dead slot is
1621 : completed, we publish the slot as dead. Don't insert FECs for
1622 : dead slots. */
1623 0 : reasm_fec->bank_dead = 1;
1624 0 : if( FD_UNLIKELY( reasm_fec->slot_complete ) ) publish_slot_dead( ctx, stem, reasm_fec->slot, &reasm_fec->key );
1625 0 : FD_LOG_DEBUG(( "dropping FEC set (slot=%lu, fec_set_idx=%u) because parent bank is marked dead", reasm_fec->slot, reasm_fec->fec_set_idx ));
1626 0 : return;
1627 0 : }
1628 :
1629 : /* An invariant from reasm is that if we receive a FEC set that is
1630 : both with eqvoc and confirmed set, we know that we must replay the
1631 : slot associated with this FEC. equivocation when fec_set_idx == 0
1632 : gets handled cleanly. */
1633 0 : int eqvoc_detected = reasm_fec->fec_set_idx!=0 && (reasm_fec->eqvoc && !parent->eqvoc);
1634 0 : if( FD_UNLIKELY( eqvoc_detected ) ) FD_TEST( reasm_fec->confirmed && parent->confirmed );
1635 :
1636 : /* We can detect if a bank has not replayed if the bank index tagged
1637 : to the FEC set is no longer valid or the bank sequence number for
1638 : the same bank is different (the bank has been recycled). This is
1639 : either due to the parent bank being evicted, or in reasm, the
1640 : parent is marked eqvoc (and not replayed), but the child gets
1641 : confirmed and delivered. */
1642 0 : fd_bank_t * parent_fec_bank = parent->bank_idx==ULONG_MAX ? NULL : fd_banks_bank_query( ctx->banks, parent->bank_idx );
1643 0 : int parent_bank_invalid = !parent_fec_bank || parent_fec_bank->bank_seq!=parent->bank_seq;
1644 :
1645 : /* If the upcoming FEC is either the start of an equivocating chain,
1646 : chains off of a bank that was evicted, OR is the child of an
1647 : equivocating chain whose parent was gated from getting replayed, we
1648 : must backfill any FECs into the scheduler. This backfill must
1649 : start from a FEC with fec_set_idx==0 with a parent FEC
1650 : corresponding to a valid bank. */
1651 0 : if( FD_LIKELY( !parent_bank_invalid && !eqvoc_detected ) ) {
1652 0 : insert_fec_set( ctx, stem, reasm_fec );
1653 0 : } else {
1654 0 : backfill_fec_sets( ctx, stem, reasm_fec );
1655 0 : }
1656 0 : }
1657 :
1658 : static int
1659 0 : try_advance_published_root( fd_replay_tile_t * ctx ) {
1660 :
1661 0 : if( FD_LIKELY( ctx->consensus_root_bank_idx==ctx->published_root_bank_idx ) ) return 0;
1662 :
1663 0 : fd_block_id_ele_t * block_id_ele = fd_block_id_map_ele_query( ctx->block_id_map, &ctx->consensus_root, NULL, ctx->block_id_arr );
1664 0 : if( FD_UNLIKELY( !block_id_ele ) ) {
1665 0 : FD_BASE58_ENCODE_32_BYTES( ctx->consensus_root.key, consensus_root_b58 );
1666 0 : FD_LOG_CRIT(( "invariant violation: block id ele not found for consensus root %s", consensus_root_b58 ));
1667 0 : }
1668 0 : ulong target_bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, block_id_ele );
1669 :
1670 : /* If the identity vote has been seen on a bank that should be rooted,
1671 : then we are now ready to produce blocks. */
1672 0 : if( FD_UNLIKELY( !ctx->identity_vote_rooted ) ) {
1673 0 : fd_bank_t * root_bank = fd_banks_bank_query( ctx->banks, target_bank_idx );
1674 0 : if( FD_UNLIKELY( !root_bank ) ) FD_LOG_CRIT(( "invariant violation: root bank not found for bank index %lu", target_bank_idx ));
1675 0 : if( root_bank->f.identity_vote_idx==ctx->identity_idx ) ctx->identity_vote_rooted = 1;
1676 0 : }
1677 :
1678 0 : ulong advanceable_root_idx = ULONG_MAX;
1679 0 : if( FD_UNLIKELY( !fd_banks_advance_root_prepare( ctx->banks, target_bank_idx, &advanceable_root_idx ) ) ) {
1680 0 : ctx->metrics.storage_root_behind++;
1681 0 : return 0;
1682 0 : }
1683 :
1684 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, advanceable_root_idx );
1685 0 : FD_TEST( bank );
1686 :
1687 0 : if( FD_UNLIKELY( advanceable_root_idx >= ctx->block_id_len ) ) {
1688 0 : FD_LOG_CRIT(( "invariant violation: advanceable root ele out of bounds [0, %lu) index %lu", ctx->block_id_len, advanceable_root_idx ));
1689 0 : }
1690 0 : fd_block_id_ele_t * advanceable_root_ele = &ctx->block_id_arr[ advanceable_root_idx ];
1691 :
1692 0 : ulong advanceable_root_slot = bank->f.slot;
1693 0 : fd_txncache_advance_root ( ctx->txncache, bank->txncache_fork_id );
1694 0 : fd_progcache_advance_root( ctx->progcache, bank->progcache_fork_id );
1695 0 : fd_accdb_advance_root ( ctx->accdb, bank->accdb_fork_id );
1696 0 : fd_sched_advance_root( ctx->sched, advanceable_root_idx );
1697 0 : fd_banks_advance_root( ctx->banks, advanceable_root_idx );
1698 :
1699 : /* Reasm also prunes from the store during its publish. */
1700 :
1701 0 : fd_reasm_publish( ctx->reasm, &advanceable_root_ele->latest_mr, ctx->store );
1702 :
1703 0 : ctx->published_root_slot = advanceable_root_slot;
1704 0 : ctx->published_root_bank_idx = advanceable_root_idx;
1705 :
1706 0 : return 1;
1707 0 : }
1708 :
1709 : static int
1710 0 : try_prune_sched( fd_replay_tile_t * ctx ) {
1711 0 : ulong bank_idx;
1712 0 : int pruned = 0;
1713 0 : while( (bank_idx=fd_sched_pruned_block_next( ctx->sched ) )!=ULONG_MAX ) {
1714 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, bank_idx );
1715 0 : FD_TEST( bank );
1716 0 : bank->refcnt--;
1717 0 : FD_LOG_DEBUG(( "bank (idx=%lu) refcnt decremented to %lu for sched", bank->idx, bank->refcnt ));
1718 0 : pruned = 1;
1719 0 : }
1720 0 : return pruned;
1721 0 : }
1722 :
1723 : static int
1724 0 : try_prune_bank( fd_replay_tile_t * ctx ) {
1725 0 : fd_banks_prune_cancel_info_t cancel_info[ 1 ];
1726 0 : int pruned = fd_banks_prune_one_dead_bank( ctx->banks, cancel_info );
1727 0 : switch( pruned ) {
1728 0 : case 2: { /* pruning bank + cancellation is needed */
1729 0 : fd_txncache_cancel_fork( ctx->txncache, cancel_info->txncache_fork_id );
1730 0 : fd_progcache_cancel_fork( ctx->progcache, cancel_info->progcache_fork_id );
1731 0 : fd_accdb_purge ( ctx->accdb, cancel_info->accdb_fork_id );
1732 0 : __attribute__((fallthrough));
1733 0 : }
1734 0 : case 1: { /* pruning bank + no cancellation is needed */
1735 : /* A sched block exists, and can be marked dead, for a bank as
1736 : soon as its first FEC has been ingested, which can happen
1737 : before the bank ever set itself up for actual execution (e.g. a
1738 : block that parses as bad on its very first FEC). So always
1739 : instruct sched to prune the block whenever banks prunes the
1740 : bank. The txncache/progcache/accdb forks, on the other hand,
1741 : are only created once the bank started actual execution. */
1742 0 : fd_sched_cancel( ctx->sched, cancel_info->bank_idx );
1743 0 : return 1;
1744 0 : }
1745 0 : case 0: /* no bank to prune */
1746 0 : return 0;
1747 0 : default:
1748 0 : FD_LOG_ERR(( "unreachable" ));
1749 0 : }
1750 0 : }
1751 :
1752 : static int
1753 : try_evict_reasm( fd_replay_tile_t * ctx,
1754 0 : fd_stem_context_t * stem ) {
1755 :
1756 : /* if reasm_evicted is set, publish starting from reasm_evicted down
1757 : to the leaf node to repair so repair can re-request for it.
1758 : reasm_evicted gets set when reasm tries to insert a FEC and there
1759 : is no remaining capacity. */
1760 0 : if( FD_LIKELY( !ctx->reasm_evicted ) ) return 0;
1761 :
1762 : /* Publish a notification to the repair tile that the Replay tile no
1763 : longer has the FEC that was evicted. This will make sure that the
1764 : repair tile will re-request the FEC if it eventually gets
1765 : confirmed so that Replay can still make forward progress. */
1766 0 : fd_replay_fec_evicted_t evicted = (fd_replay_fec_evicted_t){ .mr = ctx->reasm_evicted->key, .slot = ctx->reasm_evicted->slot, .fec_set_idx = ctx->reasm_evicted->fec_set_idx, .bank_idx = ctx->reasm_evicted->bank_idx };
1767 0 : fd_memcpy( fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk ), &evicted, sizeof(fd_replay_fec_evicted_t) );
1768 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_REASM_EVICTED, ctx->replay_out->chunk, sizeof(fd_replay_fec_evicted_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
1769 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_fec_evicted_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
1770 :
1771 : /* eviction policy only evicts chains of nodes until there is a
1772 : fork, so guaranteed that the evict path is always the left-child
1773 : TODO: This should be abstracted away. */
1774 0 : fd_reasm_pool_release( ctx->reasm, ctx->reasm_evicted );
1775 0 : ctx->reasm_evicted = fd_reasm_child( ctx->reasm, ctx->reasm_evicted ); /* indexes into pool, safe to use */
1776 0 : return 1;
1777 0 : }
1778 :
1779 : static int
1780 : try_evict_frontier( fd_replay_tile_t * ctx,
1781 0 : fd_stem_context_t * stem ) {
1782 : /* Mark a frontier eviction victim bank as dead. As refcnts on said
1783 : banks are drained, they will be pruned away. If we are trying to
1784 : mark dead (evict) the frontier it is important that no replay is
1785 : occurring; otherwise, our list of banks to evict will be stale. */
1786 0 : if( FD_UNLIKELY( !ctx->frontier_cnt ) ) return 0;
1787 :
1788 0 : ulong bank_idx = ctx->frontier_indices[ --ctx->frontier_cnt ];
1789 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, bank_idx );
1790 0 : FD_TEST( !!bank && bank->child_idx==ULONG_MAX );
1791 0 : mark_bank_dead( ctx, stem, bank->idx );
1792 0 : fd_sched_block_abandon( ctx->sched, bank->idx );
1793 0 : return 1;
1794 0 : }
1795 :
1796 : static int
1797 : try_process_fec( fd_replay_tile_t * ctx,
1798 0 : fd_stem_context_t * stem ) {
1799 :
1800 : /* If the reassembler has a fec that is ready, we should process it
1801 : and pass it to the scheduler.
1802 :
1803 : We would also like to pace FEC ingestion such that we keep the exec
1804 : tiles busy. If there's a pending frag from one of the exec tiles,
1805 : we would like to know about that asap, because that could unblock
1806 : dispatching. So we ingest FEC sets only if we are sure that there
1807 : are no more exec tile notifications to process. This delays FEC
1808 : ingestion just enough so as to keep the exec tiles as busy as we
1809 : can, and prevents us from being stuck ingesting a backlog of FEC
1810 : sets, especially when there is a pending completion notification
1811 : about a single-transaction chokepoint in the replay dispatcher DAG.
1812 : Except that when we are leader or the reasm buffer is getting full,
1813 : we prioritize FEC processing. In the leader case, this is so we
1814 : can get to the leader FEC sets asap and freeze the leader bank on
1815 : time. In the reasm full case, this is so we don't prematurely
1816 : trigger eviction. */
1817 0 : int evict_banks = 0;
1818 0 : if( FD_LIKELY( (ctx->execrp_idle_cnt>=2UL*ctx->in_cnt || ctx->is_leader || fd_reasm_free( ctx->reasm )<=1UL) &&
1819 0 : can_process_fec( ctx, &evict_banks ) ) ) {
1820 0 : fd_reasm_fec_t * fec = fd_reasm_pop( ctx->reasm );
1821 0 : process_fec_set( ctx, stem, fec );
1822 0 : ctx->execrp_idle_cnt = 0UL;
1823 0 : return 1;
1824 0 : }
1825 :
1826 : /* If we need to evict banks, just gather the frontier set of banks.
1827 : Eventually these banks will be marked dead and pruned away. */
1828 0 : if( FD_UNLIKELY( evict_banks ) ) {
1829 0 : FD_LOG_WARNING(( "banks are full and partially executed frontier banks are being evicted" ));
1830 0 : fd_banks_get_replay_frontier( ctx->banks, ctx->frontier_indices, &ctx->frontier_cnt );
1831 0 : return 1;
1832 0 : }
1833 :
1834 0 : return 0;
1835 0 : }
1836 :
1837 : static void
1838 : after_credit( fd_replay_tile_t * ctx,
1839 : fd_stem_context_t * stem,
1840 : int * opt_poll_in,
1841 0 : int * charge_busy ) {
1842 0 : if( FD_UNLIKELY( !ctx->is_booted || !ctx->wfs_complete ) ) return;
1843 :
1844 : /* The overall priority for the replay tile in order is:
1845 : 1. Make sure replay has room to progress:
1846 : a. evicting pending FECs from the reassembler
1847 : b. queueing up frontier banks for frontier eviction if needed
1848 : c. clearing any pending bank eviction victims.
1849 : 2. Drain outstanding bank references from the scheduler. This
1850 : happens after a block gets completed or a fork gets pruned.
1851 : 3. Advance the root. If the consensus root has been advanced, but
1852 : the storage root is behind, to advance it.
1853 : 4. Replay. If there is work to do for replay, do it. This is
1854 : more important than ingesting more FEC sets.
1855 : 5. If replay has nothing to do, ingest more FEC sets.
1856 : WARNING: The ordering here is VERY load bearing and it should not
1857 : be changed without extreme caution. */
1858 :
1859 0 : if( FD_UNLIKELY( try_evict_reasm( ctx, stem ) ) ) {
1860 0 : *charge_busy = 1;
1861 0 : *opt_poll_in = 0;
1862 0 : return;
1863 0 : }
1864 :
1865 0 : if( FD_UNLIKELY( try_evict_frontier( ctx, stem ) ) ) {
1866 0 : *charge_busy = 1;
1867 0 : *opt_poll_in = 0;
1868 0 : return;
1869 0 : }
1870 :
1871 0 : if( FD_UNLIKELY( try_prune_bank( ctx ) ) ) {
1872 0 : *charge_busy = 1;
1873 0 : *opt_poll_in = 0;
1874 0 : return;
1875 0 : }
1876 :
1877 0 : if( FD_UNLIKELY( try_become_leader( ctx, stem ) ) ) {
1878 0 : *charge_busy = 1;
1879 0 : *opt_poll_in = 0;
1880 0 : return;
1881 0 : }
1882 :
1883 0 : if( FD_UNLIKELY( try_fini_leader( ctx, stem ) ) ) {
1884 0 : *charge_busy = 1;
1885 0 : *opt_poll_in = 0;
1886 0 : return;
1887 0 : }
1888 :
1889 0 : if( FD_UNLIKELY( try_prune_sched( ctx ) ) ) {
1890 0 : *charge_busy = 1;
1891 0 : *opt_poll_in = 0;
1892 0 : return;
1893 0 : }
1894 :
1895 0 : if( FD_UNLIKELY( try_advance_published_root( ctx ) ) ) {
1896 0 : *charge_busy = 1;
1897 0 : *opt_poll_in = 0;
1898 0 : return;
1899 0 : }
1900 :
1901 0 : if( FD_LIKELY( replay( ctx, stem ) ) ) {
1902 0 : *charge_busy = 1;
1903 0 : *opt_poll_in = 0;
1904 0 : return;
1905 0 : }
1906 :
1907 0 : if( FD_LIKELY( try_process_fec( ctx, stem ) ) ) {
1908 0 : *charge_busy = 1;
1909 0 : *opt_poll_in = 0;
1910 0 : return;
1911 0 : }
1912 :
1913 0 : ctx->execrp_idle_cnt++;
1914 0 : }
1915 :
1916 : static int
1917 : before_frag( fd_replay_tile_t * ctx,
1918 : ulong in_idx,
1919 : ulong seq FD_PARAM_UNUSED,
1920 0 : ulong sig ) {
1921 :
1922 0 : if( FD_UNLIKELY( ctx->in_kind[ in_idx ]==IN_KIND_GOSSIP_OUT && sig!=FD_GOSSIP_UPDATE_TAG_WFS_DONE ) ) return 1;
1923 0 : return 0;
1924 0 : }
1925 :
1926 : static void
1927 : process_exec_task_done( fd_replay_tile_t * ctx,
1928 : fd_stem_context_t * stem,
1929 : fd_execrp_task_done_msg_t * msg,
1930 0 : ulong sig ) {
1931 :
1932 0 : ulong exec_tile_idx = sig&0xFFFFFFFFUL;
1933 :
1934 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, msg->bank_idx );
1935 0 : FD_TEST( bank );
1936 0 : bank->refcnt--;
1937 :
1938 0 : switch( sig>>32 ) {
1939 0 : case FD_EXECRP_TT_TXN_EXEC: {
1940 0 : ulong txn_idx = msg->txn_exec->txn_idx;
1941 0 : if( FD_UNLIKELY( !ctx->identity_vote_rooted ) ) {
1942 : /* Query the txn signature against our recently generated vote
1943 : txn signatures. If the query is successful, then we have
1944 : seen our own vote transaction land and this should be marked
1945 : in the bank. We go through this exercise until we've seen
1946 : our vote rooted. */
1947 0 : fd_txn_p_t * txn_p = fd_sched_get_txn( ctx->sched, txn_idx );
1948 :
1949 0 : fd_pubkey_t * identity_pubkey_out = NULL;
1950 0 : if( fd_vote_tracker_query_sig( ctx->vote_tracker, fd_type_pun_const( txn_p->payload+TXN( txn_p )->signature_off ), &identity_pubkey_out ) && fd_pubkey_eq( identity_pubkey_out, ctx->identity_pubkey ) ) {
1951 0 : bank->f.identity_vote_idx = ctx->identity_idx;
1952 0 : }
1953 0 : }
1954 0 : if( FD_UNLIKELY( !msg->txn_exec->is_committable && bank->state!=FD_BANK_STATE_DEAD) ) {
1955 : /* Every transaction in a valid block has to execute.
1956 : Otherwise, we should mark the block as dead. */
1957 0 : mark_bank_dead( ctx, stem, bank->idx );
1958 0 : fd_sched_block_abandon( ctx->sched, bank->idx );
1959 0 : }
1960 0 : int res = fd_sched_task_done( ctx->sched, FD_SCHED_TT_TXN_EXEC, txn_idx, exec_tile_idx, NULL );
1961 0 : FD_TEST( res==0 );
1962 0 : fd_sched_txn_info_t * txn_info = fd_sched_get_txn_info( ctx->sched, txn_idx );
1963 0 : txn_info->flags |= FD_SCHED_TXN_EXEC_DONE;
1964 0 : if( FD_LIKELY( !(txn_info->flags&FD_SCHED_TXN_SIGVERIFY_DONE)||!txn_info->txn_err ) ) { /* Set execution status if sigverify hasn't happened yet or if sigverify was a success. */
1965 0 : txn_info->txn_err = msg->txn_exec->txn_err;
1966 0 : txn_info->flags |= fd_ulong_if( msg->txn_exec->is_committable, FD_SCHED_TXN_IS_COMMITTABLE, 0UL );
1967 0 : txn_info->flags |= fd_ulong_if( msg->txn_exec->is_fees_only, FD_SCHED_TXN_IS_FEES_ONLY, 0UL );
1968 0 : }
1969 0 : if( FD_UNLIKELY( (txn_info->flags&FD_SCHED_TXN_REPLAY_DONE)==FD_SCHED_TXN_REPLAY_DONE ) ) { /* UNLIKELY because generally exec happens before sigverify. */
1970 0 : publish_txn_executed( ctx, stem, txn_idx );
1971 0 : }
1972 0 : break;
1973 0 : }
1974 0 : case FD_EXECRP_TT_TXN_SIGVERIFY: {
1975 0 : ulong txn_idx = msg->txn_sigverify->txn_idx;
1976 0 : fd_sched_txn_info_t * txn_info = fd_sched_get_txn_info( ctx->sched, txn_idx );
1977 0 : txn_info->flags |= FD_SCHED_TXN_SIGVERIFY_DONE;
1978 0 : if( FD_UNLIKELY( msg->txn_sigverify->err ) ) {
1979 0 : txn_info->txn_err = FD_RUNTIME_TXN_ERR_SIGNATURE_FAILURE;
1980 0 : txn_info->flags &= ~FD_SCHED_TXN_IS_COMMITTABLE;
1981 0 : txn_info->flags &= ~FD_SCHED_TXN_IS_FEES_ONLY;
1982 0 : }
1983 0 : if( FD_UNLIKELY( msg->txn_sigverify->err && bank->state!=FD_BANK_STATE_DEAD ) ) {
1984 : /* Every transaction in a valid block has to sigverify.
1985 : Otherwise, we should mark the block as dead. Also freeze the
1986 : bank if possible. */
1987 0 : mark_bank_dead( ctx, stem, bank->idx );
1988 0 : fd_sched_block_abandon( ctx->sched, bank->idx );
1989 0 : }
1990 0 : int res = fd_sched_task_done( ctx->sched, FD_SCHED_TT_TXN_SIGVERIFY, txn_idx, exec_tile_idx, NULL );
1991 0 : FD_TEST( res==0 );
1992 0 : if( FD_LIKELY( (txn_info->flags&FD_SCHED_TXN_REPLAY_DONE)==FD_SCHED_TXN_REPLAY_DONE ) ) {
1993 0 : publish_txn_executed( ctx, stem, txn_idx );
1994 0 : }
1995 0 : break;
1996 0 : }
1997 0 : case FD_EXECRP_TT_POH_HASH: {
1998 0 : int res = fd_sched_task_done( ctx->sched, FD_SCHED_TT_POH_HASH, ULONG_MAX, exec_tile_idx, msg->poh_hash );
1999 0 : if( FD_UNLIKELY( res<0 && bank->state!=FD_BANK_STATE_DEAD ) ) {
2000 0 : mark_bank_dead( ctx, stem, bank->idx );
2001 0 : }
2002 0 : break;
2003 0 : }
2004 0 : default: FD_LOG_CRIT(( "unexpected sig 0x%lx", sig ));
2005 0 : }
2006 :
2007 : /* Reference counter just decreased, and an exec tile just got freed
2008 : up. If there's a need to be more aggressively pruning, we could
2009 : check here if more slots just became publishable and publish. Not
2010 : publishing here shouldn't bloat the fork tree too much though. We
2011 : mark minority forks dead as soon as we can, and execution dispatch
2012 : stops on dead blocks. So shortly afterwards, dead blocks should be
2013 : eligible for pruning as in-flight transactions retire from the
2014 : execution pipeline. */
2015 :
2016 0 : }
2017 :
2018 : static void
2019 : process_tower_slot_done( fd_replay_tile_t * ctx,
2020 : fd_stem_context_t * stem,
2021 : fd_tower_slot_done_t const * msg,
2022 0 : ulong seq ) {
2023 0 : fd_bank_t * replay_bank = fd_banks_bank_query( ctx->banks, msg->replay_bank_idx );
2024 0 : if( FD_UNLIKELY( !replay_bank ) ) FD_LOG_CRIT(( "invariant violation: bank not found for bank index %lu", msg->replay_bank_idx ));
2025 0 : replay_bank->refcnt--;
2026 0 : FD_LOG_DEBUG(( "bank (idx=%lu, slot=%lu) refcnt decremented to %lu for tower", replay_bank->idx, msg->replay_slot, replay_bank->refcnt ));
2027 :
2028 0 : ctx->reset_block_id = msg->reset_block_id;
2029 0 : ctx->reset_slot = msg->reset_slot;
2030 0 : ctx->reset_timestamp_nanos = fd_log_wallclock();
2031 0 : ulong min_leader_slot = fd_ulong_max( msg->reset_slot+1UL, fd_ulong_if( ctx->highwater_leader_slot==ULONG_MAX, 0UL, ctx->highwater_leader_slot+1UL ) );
2032 0 : ctx->next_leader_slot = fd_multi_epoch_leaders_get_next_slot( ctx->mleaders, min_leader_slot, ctx->identity_pubkey );
2033 0 : if( FD_LIKELY( ctx->next_leader_slot != ULONG_MAX ) ) {
2034 0 : ctx->next_leader_tickcount = (long)((double)(ctx->next_leader_slot-ctx->reset_slot-1UL)*ctx->slot_duration_ticks) + fd_tickcount();
2035 0 : } else {
2036 0 : ctx->next_leader_tickcount = LONG_MAX;
2037 0 : }
2038 :
2039 0 : fd_block_id_ele_t * block_id_ele = fd_block_id_map_ele_query( ctx->block_id_map, &msg->reset_block_id, NULL, ctx->block_id_arr );
2040 0 : if( FD_UNLIKELY( !block_id_ele ) ) {
2041 0 : FD_BASE58_ENCODE_32_BYTES( msg->reset_block_id.key, reset_block_id_b58 );
2042 0 : FD_LOG_CRIT(( "invariant violation: block id ele doesn't exist for reset block id: %s, slot: %lu", reset_block_id_b58, msg->reset_slot ));
2043 0 : }
2044 0 : ulong reset_bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, block_id_ele );
2045 :
2046 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, reset_bank_idx );
2047 0 : if( FD_UNLIKELY( !bank ) ) {
2048 0 : FD_LOG_CRIT(( "invariant violation: bank not found for bank index %lu", reset_bank_idx ));
2049 0 : }
2050 :
2051 0 : if( FD_LIKELY( msg->root_slot!=ULONG_MAX ) ) FD_TEST( msg->root_slot<=msg->reset_slot );
2052 0 : ctx->reset_bank = bank;
2053 :
2054 0 : if( FD_LIKELY( ctx->replay_out->idx!=ULONG_MAX ) ) {
2055 0 : fd_poh_reset_t * reset = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
2056 :
2057 0 : reset->bank_idx = bank->idx;
2058 0 : reset->timestamp = ctx->reset_timestamp_nanos;
2059 0 : reset->completed_slot = ctx->reset_slot;
2060 0 : reset->hashcnt_per_tick = bank->f.hashes_per_tick;
2061 0 : reset->ticks_per_slot = bank->f.ticks_per_slot;
2062 0 : reset->tick_duration_ns = (ulong)(ctx->slot_duration_nanos/(double)reset->ticks_per_slot);
2063 :
2064 0 : fd_memcpy( reset->completed_block_id, &block_id_ele->latest_mr, sizeof(fd_hash_t) );
2065 :
2066 0 : fd_blockhashes_t const * block_hash_queue = &bank->f.block_hash_queue;
2067 0 : fd_hash_t const * last_hash = fd_blockhashes_peek_last_hash( block_hash_queue );
2068 0 : FD_TEST( last_hash );
2069 0 : fd_memcpy( reset->completed_blockhash, last_hash->uc, sizeof(fd_hash_t) );
2070 :
2071 0 : ulong ticks_per_slot = bank->f.ticks_per_slot;
2072 0 : if( FD_UNLIKELY( reset->hashcnt_per_tick==1UL ) ) {
2073 : /* Low power producer, maximum of one microblock per tick in the slot */
2074 0 : reset->max_microblocks_in_slot = ticks_per_slot;
2075 0 : } else {
2076 : /* See the long comment in after_credit for this limit */
2077 0 : reset->max_microblocks_in_slot = fd_ulong_min( MAX_MICROBLOCKS_PER_SLOT, ticks_per_slot*(reset->hashcnt_per_tick-1UL) );
2078 0 : }
2079 0 : reset->next_leader_slot = ctx->next_leader_slot;
2080 0 : reset->wfs_paused = !ctx->wfs_complete;
2081 :
2082 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_RESET, ctx->replay_out->chunk, sizeof(fd_poh_reset_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
2083 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_poh_reset_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
2084 0 : }
2085 :
2086 0 : FD_LOG_INFO(( "tower_slot_done(reset_slot=%lu, next_leader_slot=%lu, vote_slot=%lu, replay_slot=%lu, root_slot=%lu, seqno=%lu)", msg->reset_slot, ctx->next_leader_slot, msg->vote_slot, msg->replay_slot, msg->root_slot, seq ));
2087 0 : try_become_leader( ctx, stem );
2088 :
2089 0 : if( FD_LIKELY( msg->root_slot!=ULONG_MAX ) ) {
2090 :
2091 0 : FD_TEST( msg->root_slot>=ctx->consensus_root_slot );
2092 0 : fd_block_id_ele_t * block_id_ele = fd_block_id_map_ele_query( ctx->block_id_map, &msg->root_block_id, NULL, ctx->block_id_arr );
2093 0 : FD_TEST( block_id_ele );
2094 0 : ctx->consensus_root_slot = msg->root_slot;
2095 0 : ctx->consensus_root = msg->root_block_id;
2096 0 : ctx->consensus_root_bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, block_id_ele );
2097 :
2098 0 : publish_root_advanced( ctx, stem );
2099 :
2100 0 : fd_sched_root_notify( ctx->sched, ctx->consensus_root_bank_idx );
2101 0 : }
2102 :
2103 0 : ulong distance = 0UL;
2104 0 : fd_bank_t * parent = bank;
2105 0 : while( parent ) {
2106 0 : if( FD_UNLIKELY( parent->idx==ctx->consensus_root_bank_idx ) ) break;
2107 0 : parent = fd_banks_get_parent( ctx->banks, parent );
2108 0 : distance++;
2109 0 : }
2110 :
2111 0 : FD_MGAUGE_SET( REPLAY, ROOT_DISTANCE, distance );
2112 0 : }
2113 :
2114 : static void
2115 : process_fec_complete( fd_replay_tile_t * ctx,
2116 : ulong sig,
2117 0 : fd_fec_complete_t * complete_msg ) {
2118 0 : fd_shred_t const * shred = &complete_msg->last_shred_hdr;
2119 :
2120 0 : fd_hash_t const * merkle_root = &complete_msg->merkle_root;
2121 0 : fd_hash_t const * chained_merkle_root = &complete_msg->chained_merkle_root;
2122 0 : int is_leader_fec = sig == REPAIR_SIG_FEC_LEADER;
2123 0 : int data_complete = !!( shred->data.flags & FD_SHRED_DATA_FLAG_DATA_COMPLETE );
2124 0 : int slot_complete = !!( shred->data.flags & FD_SHRED_DATA_FLAG_SLOT_COMPLETE );
2125 :
2126 0 : if( FD_UNLIKELY( sig==REPAIR_SIG_FEC_INVALID ) ) {
2127 : /* FEC set detected as invalid based on duplicate confirmations.
2128 : Nothing to do except remove from store. If the FEC set is not in
2129 : reasm, we can directly remove from store. If the FEC set is in
2130 : reasm, then we let reasm_publish handle it. */
2131 0 : if( FD_LIKELY( !fd_reasm_query( ctx->reasm, merkle_root ) ) ) {
2132 0 : fd_store_remove( ctx->store, merkle_root );
2133 0 : }
2134 0 : return;
2135 0 : }
2136 :
2137 0 : if( FD_UNLIKELY( shred->slot - shred->data.parent_off == fd_reasm_slot0( ctx->reasm ) && shred->fec_set_idx == 0) ) {
2138 0 : chained_merkle_root = &fd_reasm_root( ctx->reasm )->key;
2139 0 : }
2140 :
2141 0 : if( FD_UNLIKELY( fd_reasm_query( ctx->reasm, merkle_root ) ) ) return;
2142 0 : fd_reasm_fec_t * fec = fd_reasm_insert( ctx->reasm, merkle_root, chained_merkle_root, shred->slot, shred->fec_set_idx, shred->data.parent_off, (ushort)(shred->idx - shred->fec_set_idx + 1), data_complete, slot_complete, is_leader_fec, ctx->store, &ctx->reasm_evicted );
2143 :
2144 0 : if( FD_UNLIKELY( !fec ) ) {
2145 : /* reasm failed to insert. We don't want to just put this back on
2146 : the returnable_frag queue because it's unclear whether this FEC
2147 : is truly something we want to process. Therefore our best option
2148 : is to punt it and "go around." Either the FEC was invalid and
2149 : was rejected or reasm_insert populates its last pool element with
2150 : the data of the failed insert, so we make sure to publish the
2151 : failed insert data to repair in after_credit. */
2152 0 : fd_store_remove( ctx->store, merkle_root );
2153 0 : return;
2154 0 : }
2155 0 : }
2156 :
2157 : static void
2158 0 : process_resolv_slot_completed( fd_replay_tile_t * ctx, ulong bank_idx ) {
2159 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, bank_idx );
2160 0 : FD_TEST( bank );
2161 0 : bank->refcnt--;
2162 0 : FD_LOG_DEBUG(( "bank (idx=%lu, slot=%lu) refcnt decremented to %lu for resolv", bank->idx, bank->f.slot, bank->refcnt ));
2163 0 : }
2164 :
2165 : static void
2166 : process_vote_txn_sent( fd_replay_tile_t * ctx,
2167 0 : fd_txn_m_t * txnm ) {
2168 : /* The send tile has signed and sent a vote. Add this vote to the
2169 : vote tracker. We go through this exercise until the client has
2170 : seen a vote corresponding to the current identity rooted. */
2171 0 : if( FD_UNLIKELY( !ctx->identity_vote_rooted ) ) {
2172 0 : uchar * payload = (uchar *)txnm + sizeof(fd_txn_m_t);
2173 0 : uchar txn_mem[ FD_TXN_MAX_SZ ] __attribute__((aligned(alignof(fd_txn_t))));
2174 0 : fd_txn_t * txn = (fd_txn_t *)txn_mem;
2175 0 : if( FD_UNLIKELY( !fd_txn_parse( payload, txnm->payload_sz, txn_mem, NULL ) ) ) {
2176 0 : FD_LOG_CRIT(( "Could not parse txn from send tile" ));
2177 0 : }
2178 : /* The identity of the validator that the signed the vote will
2179 : always be the first signer in the vote transaction. */
2180 0 : fd_pubkey_t * vote_identity = fd_type_pun( payload+txn->acct_addr_off );
2181 0 : fd_vote_tracker_insert( ctx->vote_tracker, vote_identity, fd_type_pun_const( payload+txn->signature_off ) );
2182 0 : }
2183 0 : }
2184 :
2185 : static inline void
2186 0 : maybe_verify_shred_version( fd_replay_tile_t * ctx ) {
2187 0 : if( FD_LIKELY( ctx->expected_shred_version && ctx->ipecho_shred_version ) ) {
2188 0 : if( FD_UNLIKELY( ctx->expected_shred_version!=ctx->ipecho_shred_version ) ) {
2189 0 : FD_LOG_ERR(( "shred version mismatch: expected %u but got %u from ipecho", ctx->expected_shred_version, ctx->ipecho_shred_version ) );
2190 0 : }
2191 0 : }
2192 :
2193 0 : if( FD_LIKELY( ctx->has_genesis_hash && ctx->hard_fork_cnt!=ULONG_MAX && (ctx->expected_shred_version || ctx->ipecho_shred_version) ) ) {
2194 0 : ushort expected_shred_version = ctx->expected_shred_version ? ctx->expected_shred_version : ctx->ipecho_shred_version;
2195 :
2196 0 : ushort actual_shred_version = compute_shred_version( ctx->genesis_hash->uc, ctx->hard_forks, ctx->hard_fork_cnt );
2197 :
2198 0 : if( FD_UNLIKELY( expected_shred_version!=actual_shred_version ) ) {
2199 0 : FD_BASE58_ENCODE_32_BYTES( ctx->genesis_hash->uc, genesis_hash_b58 );
2200 0 : FD_LOG_ERR(( "Your genesis.bin file at `%s` combined with the hard_forks from the loaded snapshot have produced "
2201 0 : "a shred version of %hu but the entrypoint you connected to on boot reported a shred version of %hu. "
2202 0 : "This likely means that the genesis.bin file you have is for a different cluster than the one you "
2203 0 : "are trying to connect to, you can delete it and restart the node to download the correct genesis "
2204 0 : "file automatically.", ctx->genesis_path, actual_shred_version, expected_shred_version ));
2205 0 : }
2206 0 : }
2207 0 : }
2208 :
2209 : static inline void
2210 0 : maybe_verify_genesis_timestamp( fd_replay_tile_t * ctx ) {
2211 0 : if( FD_LIKELY( !ctx->has_expected_genesis_timestamp || !ctx->has_genesis_timestamp ) ) return;
2212 0 : if( FD_LIKELY( ctx->genesis_timestamp==ctx->expected_genesis_timestamp ) ) return;
2213 :
2214 0 : FD_LOG_ERR(( "Your genesis.bin file at `%s` has a genesis timestamp of %lu but the snapshot you loaded has a genesis "
2215 0 : "timestamp of %lu. This either means that the genesis.bin file you have is for a different cluster than "
2216 0 : "the one you are trying to connect to, or you have loaded a snapshot for the wrong cluster. In either "
2217 0 : "case, you can delete the problematic file and restart the node to download the correct one automatically.",
2218 0 : ctx->genesis_path, ctx->genesis_timestamp, ctx->expected_genesis_timestamp ));
2219 0 : }
2220 :
2221 : static void
2222 : update_metric_identity_balance( fd_replay_tile_t * ctx,
2223 : fd_accdb_fork_id_t fork_id,
2224 0 : fd_pubkey_t const * identity ) {
2225 0 : ulong identity_balance = fd_accdb_lamports( ctx->accdb, fork_id, identity->uc );
2226 0 : FD_MGAUGE_SET( REPLAY, IDENTITY_BALANCE_LAMPORTS, identity_balance );
2227 0 : }
2228 :
2229 : static void
2230 : update_metric_epoch_credits( fd_replay_tile_t * ctx,
2231 : fd_bank_t const * bank,
2232 : fd_accdb_fork_id_t fork_id,
2233 0 : fd_pubkey_t const * vote_key ) {
2234 0 : ulong epoch_credits = 0UL;
2235 0 : fd_acc_t ro = fd_accdb_read_one( ctx->accdb, fork_id, vote_key->uc );
2236 0 : if( FD_LIKELY( ro.lamports ) ) {
2237 0 : fd_vote_state_versioned_t vsv[1];
2238 0 : if( FD_LIKELY( fd_vote_state_versioned_deserialize( vsv, ro.data, ro.data_len ) ) ) {
2239 0 : fd_vote_epoch_credits_t const * ec = fd_vsv_get_epoch_credits( vsv );
2240 0 : if( !deq_fd_vote_epoch_credits_t_empty( ec ) ) {
2241 0 : fd_vote_epoch_credits_t const * last_ec = deq_fd_vote_epoch_credits_t_peek_tail_const( ec );
2242 0 : if( last_ec->epoch==bank->f.epoch ) {
2243 0 : epoch_credits = last_ec->credits;
2244 0 : }
2245 0 : }
2246 0 : }
2247 0 : }
2248 0 : fd_accdb_unread_one( ctx->accdb, &ro );
2249 :
2250 0 : FD_MGAUGE_SET( REPLAY, EPOCH_CREDITS, epoch_credits );
2251 0 : }
2252 :
2253 : static void
2254 : update_metric_active_stake( fd_bank_t const * bank,
2255 0 : fd_pubkey_t const * vote_key ) {
2256 0 : ulong my_active_stake = 0UL;
2257 0 : ulong tot_active_stake = bank->f.total_epoch_stake;
2258 :
2259 0 : ulong stake = 0UL;
2260 0 : if( FD_FEATURE_ACTIVE_BANK( bank, validator_admission_ticket ) ) {
2261 0 : fd_top_votes_t const * top_votes = fd_bank_top_votes_t_1_query( bank );
2262 0 : fd_top_votes_query( top_votes, vote_key, NULL, &stake, NULL, NULL, NULL, NULL );
2263 0 : } else {
2264 0 : fd_vote_stakes_t * vote_stakes = fd_bank_vote_stakes( bank );
2265 0 : fd_vote_stakes_query_t_1( vote_stakes, bank->vote_stakes_fork_id, vote_key, &stake, NULL, NULL );
2266 0 : }
2267 0 : my_active_stake = stake;
2268 :
2269 0 : FD_MGAUGE_SET( REPLAY, ACTIVE_STAKE_LAMPORTS, my_active_stake );
2270 0 : FD_MGAUGE_SET( REPLAY, CLUSTER_ACTIVE_STAKE_LAMPORTS, tot_active_stake );
2271 0 : }
2272 :
2273 : static void
2274 : update_metric_balances( fd_replay_tile_t * ctx,
2275 0 : fd_bank_t * bank ) {
2276 0 : fd_accdb_fork_id_t fork_id = bank->accdb_fork_id;
2277 0 : fd_node_info_t node_info[1]; fd_node_info_read( node_info, ctx->node_info );
2278 0 : if( !fd_pubkey_check_zero( &node_info->identity ) ) {
2279 0 : update_metric_identity_balance( ctx, fork_id, &node_info->identity );
2280 0 : }
2281 :
2282 0 : if( !fd_pubkey_check_zero( &node_info->vote_account ) ) {
2283 0 : update_metric_epoch_credits( ctx, bank, fork_id, &node_info->vote_account );
2284 0 : update_metric_active_stake ( bank, &node_info->vote_account );
2285 0 : }
2286 0 : }
2287 :
2288 : static void
2289 : process_tower_optimistic_confirmed( fd_replay_tile_t * ctx,
2290 : fd_stem_context_t * stem,
2291 0 : fd_tower_slot_confirmed_t const * msg ) {
2292 :
2293 0 : fd_block_id_ele_t * block_id_ele = fd_block_id_map_ele_query( ctx->block_id_map, &msg->block_id, NULL, ctx->block_id_arr );
2294 0 : if( FD_UNLIKELY( !block_id_ele ) ) {
2295 0 : FD_BASE58_ENCODE_32_BYTES( msg->block_id.key, block_id_b58 );
2296 0 : FD_LOG_WARNING(( "missing bank for confirmed block_id: %s level %d", block_id_b58, msg->level ));
2297 0 : return;
2298 0 : }
2299 :
2300 0 : ulong bank_idx = fd_block_id_ele_get_idx( ctx->block_id_arr, block_id_ele );
2301 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, bank_idx );
2302 :
2303 :
2304 0 : if( FD_UNLIKELY( !bank ) ) {
2305 0 : FD_BASE58_ENCODE_32_BYTES( msg->block_id.key, block_id_cstr );
2306 0 : FD_LOG_WARNING(( "failed to query optimistically confirmed bank for block id %s", block_id_cstr ));
2307 0 : return;
2308 0 : }
2309 :
2310 0 : if( ctx->rpc_enabled ) {
2311 0 : bank->refcnt++;
2312 0 : FD_LOG_DEBUG(( "bank (idx=%lu, slot=%lu) refcnt incremented to %lu for rpc", bank->idx, bank->f.slot, bank->refcnt ));
2313 0 : }
2314 :
2315 0 : fd_replay_oc_advanced_t * replay_msg = fd_chunk_to_laddr( ctx->replay_out->mem, ctx->replay_out->chunk );
2316 0 : replay_msg->bank_idx = bank_idx;
2317 0 : replay_msg->slot = msg->slot;
2318 :
2319 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_OC_ADVANCED, ctx->replay_out->chunk, sizeof(fd_replay_oc_advanced_t), 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
2320 0 : ctx->replay_out->chunk = fd_dcache_compact_next( ctx->replay_out->chunk, sizeof(fd_replay_oc_advanced_t), ctx->replay_out->chunk0, ctx->replay_out->wmark );
2321 :
2322 0 : update_metric_balances( ctx, bank );
2323 0 : }
2324 :
2325 : static inline int
2326 : returnable_frag( fd_replay_tile_t * ctx,
2327 : ulong in_idx,
2328 : ulong seq,
2329 : ulong sig,
2330 : ulong chunk,
2331 : ulong sz,
2332 : ulong ctl,
2333 : ulong tsorig,
2334 : ulong tspub,
2335 0 : fd_stem_context_t * stem ) {
2336 0 : (void)seq;
2337 0 : (void)ctl;
2338 0 : (void)tsorig;
2339 0 : (void)tspub;
2340 :
2341 0 : if( FD_UNLIKELY( sz!=0UL && (chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>ctx->in[ in_idx ].mtu ) ) )
2342 0 : FD_LOG_ERR(( "chunk %lu %lu from in %d corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in_kind[ in_idx ], ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
2343 :
2344 0 : switch( ctx->in_kind[in_idx] ) {
2345 0 : case IN_KIND_GENESIS: {
2346 0 : fd_genesis_meta_t const * meta = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
2347 0 : ctx->has_genesis_hash = 1;
2348 0 : ctx->has_genesis_timestamp = 1;
2349 0 : ctx->genesis_timestamp = meta->creation_time_seconds;
2350 0 : *ctx->genesis_hash = meta->genesis_hash;
2351 0 : fd_node_info_write_begin( ctx->node_info );
2352 0 : ctx->node_info->info.genesis_hash = *ctx->genesis_hash;
2353 0 : fd_node_info_write_end( ctx->node_info );
2354 0 : if( FD_LIKELY( meta->bootstrap ) ) {
2355 0 : boot_genesis( ctx, stem, meta );
2356 0 : } else {
2357 0 : uchar const * genesis_blob = (uchar const *)( meta+1 );
2358 0 : FD_TEST( fd_genesis_parse( ctx->genesis, genesis_blob, meta->blob_sz ) );
2359 0 : }
2360 0 : ctx->has_genesis_timestamp = 1;
2361 0 : ctx->genesis_timestamp = ctx->genesis->creation_time;
2362 :
2363 0 : maybe_verify_cluster_type( ctx );
2364 0 : maybe_verify_shred_version( ctx );
2365 0 : maybe_verify_genesis_timestamp( ctx );
2366 0 : break;
2367 0 : }
2368 0 : case IN_KIND_IPECHO: {
2369 0 : FD_TEST( sig && sig<=USHORT_MAX );
2370 0 : ctx->ipecho_shred_version = (ushort)sig;
2371 0 : maybe_verify_shred_version( ctx );
2372 0 : break;
2373 0 : }
2374 0 : case IN_KIND_SNAP: {
2375 0 : on_snapshot_message( ctx, stem, in_idx, chunk, sig );
2376 0 : maybe_verify_shred_version( ctx );
2377 0 : maybe_verify_genesis_timestamp( ctx );
2378 0 : break;
2379 0 : }
2380 0 : case IN_KIND_EXECRP: {
2381 0 : process_exec_task_done( ctx, stem, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ), sig );
2382 0 : ctx->execrp_idle_cnt = 0UL;
2383 0 : break;
2384 0 : }
2385 0 : case IN_KIND_POH: {
2386 0 : process_poh_message( ctx, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
2387 0 : break;
2388 0 : }
2389 0 : case IN_KIND_RESOLV: {
2390 0 : fd_resolv_slot_exchanged_t * exchanged_slot = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
2391 0 : process_resolv_slot_completed( ctx, exchanged_slot->bank_idx );
2392 0 : break;
2393 0 : }
2394 0 : case IN_KIND_TOWER: {
2395 0 : if( FD_LIKELY( sig==FD_TOWER_SIG_SLOT_DONE ) ) {
2396 0 : process_tower_slot_done( ctx, stem, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ), seq );
2397 0 : } else if( FD_LIKELY( sig==FD_TOWER_SIG_SLOT_CONFIRMED ) ) {
2398 0 : fd_tower_slot_confirmed_t const * msg = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
2399 0 : if( msg->level==FD_TOWER_SLOT_CONFIRMED_OPTIMISTIC && !msg->fwd ) process_tower_optimistic_confirmed( ctx, stem, msg );
2400 0 : if( msg->level==FD_TOWER_SLOT_CONFIRMED_DUPLICATE ) fd_reasm_confirm( ctx->reasm, &msg->block_id );
2401 0 : } else if( FD_LIKELY( sig==FD_TOWER_SIG_SLOT_IGNORED ) ) {
2402 0 : fd_tower_slot_ignored_t const * msg = fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
2403 0 : fd_tower_slot_done_t ignored = {
2404 0 : .replay_slot = msg->slot,
2405 0 : .replay_bank_idx = msg->bank_idx,
2406 0 : .vote_slot = ULONG_MAX,
2407 0 : .reset_slot = ctx->reset_slot, /* Use most recent reset slot */
2408 0 : .reset_block_id = ctx->reset_block_id,
2409 0 : .root_slot = ULONG_MAX
2410 0 : };
2411 0 : process_tower_slot_done( ctx, stem, &ignored, seq );
2412 0 : }
2413 0 : break;
2414 0 : }
2415 0 : case IN_KIND_REPAIR: {
2416 : /* Store and reasm follow the invariant that any FEC in the
2417 : shred->out link, repair->out link, or reasm must be present in
2418 : store. If any FEC is rejected at this point, it must be
2419 : removed from store. See topology.c for more details. */
2420 0 : if( FD_UNLIKELY( sig==REPAIR_SIG_FEC || sig==REPAIR_SIG_FEC_LEADER || sig==REPAIR_SIG_FEC_INVALID ) ) {
2421 0 : process_fec_complete( ctx, sig, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
2422 0 : }
2423 0 : break;
2424 0 : }
2425 0 : case IN_KIND_TXSEND: {
2426 0 : process_vote_txn_sent( ctx, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
2427 0 : break;
2428 0 : }
2429 0 : case IN_KIND_GOSSIP_OUT: {
2430 0 : FD_TEST( sig==FD_GOSSIP_UPDATE_TAG_WFS_DONE );
2431 0 : ctx->wfs_complete = 1;
2432 :
2433 : /* Recalculate next_leader_tickcount relative to now. The
2434 : original value was computed at boot time (in boot_genesis or
2435 : on_snapshot_message). */
2436 0 : if( FD_LIKELY( ctx->next_leader_slot!=ULONG_MAX ) ) {
2437 0 : ctx->next_leader_tickcount = (long)((double)(ctx->next_leader_slot-ctx->reset_slot-1UL)*ctx->slot_duration_ticks) + fd_tickcount();
2438 0 : } else {
2439 0 : ctx->next_leader_tickcount = LONG_MAX;
2440 0 : }
2441 :
2442 0 : FD_LOG_NOTICE(( "Done waiting for supermajority. More than 80 percent of cluster stake has joined." ));
2443 0 : if( FD_LIKELY( ctx->replay_out->idx!=ULONG_MAX ) ) {
2444 0 : fd_stem_publish( stem, ctx->replay_out->idx, REPLAY_SIG_WFS_DONE, ctx->replay_out->chunk, 0UL, 0UL, 0UL, fd_frag_meta_ts_comp( fd_tickcount() ) );
2445 0 : }
2446 0 : break;
2447 0 : }
2448 0 : case IN_KIND_RPC: {
2449 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, sig );
2450 0 : FD_TEST( bank );
2451 0 : bank->refcnt--;
2452 0 : FD_LOG_DEBUG(( "bank (idx=%lu, slot=%lu) refcnt decremented to %lu for %s", bank->idx, bank->f.slot, bank->refcnt, ctx->in_kind[ in_idx ]==IN_KIND_RPC ? "rpc" : "gui" ));
2453 0 : break;
2454 0 : }
2455 0 : default:
2456 0 : FD_LOG_ERR(( "unhandled kind %d", ctx->in_kind[ in_idx ] ));
2457 0 : }
2458 :
2459 0 : return 0;
2460 0 : }
2461 :
2462 : static inline fd_replay_out_link_t
2463 : out1( fd_topo_t const * topo,
2464 : fd_topo_tile_t const * tile,
2465 0 : char const * name ) {
2466 0 : ulong idx = ULONG_MAX;
2467 :
2468 0 : for( ulong i=0UL; i<tile->out_cnt; i++ ) {
2469 0 : fd_topo_link_t const * link = &topo->links[ tile->out_link_id[ i ] ];
2470 0 : if( !strcmp( link->name, name ) ) {
2471 0 : if( FD_UNLIKELY( idx!=ULONG_MAX ) ) FD_LOG_ERR(( "tile %s:%lu had multiple output links named %s but expected one", tile->name, tile->kind_id, name ));
2472 0 : idx = i;
2473 0 : }
2474 0 : }
2475 :
2476 0 : if( FD_UNLIKELY( idx==ULONG_MAX ) ) return (fd_replay_out_link_t){ .idx = ULONG_MAX, .mem = NULL, .chunk0 = 0, .wmark = 0, .chunk = 0 };
2477 :
2478 0 : void * mem = topo->workspaces[ topo->objs[ topo->links[ tile->out_link_id[ idx ] ].dcache_obj_id ].wksp_id ].wksp;
2479 0 : ulong chunk0 = fd_dcache_compact_chunk0( mem, topo->links[ tile->out_link_id[ idx ] ].dcache );
2480 0 : ulong wmark = fd_dcache_compact_wmark ( mem, topo->links[ tile->out_link_id[ idx ] ].dcache, topo->links[ tile->out_link_id[ idx ] ].mtu );
2481 :
2482 0 : return (fd_replay_out_link_t){ .idx = idx, .mem = mem, .chunk0 = chunk0, .wmark = wmark, .chunk = chunk0 };
2483 0 : }
2484 :
2485 : static void
2486 : privileged_init( fd_topo_t const * topo,
2487 0 : fd_topo_tile_t const * tile ) {
2488 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
2489 :
2490 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
2491 0 : fd_replay_tile_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_replay_tile_t), sizeof(fd_replay_tile_t) );
2492 :
2493 0 : if( FD_UNLIKELY( !strcmp( tile->replay.identity_key_path, "" ) ) ) FD_LOG_ERR(( "identity_key_path not set" ));
2494 :
2495 0 : ctx->identity_pubkey[ 0 ] = *(fd_pubkey_t const *)fd_type_pun_const( fd_keyload_load( tile->replay.identity_key_path, /* pubkey only: */ 1 ) );
2496 0 : ctx->identity_idx = 0UL;
2497 :
2498 0 : ctx->bundle.enabled = tile->replay.bundle.enabled;
2499 0 : if( FD_UNLIKELY( !tile->replay.bundle.vote_account_path[0] ) ) {
2500 0 : ctx->bundle.enabled = 0;
2501 0 : }
2502 :
2503 0 : if( FD_UNLIKELY( ctx->bundle.enabled ) ) {
2504 0 : if( FD_UNLIKELY( !fd_base58_decode_32( tile->replay.bundle.vote_account_path, ctx->bundle.vote_account.uc ) ) ) {
2505 0 : const uchar * vote_key = fd_keyload_load( tile->replay.bundle.vote_account_path, /* pubkey only: */ 1 );
2506 0 : fd_memcpy( ctx->bundle.vote_account.uc, vote_key, 32UL );
2507 0 : }
2508 0 : }
2509 :
2510 0 : FD_TEST( fd_rng_secure( &ctx->rng_seed, sizeof(ctx->rng_seed) ) );
2511 0 : FD_TEST( fd_rng_secure( &ctx->blockhash_seed, sizeof(ulong) ) );
2512 0 : FD_TEST( fd_rng_secure( &ctx->reasm_seed, sizeof(ulong) ) );
2513 0 : FD_TEST( fd_rng_secure( &ctx->vote_tracker_seed, sizeof(ulong) ) );
2514 0 : FD_TEST( fd_rng_secure( &ctx->block_id_map_seed, sizeof(ulong) ) );
2515 0 : FD_TEST( fd_rng_secure( &ctx->initial_block_id, sizeof(fd_hash_t) ) );
2516 0 : FD_TEST( fd_rng_secure( &ctx->runtime_stack_seed, sizeof(ulong) ) );
2517 0 : }
2518 :
2519 : static void
2520 : unprivileged_init( fd_topo_t const * topo,
2521 0 : fd_topo_tile_t const * tile ) {
2522 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
2523 :
2524 0 : ulong chain_cnt = fd_block_id_map_chain_cnt_est( tile->replay.max_live_slots );
2525 :
2526 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
2527 0 : fd_replay_tile_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_replay_tile_t), sizeof(fd_replay_tile_t) );
2528 0 : void * runtime_stack_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_runtime_stack_align(), fd_runtime_stack_footprint( FD_RUNTIME_MAX_VOTE_ACCOUNTS, FD_RUNTIME_EXPECTED_VOTE_ACCOUNTS, FD_RUNTIME_EXPECTED_STAKE_ACCOUNTS ) );
2529 0 : void * block_id_arr_mem = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_block_id_ele_t), sizeof(fd_block_id_ele_t) * tile->replay.max_live_slots );
2530 0 : void * block_id_map_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_block_id_map_align(), fd_block_id_map_footprint( chain_cnt ) );
2531 0 : void * _txncache = FD_SCRATCH_ALLOC_APPEND( l, fd_txncache_align(), fd_txncache_footprint( tile->replay.max_live_slots ) );
2532 0 : void * _accdb = FD_SCRATCH_ALLOC_APPEND( l, fd_accdb_align(), fd_accdb_footprint( tile->replay.max_live_slots ) );
2533 0 : void * reasm_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_reasm_align(), fd_reasm_footprint( tile->replay.fec_max ) );
2534 0 : void * sched_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_sched_align(), fd_sched_footprint( tile->replay.sched_depth, tile->replay.max_live_slots ) );
2535 0 : void * vote_tracker_mem = FD_SCRATCH_ALLOC_APPEND( l, fd_vote_tracker_align(), fd_vote_tracker_footprint() );
2536 0 : void * _capture_ctx = FD_SCRATCH_ALLOC_APPEND( l, fd_capture_ctx_align(), fd_capture_ctx_footprint() );
2537 0 : void * dump_proto_ctx_mem = FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_dump_proto_ctx_t), sizeof(fd_dump_proto_ctx_t) );
2538 0 : void * block_dump_ctx = NULL;
2539 0 : if( FD_UNLIKELY( tile->replay.dump_block_to_pb ) ) {
2540 0 : block_dump_ctx = FD_SCRATCH_ALLOC_APPEND( l, fd_block_dump_context_align(), fd_block_dump_context_footprint() );
2541 0 : }
2542 :
2543 0 : ctx->runtime_stack = fd_runtime_stack_join( fd_runtime_stack_new( runtime_stack_mem, FD_RUNTIME_MAX_VOTE_ACCOUNTS, FD_RUNTIME_EXPECTED_VOTE_ACCOUNTS, FD_RUNTIME_EXPECTED_STAKE_ACCOUNTS, ctx->runtime_stack_seed ) );
2544 0 : FD_TEST( ctx->runtime_stack );
2545 :
2546 0 : ctx->wksp = topo->workspaces[ topo->objs[ tile->tile_obj_id ].wksp_id ].wksp;
2547 :
2548 0 : ulong store_obj_id = fd_pod_query_ulong( topo->props, "store", ULONG_MAX );
2549 0 : FD_TEST( store_obj_id!=ULONG_MAX );
2550 0 : ctx->store = fd_store_join( fd_topo_obj_laddr( topo, store_obj_id ) );
2551 0 : FD_TEST( ctx->store );
2552 :
2553 0 : ulong banks_obj_id = fd_pod_query_ulong( topo->props, "banks", ULONG_MAX );
2554 0 : FD_TEST( banks_obj_id!=ULONG_MAX );
2555 :
2556 0 : ctx->banks = fd_banks_join( fd_topo_obj_laddr( topo, banks_obj_id ) );
2557 0 : FD_TEST( ctx->banks );
2558 :
2559 0 : ulong node_info_obj_id = fd_pod_query_ulong( topo->props, "node_info", ULONG_MAX );
2560 0 : FD_TEST( node_info_obj_id!=ULONG_MAX );
2561 0 : ctx->node_info = fd_node_info_box_join( fd_topo_obj_laddr( topo, node_info_obj_id ) );
2562 0 : FD_TEST( ctx->node_info );
2563 0 : fd_node_info_write_begin( ctx->node_info );
2564 0 : ctx->node_info->info.identity = *ctx->identity_pubkey;
2565 0 : fd_node_info_write_end( ctx->node_info );
2566 :
2567 0 : FD_MGAUGE_SET( REPLAY, BANK_LIVE_MAX, fd_banks_pool_max_cnt( ctx->banks ) );
2568 :
2569 0 : ctx->frontier_cnt = 0UL;
2570 :
2571 0 : ctx->consensus_root_slot = ULONG_MAX;
2572 0 : ctx->consensus_root = ctx->initial_block_id;
2573 0 : ctx->published_root_slot = ULONG_MAX;
2574 :
2575 0 : ctx->expected_shred_version = tile->replay.expected_shred_version;
2576 0 : ctx->ipecho_shred_version = 0;
2577 0 : fd_memcpy( ctx->genesis_path, tile->replay.genesis_path, sizeof(ctx->genesis_path) );
2578 0 : ctx->has_genesis_hash = 0;
2579 0 : ctx->has_genesis_timestamp = 0;
2580 0 : ctx->has_expected_genesis_timestamp = 0;
2581 0 : ctx->cluster_type = FD_CLUSTER_UNKNOWN;
2582 0 : ctx->hard_fork_cnt = ULONG_MAX;
2583 0 : ctx->has_manifest_block_id = 0;
2584 :
2585 0 : if( FD_UNLIKELY( ctx->bundle.enabled ) ) {
2586 0 : if( FD_UNLIKELY( !fd_bundle_crank_gen_init( ctx->bundle.gen,
2587 0 : (fd_acct_addr_t const *)tile->replay.bundle.tip_distribution_program_addr,
2588 0 : (fd_acct_addr_t const *)tile->replay.bundle.tip_payment_program_addr,
2589 0 : (fd_acct_addr_t const *)ctx->bundle.vote_account.uc,
2590 0 : (fd_acct_addr_t const *)ctx->bundle.vote_account.uc, "NAN", 0UL ) ) ) {
2591 0 : FD_LOG_ERR(( "failed to initialize bundle crank gen" ));
2592 0 : }
2593 0 : }
2594 :
2595 0 : FD_TEST( tile->replay.enable_features_cnt<=sizeof(ctx->enable_features)/sizeof(ctx->enable_features[0]) );
2596 0 : ctx->enable_features_cnt = tile->replay.enable_features_cnt;
2597 0 : for( ulong i=0UL; i<tile->replay.enable_features_cnt; i++ ) {
2598 0 : fd_memcpy( ctx->enable_features[ i ], tile->replay.enable_features[ i ], FD_BASE58_ENCODED_32_SZ );
2599 0 : }
2600 :
2601 0 : ulong progcache_obj_id; FD_TEST( (progcache_obj_id = fd_pod_query_ulong( topo->props, "progcache", ULONG_MAX ) )!=ULONG_MAX );
2602 0 : FD_TEST( fd_progcache_shmem_join( ctx->progcache, fd_topo_obj_laddr( topo, progcache_obj_id ) ) );
2603 :
2604 0 : fd_wksp_t * progcache_wksp = fd_wksp_containing( ctx->progcache->shmem );
2605 0 : FD_TEST( progcache_wksp );
2606 0 : fd_wksp_mon_init( ctx->progcache_wksp_mon, progcache_wksp, FD_WKSP_MON_DEFAULT_RATE, fd_tickcount() );
2607 :
2608 0 : void * _txncache_shmem = fd_topo_obj_laddr( topo, tile->replay.txncache_obj_id );
2609 0 : fd_txncache_shmem_t * txncache_shmem = fd_txncache_shmem_join( _txncache_shmem );
2610 0 : FD_TEST( txncache_shmem );
2611 0 : ctx->txncache = fd_txncache_join( fd_txncache_new( _txncache, txncache_shmem ) );
2612 0 : FD_TEST( ctx->txncache );
2613 :
2614 0 : void * _accdb_shmem = fd_topo_obj_laddr( topo, tile->replay.accdb_obj_id );
2615 0 : fd_accdb_shmem_t * accdb_shmem = fd_accdb_shmem_join( _accdb_shmem );
2616 0 : FD_TEST( accdb_shmem );
2617 0 : ctx->accdb = fd_accdb_join( fd_accdb_new( _accdb, accdb_shmem, FD_ACCDB_FD_RW, 0UL, NULL ) );
2618 0 : FD_TEST( ctx->accdb );
2619 :
2620 0 : ctx->capture_ctx = NULL;
2621 0 : if( FD_UNLIKELY( strcmp( "", tile->replay.solcap_capture ) ) ) {
2622 0 : ctx->capture_ctx = fd_capture_ctx_join( fd_capture_ctx_new( _capture_ctx ) );
2623 0 : ctx->capture_ctx->solcap_start_slot = tile->replay.capture_start_slot;
2624 0 : ctx->capture_ctx->capture_solcap = 1;
2625 0 : }
2626 :
2627 0 : ctx->dump_proto_ctx = NULL;
2628 0 : if( FD_UNLIKELY( strcmp( "", tile->replay.dump_proto_dir ) ) ) {
2629 0 : ctx->dump_proto_ctx = dump_proto_ctx_mem;
2630 0 : ctx->dump_proto_ctx->dump_proto_output_dir = tile->replay.dump_proto_dir;
2631 0 : if( FD_LIKELY( tile->replay.dump_block_to_pb ) ) {
2632 0 : ctx->dump_proto_ctx->dump_block_to_pb = !!tile->replay.dump_block_to_pb;
2633 0 : }
2634 0 : }
2635 :
2636 0 : if( FD_UNLIKELY( tile->replay.dump_block_to_pb ) ) {
2637 0 : ctx->block_dump_ctx = fd_block_dump_context_join( fd_block_dump_context_new( block_dump_ctx ) );
2638 0 : } else {
2639 0 : ctx->block_dump_ctx = NULL;
2640 0 : }
2641 :
2642 0 : ctx->is_booted = 0;
2643 :
2644 0 : ctx->larger_max_cost_per_block = tile->replay.larger_max_cost_per_block;
2645 :
2646 0 : FD_TEST( fd_rng_new( ctx->rng, ctx->rng_seed, 0UL ) );
2647 :
2648 0 : ctx->reasm = fd_reasm_join( fd_reasm_new( reasm_mem, tile->replay.fec_max, ctx->reasm_seed ) );
2649 0 : FD_TEST( ctx->reasm );
2650 0 : ctx->reasm_evicted = NULL;
2651 :
2652 0 : ctx->sched = fd_sched_join( fd_sched_new( sched_mem, ctx->rng, tile->replay.sched_depth, tile->replay.max_live_slots, fd_topo_tile_name_cnt( topo, "execrp" ) ) );
2653 0 : FD_TEST( ctx->sched );
2654 :
2655 0 : ctx->in_cnt = tile->in_cnt;
2656 0 : ctx->execrp_idle_cnt = 0UL;
2657 :
2658 0 : ctx->vote_tracker = fd_vote_tracker_join( fd_vote_tracker_new( vote_tracker_mem, ctx->vote_tracker_seed ) );
2659 0 : FD_TEST( ctx->vote_tracker );
2660 :
2661 0 : ctx->identity_vote_rooted = 0;
2662 :
2663 0 : ctx->wait_for_vote_to_start_leader = tile->replay.wait_for_vote_to_start_leader;
2664 :
2665 0 : ctx->wfs_enabled = memcmp( tile->replay.wait_for_supermajority_with_bank_hash.uc, ((fd_pubkey_t){ 0 }).uc, sizeof(fd_pubkey_t) );
2666 0 : ctx->expected_bank_hash = tile->replay.wait_for_supermajority_with_bank_hash;
2667 0 : ctx->wfs_complete = !ctx->wfs_enabled;
2668 :
2669 0 : ctx->mleaders = fd_multi_epoch_leaders_join( fd_multi_epoch_leaders_new( ctx->mleaders_mem ) );
2670 0 : FD_TEST( ctx->mleaders );
2671 :
2672 0 : ctx->is_leader = 0;
2673 0 : ctx->supports_leader = fd_topo_find_tile( topo, "pack", 0UL )!=ULONG_MAX;
2674 0 : ctx->reset_slot = 0UL;
2675 0 : ctx->reset_bank = NULL;
2676 0 : ctx->reset_block_id = ctx->initial_block_id;
2677 0 : ctx->reset_timestamp_nanos = 0UL;
2678 0 : ctx->next_leader_slot = ULONG_MAX;
2679 0 : ctx->next_leader_tickcount = LONG_MAX;
2680 0 : ctx->highwater_leader_slot = ULONG_MAX;
2681 0 : ctx->slot_duration_nanos = 350L*1000L*1000L; /* TODO: Not fixed ... not always 350ms ... */
2682 0 : ctx->slot_duration_ticks = (double)ctx->slot_duration_nanos*fd_tempo_tick_per_ns( NULL );
2683 0 : ctx->leader_bank = NULL;
2684 :
2685 0 : ctx->block_id_len = tile->replay.max_live_slots;
2686 0 : ctx->block_id_arr = (fd_block_id_ele_t *)block_id_arr_mem;
2687 0 : ctx->block_id_map = fd_block_id_map_join( fd_block_id_map_new( block_id_map_mem, chain_cnt, ctx->block_id_map_seed ) );
2688 0 : FD_TEST( ctx->block_id_map );
2689 0 : for( ulong i=0UL; i<tile->replay.max_live_slots; i++ ) ctx->block_id_arr[ i ].block_id_seen = 0;
2690 :
2691 0 : ctx->resolv_tile_cnt = fd_topo_tile_name_cnt( topo, "resolv" );
2692 :
2693 0 : ctx->keyswitch = fd_keyswitch_join( fd_topo_obj_laddr( topo, tile->id_keyswitch_obj_id ) );
2694 0 : FD_TEST( ctx->keyswitch );
2695 0 : ctx->halt_leader = 0;
2696 :
2697 0 : FD_TEST( tile->in_cnt<=sizeof(ctx->in)/sizeof(ctx->in[0]) );
2698 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
2699 0 : fd_topo_link_t const * link = &topo->links[ tile->in_link_id[ i ] ];
2700 0 : fd_topo_wksp_t const * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
2701 :
2702 0 : if( FD_LIKELY( link->dcache ) ) {
2703 0 : ctx->in[ i ].mem = link_wksp->wksp;
2704 0 : ctx->in[ i ].chunk0 = fd_dcache_compact_chunk0( ctx->in[ i ].mem, link->dcache );
2705 0 : ctx->in[ i ].wmark = fd_dcache_compact_wmark ( ctx->in[ i ].mem, link->dcache, link->mtu );
2706 0 : ctx->in[ i ].mtu = link->mtu;
2707 0 : }
2708 :
2709 0 : if( !strcmp( link->name, "genesi_out" ) ) ctx->in_kind[ i ] = IN_KIND_GENESIS;
2710 0 : else if( !strcmp( link->name, "ipecho_out" ) ) ctx->in_kind[ i ] = IN_KIND_IPECHO;
2711 0 : else if( !strcmp( link->name, "snapin_manif" ) ) ctx->in_kind[ i ] = IN_KIND_SNAP;
2712 0 : else if( !strcmp( link->name, "execrp_replay" ) ) ctx->in_kind[ i ] = IN_KIND_EXECRP;
2713 0 : else if( !strcmp( link->name, "tower_out" ) ) ctx->in_kind[ i ] = IN_KIND_TOWER;
2714 0 : else if( !strcmp( link->name, "poh_replay" ) ) ctx->in_kind[ i ] = IN_KIND_POH;
2715 0 : else if( !strcmp( link->name, "resolv_replay" ) ) ctx->in_kind[ i ] = IN_KIND_RESOLV;
2716 0 : else if( !strcmp( link->name, "shred_out" ) ) ctx->in_kind[ i ] = IN_KIND_REPAIR;
2717 0 : else if( !strcmp( link->name, "repair_out" ) ) ctx->in_kind[ i ] = IN_KIND_REPAIR;
2718 0 : else if( !strcmp( link->name, "txsend_out" ) ) ctx->in_kind[ i ] = IN_KIND_TXSEND;
2719 0 : else if( !strcmp( link->name, "rpc_replay" ) ) ctx->in_kind[ i ] = IN_KIND_RPC;
2720 0 : else if( !strcmp( link->name, "gossip_out" ) ) ctx->in_kind[ i ] = IN_KIND_GOSSIP_OUT;
2721 0 : else FD_LOG_ERR(( "unexpected input link name %s", link->name ));
2722 0 : }
2723 :
2724 0 : *ctx->epoch_out = out1( topo, tile, "replay_epoch" ); FD_TEST( ctx->epoch_out->idx!=ULONG_MAX );
2725 0 : *ctx->replay_out = out1( topo, tile, "replay_out" ); FD_TEST( ctx->replay_out->idx!=ULONG_MAX );
2726 0 : *ctx->exec_out = out1( topo, tile, "replay_execrp" ); FD_TEST( ctx->exec_out->idx!=ULONG_MAX );
2727 :
2728 0 : ctx->rpc_enabled = fd_topo_find_tile( topo, "rpc", 0UL )!=ULONG_MAX;
2729 :
2730 0 : if( FD_UNLIKELY( strcmp( "", tile->replay.solcap_capture ) ) ) {
2731 0 : ulong idx = fd_topo_find_tile_out_link( topo, tile, "cap_repl", 0UL );
2732 0 : FD_TEST( idx!=ULONG_MAX );
2733 0 : fd_topo_link_t const * link = &topo->links[ tile->out_link_id[ idx ] ];
2734 :
2735 :
2736 0 : fd_capture_link_buf_t * cap_repl_out = ctx->cap_repl_out;
2737 0 : cap_repl_out->base.vt = &fd_capture_link_buf_vt;
2738 0 : cap_repl_out->idx = idx;
2739 0 : cap_repl_out->mem = topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ].wksp;
2740 0 : cap_repl_out->chunk0 = fd_dcache_compact_chunk0( cap_repl_out->mem, link->dcache );
2741 0 : cap_repl_out->wmark = fd_dcache_compact_wmark( cap_repl_out->mem, link->dcache, link->mtu );
2742 0 : cap_repl_out->chunk = cap_repl_out->chunk0;
2743 0 : cap_repl_out->mcache = link->mcache;
2744 0 : cap_repl_out->depth = fd_mcache_depth( link->mcache );
2745 0 : cap_repl_out->seq = 0UL;
2746 :
2747 0 : ctx->capture_ctx->capctx_type.buf = cap_repl_out;
2748 0 : ctx->capture_ctx->capture_link = &cap_repl_out->base;
2749 0 : ctx->capture_ctx->current_txn_idx = 0UL;
2750 :
2751 :
2752 0 : ulong consumer_tile_idx = fd_topo_find_tile( topo, "solcap", 0UL );
2753 0 : fd_topo_tile_t const * consumer_tile = &topo->tiles[ consumer_tile_idx ];
2754 0 : cap_repl_out->fseq = NULL;
2755 0 : for( ulong j = 0UL; j < consumer_tile->in_cnt; j++ ) {
2756 0 : if( FD_UNLIKELY( consumer_tile->in_link_id[ j ] == link->id ) ) {
2757 0 : cap_repl_out->fseq = fd_fseq_join( fd_topo_obj_laddr( topo, consumer_tile->in_link_fseq_obj_id[ j ] ) );
2758 0 : FD_TEST( cap_repl_out->fseq );
2759 0 : break;
2760 0 : }
2761 0 : }
2762 0 : }
2763 :
2764 0 : fd_memset( &ctx->metrics, 0, sizeof(ctx->metrics) );
2765 :
2766 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_query_wait, FD_MHIST_SECONDS_MIN( REPLAY, STORE_QUERY_WAIT_SECONDS ),
2767 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_QUERY_WAIT_SECONDS ) ) );
2768 0 : fd_histf_join( fd_histf_new( ctx->metrics.store_query_work, FD_MHIST_SECONDS_MIN( REPLAY, STORE_QUERY_WORK_SECONDS ),
2769 0 : FD_MHIST_SECONDS_MAX( REPLAY, STORE_QUERY_WORK_SECONDS ) ) );
2770 :
2771 : /* Ensure precompiles are available, crash fast otherwise */
2772 0 : fd_precompiles();
2773 :
2774 0 : ulong scratch_top = FD_SCRATCH_ALLOC_FINI( l, scratch_align() );
2775 0 : if( FD_UNLIKELY( scratch_top > (ulong)scratch + scratch_footprint( tile ) ) )
2776 0 : FD_LOG_ERR(( "scratch overflow %lu %lu %lu", scratch_top - (ulong)scratch - scratch_footprint( tile ), scratch_top, (ulong)scratch + scratch_footprint( tile ) ));
2777 0 : }
2778 :
2779 : static ulong
2780 : populate_allowed_seccomp( fd_topo_t const * topo FD_FN_UNUSED,
2781 : fd_topo_tile_t const * tile FD_FN_UNUSED,
2782 : ulong out_cnt,
2783 0 : struct sock_filter * out ) {
2784 :
2785 0 : populate_sock_filter_policy_fd_replay_tile( out_cnt, out, (uint)fd_log_private_logfile_fd(), FD_ACCDB_FD_RW );
2786 0 : return sock_filter_policy_fd_replay_tile_instr_cnt;
2787 0 : }
2788 :
2789 : static ulong
2790 : populate_allowed_fds( fd_topo_t const * topo FD_FN_UNUSED,
2791 : fd_topo_tile_t const * tile FD_FN_UNUSED,
2792 : ulong out_fds_cnt,
2793 0 : int * out_fds ) {
2794 :
2795 0 : if( FD_UNLIKELY( out_fds_cnt<3UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
2796 :
2797 0 : ulong out_cnt = 0UL;
2798 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
2799 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
2800 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
2801 0 : out_fds[ out_cnt++ ] = FD_ACCDB_FD_RW; /* accounts db */
2802 :
2803 0 : return out_cnt;
2804 0 : }
2805 :
2806 : static inline void
2807 0 : during_housekeeping( fd_replay_tile_t * ctx ) {
2808 0 : if( FD_UNLIKELY( fd_keyswitch_state_query( ctx->keyswitch )==FD_KEYSWITCH_STATE_UNHALT_PENDING ) ) {
2809 0 : FD_CHECK_CRIT( ctx->halt_leader, "state machine corruption" );
2810 0 : FD_LOG_DEBUG(( "keyswitch: unhalting leader" ));
2811 0 : ctx->halt_leader = 0;
2812 0 : fd_keyswitch_state( ctx->keyswitch, FD_KEYSWITCH_STATE_COMPLETED );
2813 0 : }
2814 :
2815 0 : if( FD_UNLIKELY( fd_keyswitch_state_query( ctx->keyswitch )==FD_KEYSWITCH_STATE_SWITCH_PENDING ) ) {
2816 0 : FD_LOG_DEBUG(( "keyswitch: halting leader" ));
2817 0 : ctx->halt_leader = 1;
2818 0 : if( !ctx->is_leader ) maybe_switch_identity( ctx );
2819 0 : }
2820 0 : }
2821 :
2822 : #undef DEBUG_LOGGING
2823 :
2824 : /* counting carefully, after_credit can generate at most 7 frags and
2825 : returnable_frag boot_genesis can also generate at most 7 frags, so 14
2826 : is a conservative bound. */
2827 0 : #define STEM_BURST (14UL)
2828 :
2829 : /* fd_tempo_lazy_default( 16384 ) where 16384 is the minimum out-link
2830 : depth (i.e. cr_max) but excludes replay_epoch, which is so infrequent
2831 : credit availability is a non-issue. */
2832 0 : #define STEM_LAZY ((long)36865)
2833 :
2834 0 : #define STEM_CALLBACK_CONTEXT_TYPE fd_replay_tile_t
2835 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(fd_replay_tile_t)
2836 :
2837 0 : #define STEM_CALLBACK_METRICS_WRITE metrics_write
2838 0 : #define STEM_CALLBACK_AFTER_CREDIT after_credit
2839 0 : #define STEM_CALLBACK_BEFORE_FRAG before_frag
2840 0 : #define STEM_CALLBACK_RETURNABLE_FRAG returnable_frag
2841 0 : #define STEM_CALLBACK_DURING_HOUSEKEEPING during_housekeeping
2842 :
2843 : #include "../../disco/stem/fd_stem.c"
2844 :
2845 : fd_topo_run_tile_t fd_tile_replay = {
2846 : .name = "replay",
2847 : .populate_allowed_seccomp = populate_allowed_seccomp,
2848 : .populate_allowed_fds = populate_allowed_fds,
2849 : .scratch_align = scratch_align,
2850 : .scratch_footprint = scratch_footprint,
2851 : .privileged_init = privileged_init,
2852 : .unprivileged_init = unprivileged_init,
2853 : .run = stem_run,
2854 : };
|