Line data Source code
1 : #include "fd_tower_tile.h"
2 : #include "generated/fd_tower_tile_seccomp.h"
3 :
4 : #include "../../choreo/ghost/fd_ghost.h"
5 : #include "../../choreo/hfork/fd_hfork.h"
6 : #include "../../choreo/notar/fd_notar.h"
7 : #include "../../choreo/tower/fd_tower.h"
8 : #include "../../choreo/tower/fd_tower_accts.h"
9 : #include "../../choreo/tower/fd_tower_forks.h"
10 : #include "../../choreo/tower/fd_tower_serde.h"
11 : #include "../../disco/fd_txn_p.h"
12 : #include "../../disco/keyguard/fd_keyload.h"
13 : #include "../../disco/metrics/fd_metrics.h"
14 : #include "../../disco/topo/fd_topo.h"
15 : #include "../../disco/fd_txn_m.h"
16 : #include "../../choreo/tower/fd_epoch_stakes.h"
17 : #include "../../discof/restore/utils/fd_ssmsg.h"
18 : #include "../../discof/replay/fd_exec.h"
19 : #include "../../discof/replay/fd_replay_tile.h"
20 : #include "../../flamenco/accdb/fd_accdb_impl_v1.h"
21 : #include "../../flamenco/runtime/fd_bank.h"
22 : #include "../../util/pod/fd_pod.h"
23 :
24 : #include <errno.h>
25 : #include <fcntl.h>
26 :
27 : /* The tower tile is responsible for two things:
28 :
29 : 1. running the fork choice (fd_ghost) and TowerBFT (fd_tower) rules
30 : after replaying a block.
31 : 2. listening to gossip (duplicate shred and vote messages) and
32 : monitoring for duplicate or duplicate confirmed blocks (fd_notar).
33 :
34 : Tower signals to other tiles about events that occur as a result of
35 : those two above events, such as what block to vote on, what block to
36 : reset onto as leader, what block got rooted, what blocks are
37 : duplicates and what blocks are confirmed.
38 :
39 : In general, tower uses the block_id as the identifier for blocks. The
40 : block_id is the merkle root of the last FEC set for a block. This is
41 : guaranteed to be unique for a given block and is the canonical
42 : identifier over the slot number because unlike slot numbers, if a
43 : leader equivocates (produces multiple blocks for the same slot) the
44 : block_id can disambiguate the blocks.
45 :
46 : However, the block_id was only introduced into the Solana protocol
47 : recently, and TowerBFT still uses the "legacy" identifier of slot
48 : numbers for blocks. So the tile (and relevant modules) will use
49 : block_id when possible to interface with the protocol but otherwise
50 : falling back to slot number when block_id is unsupported. */
51 :
52 : #define LOGGING 0
53 :
54 0 : #define IN_KIND_DEDUP (0)
55 0 : #define IN_KIND_EXEC (1)
56 0 : #define IN_KIND_REPLAY (2)
57 :
58 0 : #define VOTE_TXN_SIG_MAX (2UL) /* validator identity and vote authority */
59 :
60 : struct notif {
61 : ulong slot;
62 : int kind;
63 : };
64 : typedef struct notif notif_t;
65 :
66 : #define DEQUE_NAME notif
67 0 : #define DEQUE_T notif_t
68 : #include "../../util/tmpl/fd_deque_dynamic.c"
69 :
70 : static const fd_hash_t manifest_block_id = { .ul = { 0xf17eda2ce7b1d } }; /* FIXME manifest_block_id */
71 :
72 : typedef struct {
73 : fd_wksp_t * mem;
74 : ulong chunk0;
75 : ulong wmark;
76 : ulong mtu;
77 : } in_ctx_t;
78 :
79 : typedef struct {
80 : fd_wksp_t * wksp; /* workspace */
81 :
82 : ulong seed; /* map seed */
83 : int checkpt_fd;
84 : int restore_fd;
85 : fd_pubkey_t identity_key[1];
86 : fd_pubkey_t vote_account[1];
87 : uchar our_vote_acct[FD_VOTE_STATE_DATA_MAX]; /* buffer for reading back our own vote acct data */
88 :
89 : /* structures owned by tower tile */
90 :
91 : fd_forks_t * forks;
92 : fd_ghost_t * ghost;
93 : fd_hfork_t * hfork;
94 : fd_notar_t * notar;
95 : fd_tower_t * tower;
96 : fd_tower_t * tower_spare; /* spare tower used during processing */
97 : notif_t * notif; /* deque of confirmation notifications queued for publishing */
98 : fd_tower_accts_t * tower_accts; /* deque of accts, stake, and pubkey for the currently replayed slot */
99 : fd_epoch_stakes_t * slot_stakes; /* tracks the stakes for each voter in the epoch per fork */
100 :
101 : /* external joins owned by replay tile */
102 :
103 : fd_banks_t * banks;
104 : fd_accdb_user_t accdb[1];
105 :
106 : /* frag-related structures (consume and publish) */
107 :
108 : uchar vote_txn[FD_TPU_PARSED_MTU];
109 : fd_sha512_t * vote_sha[VOTE_TXN_SIG_MAX];
110 : fd_compact_tower_sync_serde_t compact_tower_sync_serde;
111 : fd_snapshot_manifest_t manifest;
112 : fd_replay_slot_completed_t replay_slot_completed;
113 :
114 : /* slot watermarks */
115 :
116 : ulong init_slot; /* initial slot from genesis or snapshot */
117 : ulong root_slot; /* monotonically increasing contiguous tower root slot */
118 : ulong conf_slot; /* monotonically increasing contiguous confirmed slot */
119 :
120 : /* in/out link setup */
121 :
122 : int in_kind[ 64UL ];
123 : in_ctx_t in [ 64UL ];
124 :
125 : fd_wksp_t * out_mem;
126 : ulong out_chunk0;
127 : ulong out_wmark;
128 : ulong out_chunk;
129 :
130 : /* metrics */
131 :
132 : struct ctx_metrics_t {
133 : ulong vote_txn_invalid;
134 : ulong vote_txn_ignored;
135 : ulong vote_txn_mismatch;
136 :
137 : ulong ancestor_rollback;
138 : ulong sibling_confirmed;
139 : ulong same_fork;
140 : ulong switch_pass;
141 : ulong switch_fail;
142 : ulong lockout_fail;
143 : ulong threshold_fail;
144 : ulong propagated_fail;
145 :
146 : fd_hfork_metrics_t hard_forks;
147 : } metrics;
148 : } ctx_t;
149 :
150 : FD_FN_CONST static inline ulong
151 0 : scratch_align( void ) {
152 0 : return 128UL;
153 0 : }
154 :
155 : FD_FN_PURE static inline ulong
156 0 : scratch_footprint( FD_PARAM_UNUSED fd_topo_tile_t const * tile ) {
157 0 : ulong slot_max = tile->tower.max_live_slots;
158 0 : FD_LOG_DEBUG(( "hfork footprint %lu", fd_hfork_footprint( slot_max, FD_VOTER_MAX ) ));
159 0 : ulong l = FD_LAYOUT_INIT;
160 0 : l = FD_LAYOUT_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
161 0 : l = FD_LAYOUT_APPEND( l, fd_ghost_align(), fd_ghost_footprint( 2*slot_max, FD_VOTER_MAX ) );
162 0 : l = FD_LAYOUT_APPEND( l, fd_hfork_align(), fd_hfork_footprint( slot_max, FD_VOTER_MAX ) );
163 0 : l = FD_LAYOUT_APPEND( l, fd_notar_align(), fd_notar_footprint( tile->tower.max_vote_lookahead ) );
164 0 : l = FD_LAYOUT_APPEND( l, fd_tower_align(), fd_tower_footprint() );
165 0 : l = FD_LAYOUT_APPEND( l, fd_tower_accts_align(), fd_tower_accts_footprint( FD_VOTER_MAX ) );
166 0 : l = FD_LAYOUT_APPEND( l, fd_forks_align(), fd_forks_footprint( slot_max, FD_VOTER_MAX ) );
167 0 : l = FD_LAYOUT_APPEND( l, fd_tower_align(), fd_tower_footprint() ); /* ctx->tower_spare */
168 0 : l = FD_LAYOUT_APPEND( l, fd_epoch_stakes_align(), fd_epoch_stakes_footprint( slot_max ) );
169 0 : l = FD_LAYOUT_APPEND( l, notif_align(), notif_footprint( slot_max ) );
170 0 : return FD_LAYOUT_FINI( l, scratch_align() );
171 0 : }
172 :
173 : static inline void
174 0 : metrics_write( ctx_t * ctx ) {
175 0 : FD_MCNT_SET( TOWER, VOTE_TXN_INVALID, ctx->metrics.vote_txn_invalid );
176 0 : FD_MCNT_SET( TOWER, VOTE_TXN_IGNORED, ctx->metrics.vote_txn_ignored );
177 0 : FD_MCNT_SET( TOWER, VOTE_TXN_MISMATCH, ctx->metrics.vote_txn_mismatch );
178 :
179 0 : FD_MCNT_SET( TOWER, ANCESTOR_ROLLBACK, ctx->metrics.ancestor_rollback );
180 0 : FD_MCNT_SET( TOWER, SIBLING_CONFIRMED, ctx->metrics.sibling_confirmed );
181 0 : FD_MCNT_SET( TOWER, SAME_FORK, ctx->metrics.same_fork );
182 0 : FD_MCNT_SET( TOWER, SWITCH_PASS, ctx->metrics.switch_pass );
183 0 : FD_MCNT_SET( TOWER, SWITCH_FAIL, ctx->metrics.switch_fail );
184 0 : FD_MCNT_SET( TOWER, LOCKOUT_FAIL, ctx->metrics.lockout_fail );
185 0 : FD_MCNT_SET( TOWER, THRESHOLD_FAIL, ctx->metrics.threshold_fail );
186 0 : FD_MCNT_SET( TOWER, PROPAGATED_FAIL, ctx->metrics.propagated_fail );
187 :
188 0 : FD_MCNT_SET( TOWER, HARD_FORKS_SEEN, ctx->metrics.hard_forks.seen );
189 0 : FD_MCNT_SET( TOWER, HARD_FORKS_PRUNED, ctx->metrics.hard_forks.pruned );
190 :
191 0 : FD_MGAUGE_SET( TOWER, HARD_FORKS_ACTIVE, ctx->metrics.hard_forks.active );
192 0 : }
193 :
194 : static void
195 : publish_slot_confirmed( ctx_t * ctx,
196 : fd_stem_context_t * stem,
197 : ulong tsorig,
198 : ulong slot,
199 : fd_hash_t const * block_id,
200 : ulong bank_idx,
201 0 : int kind ) {
202 0 : fd_tower_slot_confirmed_t * msg = fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
203 0 : msg->slot = slot;
204 0 : msg->block_id = *block_id;
205 0 : msg->bank_idx = bank_idx;
206 0 : msg->kind = kind;
207 0 : fd_stem_publish( stem, 0UL, FD_TOWER_SIG_SLOT_CONFIRMED, ctx->out_chunk, sizeof(fd_tower_slot_confirmed_t), 0UL, tsorig, fd_frag_meta_ts_comp( fd_tickcount() ) );
208 0 : ctx->out_chunk = fd_dcache_compact_next( ctx->out_chunk, sizeof(fd_tower_slot_confirmed_t), ctx->out_chunk0, ctx->out_wmark );
209 0 : }
210 :
211 : static void
212 : contiguous_confirm( ctx_t * ctx,
213 : ulong slot,
214 : ulong wmark,
215 0 : int kind ) {
216 :
217 : /* For optimistic and rooted confirmations, confirming a slot means
218 : all ancestors are confirmed too, so we need to publish any skipped
219 : ancestors (confirmations can be out-of-order and roots can be
220 : skipped due to lockout). */
221 :
222 0 : ulong cnt = 0;
223 0 : ulong ancestor = slot;
224 0 : while( FD_UNLIKELY( ancestor > wmark ) ) {
225 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, ancestor );
226 0 : if( FD_UNLIKELY( !fork ) ) break; /* rooted past this ancestor */
227 0 : if( FD_UNLIKELY( !notif_avail( ctx->notif ) ) ) FD_LOG_CRIT(( "attempted to confirm %lu slots more than slot max %lu", cnt, notif_max( ctx->notif ) ));
228 0 : notif_push_tail( ctx->notif, (notif_t){ .slot = ancestor, .kind = kind } );
229 0 : cnt++;
230 0 : ancestor = fork->parent_slot;
231 0 : }
232 0 : }
233 :
234 : static void
235 : notar_confirm( ctx_t * ctx,
236 : fd_stem_context_t * stem,
237 : ulong tsorig,
238 0 : fd_notar_blk_t * notar_blk ) {
239 :
240 : /* Record any confirmations in our tower forks structure and also
241 : publish slot_confirmed frags indicating confirmations to consumers.
242 :
243 : See documentation in fd_tower_tile.h for guarantees. */
244 :
245 0 : if( FD_LIKELY( notar_blk->dup_conf && !notar_blk->dup_notif ) ) {
246 0 : publish_slot_confirmed( ctx, stem, tsorig, notar_blk->slot, ¬ar_blk->block_id, ULONG_MAX, FD_TOWER_SLOT_CONFIRMED_DUPLICATE );
247 0 : notar_blk->dup_notif = 1;
248 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, notar_blk->slot ); /* ensure fork exists */
249 0 : if( FD_UNLIKELY( !fork ) ) return; /* a slot can be duplicate confirmed by gossip votes before replay */
250 0 : fd_forks_confirmed( fork, ¬ar_blk->block_id );
251 0 : }
252 0 : if( FD_LIKELY( notar_blk->opt_conf ) ) {
253 0 : if( FD_UNLIKELY( !notar_blk->opt_notif ) ) {
254 0 : publish_slot_confirmed( ctx, stem, tsorig, notar_blk->slot, ¬ar_blk->block_id, ULONG_MAX, FD_TOWER_SLOT_CONFIRMED_CLUSTER ); /* a slot can be cluster confirmed by gossip votes before replay */
255 0 : notar_blk->opt_notif = 1;
256 0 : }
257 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, notar_blk->slot );
258 0 : if( FD_UNLIKELY( fork && notar_blk->slot > ctx->conf_slot ) ) {
259 0 : contiguous_confirm( ctx, notar_blk->slot, ctx->conf_slot, FD_TOWER_SLOT_CONFIRMED_OPTIMISTIC );
260 0 : ctx->conf_slot = notar_blk->slot;
261 0 : }
262 0 : }
263 0 : }
264 :
265 : static void
266 : count_vote_txn( ctx_t * ctx,
267 : fd_stem_context_t * stem,
268 : ulong tsorig,
269 : fd_txn_t const * txn,
270 0 : uchar const * payload ) {
271 :
272 : /* Count vote txns from resolv and replay. Note these txns have
273 : already been parsed and sigverified, so the only thing tower needs
274 : to do is filter for votes.
275 :
276 : We are a little stricter than Agave here when validating the vote
277 : because we use the same validation as pack ie. is_simple_vote which
278 : includes a check that there are at most two signers, whereas
279 : Agave's gossip vote parser does not perform that same check (the
280 : only two signers are the identity key and vote authority, which may
281 : optionally be the same).
282 :
283 : Being a little stricter here is ok because even if we drop some
284 : votes with extraneous signers that Agave would consider valid
285 : (unlikely), gossip votes are in general considered unreliable and
286 : ultimately consensus is reached through replaying the vote txns.
287 :
288 : The remaining checks mirror Agave as closely as possible (and are
289 : documented throughout below). */
290 :
291 0 : if( FD_UNLIKELY( !fd_txn_is_simple_vote_transaction( txn, payload ) ) ) { ctx->metrics.vote_txn_invalid++; return; }
292 :
293 : /* TODO check the authorized voter for this vote account (from epoch
294 : stakes) is one of the signers */
295 :
296 : /* Filter any non-tower sync votes. */
297 :
298 0 : fd_txn_instr_t const * instr = &txn->instr[0];
299 0 : uchar const * instr_data = payload + instr->data_off;
300 0 : uint kind = fd_uint_load_4_fast( instr_data );
301 0 : if( FD_UNLIKELY( kind != FD_VOTE_IX_KIND_TOWER_SYNC && kind != FD_VOTE_IX_KIND_TOWER_SYNC_SWITCH ) ) { ctx->metrics.vote_txn_ignored++; return; };
302 :
303 : /* Deserialize the CompactTowerSync. */
304 :
305 0 : int err = fd_compact_tower_sync_deserialize( &ctx->compact_tower_sync_serde, instr_data + sizeof(uint), instr->data_sz - sizeof(uint) );
306 0 : if( FD_UNLIKELY( err == -1 ) ) { ctx->metrics.vote_txn_invalid++; return; }
307 0 : ulong slot = ctx->compact_tower_sync_serde.root;
308 0 : fd_tower_remove_all( ctx->tower_spare );
309 0 : for( ulong i = 0; i < ctx->compact_tower_sync_serde.lockouts_cnt; i++ ) {
310 0 : slot += ctx->compact_tower_sync_serde.lockouts[i].offset;
311 0 : fd_tower_push_tail( ctx->tower_spare, (fd_tower_vote_t){ .slot = slot, .conf = ctx->compact_tower_sync_serde.lockouts[i].confirmation_count } );
312 0 : }
313 0 : if( FD_UNLIKELY( 0==memcmp( &ctx->compact_tower_sync_serde.block_id, &hash_null, sizeof(fd_hash_t) ) ) ) { ctx->metrics.vote_txn_invalid++; return; };
314 :
315 0 : fd_pubkey_t const * accs = (fd_pubkey_t const *)fd_type_pun_const( payload + txn->acct_addr_off );
316 0 : fd_pubkey_t const * vote_acc = NULL;
317 0 : if( FD_UNLIKELY( txn->signature_cnt==1 ) ) vote_acc = (fd_pubkey_t const *)fd_type_pun_const( &accs[1] ); /* identity and authority same, account idx 1 is the vote account address */
318 0 : else vote_acc = (fd_pubkey_t const *)fd_type_pun_const( &accs[2] ); /* identity and authority diff, account idx 2 is the vote account address */
319 :
320 : /* Return early if their tower is empty. */
321 :
322 0 : if( FD_UNLIKELY( fd_tower_empty( ctx->tower_spare ) ) ) { ctx->metrics.vote_txn_ignored++; return; };
323 :
324 : /* The vote txn contains a block id and bank hash for their last vote
325 : slot in the tower. Agave always counts the last vote.
326 :
327 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/cluster_info_vote_listener.rs#L476-L487 */
328 :
329 0 : fd_tower_vote_t const * their_last_vote = fd_tower_peek_tail_const( ctx->tower_spare );
330 0 : fd_hash_t const * their_block_id = &ctx->compact_tower_sync_serde.block_id;
331 0 : fd_hash_t const * their_bank_hash = &ctx->compact_tower_sync_serde.hash;
332 :
333 : /* Similar to what Agave does in cluster_info_vote_listener, we use
334 : the stake associated with a vote account as of our current root
335 : (which could potentially be a different epoch than the vote we are
336 : counting or when we observe the vote). They default stake to 0 for
337 : voters who are not found. */
338 :
339 0 : ulong total_stake = fd_ghost_root( ctx->ghost )->total_stake;
340 :
341 0 : fd_voter_stake_key_t stake_key = { .vote_account = *vote_acc, .slot = ctx->root_slot };
342 0 : fd_voter_stake_t * stake = fd_voter_stake_map_ele_query( ctx->slot_stakes->voter_stake_map, &stake_key, NULL, ctx->slot_stakes->voter_stake_pool );
343 :
344 0 : fd_hfork_count_vote( ctx->hfork, vote_acc, their_block_id, their_bank_hash, their_last_vote->slot, stake ? stake->stake : 0, total_stake, &ctx->metrics.hard_forks );
345 :
346 0 : fd_notar_blk_t * notar_blk = fd_notar_count_vote( ctx->notar, total_stake, vote_acc, their_last_vote->slot, their_block_id );
347 0 : if( FD_LIKELY( notar_blk ) ) notar_confirm( ctx, stem, tsorig, notar_blk );
348 :
349 0 : fd_tower_forks_t * fork = fd_tower_forks_query( ctx->forks->tower_forks, their_last_vote->slot, NULL );
350 0 : if( FD_UNLIKELY( !fork ) ) { ctx->metrics.vote_txn_ignored++; return; /* we haven't replayed this slot yet */ };
351 :
352 0 : fd_hash_t const * our_block_id = fd_forks_canonical_block_id( ctx->forks, their_last_vote->slot );
353 0 : if( FD_UNLIKELY( 0!=memcmp( our_block_id, their_block_id, sizeof(fd_hash_t) ) ) ) { ctx->metrics.vote_txn_mismatch++; return; }
354 :
355 : /* Agave decides to count intermediate vote slots in the tower only if
356 : 1. they've replayed the slot and 2. their replay bank hash matches
357 : the vote's bank hash. We do the same thing, but using block_ids.
358 :
359 : It's possible we haven't yet replayed this slot being voted on
360 : because gossip votes can be ahead of our replay.
361 :
362 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/cluster_info_vote_listener.rs#L483-L487 */
363 :
364 0 : int skipped_last_vote = 0;
365 0 : for( fd_tower_iter_t iter = fd_tower_iter_init_rev( ctx->tower_spare );
366 0 : !fd_tower_iter_done_rev( ctx->tower_spare, iter );
367 0 : iter = fd_tower_iter_prev ( ctx->tower_spare, iter ) ) {
368 0 : if( FD_UNLIKELY( !skipped_last_vote ) ) { skipped_last_vote = 1; continue; }
369 0 : fd_tower_vote_t const * their_intermediate_vote = fd_tower_iter_ele_const( ctx->tower_spare, iter );
370 :
371 : /* If we don't recognize an intermediate vote slot in their tower,
372 : it means their tower either:
373 :
374 : 1. Contains intermediate vote slots that are too old (older than
375 : our root) so we already pruned them for tower_forks. Normally
376 : if the descendant (last vote slot) is in tower forks, then all
377 : of its ancestors should be in there too.
378 :
379 : 2. Is invalid. Even though at this point we have successfully
380 : sigverified and deserialized their vote txn, the tower itself
381 : might still be invalid because unlike TPU vote txns, we have
382 : not plumbed through the vote program, but obviously gossip
383 : votes do not so we need to do some light validation here.
384 :
385 : We could throwaway this voter's tower, but we handle it the same
386 : way as Agave which is to just skip this intermediate vote slot:
387 :
388 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/cluster_info_vote_listener.rs#L513-L518 */
389 :
390 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, their_intermediate_vote->slot );
391 0 : if( FD_UNLIKELY( !fork ) ) { ctx->metrics.vote_txn_ignored++; continue; }
392 :
393 : /* Otherwise, we count the vote using our own block id for that slot
394 : (again, mirroring what Agave does albeit with bank hashes).
395 :
396 : Agave uses the current root bank's total stake when counting
397 : vote txns from gossip / replay:
398 :
399 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/cluster_info_vote_listener.rs#L500 */
400 :
401 :
402 0 : fd_notar_blk_t * notar_blk = fd_notar_count_vote( ctx->notar, total_stake, vote_acc, their_last_vote->slot, fd_forks_canonical_block_id( ctx->forks, their_intermediate_vote->slot ) );
403 0 : if( FD_LIKELY( notar_blk ) ) notar_confirm( ctx, stem, tsorig, notar_blk );
404 0 : }
405 0 : }
406 :
407 : ulong
408 : query_acct_stake_from_bank( fd_tower_accts_t * tower_accts_deque,
409 : fd_epoch_stakes_t * epoch_stakes,
410 : fd_bank_t * bank,
411 0 : ulong slot ) {
412 0 : ulong total_stake = 0;
413 0 : fd_vote_states_t const * vote_states = fd_bank_vote_states_locking_query( bank );
414 0 : fd_vote_states_iter_t iter_[1];
415 0 : ulong prev_voter_idx = ULONG_MAX;
416 0 : for( fd_vote_states_iter_t * iter = fd_vote_states_iter_init( iter_, vote_states );
417 0 : !fd_vote_states_iter_done( iter );
418 0 : fd_vote_states_iter_next( iter ) ) {
419 0 : fd_vote_state_ele_t const * vote_state = fd_vote_states_iter_ele( iter );
420 0 : if( FD_UNLIKELY( vote_state->stake_t_2 == 0 ) ) continue; /* skip unstaked vote accounts */
421 0 : fd_pubkey_t const * vote_account_pubkey = &vote_state->vote_account;
422 0 : fd_tower_accts_push_tail( tower_accts_deque, (fd_tower_accts_t){ .addr = *vote_account_pubkey, .stake = vote_state->stake_t_2 } );
423 0 : prev_voter_idx = fd_epoch_stakes_slot_stakes_add( epoch_stakes, slot, vote_account_pubkey, vote_state->stake_t_2, prev_voter_idx );
424 0 : total_stake += vote_state->stake_t_2;
425 0 : }
426 0 : fd_bank_vote_states_end_locking_query( bank );
427 0 : return total_stake;
428 0 : }
429 :
430 : /* query accdb for the vote state (vote account data) of the given vote
431 : account address as of xid. Returns 1 if found, 0 otherwise. */
432 :
433 : static int
434 : query_vote_state_from_accdb( fd_accdb_user_t * accdb,
435 : fd_funk_txn_xid_t const * xid,
436 : fd_pubkey_t const * vote_acc,
437 0 : uchar buf[static FD_VOTE_STATE_DATA_MAX] ) {
438 0 : for(;;) {
439 0 : fd_accdb_peek_t peek[1];
440 0 : if( FD_UNLIKELY( !fd_accdb_peek( accdb, peek, xid, vote_acc->uc ) ) ) return 0;
441 :
442 0 : ulong data_sz = fd_accdb_ref_data_sz( peek->acc );
443 0 : if( FD_UNLIKELY( data_sz > FD_VOTE_STATE_DATA_MAX ) ) {
444 0 : FD_BASE58_ENCODE_32_BYTES( vote_acc->uc, acc_cstr );
445 0 : FD_LOG_CRIT(( "vote account %s exceeds FD_VOTE_STATE_DATA_MAX. dlen %lu > %lu", acc_cstr, data_sz, FD_VOTE_STATE_DATA_MAX ));
446 0 : }
447 0 : fd_memcpy( buf, fd_accdb_ref_data_const( peek->acc ), data_sz );
448 :
449 0 : if( FD_LIKELY( fd_accdb_peek_test( peek ) ) ) break;
450 0 : FD_SPIN_PAUSE();
451 0 : }
452 0 : return 1;
453 0 : }
454 :
455 : static void
456 : replay_slot_completed( ctx_t * ctx,
457 : fd_replay_slot_completed_t * slot_completed,
458 : ulong tsorig,
459 0 : fd_stem_context_t * stem ) {
460 :
461 : /* Initialize slot watermarks on the first replay_slot_completed. */
462 :
463 0 : if( FD_UNLIKELY( ctx->init_slot == ULONG_MAX ) ) {
464 0 : ctx->init_slot = slot_completed->slot;
465 0 : ctx->root_slot = slot_completed->slot;
466 0 : ctx->conf_slot = slot_completed->slot;
467 0 : }
468 :
469 : /* This is a temporary patch for equivocation. */
470 :
471 0 : if( FD_UNLIKELY( fd_forks_query( ctx->forks, slot_completed->slot ) ) ) {
472 0 : FD_BASE58_ENCODE_32_BYTES( slot_completed->block_id.uc, block_id );
473 0 : FD_LOG_WARNING(( "tower ignoring replay of equivocating slot %lu %s", slot_completed->slot, block_id ));
474 0 : return;
475 0 : }
476 :
477 : /* Initialize the xid. */
478 :
479 0 : fd_funk_txn_xid_t xid = { .ul = { slot_completed->slot, slot_completed->bank_idx } };
480 :
481 : /* Query our on-chain vote acct and reconcile with our local tower. */
482 :
483 0 : int found = query_vote_state_from_accdb( ctx->accdb, &xid, ctx->vote_account, ctx->our_vote_acct );
484 0 : if( FD_LIKELY( found ) ) {
485 0 : fd_tower_reconcile( ctx->tower, ctx->root_slot, ctx->our_vote_acct );
486 : /* Sanity check that most recent vote in tower exists in tower forks */
487 0 : fd_tower_vote_t const * last_vote = fd_tower_peek_tail_const( ctx->tower );
488 0 : FD_TEST( !last_vote || fd_forks_query( ctx->forks, last_vote->slot ) );
489 0 : }
490 :
491 : /* Insert the vote acct addrs and stakes from the bank into accts. */
492 :
493 0 : fd_tower_accts_remove_all( ctx->tower_accts );
494 0 : fd_bank_t * bank = fd_banks_bank_query( ctx->banks, slot_completed->bank_idx );
495 0 : if( FD_UNLIKELY( !bank ) ) FD_LOG_CRIT(( "invariant violation: bank %lu is missing", slot_completed->bank_idx ));
496 0 : ulong total_stake = query_acct_stake_from_bank( ctx->tower_accts, ctx->slot_stakes, bank, slot_completed->slot );
497 :
498 : /* Insert the just replayed block into forks. */
499 :
500 0 : FD_TEST( !fd_forks_query( ctx->forks, slot_completed->slot ) );
501 0 : fd_tower_forks_t * fork = fd_forks_insert( ctx->forks, slot_completed->slot, slot_completed->parent_slot );
502 0 : fork->parent_slot = slot_completed->parent_slot;
503 0 : fork->confirmed = 0;
504 0 : fork->voted = 0;
505 0 : fork->replayed_block_id = slot_completed->block_id;
506 0 : fork->bank_idx = slot_completed->bank_idx;
507 0 : fd_forks_replayed( ctx->forks, fork, slot_completed->bank_idx, &slot_completed->block_id );
508 0 : fd_forks_lockouts_clear( ctx->forks, slot_completed->parent_slot );
509 :
510 : /* Insert the just replayed block into ghost. */
511 :
512 0 : fd_hash_t const * parent_block_id = &slot_completed->parent_block_id;
513 0 : if( FD_UNLIKELY( slot_completed->parent_slot==ctx->init_slot ) ) parent_block_id = &manifest_block_id;
514 0 : if( FD_UNLIKELY( slot_completed->slot ==ctx->init_slot ) ) parent_block_id = NULL;
515 0 : fd_ghost_blk_t * ghost_blk = fd_ghost_insert( ctx->ghost, &slot_completed->block_id, parent_block_id, slot_completed->slot );
516 0 : ghost_blk->total_stake = total_stake;
517 :
518 : /* Iterate vote accounts. */
519 :
520 0 : for( fd_tower_accts_iter_t iter = fd_tower_accts_iter_init( ctx->tower_accts );
521 0 : !fd_tower_accts_iter_done( ctx->tower_accts, iter );
522 0 : iter = fd_tower_accts_iter_next( ctx->tower_accts, iter ) ) {
523 0 : fd_tower_accts_t * acct = fd_tower_accts_iter_ele( ctx->tower_accts, iter );
524 0 : fd_pubkey_t const * vote_acc = &acct->addr;
525 :
526 0 : if( FD_UNLIKELY( !query_vote_state_from_accdb( ctx->accdb, &xid, vote_acc, acct->data ) ) ) {
527 0 : FD_BASE58_ENCODE_32_BYTES( vote_acc->uc, acc_cstr );
528 0 : FD_LOG_CRIT(( "vote account in bank->vote_states not found. slot %lu address: %s", slot_completed->slot, acc_cstr ));
529 0 : };
530 :
531 : /* 1. Update forks with lockouts. */
532 :
533 0 : fd_forks_lockouts_add( ctx->forks, slot_completed->slot, &acct->addr, acct );
534 :
535 : /* 2. Count the last vote slot in the vote state towards ghost. */
536 :
537 0 : ulong vote_slot = fd_voter_vote_slot( acct->data );
538 0 : if( FD_UNLIKELY( vote_slot==ULONG_MAX ) ) continue; /* hasn't voted */
539 0 : if( FD_UNLIKELY( vote_slot < fd_ghost_root( ctx->ghost )->slot ) ) continue; /* vote too old */
540 :
541 : /* We search up the ghost ancestry to find the ghost block for this
542 : vote slot. In Agave, they look this value up using a hashmap of
543 : slot->block_id ("fork progress"), but that approach only works
544 : because they dump and repair (so there's only ever one canonical
545 : block id). We retain multiple block ids, both the original and
546 : confirmed one. */
547 :
548 0 : fd_ghost_blk_t * ancestor_blk = fd_ghost_slot_ancestor( ctx->ghost, ghost_blk, vote_slot ); /* FIXME potentially slow */
549 :
550 : /* It is impossible for ancestor to be missing, because these are
551 : vote accounts on a given fork, not vote txns across forks. So we
552 : know these towers must contain slots we know about (as long as
553 : they are >= root, which we checked above). */
554 :
555 0 : if( FD_UNLIKELY( !ancestor_blk ) ) FD_LOG_CRIT(( "missing ancestor. replay slot %lu vote slot %lu voter %s", slot_completed->slot, vote_slot, FD_BASE58_ENC_32_ALLOCA( &acct->addr ) ));
556 :
557 0 : fd_ghost_count_vote( ctx->ghost, ancestor_blk, &acct->addr, acct->stake, vote_slot );
558 0 : }
559 :
560 : /* Insert the just replayed block into hard fork detector. */
561 :
562 0 : fd_hfork_record_our_bank_hash( ctx->hfork, &slot_completed->block_id, &slot_completed->bank_hash, fd_ghost_root( ctx->ghost )->total_stake );
563 :
564 : /* fd_notar requires some bookkeeping when there is a new epoch. */
565 :
566 0 : if( FD_UNLIKELY( ctx->notar->epoch==ULONG_MAX || slot_completed->epoch > ctx->notar->epoch ) ) {
567 0 : fd_notar_advance_epoch( ctx->notar, ctx->tower_accts, slot_completed->epoch );
568 0 : }
569 :
570 : /* Check if gossip votes already confirmed the fork's block_id (gossip
571 : can be ahead of replay - this is tracked by fd_notar). */
572 :
573 0 : fd_notar_slot_t * notar_slot = fd_notar_slot_query( ctx->notar->slot_map, slot_completed->slot, NULL );
574 0 : if( FD_UNLIKELY( notar_slot )) { /* optimize for replay keeping up (being ahead of gossip votes) */
575 0 : for( ulong i = 0; i < notar_slot->block_ids_cnt; i++ ) {
576 0 : fd_notar_blk_t * notar_blk = fd_notar_blk_query( ctx->notar->blk_map, notar_slot->block_ids[i], NULL );
577 0 : FD_TEST( notar_blk ); /* block_ids_cnt corrupt */
578 0 : if( FD_LIKELY( notar_blk->dup_conf ) ) {
579 0 : fork->confirmed = 1;
580 0 : fork->confirmed_block_id = notar_blk->block_id;
581 0 : break;
582 0 : }
583 0 : }
584 0 : }
585 :
586 : /* We replayed an unconfirmed duplicate, warn for now. Follow-up PR
587 : will implement eviction and repair of the correct one. */
588 :
589 0 : if( FD_UNLIKELY( fork->confirmed && 0!=memcmp( &fork->confirmed_block_id, &fork->replayed_block_id, sizeof(fd_hash_t) ) ) ) {
590 0 : FD_LOG_WARNING(( "replayed an unconfirmed duplicate %lu. ours %s. confirmed %s.", slot_completed->slot, FD_BASE58_ENC_32_ALLOCA( &slot_completed->block_id ), FD_BASE58_ENC_32_ALLOCA( &fork->confirmed_block_id ) ));
591 0 : }
592 :
593 : /* Determine reset, vote, and root slots. There may not be a vote or
594 : root slot but there is always a reset slot. */
595 :
596 0 : fd_tower_out_t out = fd_tower_vote_and_reset( ctx->tower, ctx->tower_accts, ctx->slot_stakes, ctx->forks, ctx->ghost, ctx->notar );
597 :
598 : /* Write out metrics for vote / reset reasons. */
599 :
600 0 : ctx->metrics.ancestor_rollback += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_ANCESTOR_ROLLBACK );
601 0 : ctx->metrics.sibling_confirmed += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SIBLING_CONFIRMED );
602 0 : ctx->metrics.same_fork += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SAME_FORK );
603 0 : ctx->metrics.switch_pass += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SWITCH_PASS );
604 0 : ctx->metrics.switch_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SWITCH_FAIL );
605 0 : ctx->metrics.lockout_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_LOCKOUT_FAIL );
606 0 : ctx->metrics.threshold_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_THRESHOLD_FAIL );
607 0 : ctx->metrics.propagated_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_PROPAGATED_FAIL );
608 :
609 : /* Update forks if there is a vote slot. */
610 :
611 0 : if( FD_LIKELY( out.vote_slot!=ULONG_MAX ) ) {
612 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, out.vote_slot );
613 0 : FD_TEST( fork ); /* we must have replayed every slot we voted for */
614 0 : fd_forks_voted( fork, &out.vote_block_id );
615 0 : }
616 :
617 : /* Publish according structures if there is a root */
618 :
619 0 : if( FD_UNLIKELY( out.root_slot!=ULONG_MAX ) ) {
620 :
621 : /* forks */
622 :
623 0 : for(ulong slot = ctx->root_slot; slot < out.root_slot; slot++ ) {
624 0 : fd_tower_forks_t * fork = fd_forks_query ( ctx->forks, slot );
625 0 : if( FD_LIKELY( fork ) ) fd_forks_remove( ctx->forks, slot );
626 0 : fd_epoch_stakes_slot_t * slot_stakes = fd_epoch_stakes_slot_map_query ( ctx->slot_stakes->slot_stakes_map, slot, NULL );
627 0 : if( FD_LIKELY( slot_stakes ) ) fd_epoch_stakes_slot_stakes_remove( ctx->slot_stakes, slot_stakes );
628 0 : }
629 :
630 : /* ghost */
631 :
632 0 : fd_ghost_blk_t * newr = fd_ghost_query( ctx->ghost, &out.root_block_id );
633 0 : if( FD_UNLIKELY( !newr ) ) { /* a block id we rooted is missing from ghost */
634 0 : FD_BASE58_ENCODE_32_BYTES( out.root_block_id.uc, block_id_cstr );
635 0 : FD_LOG_CRIT(( "missing root block id %s at slot %lu", block_id_cstr, out.root_slot ));
636 0 : }
637 0 : fd_ghost_publish( ctx->ghost, newr );
638 :
639 : /* notar */
640 :
641 0 : fd_notar_advance_wmark( ctx->notar, out.root_slot );
642 :
643 : /* Rooting implies optimistic confirmation in the Firedancer API, so
644 : we need to make sure to publish the optimistic frags before the
645 : rooted frags. In most cases this is a no-op because gossip votes
646 : already triggered optimistic confirmation.
647 :
648 : TODO include replay votes in optimistic conf vote counting. */
649 :
650 0 : contiguous_confirm( ctx, out.root_slot, ctx->conf_slot, FD_TOWER_SLOT_CONFIRMED_OPTIMISTIC );
651 0 : contiguous_confirm( ctx, out.root_slot, ctx->root_slot, FD_TOWER_SLOT_CONFIRMED_ROOTED );
652 :
653 : /* Update slot watermarks. */
654 :
655 0 : ctx->conf_slot = out.root_slot;
656 0 : ctx->root_slot = out.root_slot;
657 0 : }
658 :
659 : /* Publish a slot_done frag to tower_out. */
660 :
661 0 : fd_tower_slot_done_t * msg = fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
662 0 : msg->replay_slot = slot_completed->slot;
663 0 : msg->active_fork_cnt = fd_tower_leaves_pool_used( ctx->forks->tower_leaves_pool );
664 0 : msg->vote_slot = out.vote_slot;
665 0 : msg->reset_slot = out.reset_slot;
666 0 : msg->reset_block_id = out.reset_block_id;
667 0 : msg->root_slot = out.root_slot;
668 0 : msg->root_block_id = out.root_block_id;
669 0 : msg->replay_bank_idx = slot_completed->bank_idx;
670 :
671 : /* Populate slot_done with a vote txn representing our current tower
672 : (regardless of whether there was a new vote slot or not).
673 :
674 : TODO only do this on refresh_last_vote? */
675 :
676 0 : fd_lockout_offset_t lockouts[FD_TOWER_VOTE_MAX];
677 0 : fd_txn_p_t txn[1];
678 0 : fd_tower_to_vote_txn( ctx->tower, ctx->root_slot, lockouts, &slot_completed->bank_hash, &slot_completed->block_hash, ctx->identity_key, ctx->identity_key, ctx->vote_account, txn );
679 0 : FD_TEST( !fd_tower_empty( ctx->tower ) );
680 0 : FD_TEST( txn->payload_sz && txn->payload_sz<=FD_TPU_MTU );
681 0 : fd_memcpy( msg->vote_txn, txn->payload, txn->payload_sz );
682 0 : msg->vote_txn_sz = txn->payload_sz;
683 :
684 0 : fd_stem_publish( stem, 0UL, FD_TOWER_SIG_SLOT_DONE, ctx->out_chunk, sizeof(fd_tower_slot_done_t), 0UL, tsorig, fd_frag_meta_ts_comp( fd_tickcount() ) );
685 0 : ctx->out_chunk = fd_dcache_compact_next( ctx->out_chunk, sizeof(fd_tower_slot_done_t), ctx->out_chunk0, ctx->out_wmark );
686 :
687 : # if LOGGING
688 : fd_ghost_print( ctx->ghost, fd_ghost_root( ctx->ghost ) );
689 : fd_tower_print( ctx->tower, fd_ghost_root( ctx->ghost )->slot );
690 : # endif
691 0 : }
692 :
693 : static inline void
694 : after_credit( ctx_t * ctx,
695 : fd_stem_context_t * stem,
696 : int * opt_poll_in,
697 0 : int * charge_busy ) {
698 0 : while( FD_LIKELY( !notif_empty( ctx->notif ) ) ) {
699 0 : notif_t ancestor = notif_pop_tail( ctx->notif );
700 0 : fd_tower_forks_t * fork = fd_tower_forks_query( ctx->forks->tower_forks, ancestor.slot, NULL );
701 0 : if( FD_UNLIKELY( !fork ) ) FD_LOG_CRIT(( "missing fork for ancestor %lu", ancestor.slot ));
702 0 : publish_slot_confirmed( ctx, stem, fd_frag_meta_ts_comp( fd_tickcount() ), ancestor.slot, fd_forks_canonical_block_id( ctx->forks, ancestor.slot ), fork->bank_idx, ancestor.kind );
703 0 : *opt_poll_in = 0; /* drain the confirmations */
704 0 : *charge_busy = 1;
705 0 : }
706 0 : }
707 :
708 : static inline int
709 : returnable_frag( ctx_t * ctx,
710 : ulong in_idx,
711 : ulong seq FD_PARAM_UNUSED,
712 : ulong sig,
713 : ulong chunk,
714 : ulong sz,
715 : ulong ctl FD_PARAM_UNUSED,
716 : ulong tsorig,
717 : ulong tspub FD_PARAM_UNUSED,
718 0 : fd_stem_context_t * stem ) {
719 :
720 0 : if( FD_UNLIKELY( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>ctx->in[ in_idx ].mtu ) )
721 0 : FD_LOG_ERR(( "chunk %lu %lu from in %d corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in_kind[ in_idx ], ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
722 :
723 0 : switch( ctx->in_kind[ in_idx ] ) {
724 0 : case IN_KIND_DEDUP: {
725 0 : if( FD_UNLIKELY( ctx->root_slot==ULONG_MAX ) ) return 1;
726 0 : fd_txn_m_t * txnm = (fd_txn_m_t *)fd_chunk_to_laddr( ctx->in[in_idx].mem, chunk );
727 0 : FD_TEST( txnm->payload_sz<=FD_TPU_MTU );
728 0 : FD_TEST( txnm->txn_t_sz<=FD_TXN_MAX_SZ );
729 0 : count_vote_txn( ctx, stem, tsorig, fd_txn_m_txn_t_const( txnm ), fd_txn_m_payload_const( txnm ) );
730 0 : return 0;
731 0 : }
732 0 : case IN_KIND_EXEC: {
733 0 : if( FD_LIKELY( (sig>>32)==FD_EXEC_TT_TXN_EXEC ) ) {
734 0 : fd_exec_txn_exec_msg_t * msg = fd_chunk_to_laddr( ctx->in[in_idx].mem, chunk );
735 0 : count_vote_txn( ctx, stem, tsorig, TXN(&msg->txn), msg->txn.payload );
736 0 : }
737 0 : return 0;
738 0 : }
739 0 : case IN_KIND_REPLAY: {
740 0 : if( FD_LIKELY( sig==REPLAY_SIG_SLOT_COMPLETED ) ) {
741 0 : fd_memcpy( &ctx->replay_slot_completed, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ), sizeof(fd_replay_slot_completed_t) );
742 0 : replay_slot_completed( ctx, &ctx->replay_slot_completed, tsorig, stem );
743 0 : } else if ( FD_LIKELY( sig==REPLAY_SIG_SLOT_DEAD ) ) {
744 0 : fd_replay_slot_dead_t * slot_dead = (fd_replay_slot_dead_t *)fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
745 0 : fd_hfork_record_our_bank_hash( ctx->hfork, &slot_dead->block_id, NULL, fd_ghost_root( ctx->ghost )->total_stake );
746 0 : }
747 0 : return 0;
748 0 : }
749 0 : default: {
750 0 : FD_LOG_ERR(( "unexpected input kind %d", ctx->in_kind[ in_idx ] ));
751 0 : }
752 0 : }
753 0 : }
754 :
755 : static void
756 : privileged_init( fd_topo_t * topo,
757 0 : fd_topo_tile_t * tile ) {
758 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
759 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
760 0 : ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
761 0 : FD_SCRATCH_ALLOC_FINI( l, scratch_align() );
762 :
763 0 : FD_TEST( fd_rng_secure( &ctx->seed, sizeof(ctx->seed) ) );
764 :
765 0 : if( FD_UNLIKELY( !strcmp( tile->tower.identity_key, "" ) ) ) FD_LOG_ERR(( "identity_key_path not set" ));
766 0 : ctx->identity_key[ 0 ] = *(fd_pubkey_t const *)fd_type_pun_const( fd_keyload_load( tile->tower.identity_key, /* pubkey only: */ 1 ) );
767 :
768 : /* The vote key can be specified either directly as a base58 encoded
769 : pubkey, or as a file path. We first try to decode as a pubkey. */
770 :
771 0 : uchar * vote_key = fd_base58_decode_32( tile->tower.vote_account, ctx->vote_account->uc );
772 0 : if( FD_UNLIKELY( !vote_key ) ) ctx->vote_account[ 0 ] = *(fd_pubkey_t const *)fd_type_pun_const( fd_keyload_load( tile->tower.vote_account, /* pubkey only: */ 1 ) );
773 :
774 : /* The tower file is used to checkpt and restore the state of the
775 : local tower. */
776 :
777 0 : char path[ PATH_MAX ];
778 0 : FD_TEST( fd_cstr_printf_check( path, sizeof(path), NULL, "%s/tower-1_9-%s.bin.new", tile->tower.base_path, FD_BASE58_ENC_32_ALLOCA( ctx->identity_key->uc ) ) );
779 0 : ctx->checkpt_fd = open( path, O_WRONLY|O_CREAT|O_TRUNC, 0600 );
780 0 : if( FD_UNLIKELY( -1==ctx->checkpt_fd ) ) FD_LOG_ERR(( "open(`%s`) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
781 :
782 0 : FD_TEST( fd_cstr_printf_check( path, sizeof(path), NULL, "%s/tower-1_9-%s.bin", tile->tower.base_path, FD_BASE58_ENC_32_ALLOCA( ctx->identity_key->uc ) ) );
783 0 : ctx->restore_fd = open( path, O_RDONLY );
784 0 : if( FD_UNLIKELY( -1==ctx->restore_fd && errno!=ENOENT ) ) FD_LOG_ERR(( "open(`%s`) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
785 0 : }
786 :
787 : static void
788 : unprivileged_init( fd_topo_t * topo,
789 0 : fd_topo_tile_t * tile ) {
790 0 : ulong slot_max = tile->tower.max_live_slots;
791 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
792 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
793 0 : ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
794 0 : void * ghost = FD_SCRATCH_ALLOC_APPEND( l, fd_ghost_align(), fd_ghost_footprint( 2*slot_max, FD_VOTER_MAX ) );
795 0 : void * hfork = FD_SCRATCH_ALLOC_APPEND( l, fd_hfork_align(), fd_hfork_footprint( slot_max, FD_VOTER_MAX ) );
796 0 : void * notar = FD_SCRATCH_ALLOC_APPEND( l, fd_notar_align(), fd_notar_footprint( tile->tower.max_vote_lookahead ) );
797 0 : void * tower = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_align(), fd_tower_footprint() );
798 0 : void * accts = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_accts_align(), fd_tower_accts_footprint( FD_VOTER_MAX ) );
799 0 : void * forks = FD_SCRATCH_ALLOC_APPEND( l, fd_forks_align(), fd_forks_footprint( slot_max, FD_VOTER_MAX ) );
800 0 : void * spare = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_align(), fd_tower_footprint() );
801 0 : void * stake = FD_SCRATCH_ALLOC_APPEND( l, fd_epoch_stakes_align(), fd_epoch_stakes_footprint( slot_max ) );
802 0 : void * notif = FD_SCRATCH_ALLOC_APPEND( l, notif_align(), notif_footprint( slot_max ) );
803 0 : FD_SCRATCH_ALLOC_FINI( l, scratch_align() );
804 :
805 0 : ctx->wksp = topo->workspaces[ topo->objs[ tile->tile_obj_id ].wksp_id ].wksp;
806 0 : ctx->ghost = fd_ghost_join ( fd_ghost_new ( ghost, 2*slot_max, FD_VOTER_MAX, 42UL ) ); /* FIXME seed */
807 0 : ctx->hfork = fd_hfork_join ( fd_hfork_new ( hfork, slot_max, FD_VOTER_MAX, ctx->seed, tile->tower.hard_fork_fatal ) );
808 0 : ctx->notar = fd_notar_join ( fd_notar_new ( notar, tile->tower.max_vote_lookahead ) );
809 0 : ctx->tower = fd_tower_join ( fd_tower_new ( tower ) );
810 0 : ctx->tower_accts = fd_tower_accts_join ( fd_tower_accts_new ( accts, FD_VOTER_MAX ) );
811 0 : ctx->forks = fd_forks_join ( fd_forks_new ( forks, slot_max, FD_VOTER_MAX ) );
812 0 : ctx->tower_spare = fd_tower_join ( fd_tower_new ( spare ) );
813 0 : ctx->slot_stakes = fd_epoch_stakes_join( fd_epoch_stakes_new( stake, slot_max ) );
814 0 : ctx->notif = notif_join ( notif_new ( notif, slot_max ) );
815 0 : FD_TEST( ctx->ghost );
816 0 : FD_TEST( ctx->hfork );
817 0 : FD_TEST( ctx->notar );
818 0 : FD_TEST( ctx->tower );
819 0 : FD_TEST( ctx->forks );
820 0 : FD_TEST( ctx->tower_spare );
821 0 : FD_TEST( ctx->tower_accts );
822 0 : FD_TEST( ctx->slot_stakes );
823 0 : FD_TEST( ctx->notif );
824 :
825 0 : for( ulong i = 0; i<VOTE_TXN_SIG_MAX; i++ ) {
826 0 : fd_sha512_t * sha = fd_sha512_join( fd_sha512_new( FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sha512_t), sizeof(fd_sha512_t) ) ) );
827 0 : FD_TEST( sha );
828 0 : ctx->vote_sha[i] = sha;
829 0 : }
830 :
831 0 : ctx->init_slot = ULONG_MAX;
832 0 : ctx->root_slot = ULONG_MAX;
833 0 : ctx->conf_slot = ULONG_MAX;
834 :
835 0 : memset( &ctx->metrics, 0, sizeof( struct ctx_metrics_t ) );
836 :
837 0 : ulong banks_obj_id = fd_pod_query_ulong( topo->props, "banks", ULONG_MAX );
838 0 : FD_TEST( banks_obj_id!=ULONG_MAX );
839 0 : ctx->banks = fd_banks_join( fd_topo_obj_laddr( topo, banks_obj_id ) );
840 0 : FD_TEST( ctx->banks );
841 :
842 0 : ulong funk_obj_id = fd_pod_query_ulong( topo->props, "funk", ULONG_MAX );
843 0 : FD_TEST( funk_obj_id!=ULONG_MAX );
844 0 : FD_TEST( fd_accdb_user_v1_init( ctx->accdb, fd_topo_obj_laddr( topo, funk_obj_id ) ) );
845 :
846 0 : FD_TEST( tile->in_cnt<sizeof(ctx->in_kind)/sizeof(ctx->in_kind[0]) );
847 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
848 0 : fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
849 0 : fd_topo_wksp_t * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
850 :
851 0 : if ( FD_LIKELY( !strcmp( link->name, "dedup_resolv" ) ) ) ctx->in_kind[ i ] = IN_KIND_DEDUP;
852 0 : else if( FD_LIKELY( !strcmp( link->name, "replay_exec" ) ) ) ctx->in_kind[ i ] = IN_KIND_EXEC;
853 0 : else if( FD_LIKELY( !strcmp( link->name, "replay_out" ) ) ) ctx->in_kind[ i ] = IN_KIND_REPLAY;
854 0 : else FD_LOG_ERR(( "tower tile has unexpected input link %lu %s", i, link->name ));
855 :
856 0 : ctx->in[ i ].mem = link_wksp->wksp;
857 0 : ctx->in[ i ].mtu = link->mtu;
858 0 : ctx->in[ i ].chunk0 = fd_dcache_compact_chunk0( ctx->in[ i ].mem, link->dcache );
859 0 : ctx->in[ i ].wmark = fd_dcache_compact_wmark ( ctx->in[ i ].mem, link->dcache, link->mtu );
860 0 : }
861 :
862 0 : ctx->out_mem = topo->workspaces[ topo->objs[ topo->links[ tile->out_link_id[ 0 ] ].dcache_obj_id ].wksp_id ].wksp;
863 0 : ctx->out_chunk0 = fd_dcache_compact_chunk0( ctx->out_mem, topo->links[ tile->out_link_id[ 0 ] ].dcache );
864 0 : ctx->out_wmark = fd_dcache_compact_wmark ( ctx->out_mem, topo->links[ tile->out_link_id[ 0 ] ].dcache, topo->links[ tile->out_link_id[ 0 ] ].mtu );
865 0 : ctx->out_chunk = ctx->out_chunk0;
866 0 : }
867 :
868 : static ulong
869 : populate_allowed_seccomp( fd_topo_t const * topo,
870 : fd_topo_tile_t const * tile,
871 : ulong out_cnt,
872 0 : struct sock_filter * out ) {
873 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
874 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
875 0 : ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
876 :
877 0 : populate_sock_filter_policy_fd_tower_tile( out_cnt, out, (uint)fd_log_private_logfile_fd(), (uint)ctx->checkpt_fd, (uint)ctx->restore_fd );
878 0 : return sock_filter_policy_fd_tower_tile_instr_cnt;
879 0 : }
880 :
881 : static ulong
882 : populate_allowed_fds( fd_topo_t const * topo,
883 : fd_topo_tile_t const * tile,
884 : ulong out_fds_cnt,
885 0 : int * out_fds ) {
886 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
887 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
888 0 : ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
889 :
890 0 : if( FD_UNLIKELY( out_fds_cnt<4UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
891 :
892 0 : ulong out_cnt = 0UL;
893 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
894 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
895 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
896 0 : if( FD_LIKELY( ctx->checkpt_fd!=-1 ) ) out_fds[ out_cnt++ ] = ctx->checkpt_fd;
897 0 : if( FD_LIKELY( ctx->restore_fd!=-1 ) ) out_fds[ out_cnt++ ] = ctx->restore_fd;
898 0 : return out_cnt;
899 0 : }
900 :
901 0 : #define STEM_BURST (3UL) /* dup conf + cluster conf + slot_done */
902 :
903 0 : #define STEM_CALLBACK_CONTEXT_TYPE ctx_t
904 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(ctx_t)
905 0 : #define STEM_CALLBACK_METRICS_WRITE metrics_write
906 0 : #define STEM_CALLBACK_AFTER_CREDIT after_credit
907 0 : #define STEM_CALLBACK_RETURNABLE_FRAG returnable_frag
908 :
909 : #include "../../disco/stem/fd_stem.c"
910 :
911 : fd_topo_run_tile_t fd_tile_tower = {
912 : .name = "tower",
913 : .populate_allowed_seccomp = populate_allowed_seccomp,
914 : .populate_allowed_fds = populate_allowed_fds,
915 : .scratch_align = scratch_align,
916 : .scratch_footprint = scratch_footprint,
917 : .unprivileged_init = unprivileged_init,
918 : .privileged_init = privileged_init,
919 : .run = stem_run,
920 : };
|