Line data Source code
1 : #include "fd_tower_tile.h"
2 : #include "generated/fd_tower_tile_seccomp.h"
3 :
4 : #include "../../choreo/ghost/fd_ghost.h"
5 : #include "../../choreo/hfork/fd_hfork.h"
6 : #include "../../choreo/notar/fd_notar.h"
7 : #include "../../choreo/tower/fd_tower.h"
8 : #include "../../choreo/tower/fd_tower_accts.h"
9 : #include "../../choreo/tower/fd_tower_forks.h"
10 : #include "../../choreo/tower/fd_tower_serde.h"
11 : #include "../../disco/fd_txn_p.h"
12 : #include "../../disco/keyguard/fd_keyload.h"
13 : #include "../../disco/metrics/fd_metrics.h"
14 : #include "../../disco/topo/fd_topo.h"
15 : #include "../../disco/fd_txn_m.h"
16 : #include "../../choreo/tower/fd_epoch_stakes.h"
17 : #include "../../discof/fd_accdb_topo.h"
18 : #include "../../discof/restore/utils/fd_ssmsg.h"
19 : #include "../../discof/replay/fd_exec.h"
20 : #include "../../discof/replay/fd_replay_tile.h"
21 : #include "../../flamenco/accdb/fd_accdb_sync.h"
22 : #include "../../flamenco/accdb/fd_accdb_pipe.h"
23 : #include "../../flamenco/runtime/fd_bank.h"
24 : #include "../../util/pod/fd_pod.h"
25 :
26 : #include <errno.h>
27 : #include <fcntl.h>
28 : #include <unistd.h>
29 :
30 : /* The tower tile is responsible for two things:
31 :
32 : 1. running the fork choice (fd_ghost) and TowerBFT (fd_tower) rules
33 : after replaying a block.
34 : 2. listening to gossip (duplicate shred and vote messages) and
35 : monitoring for duplicate or duplicate confirmed blocks (fd_notar).
36 :
37 : Tower signals to other tiles about events that occur as a result of
38 : those two above events, such as what block to vote on, what block to
39 : reset onto as leader, what block got rooted, what blocks are
40 : duplicates and what blocks are confirmed.
41 :
42 : In general, tower uses the block_id as the identifier for blocks. The
43 : block_id is the merkle root of the last FEC set for a block. This is
44 : guaranteed to be unique for a given block and is the canonical
45 : identifier over the slot number because unlike slot numbers, if a
46 : leader equivocates (produces multiple blocks for the same slot) the
47 : block_id can disambiguate the blocks.
48 :
49 : However, the block_id was only introduced into the Solana protocol
50 : recently, and TowerBFT still uses the "legacy" identifier of slot
51 : numbers for blocks. So the tile (and relevant modules) will use
52 : block_id when possible to interface with the protocol but otherwise
53 : falling back to slot number when block_id is unsupported. */
54 :
55 0 : #define IN_KIND_DEDUP (0)
56 0 : #define IN_KIND_EXEC (1)
57 0 : #define IN_KIND_REPLAY (2)
58 :
59 0 : #define VOTE_TXN_SIG_MAX (2UL) /* validator identity and vote authority */
60 :
61 : struct notif {
62 : ulong slot;
63 : int kind;
64 : fd_hash_t block_id; /* for notar confirmations only */
65 : };
66 : typedef struct notif notif_t;
67 :
68 : #define DEQUE_NAME notif
69 0 : #define DEQUE_T notif_t
70 : #include "../../util/tmpl/fd_deque_dynamic.c"
71 :
72 : static const fd_hash_t manifest_block_id = { .ul = { 0xf17eda2ce7b1d } }; /* FIXME manifest_block_id */
73 :
74 : typedef struct {
75 : fd_wksp_t * mem;
76 : ulong chunk0;
77 : ulong wmark;
78 : ulong mtu;
79 : } in_ctx_t;
80 :
81 : typedef struct {
82 : fd_wksp_t * wksp; /* workspace */
83 :
84 : ulong seed; /* map seed */
85 : int checkpt_fd;
86 : int restore_fd;
87 : fd_pubkey_t identity_key[1];
88 : fd_pubkey_t vote_account[1];
89 : uchar our_vote_acct[FD_VOTE_STATE_DATA_MAX]; /* buffer for reading back our own vote acct data */
90 :
91 : /* structures owned by tower tile */
92 :
93 : fd_forks_t * forks;
94 : fd_ghost_t * ghost;
95 : fd_hfork_t * hfork;
96 : fd_notar_t * notar;
97 : fd_tower_t * tower;
98 : fd_tower_t * tower_spare; /* spare tower used during processing */
99 : notif_t * notif; /* deque of confirmation notifications queued for publishing */
100 : fd_tower_accts_t * tower_accts; /* deque of accts, stake, and pubkey for the currently replayed slot */
101 : fd_epoch_stakes_t * slot_stakes; /* tracks the stakes for each voter in the epoch per fork */
102 :
103 : /* external joins owned by replay tile */
104 :
105 : fd_banks_t banks[1];
106 : fd_accdb_user_t accdb[1];
107 :
108 : /* frag-related structures (consume and publish) */
109 :
110 : uchar vote_txn[FD_TPU_PARSED_MTU];
111 : fd_sha512_t * vote_sha[VOTE_TXN_SIG_MAX];
112 : fd_compact_tower_sync_serde_t compact_tower_sync_serde;
113 : fd_snapshot_manifest_t manifest;
114 : fd_replay_slot_completed_t replay_slot_completed;
115 :
116 : /* slot watermarks */
117 :
118 : ulong init_slot; /* initial slot from genesis or snapshot */
119 : ulong root_slot; /* monotonically increasing contiguous tower root slot */
120 : ulong conf_slot; /* monotonically increasing contiguous confirmed slot */
121 : ulong supc_slot; /* monotonically increasing contiguous super slot */
122 :
123 : /* in/out link setup */
124 :
125 : int in_kind[ 64UL ];
126 : in_ctx_t in [ 64UL ];
127 :
128 : fd_wksp_t * out_mem;
129 : ulong out_chunk0;
130 : ulong out_wmark;
131 : ulong out_chunk;
132 :
133 : /* metrics */
134 :
135 : struct ctx_metrics_t {
136 : ulong vote_txn_invalid;
137 : ulong vote_txn_ignored;
138 : ulong vote_txn_mismatch;
139 :
140 : ulong ancestor_rollback;
141 : ulong sibling_confirmed;
142 : ulong same_fork;
143 : ulong switch_pass;
144 : ulong switch_fail;
145 : ulong lockout_fail;
146 : ulong threshold_fail;
147 : ulong propagated_fail;
148 :
149 : ulong root_slot;
150 : ulong vote_slot;
151 : ulong reset_slot;
152 : ulong slot_ignored;
153 :
154 : fd_hfork_metrics_t hfork;
155 : } metrics;
156 :
157 : /* debug logging */
158 : int debug_fd;
159 : fd_io_buffered_ostream_t debug_ostream;
160 : uchar debug_buf[4096];
161 :
162 : } ctx_t;
163 :
164 : FD_FN_CONST static inline ulong
165 0 : scratch_align( void ) {
166 0 : return 128UL;
167 0 : }
168 :
169 : FD_FN_PURE static inline ulong
170 0 : scratch_footprint( FD_PARAM_UNUSED fd_topo_tile_t const * tile ) {
171 0 : ulong slot_max = tile->tower.max_live_slots;
172 0 : FD_LOG_DEBUG(( "hfork footprint %lu", fd_hfork_footprint( slot_max, FD_VOTER_MAX ) ));
173 0 : ulong l = FD_LAYOUT_INIT;
174 0 : l = FD_LAYOUT_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
175 0 : l = FD_LAYOUT_APPEND( l, fd_ghost_align(), fd_ghost_footprint( 2*slot_max, FD_VOTER_MAX ) );
176 0 : l = FD_LAYOUT_APPEND( l, fd_hfork_align(), fd_hfork_footprint( slot_max, FD_VOTER_MAX ) );
177 0 : l = FD_LAYOUT_APPEND( l, fd_notar_align(), fd_notar_footprint( tile->tower.max_vote_lookahead ) );
178 0 : l = FD_LAYOUT_APPEND( l, fd_tower_align(), fd_tower_footprint() );
179 0 : l = FD_LAYOUT_APPEND( l, fd_tower_accts_align(), fd_tower_accts_footprint( FD_VOTER_MAX ) );
180 0 : l = FD_LAYOUT_APPEND( l, fd_forks_align(), fd_forks_footprint( slot_max, FD_VOTER_MAX ) );
181 0 : l = FD_LAYOUT_APPEND( l, fd_tower_align(), fd_tower_footprint() ); /* ctx->tower_spare */
182 0 : l = FD_LAYOUT_APPEND( l, fd_epoch_stakes_align(), fd_epoch_stakes_footprint( slot_max ) );
183 0 : l = FD_LAYOUT_APPEND( l, notif_align(), notif_footprint( slot_max ) );
184 0 : return FD_LAYOUT_FINI( l, scratch_align() );
185 0 : }
186 :
187 : static inline void
188 0 : metrics_write( ctx_t * ctx ) {
189 0 : FD_MCNT_SET( TOWER, VOTE_TXN_INVALID, ctx->metrics.vote_txn_invalid );
190 0 : FD_MCNT_SET( TOWER, VOTE_TXN_IGNORED, ctx->metrics.vote_txn_ignored );
191 0 : FD_MCNT_SET( TOWER, VOTE_TXN_MISMATCH, ctx->metrics.vote_txn_mismatch );
192 :
193 0 : FD_MCNT_SET( TOWER, ANCESTOR_ROLLBACK, ctx->metrics.ancestor_rollback );
194 0 : FD_MCNT_SET( TOWER, SIBLING_CONFIRMED, ctx->metrics.sibling_confirmed );
195 0 : FD_MCNT_SET( TOWER, SAME_FORK, ctx->metrics.same_fork );
196 0 : FD_MCNT_SET( TOWER, SWITCH_PASS, ctx->metrics.switch_pass );
197 0 : FD_MCNT_SET( TOWER, SWITCH_FAIL, ctx->metrics.switch_fail );
198 0 : FD_MCNT_SET( TOWER, LOCKOUT_FAIL, ctx->metrics.lockout_fail );
199 0 : FD_MCNT_SET( TOWER, THRESHOLD_FAIL, ctx->metrics.threshold_fail );
200 0 : FD_MCNT_SET( TOWER, PROPAGATED_FAIL, ctx->metrics.propagated_fail );
201 :
202 0 : FD_MCNT_SET ( TOWER, HARD_FORKS_SEEN, ctx->metrics.hfork.seen );
203 0 : FD_MCNT_SET ( TOWER, HARD_FORKS_PRUNED, ctx->metrics.hfork.pruned );
204 0 : FD_MGAUGE_SET( TOWER, HARD_FORKS_ACTIVE, ctx->metrics.hfork.active );
205 0 : FD_MGAUGE_SET( TOWER, HARD_FORKS_MAX_WIDTH, ctx->metrics.hfork.max_width );
206 :
207 0 : FD_MGAUGE_SET( TOWER, ROOT_SLOT, ctx->metrics.root_slot );
208 0 : FD_MGAUGE_SET( TOWER, VOTE_SLOT, ctx->metrics.vote_slot );
209 0 : FD_MGAUGE_SET( TOWER, RESET_SLOT, ctx->metrics.reset_slot );
210 0 : FD_MCNT_SET ( TOWER, SLOT_IGNORED, ctx->metrics.slot_ignored );
211 0 : }
212 :
213 : static void
214 : publish_slot_confirmed( ctx_t * ctx,
215 : fd_stem_context_t * stem,
216 : ulong tsorig,
217 : ulong slot,
218 : fd_hash_t const * block_id,
219 : ulong bank_idx,
220 0 : int kind ) {
221 0 : fd_tower_slot_confirmed_t * msg = fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
222 0 : msg->slot = slot;
223 0 : msg->block_id = *block_id;
224 0 : msg->bank_idx = bank_idx;
225 0 : msg->kind = kind;
226 0 : fd_stem_publish( stem, 0UL, FD_TOWER_SIG_SLOT_CONFIRMED, ctx->out_chunk, sizeof(fd_tower_slot_confirmed_t), 0UL, tsorig, fd_frag_meta_ts_comp( fd_tickcount() ) );
227 0 : ctx->out_chunk = fd_dcache_compact_next( ctx->out_chunk, sizeof(fd_tower_slot_confirmed_t), ctx->out_chunk0, ctx->out_wmark );
228 0 : }
229 :
230 : static void
231 : contiguous_confirm( ctx_t * ctx,
232 : ulong slot,
233 : ulong wmark,
234 0 : int kind ) {
235 :
236 : /* For optimistic and rooted confirmations, confirming a slot means
237 : all ancestors are confirmed too, so we need to publish any skipped
238 : ancestors (confirmations can be out-of-order and roots can be
239 : skipped due to lockout). */
240 :
241 0 : ulong cnt = 0;
242 0 : ulong ancestor = slot;
243 0 : while( FD_UNLIKELY( ancestor > wmark ) ) {
244 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, ancestor );
245 0 : if( FD_UNLIKELY( !fork ) ) break; /* rooted past this ancestor */
246 0 : if( FD_UNLIKELY( !notif_avail( ctx->notif ) ) ) FD_LOG_CRIT(( "attempted to confirm %lu slots more than slot max %lu", cnt, notif_max( ctx->notif ) )); /* should be impossible */
247 0 : notif_push_tail( ctx->notif, (notif_t){ .slot = ancestor, .kind = kind } );
248 0 : cnt++;
249 0 : ancestor = fork->parent_slot;
250 0 : }
251 0 : }
252 :
253 : static void
254 : notar_confirm( ctx_t * ctx,
255 0 : fd_notar_blk_t * notar_blk ) {
256 :
257 : /* Record any confirmations in our tower forks structure and also
258 : publish slot_confirmed frags indicating confirmations to consumers.
259 :
260 : See documentation in fd_tower_tile.h for guarantees. */
261 :
262 0 : if( FD_LIKELY( notar_blk->dup_conf && !notar_blk->dup_notif ) ) {
263 0 : if( FD_UNLIKELY( !notif_avail( ctx->notif ) ) ) FD_LOG_CRIT(( "attempted to confirm more than slot max %lu", notif_max( ctx->notif ) )); /* should be impossible */
264 0 : notif_push_head( ctx->notif, (notif_t){ .slot = notar_blk->slot, .kind = FD_TOWER_SLOT_CONFIRMED_DUPLICATE, .block_id = notar_blk->block_id } );
265 0 : notar_blk->dup_notif = 1;
266 :
267 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, notar_blk->slot ); /* ensure fork exists */
268 0 : if( FD_UNLIKELY( !fork ) ) return; /* a slot can be duplicate confirmed by gossip votes before replay */
269 0 : fd_forks_confirmed( fork, ¬ar_blk->block_id );
270 0 : }
271 0 : if( FD_LIKELY( notar_blk->opt_conf ) ) {
272 0 : if( FD_UNLIKELY( !notar_blk->opt_notif ) ) {
273 0 : if( FD_UNLIKELY( !notif_avail( ctx->notif ) ) ) FD_LOG_CRIT(( "attempted to confirm more than slot max %lu", notif_max( ctx->notif ) )); /* should be impossible */
274 0 : notif_push_head( ctx->notif, (notif_t){ .slot = notar_blk->slot, .kind = FD_TOWER_SLOT_CONFIRMED_CLUSTER, .block_id = notar_blk->block_id } );
275 0 : notar_blk->opt_notif = 1;
276 0 : }
277 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, notar_blk->slot );
278 0 : if( FD_UNLIKELY( fork && notar_blk->slot > ctx->conf_slot ) ) {
279 0 : contiguous_confirm( ctx, notar_blk->slot, ctx->conf_slot, FD_TOWER_SLOT_CONFIRMED_OPTIMISTIC );
280 0 : ctx->conf_slot = notar_blk->slot;
281 0 : }
282 0 : }
283 0 : if( FD_LIKELY( notar_blk->sup_conf ) ) {
284 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, notar_blk->slot );
285 0 : if( FD_UNLIKELY( fork && notar_blk->slot > ctx->supc_slot ) ) {
286 0 : contiguous_confirm( ctx, notar_blk->slot, ctx->supc_slot, FD_TOWER_SLOT_CONFIRMED_SUPER );
287 0 : ctx->supc_slot = notar_blk->slot;
288 0 : }
289 0 : }
290 0 : }
291 :
292 : static void
293 : count_vote_txn( ctx_t * ctx,
294 : fd_txn_t const * txn,
295 0 : uchar const * payload ) {
296 :
297 : /* Count vote txns from resolv and replay. Note these txns have
298 : already been parsed and sigverified, so the only thing tower needs
299 : to do is filter for votes.
300 :
301 : We are a little stricter than Agave here when validating the vote
302 : because we use the same validation as pack ie. is_simple_vote which
303 : includes a check that there are at most two signers, whereas
304 : Agave's gossip vote parser does not perform that same check (the
305 : only two signers are the identity key and vote authority, which may
306 : optionally be the same).
307 :
308 : Being a little stricter here is ok because even if we drop some
309 : votes with extraneous signers that Agave would consider valid
310 : (unlikely), gossip votes are in general considered unreliable and
311 : ultimately consensus is reached through replaying the vote txns.
312 :
313 : The remaining checks mirror Agave as closely as possible (and are
314 : documented throughout below). */
315 :
316 0 : if( FD_UNLIKELY( !fd_txn_is_simple_vote_transaction( txn, payload ) ) ) { ctx->metrics.vote_txn_invalid++; return; }
317 :
318 : /* TODO check the authorized voter for this vote account (from epoch
319 : stakes) is one of the signers */
320 :
321 : /* Filter any non-tower sync votes. */
322 :
323 0 : fd_txn_instr_t const * instr = &txn->instr[0];
324 0 : uchar const * instr_data = payload + instr->data_off;
325 0 : uint kind = fd_uint_load_4_fast( instr_data );
326 0 : if( FD_UNLIKELY( kind != FD_VOTE_IX_KIND_TOWER_SYNC && kind != FD_VOTE_IX_KIND_TOWER_SYNC_SWITCH ) ) { ctx->metrics.vote_txn_ignored++; return; };
327 :
328 : /* Deserialize the CompactTowerSync. */
329 :
330 0 : int err = fd_compact_tower_sync_deserialize( &ctx->compact_tower_sync_serde, instr_data + sizeof(uint), instr->data_sz - sizeof(uint) );
331 0 : if( FD_UNLIKELY( err == -1 ) ) { ctx->metrics.vote_txn_invalid++; return; }
332 0 : ulong slot = ctx->compact_tower_sync_serde.root;
333 0 : fd_tower_remove_all( ctx->tower_spare );
334 0 : for( ulong i = 0; i < ctx->compact_tower_sync_serde.lockouts_cnt; i++ ) {
335 0 : slot += ctx->compact_tower_sync_serde.lockouts[i].offset;
336 0 : fd_tower_push_tail( ctx->tower_spare, (fd_tower_vote_t){ .slot = slot, .conf = ctx->compact_tower_sync_serde.lockouts[i].confirmation_count } );
337 0 : }
338 0 : if( FD_UNLIKELY( 0==memcmp( &ctx->compact_tower_sync_serde.block_id, &hash_null, sizeof(fd_hash_t) ) ) ) { ctx->metrics.vote_txn_invalid++; return; };
339 :
340 0 : fd_pubkey_t const * accs = (fd_pubkey_t const *)fd_type_pun_const( payload + txn->acct_addr_off );
341 0 : fd_pubkey_t const * vote_acc = NULL;
342 0 : if( FD_UNLIKELY( txn->signature_cnt==1 ) ) vote_acc = (fd_pubkey_t const *)fd_type_pun_const( &accs[1] ); /* identity and authority same, account idx 1 is the vote account address */
343 0 : else vote_acc = (fd_pubkey_t const *)fd_type_pun_const( &accs[2] ); /* identity and authority diff, account idx 2 is the vote account address */
344 :
345 : /* Return early if their tower is empty. */
346 :
347 0 : if( FD_UNLIKELY( fd_tower_empty( ctx->tower_spare ) ) ) { ctx->metrics.vote_txn_ignored++; return; };
348 :
349 : /* The vote txn contains a block id and bank hash for their last vote
350 : slot in the tower. Agave always counts the last vote.
351 :
352 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/cluster_info_vote_listener.rs#L476-L487 */
353 :
354 0 : fd_tower_vote_t const * their_last_vote = fd_tower_peek_tail_const( ctx->tower_spare );
355 0 : fd_hash_t const * their_block_id = &ctx->compact_tower_sync_serde.block_id;
356 0 : fd_hash_t const * their_bank_hash = &ctx->compact_tower_sync_serde.hash;
357 :
358 : /* Similar to what Agave does in cluster_info_vote_listener, we use
359 : the stake associated with a vote account as of our current root
360 : (which could potentially be a different epoch than the vote we are
361 : counting or when we observe the vote). They default stake to 0 for
362 : voters who are not found. */
363 :
364 0 : ulong total_stake = fd_ghost_root( ctx->ghost )->total_stake;
365 :
366 0 : fd_voter_stake_key_t stake_key = { .vote_account = *vote_acc, .slot = ctx->root_slot };
367 0 : fd_voter_stake_t * stake = fd_voter_stake_map_ele_query( ctx->slot_stakes->voter_stake_map, &stake_key, NULL, ctx->slot_stakes->voter_stake_pool );
368 :
369 0 : fd_hfork_count_vote( ctx->hfork, vote_acc, their_block_id, their_bank_hash, their_last_vote->slot, stake ? stake->stake : 0, total_stake, &ctx->metrics.hfork );
370 :
371 0 : fd_notar_blk_t * notar_blk = fd_notar_count_vote( ctx->notar, total_stake, vote_acc, their_last_vote->slot, their_block_id );
372 0 : if( FD_LIKELY( notar_blk ) ) notar_confirm( ctx, notar_blk );
373 :
374 0 : fd_tower_forks_t * fork = fd_tower_forks_query( ctx->forks->tower_forks, their_last_vote->slot, NULL );
375 0 : if( FD_UNLIKELY( !fork ) ) { ctx->metrics.vote_txn_ignored++; return; /* we haven't replayed this slot yet */ };
376 :
377 0 : fd_hash_t const * our_block_id = fd_forks_canonical_block_id( ctx->forks, their_last_vote->slot );
378 0 : if( FD_UNLIKELY( 0!=memcmp( our_block_id, their_block_id, sizeof(fd_hash_t) ) ) ) { ctx->metrics.vote_txn_mismatch++; return; }
379 :
380 : /* Agave decides to count intermediate vote slots in the tower only if
381 : 1. they've replayed the slot and 2. their replay bank hash matches
382 : the vote's bank hash. We do the same thing, but using block_ids.
383 :
384 : It's possible we haven't yet replayed this slot being voted on
385 : because gossip votes can be ahead of our replay.
386 :
387 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/cluster_info_vote_listener.rs#L483-L487 */
388 :
389 0 : int skipped_last_vote = 0;
390 0 : for( fd_tower_iter_t iter = fd_tower_iter_init_rev( ctx->tower_spare );
391 0 : !fd_tower_iter_done_rev( ctx->tower_spare, iter );
392 0 : iter = fd_tower_iter_prev ( ctx->tower_spare, iter ) ) {
393 0 : if( FD_UNLIKELY( !skipped_last_vote ) ) { skipped_last_vote = 1; continue; }
394 0 : fd_tower_vote_t const * their_intermediate_vote = fd_tower_iter_ele_const( ctx->tower_spare, iter );
395 :
396 : /* If we don't recognize an intermediate vote slot in their tower,
397 : it means their tower either:
398 :
399 : 1. Contains intermediate vote slots that are too old (older than
400 : our root) so we already pruned them for tower_forks. Normally
401 : if the descendant (last vote slot) is in tower forks, then all
402 : of its ancestors should be in there too.
403 :
404 : 2. Is invalid. Even though at this point we have successfully
405 : sigverified and deserialized their vote txn, the tower itself
406 : might still be invalid because unlike TPU vote txns, we have
407 : not plumbed through the vote program, but obviously gossip
408 : votes do not so we need to do some light validation here.
409 :
410 : We could throwaway this voter's tower, but we handle it the same
411 : way as Agave which is to just skip this intermediate vote slot:
412 :
413 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/cluster_info_vote_listener.rs#L513-L518 */
414 :
415 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, their_intermediate_vote->slot );
416 0 : if( FD_UNLIKELY( !fork ) ) { ctx->metrics.vote_txn_ignored++; continue; }
417 :
418 : /* Otherwise, we count the vote using our own block id for that slot
419 : (again, mirroring what Agave does albeit with bank hashes).
420 :
421 : Agave uses the current root bank's total stake when counting
422 : vote txns from gossip / replay:
423 :
424 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/cluster_info_vote_listener.rs#L500 */
425 :
426 :
427 0 : fd_notar_blk_t * notar_blk = fd_notar_count_vote( ctx->notar, total_stake, vote_acc, their_intermediate_vote->slot, fd_forks_canonical_block_id( ctx->forks, their_intermediate_vote->slot ) );
428 0 : if( FD_LIKELY( notar_blk ) ) notar_confirm( ctx, notar_blk );
429 0 : }
430 0 : }
431 :
432 : ulong
433 : query_acct_stake_from_bank( fd_tower_accts_t * tower_accts_deque,
434 : fd_epoch_stakes_t * epoch_stakes,
435 : fd_bank_t * bank,
436 0 : ulong slot ) {
437 0 : ulong total_stake = 0;
438 0 : fd_vote_states_t const * vote_states = fd_bank_vote_states_locking_query( bank );
439 0 : fd_vote_states_iter_t iter_[1];
440 0 : ulong prev_voter_idx = ULONG_MAX;
441 0 : for( fd_vote_states_iter_t * iter = fd_vote_states_iter_init( iter_, vote_states );
442 0 : !fd_vote_states_iter_done( iter );
443 0 : fd_vote_states_iter_next( iter ) ) {
444 0 : fd_vote_state_ele_t const * vote_state = fd_vote_states_iter_ele( iter );
445 0 : if( FD_UNLIKELY( vote_state->stake_t_2 == 0 ) ) continue; /* skip unstaked vote accounts */
446 0 : fd_pubkey_t const * vote_account_pubkey = &vote_state->vote_account;
447 0 : fd_tower_accts_push_tail( tower_accts_deque, (fd_tower_accts_t){ .addr = *vote_account_pubkey, .stake = vote_state->stake_t_2 } );
448 0 : prev_voter_idx = fd_epoch_stakes_slot_stakes_add( epoch_stakes, slot, vote_account_pubkey, vote_state->stake_t_2, prev_voter_idx );
449 0 : total_stake += vote_state->stake_t_2;
450 0 : }
451 0 : fd_bank_vote_states_end_locking_query( bank );
452 0 : return total_stake;
453 0 : }
454 :
455 : static void
456 : replay_slot_completed( ctx_t * ctx,
457 : fd_replay_slot_completed_t * slot_completed,
458 : ulong tsorig,
459 0 : fd_stem_context_t * stem ) {
460 :
461 : /* Initialize slot watermarks on the first replay_slot_completed. */
462 :
463 0 : if( FD_UNLIKELY( ctx->init_slot == ULONG_MAX ) ) {
464 0 : ctx->init_slot = slot_completed->slot;
465 0 : ctx->root_slot = slot_completed->slot;
466 0 : ctx->conf_slot = slot_completed->slot;
467 0 : ctx->supc_slot = slot_completed->slot;
468 0 : }
469 :
470 0 : if( FD_UNLIKELY( 0==memcmp( &slot_completed->block_id.uc, &hash_null, sizeof(fd_hash_t) ) ) ) {
471 0 : FD_LOG_CRIT(( "replay_slot_completed slot %lu block id is null", slot_completed->slot ));
472 0 : }
473 :
474 : /* This is a temporary patch for equivocation. */
475 :
476 0 : if( FD_UNLIKELY( fd_forks_query( ctx->forks, slot_completed->slot ) ) ) {
477 0 : FD_BASE58_ENCODE_32_BYTES( slot_completed->block_id.uc, block_id );
478 0 : FD_LOG_WARNING(( "tower ignoring replay of equivocating slot %lu %s", slot_completed->slot, block_id ));
479 :
480 : /* Still need to return a message to replay so the refcnt on the bank is decremented. */
481 0 : fd_tower_slot_ignored_t * msg = fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
482 0 : msg->slot = slot_completed->slot;
483 0 : msg->bank_idx = slot_completed->bank_idx;
484 :
485 0 : fd_stem_publish( stem, 0UL, FD_TOWER_SIG_SLOT_IGNORED, ctx->out_chunk, sizeof(fd_tower_slot_ignored_t), 0UL, tsorig, fd_frag_meta_ts_comp( fd_tickcount() ) );
486 0 : ctx->out_chunk = fd_dcache_compact_next( ctx->out_chunk, sizeof(fd_tower_slot_ignored_t), ctx->out_chunk0, ctx->out_wmark );
487 0 : return;
488 0 : }
489 :
490 : /* Initialize the xid. */
491 :
492 0 : fd_funk_txn_xid_t xid = { .ul = { slot_completed->slot, slot_completed->bank_idx } };
493 :
494 : /* Query our on-chain vote acct and reconcile with our local tower. */
495 :
496 0 : ulong our_vote_acct_bal = ULONG_MAX;
497 0 : int found = 0;
498 0 : fd_accdb_ro_t ro[1];
499 0 : if( FD_LIKELY( fd_accdb_open_ro( ctx->accdb, ro, &xid, ctx->vote_account ) ) ) {
500 : /* Copy account data */
501 0 : found = 1;
502 0 : ulong data_sz = fd_ulong_min( fd_accdb_ref_data_sz( ro ), FD_VOTE_STATE_DATA_MAX );
503 0 : our_vote_acct_bal = fd_accdb_ref_lamports( ro );
504 0 : fd_memcpy( ctx->our_vote_acct, fd_accdb_ref_data_const( ro ), data_sz );
505 0 : fd_accdb_close_ro( ctx->accdb, ro );
506 :
507 0 : fd_tower_reconcile( ctx->tower, ctx->root_slot, ctx->our_vote_acct );
508 : /* Sanity check that most recent vote in tower exists in tower forks */
509 0 : fd_tower_vote_t const * last_vote = fd_tower_peek_tail_const( ctx->tower );
510 0 : FD_TEST( !last_vote || fd_forks_query( ctx->forks, last_vote->slot ) );
511 0 : }
512 :
513 : /* Insert the vote acct addrs and stakes from the bank into accts. */
514 :
515 0 : fd_tower_accts_remove_all( ctx->tower_accts );
516 0 : fd_bank_t bank[1];
517 0 : if( FD_UNLIKELY( !fd_banks_bank_query( bank, ctx->banks, slot_completed->bank_idx ) ) ) FD_LOG_CRIT(( "invariant violation: bank %lu is missing", slot_completed->bank_idx ));
518 0 : ulong total_stake = query_acct_stake_from_bank( ctx->tower_accts, ctx->slot_stakes, bank, slot_completed->slot );
519 :
520 : /* Insert the just replayed block into forks. */
521 :
522 0 : FD_TEST( !fd_forks_query( ctx->forks, slot_completed->slot ) );
523 0 : fd_tower_forks_t * fork = fd_forks_insert( ctx->forks, slot_completed->slot, slot_completed->parent_slot );
524 0 : fork->parent_slot = slot_completed->parent_slot;
525 0 : fork->confirmed = 0;
526 0 : fork->voted = 0;
527 0 : fork->replayed_block_id = slot_completed->block_id;
528 0 : fork->bank_idx = slot_completed->bank_idx;
529 0 : fd_forks_replayed( ctx->forks, fork, slot_completed->bank_idx, &slot_completed->block_id );
530 0 : fd_forks_lockouts_clear( ctx->forks, slot_completed->parent_slot );
531 :
532 : /* Insert the just replayed block into ghost. */
533 :
534 0 : fd_hash_t const * parent_block_id = &slot_completed->parent_block_id;
535 0 : if( FD_UNLIKELY( slot_completed->parent_slot==ctx->init_slot ) ) parent_block_id = &manifest_block_id;
536 0 : if( FD_UNLIKELY( slot_completed->slot ==ctx->init_slot ) ) parent_block_id = NULL;
537 :
538 0 : if( FD_UNLIKELY( parent_block_id && !fd_ghost_query( ctx->ghost, parent_block_id ) ) ) {
539 :
540 : /* Rare occurrence where replay executes a block down a minority fork
541 : that we have pruned. Due to a race in reading frags, replay may
542 : believe the minority fork exists and is still executable, and
543 : executes the block and delivers it to tower. Tower should ignore
544 : this block as it's parent no longer exists. */
545 :
546 0 : FD_BASE58_ENCODE_32_BYTES( parent_block_id->uc, parent_block_id_cstr );
547 0 : FD_LOG_WARNING(( "replay likely lagging tower publish, executed slot %lu is missing parent block id %s, excluding from ghost", slot_completed->slot, parent_block_id_cstr ));
548 0 : ctx->metrics.slot_ignored++;
549 :
550 : /* Still need to return a message to replay so the refcnt on the
551 : bank is decremented. */
552 :
553 0 : fd_tower_slot_ignored_t * msg = fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
554 0 : msg->slot = slot_completed->slot;
555 0 : msg->bank_idx = slot_completed->bank_idx;
556 :
557 0 : fd_stem_publish( stem, 0UL, FD_TOWER_SIG_SLOT_IGNORED, ctx->out_chunk, sizeof(fd_tower_slot_ignored_t), 0UL, tsorig, fd_frag_meta_ts_comp( fd_tickcount() ) );
558 0 : ctx->out_chunk = fd_dcache_compact_next( ctx->out_chunk, sizeof(fd_tower_slot_ignored_t), ctx->out_chunk0, ctx->out_wmark );
559 0 : return;
560 0 : }
561 :
562 0 : fd_ghost_blk_t * ghost_blk = fd_ghost_insert( ctx->ghost, &slot_completed->block_id, parent_block_id, slot_completed->slot );
563 0 : ghost_blk->total_stake = total_stake;
564 :
565 : /* Iterate vote accounts. */
566 :
567 0 : fd_tower_accts_t * tower_accts = ctx->tower_accts;
568 0 : fd_accdb_ro_pipe_t ro_pipe[1];
569 0 : fd_accdb_ro_pipe_init( ro_pipe, ctx->accdb, &xid );
570 0 : fd_tower_accts_iter_t iter_head = fd_tower_accts_iter_init( tower_accts );
571 0 : fd_tower_accts_iter_t iter_tail = fd_tower_accts_iter_init( tower_accts );
572 0 : for(;;) {
573 0 : if( FD_UNLIKELY( fd_tower_accts_iter_done( tower_accts, iter_head ) ) ) {
574 0 : fd_accdb_ro_pipe_flush( ro_pipe );
575 0 : }
576 :
577 0 : fd_accdb_ro_t * ro;
578 0 : while( (ro = fd_accdb_ro_pipe_poll( ro_pipe )) ) {
579 0 : fd_tower_accts_t * acct = fd_tower_accts_iter_ele( tower_accts, iter_tail );
580 0 : if( FD_UNLIKELY( !fd_accdb_ref_lamports( ro ) ) ) {
581 0 : FD_BASE58_ENCODE_32_BYTES( acct->addr.key, pubkey_b58 );
582 0 : FD_LOG_CRIT(( "vote account in bank->vote_states not found. slot %lu address %s", slot_completed->slot, pubkey_b58 ));
583 0 : }
584 0 : ulong data_sz = fd_ulong_min( fd_accdb_ref_data_sz( ro ), FD_VOTE_STATE_DATA_MAX );
585 0 : fd_memcpy( acct->data, fd_accdb_ref_data_const( ro ), data_sz );
586 :
587 : /* 1. Update forks with lockouts. */
588 :
589 0 : fd_forks_lockouts_add( ctx->forks, slot_completed->slot, &acct->addr, acct );
590 :
591 : /* 2. Count the last vote slot in the vote state towards ghost. */
592 :
593 0 : ulong vote_slot = fd_voter_vote_slot( acct->data );
594 0 : if( FD_LIKELY( vote_slot!=ULONG_MAX && /* has voted */
595 0 : vote_slot>=fd_ghost_root( ctx->ghost )->slot ) ) { /* vote not too old */
596 : /* We search up the ghost ancestry to find the ghost block for this
597 : vote slot. In Agave, they look this value up using a hashmap of
598 : slot->block_id ("fork progress"), but that approach only works
599 : because they dump and repair (so there's only ever one canonical
600 : block id). We retain multiple block ids, both the original and
601 : confirmed one. */
602 :
603 0 : fd_ghost_blk_t * ancestor_blk = fd_ghost_slot_ancestor( ctx->ghost, ghost_blk, vote_slot ); /* FIXME potentially slow */
604 :
605 : /* It is impossible for ancestor to be missing, because these are
606 : vote accounts on a given fork, not vote txns across forks. So we
607 : know these towers must contain slots we know about (as long as
608 : they are >= root, which we checked above). */
609 :
610 0 : if( FD_UNLIKELY( !ancestor_blk ) ) {
611 0 : FD_BASE58_ENCODE_32_BYTES( acct->addr.key, pubkey_b58 );
612 0 : FD_LOG_CRIT(( "missing ancestor. replay slot %lu vote slot %lu voter %s", slot_completed->slot, vote_slot, pubkey_b58 ));
613 0 : }
614 :
615 0 : fd_ghost_count_vote( ctx->ghost, ancestor_blk, &acct->addr, acct->stake, vote_slot );
616 0 : }
617 :
618 0 : if( FD_UNLIKELY( fd_tower_accts_iter_done( tower_accts, iter_tail ) ) ) {
619 0 : goto done_vote_iter;
620 0 : }
621 0 : iter_tail = fd_tower_accts_iter_next( tower_accts, iter_tail );
622 0 : }
623 :
624 0 : if( FD_UNLIKELY( fd_tower_accts_iter_done( tower_accts, iter_head ) ) ) break;
625 0 : fd_accdb_ro_pipe_enqueue( ro_pipe, fd_tower_accts_iter_ele( ctx->tower_accts, iter_head )->addr.key );
626 0 : iter_head = fd_tower_accts_iter_next( ctx->tower_accts, iter_head );
627 0 : }
628 0 : done_vote_iter:
629 0 : fd_accdb_ro_pipe_fini( ro_pipe );
630 :
631 : /* Insert the just replayed block into hard fork detector. */
632 :
633 0 : fd_hfork_record_our_bank_hash( ctx->hfork, &slot_completed->block_id, &slot_completed->bank_hash, fd_ghost_root( ctx->ghost )->total_stake );
634 :
635 : /* fd_notar requires some bookkeeping when there is a new epoch. */
636 :
637 0 : if( FD_UNLIKELY( ctx->notar->epoch==ULONG_MAX || slot_completed->epoch > ctx->notar->epoch ) ) {
638 0 : fd_notar_advance_epoch( ctx->notar, ctx->tower_accts, slot_completed->epoch );
639 0 : }
640 :
641 : /* Check if gossip votes already confirmed the fork's block_id (gossip
642 : can be ahead of replay - this is tracked by fd_notar). */
643 :
644 0 : fd_notar_slot_t * notar_slot = fd_notar_slot_query( ctx->notar->slot_map, slot_completed->slot, NULL );
645 0 : if( FD_UNLIKELY( notar_slot )) { /* optimize for replay keeping up (being ahead of gossip votes) */
646 0 : for( ulong i = 0; i < notar_slot->block_ids_cnt; i++ ) {
647 0 : fd_notar_blk_t * notar_blk = fd_notar_blk_query( ctx->notar->blk_map, notar_slot->block_ids[i], NULL );
648 0 : FD_TEST( notar_blk ); /* block_ids_cnt corrupt */
649 0 : if( FD_LIKELY( notar_blk->dup_conf ) ) {
650 0 : fork->confirmed = 1;
651 0 : fork->confirmed_block_id = notar_blk->block_id;
652 0 : break;
653 0 : }
654 0 : }
655 0 : }
656 :
657 : /* We replayed an unconfirmed duplicate, warn for now. Follow-up PR
658 : will implement eviction and repair of the correct one. */
659 :
660 0 : if( FD_UNLIKELY( fork->confirmed && 0!=memcmp( &fork->confirmed_block_id, &fork->replayed_block_id, sizeof(fd_hash_t) ) ) ) {
661 0 : FD_BASE58_ENCODE_32_BYTES( slot_completed->block_id.key, block_id_b58 );
662 0 : FD_BASE58_ENCODE_32_BYTES( fork->confirmed_block_id.key, confirmed_block_id_b58 );
663 0 : FD_LOG_WARNING(( "replayed an unconfirmed duplicate %lu. ours %s. confirmed %s.", slot_completed->slot, block_id_b58, confirmed_block_id_b58 ));
664 0 : }
665 :
666 : /* Determine reset, vote, and root slots. There may not be a vote or
667 : root slot but there is always a reset slot. */
668 :
669 0 : fd_tower_out_t out = fd_tower_vote_and_reset( ctx->tower, ctx->tower_accts, ctx->slot_stakes, ctx->forks, ctx->ghost, ctx->notar );
670 :
671 : /* Write out metrics for vote / reset reasons. */
672 :
673 0 : ctx->metrics.ancestor_rollback += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_ANCESTOR_ROLLBACK );
674 0 : ctx->metrics.sibling_confirmed += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SIBLING_CONFIRMED );
675 0 : ctx->metrics.same_fork += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SAME_FORK );
676 0 : ctx->metrics.switch_pass += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SWITCH_PASS );
677 0 : ctx->metrics.switch_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SWITCH_FAIL );
678 0 : ctx->metrics.lockout_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_LOCKOUT_FAIL );
679 0 : ctx->metrics.threshold_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_THRESHOLD_FAIL );
680 0 : ctx->metrics.propagated_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_PROPAGATED_FAIL );
681 :
682 : /* Update forks if there is a vote slot. */
683 :
684 0 : if( FD_LIKELY( out.vote_slot!=ULONG_MAX ) ) {
685 0 : ctx->metrics.vote_slot = out.vote_slot;
686 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, out.vote_slot );
687 0 : FD_TEST( fork ); /* we must have replayed every slot we voted for */
688 0 : fd_forks_voted( fork, &out.vote_block_id );
689 0 : }
690 :
691 : /* Publish according structures if there is a root */
692 :
693 0 : if( FD_UNLIKELY( out.root_slot!=ULONG_MAX ) ) {
694 0 : ctx->metrics.root_slot = out.root_slot;
695 :
696 0 : if( FD_UNLIKELY( 0==memcmp( &out.root_block_id, &hash_null, sizeof(fd_hash_t) ) ) ) {
697 0 : FD_LOG_CRIT(( "invariant violation: root block id is null at slot %lu", out.root_slot ));
698 0 : }
699 :
700 : /* forks */
701 :
702 0 : for(ulong slot = ctx->root_slot; slot < out.root_slot; slot++ ) {
703 0 : fd_tower_forks_t * fork = fd_forks_query ( ctx->forks, slot );
704 0 : if( FD_LIKELY( fork ) ) fd_forks_remove( ctx->forks, slot );
705 0 : fd_epoch_stakes_slot_t * slot_stakes = fd_epoch_stakes_slot_map_query ( ctx->slot_stakes->slot_stakes_map, slot, NULL );
706 0 : if( FD_LIKELY( slot_stakes ) ) fd_epoch_stakes_slot_stakes_remove( ctx->slot_stakes, slot_stakes );
707 0 : }
708 :
709 : /* ghost */
710 :
711 0 : fd_ghost_blk_t * newr = fd_ghost_query( ctx->ghost, &out.root_block_id );
712 0 : if( FD_UNLIKELY( !newr ) ) { /* a block id we rooted is missing from ghost */
713 0 : FD_BASE58_ENCODE_32_BYTES( out.root_block_id.uc, block_id_cstr );
714 0 : FD_LOG_CRIT(( "missing root block id %s at slot %lu", block_id_cstr, out.root_slot ));
715 0 : }
716 0 : fd_ghost_publish( ctx->ghost, newr );
717 :
718 : /* notar */
719 :
720 0 : fd_notar_advance_wmark( ctx->notar, out.root_slot );
721 :
722 : /* Rooting implies optimistic confirmation in the Firedancer API, so
723 : we need to make sure to publish weaker confirmation levels before
724 : publishing stronger ones. In most cases this is a no-op because
725 : gossip votes already triggered optimistic confirmation.
726 :
727 : TODO include replay votes in optimistic conf vote counting. */
728 :
729 0 : contiguous_confirm( ctx, out.root_slot, ctx->conf_slot, FD_TOWER_SLOT_CONFIRMED_OPTIMISTIC );
730 0 : contiguous_confirm( ctx, out.root_slot, ctx->supc_slot, FD_TOWER_SLOT_CONFIRMED_SUPER );
731 0 : contiguous_confirm( ctx, out.root_slot, ctx->root_slot, FD_TOWER_SLOT_CONFIRMED_ROOTED );
732 :
733 : /* Update slot watermarks. */
734 :
735 0 : ctx->root_slot = out.root_slot;
736 0 : }
737 :
738 0 : ctx->metrics.reset_slot = out.reset_slot;
739 :
740 : /* Publish a slot_done frag to tower_out. */
741 :
742 0 : fd_tower_slot_done_t * msg = fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
743 0 : msg->replay_slot = slot_completed->slot;
744 0 : msg->active_fork_cnt = fd_tower_leaves_pool_used( ctx->forks->tower_leaves_pool );
745 0 : msg->vote_slot = out.vote_slot;
746 0 : msg->reset_slot = out.reset_slot;
747 0 : msg->reset_block_id = out.reset_block_id;
748 0 : msg->root_slot = out.root_slot;
749 0 : msg->root_block_id = out.root_block_id;
750 0 : msg->replay_bank_idx = slot_completed->bank_idx;
751 0 : msg->vote_acct_bal = our_vote_acct_bal;
752 :
753 : /* Populate slot_done with a vote txn representing our current tower
754 : (regardless of whether there was a new vote slot or not).
755 :
756 : TODO only do this on refresh_last_vote? */
757 :
758 0 : fd_lockout_offset_t lockouts[FD_TOWER_VOTE_MAX];
759 0 : fd_txn_p_t txn[1];
760 0 : fd_tower_to_vote_txn( ctx->tower, ctx->root_slot, lockouts, &slot_completed->bank_hash, &slot_completed->block_hash, ctx->identity_key, ctx->identity_key, ctx->vote_account, txn );
761 0 : FD_TEST( !fd_tower_empty( ctx->tower ) );
762 0 : FD_TEST( txn->payload_sz && txn->payload_sz<=FD_TPU_MTU );
763 0 : fd_memcpy( msg->vote_txn, txn->payload, txn->payload_sz );
764 0 : msg->vote_txn_sz = txn->payload_sz;
765 :
766 0 : msg->tower_cnt = 0UL;
767 0 : if( FD_LIKELY( found ) ) msg->tower_cnt = fd_tower_with_lat_from_vote_acc( msg->tower, ctx->our_vote_acct );
768 :
769 0 : fd_stem_publish( stem, 0UL, FD_TOWER_SIG_SLOT_DONE, ctx->out_chunk, sizeof(fd_tower_slot_done_t), 0UL, tsorig, fd_frag_meta_ts_comp( fd_tickcount() ) );
770 0 : ctx->out_chunk = fd_dcache_compact_next( ctx->out_chunk, sizeof(fd_tower_slot_done_t), ctx->out_chunk0, ctx->out_wmark );
771 :
772 0 : if( FD_UNLIKELY( ctx->debug_fd!=-1 ) ) {
773 : /* standard buf_sz used by below prints is ~3400 bytes, so buf_max of
774 : 4096 is sufficient to keep the debug file mostly up to date */
775 0 : fd_ghost_print( ctx->ghost, fd_ghost_root( ctx->ghost ), &ctx->debug_ostream );
776 0 : fd_tower_print( ctx->tower, fd_ghost_root( ctx->ghost )->slot, &ctx->debug_ostream );
777 0 : }
778 0 : }
779 :
780 : static inline void
781 : after_credit( ctx_t * ctx,
782 : fd_stem_context_t * stem,
783 : int * opt_poll_in FD_PARAM_UNUSED,
784 0 : int * charge_busy ) {
785 0 : if( FD_LIKELY( !notif_empty( ctx->notif ) ) ) {
786 :
787 : /* Contiguous confirmations are pushed to tail in order from child
788 : to ancestor, so we pop from tail to publish confirmations in
789 : order from ancestor to child. */
790 :
791 0 : notif_t ancestor = notif_pop_tail( ctx->notif );
792 0 : if( FD_UNLIKELY( ancestor.kind == FD_TOWER_SLOT_CONFIRMED_CLUSTER || ancestor.kind == FD_TOWER_SLOT_CONFIRMED_DUPLICATE ) ) {
793 :
794 : /* Duplicate confirmations and cluster confirmations were sourced
795 : from notar (through gossip txns and replay txns) so we need to
796 : use the block_id from the notif recorded at the time of the
797 : confirmation */
798 :
799 0 : publish_slot_confirmed( ctx, stem, fd_frag_meta_ts_comp( fd_tickcount() ), ancestor.slot, &ancestor.block_id, ULONG_MAX, ancestor.kind );
800 0 : } else {
801 0 : fd_tower_forks_t * fork = fd_tower_forks_query( ctx->forks->tower_forks, ancestor.slot, NULL );
802 0 : if( FD_UNLIKELY( !fork ) ) FD_LOG_CRIT(( "missing fork for ancestor %lu", ancestor.slot ));
803 0 : publish_slot_confirmed( ctx, stem, fd_frag_meta_ts_comp( fd_tickcount() ), ancestor.slot, fd_forks_canonical_block_id( ctx->forks, ancestor.slot ), fork->bank_idx, ancestor.kind );
804 0 : }
805 0 : *opt_poll_in = 0; /* drain the confirmations */
806 0 : *charge_busy = 1;
807 0 : }
808 0 : }
809 :
810 : static inline int
811 : returnable_frag( ctx_t * ctx,
812 : ulong in_idx,
813 : ulong seq FD_PARAM_UNUSED,
814 : ulong sig,
815 : ulong chunk,
816 : ulong sz,
817 : ulong ctl FD_PARAM_UNUSED,
818 : ulong tsorig,
819 : ulong tspub FD_PARAM_UNUSED,
820 0 : fd_stem_context_t * stem ) {
821 :
822 0 : if( FD_UNLIKELY( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>ctx->in[ in_idx ].mtu ) )
823 0 : FD_LOG_ERR(( "chunk %lu %lu from in %d corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in_kind[ in_idx ], ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
824 :
825 0 : switch( ctx->in_kind[ in_idx ] ) {
826 0 : case IN_KIND_DEDUP: {
827 0 : if( FD_UNLIKELY( ctx->root_slot==ULONG_MAX ) ) return 1;
828 0 : fd_txn_m_t * txnm = (fd_txn_m_t *)fd_chunk_to_laddr( ctx->in[in_idx].mem, chunk );
829 0 : FD_TEST( txnm->payload_sz<=FD_TPU_MTU );
830 0 : FD_TEST( txnm->txn_t_sz<=FD_TXN_MAX_SZ );
831 0 : count_vote_txn( ctx, fd_txn_m_txn_t_const( txnm ), fd_txn_m_payload_const( txnm ) );
832 0 : return 0;
833 0 : }
834 0 : case IN_KIND_EXEC: {
835 0 : if( FD_LIKELY( (sig>>32)==FD_EXEC_TT_TXN_EXEC ) ) {
836 0 : fd_exec_txn_exec_msg_t * msg = fd_chunk_to_laddr( ctx->in[in_idx].mem, chunk );
837 0 : count_vote_txn( ctx, TXN(msg->txn), msg->txn->payload );
838 0 : }
839 0 : return 0;
840 0 : }
841 0 : case IN_KIND_REPLAY: {
842 0 : if( FD_LIKELY( sig==REPLAY_SIG_SLOT_COMPLETED ) ) {
843 0 : fd_memcpy( &ctx->replay_slot_completed, fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ), sizeof(fd_replay_slot_completed_t) );
844 0 : replay_slot_completed( ctx, &ctx->replay_slot_completed, tsorig, stem );
845 0 : } else if ( FD_LIKELY( sig==REPLAY_SIG_SLOT_DEAD ) ) {
846 0 : fd_replay_slot_dead_t * slot_dead = (fd_replay_slot_dead_t *)fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
847 0 : fd_hfork_record_our_bank_hash( ctx->hfork, &slot_dead->block_id, NULL, fd_ghost_root( ctx->ghost )->total_stake );
848 0 : }
849 0 : return 0;
850 0 : }
851 0 : default: {
852 0 : FD_LOG_ERR(( "unexpected input kind %d", ctx->in_kind[ in_idx ] ));
853 0 : }
854 0 : }
855 0 : }
856 :
857 : static void
858 : privileged_init( fd_topo_t * topo,
859 0 : fd_topo_tile_t * tile ) {
860 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
861 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
862 0 : ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
863 0 : FD_SCRATCH_ALLOC_FINI( l, scratch_align() );
864 :
865 0 : FD_TEST( fd_rng_secure( &ctx->seed, sizeof(ctx->seed) ) );
866 :
867 0 : if( FD_UNLIKELY( !strcmp( tile->tower.identity_key, "" ) ) ) FD_LOG_ERR(( "identity_key_path not set" ));
868 0 : ctx->identity_key[ 0 ] = *(fd_pubkey_t const *)fd_type_pun_const( fd_keyload_load( tile->tower.identity_key, /* pubkey only: */ 1 ) );
869 :
870 : /* The vote key can be specified either directly as a base58 encoded
871 : pubkey, or as a file path. We first try to decode as a pubkey. */
872 :
873 0 : uchar * vote_key = fd_base58_decode_32( tile->tower.vote_account, ctx->vote_account->uc );
874 0 : if( FD_UNLIKELY( !vote_key ) ) ctx->vote_account[ 0 ] = *(fd_pubkey_t const *)fd_type_pun_const( fd_keyload_load( tile->tower.vote_account, /* pubkey only: */ 1 ) );
875 :
876 : /* The tower file is used to checkpt and restore the state of the
877 : local tower. */
878 :
879 0 : char path[ PATH_MAX ];
880 0 : FD_BASE58_ENCODE_32_BYTES( ctx->identity_key->uc, identity_key_b58 );
881 0 : FD_TEST( fd_cstr_printf_check( path, sizeof(path), NULL, "%s/tower-1_9-%s.bin.new", tile->tower.base_path, identity_key_b58 ) );
882 0 : ctx->checkpt_fd = open( path, O_WRONLY|O_CREAT|O_TRUNC, 0600 );
883 0 : if( FD_UNLIKELY( -1==ctx->checkpt_fd ) ) FD_LOG_ERR(( "open(`%s`) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
884 :
885 0 : FD_TEST( fd_cstr_printf_check( path, sizeof(path), NULL, "%s/tower-1_9-%s.bin", tile->tower.base_path, identity_key_b58 ) );
886 0 : ctx->restore_fd = open( path, O_RDONLY );
887 0 : if( FD_UNLIKELY( -1==ctx->restore_fd && errno!=ENOENT ) ) FD_LOG_ERR(( "open(`%s`) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
888 :
889 0 : if( FD_LIKELY( tile->tower.debug_logging ) ) {
890 0 : FD_TEST( fd_cstr_printf_check( path, sizeof(path), NULL, "%s/tower-debug.log", tile->tower.base_path ) );
891 0 : ctx->debug_fd = open( path, O_WRONLY|O_CREAT|O_APPEND, 0644 );
892 0 : if( FD_UNLIKELY( -1==ctx->debug_fd ) ) FD_LOG_ERR(( "open(`%s`) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
893 :
894 0 : if( FD_LIKELY( ctx->debug_fd!=-1 ) ) {
895 0 : int err = ftruncate( ctx->debug_fd, 0UL );
896 0 : if( FD_UNLIKELY( err ) ) FD_LOG_ERR(( "failed to truncate file (%i-%s)", errno, fd_io_strerror( errno ) ));
897 0 : FD_TEST( fd_io_buffered_ostream_init( &ctx->debug_ostream, ctx->debug_fd, ctx->debug_buf, sizeof(ctx->debug_buf) ) );
898 0 : }
899 0 : } else {
900 0 : ctx->debug_fd = -1;
901 0 : }
902 0 : }
903 :
904 : static void
905 : unprivileged_init( fd_topo_t * topo,
906 0 : fd_topo_tile_t * tile ) {
907 0 : ulong slot_max = tile->tower.max_live_slots;
908 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
909 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
910 0 : ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
911 0 : void * ghost = FD_SCRATCH_ALLOC_APPEND( l, fd_ghost_align(), fd_ghost_footprint( 2*slot_max, FD_VOTER_MAX ) );
912 0 : void * hfork = FD_SCRATCH_ALLOC_APPEND( l, fd_hfork_align(), fd_hfork_footprint( slot_max, FD_VOTER_MAX ) );
913 0 : void * notar = FD_SCRATCH_ALLOC_APPEND( l, fd_notar_align(), fd_notar_footprint( tile->tower.max_vote_lookahead ) );
914 0 : void * tower = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_align(), fd_tower_footprint() );
915 0 : void * accts = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_accts_align(), fd_tower_accts_footprint( FD_VOTER_MAX ) );
916 0 : void * forks = FD_SCRATCH_ALLOC_APPEND( l, fd_forks_align(), fd_forks_footprint( slot_max, FD_VOTER_MAX ) );
917 0 : void * spare = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_align(), fd_tower_footprint() );
918 0 : void * stake = FD_SCRATCH_ALLOC_APPEND( l, fd_epoch_stakes_align(), fd_epoch_stakes_footprint( slot_max ) );
919 0 : void * notif = FD_SCRATCH_ALLOC_APPEND( l, notif_align(), notif_footprint( slot_max ) );
920 0 : FD_SCRATCH_ALLOC_FINI( l, scratch_align() );
921 :
922 0 : ctx->wksp = topo->workspaces[ topo->objs[ tile->tile_obj_id ].wksp_id ].wksp;
923 0 : ctx->ghost = fd_ghost_join ( fd_ghost_new ( ghost, 2*slot_max, FD_VOTER_MAX, 42UL ) ); /* FIXME seed */
924 0 : ctx->hfork = fd_hfork_join ( fd_hfork_new ( hfork, slot_max, FD_VOTER_MAX, ctx->seed, tile->tower.hard_fork_fatal ) );
925 0 : ctx->notar = fd_notar_join ( fd_notar_new ( notar, tile->tower.max_vote_lookahead ) );
926 0 : ctx->tower = fd_tower_join ( fd_tower_new ( tower ) );
927 0 : ctx->tower_accts = fd_tower_accts_join ( fd_tower_accts_new ( accts, FD_VOTER_MAX ) );
928 0 : ctx->forks = fd_forks_join ( fd_forks_new ( forks, slot_max, FD_VOTER_MAX ) );
929 0 : ctx->tower_spare = fd_tower_join ( fd_tower_new ( spare ) );
930 0 : ctx->slot_stakes = fd_epoch_stakes_join( fd_epoch_stakes_new( stake, slot_max ) );
931 0 : ctx->notif = notif_join ( notif_new ( notif, slot_max ) );
932 0 : FD_TEST( ctx->ghost );
933 0 : FD_TEST( ctx->hfork );
934 0 : FD_TEST( ctx->notar );
935 0 : FD_TEST( ctx->tower );
936 0 : FD_TEST( ctx->forks );
937 0 : FD_TEST( ctx->tower_spare );
938 0 : FD_TEST( ctx->tower_accts );
939 0 : FD_TEST( ctx->slot_stakes );
940 0 : FD_TEST( ctx->notif );
941 :
942 0 : for( ulong i = 0; i<VOTE_TXN_SIG_MAX; i++ ) {
943 0 : fd_sha512_t * sha = fd_sha512_join( fd_sha512_new( FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sha512_t), sizeof(fd_sha512_t) ) ) );
944 0 : FD_TEST( sha );
945 0 : ctx->vote_sha[i] = sha;
946 0 : }
947 :
948 0 : ctx->init_slot = ULONG_MAX;
949 0 : ctx->root_slot = ULONG_MAX;
950 0 : ctx->conf_slot = ULONG_MAX;
951 0 : ctx->supc_slot = ULONG_MAX;
952 :
953 0 : memset( &ctx->metrics, 0, sizeof( struct ctx_metrics_t ) );
954 :
955 0 : ulong banks_obj_id = fd_pod_query_ulong( topo->props, "banks", ULONG_MAX );
956 0 : FD_TEST( banks_obj_id!=ULONG_MAX );
957 0 : ulong banks_locks_obj_id = fd_pod_query_ulong( topo->props, "banks_locks", ULONG_MAX );
958 0 : FD_TEST( banks_locks_obj_id!=ULONG_MAX );
959 0 : FD_TEST( fd_banks_join( ctx->banks, fd_topo_obj_laddr( topo, banks_obj_id ), fd_topo_obj_laddr( topo, banks_locks_obj_id ) ) );
960 :
961 0 : fd_accdb_init_from_topo( ctx->accdb, topo, tile );
962 :
963 0 : FD_TEST( tile->in_cnt<sizeof(ctx->in_kind)/sizeof(ctx->in_kind[0]) );
964 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
965 0 : fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
966 0 : fd_topo_wksp_t * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
967 :
968 0 : if ( FD_LIKELY( !strcmp( link->name, "dedup_resolv" ) ) ) ctx->in_kind[ i ] = IN_KIND_DEDUP;
969 0 : else if( FD_LIKELY( !strcmp( link->name, "replay_exec" ) ) ) ctx->in_kind[ i ] = IN_KIND_EXEC;
970 0 : else if( FD_LIKELY( !strcmp( link->name, "replay_out" ) ) ) ctx->in_kind[ i ] = IN_KIND_REPLAY;
971 0 : else FD_LOG_ERR(( "tower tile has unexpected input link %lu %s", i, link->name ));
972 :
973 0 : ctx->in[ i ].mem = link_wksp->wksp;
974 0 : ctx->in[ i ].mtu = link->mtu;
975 0 : ctx->in[ i ].chunk0 = fd_dcache_compact_chunk0( ctx->in[ i ].mem, link->dcache );
976 0 : ctx->in[ i ].wmark = fd_dcache_compact_wmark ( ctx->in[ i ].mem, link->dcache, link->mtu );
977 0 : }
978 :
979 0 : ctx->out_mem = topo->workspaces[ topo->objs[ topo->links[ tile->out_link_id[ 0 ] ].dcache_obj_id ].wksp_id ].wksp;
980 0 : ctx->out_chunk0 = fd_dcache_compact_chunk0( ctx->out_mem, topo->links[ tile->out_link_id[ 0 ] ].dcache );
981 0 : ctx->out_wmark = fd_dcache_compact_wmark ( ctx->out_mem, topo->links[ tile->out_link_id[ 0 ] ].dcache, topo->links[ tile->out_link_id[ 0 ] ].mtu );
982 0 : ctx->out_chunk = ctx->out_chunk0;
983 0 : }
984 :
985 : static ulong
986 : populate_allowed_seccomp( fd_topo_t const * topo,
987 : fd_topo_tile_t const * tile,
988 : ulong out_cnt,
989 0 : struct sock_filter * out ) {
990 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
991 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
992 0 : ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
993 :
994 0 : populate_sock_filter_policy_fd_tower_tile( out_cnt, out, (uint)fd_log_private_logfile_fd(), (uint)ctx->checkpt_fd, (uint)ctx->restore_fd, (uint)ctx->debug_fd );
995 0 : return sock_filter_policy_fd_tower_tile_instr_cnt;
996 0 : }
997 :
998 : static ulong
999 : populate_allowed_fds( fd_topo_t const * topo,
1000 : fd_topo_tile_t const * tile,
1001 : ulong out_fds_cnt,
1002 0 : int * out_fds ) {
1003 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
1004 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
1005 0 : ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
1006 :
1007 0 : if( FD_UNLIKELY( out_fds_cnt<4UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
1008 :
1009 0 : ulong out_cnt = 0UL;
1010 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
1011 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
1012 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
1013 0 : if( FD_LIKELY( ctx->checkpt_fd!=-1 ) ) out_fds[ out_cnt++ ] = ctx->checkpt_fd;
1014 0 : if( FD_LIKELY( ctx->restore_fd!=-1 ) ) out_fds[ out_cnt++ ] = ctx->restore_fd;
1015 0 : if( FD_LIKELY( ctx->debug_fd!=-1 ) ) out_fds[ out_cnt++ ] = ctx->debug_fd;
1016 0 : return out_cnt;
1017 0 : }
1018 :
1019 0 : #define STEM_BURST (2UL) /* slot_conf AND (slot_done OR slot_ignored) */
1020 : /* See explanation in fd_pack */
1021 0 : #define STEM_LAZY (128L*3000L)
1022 :
1023 0 : #define STEM_CALLBACK_CONTEXT_TYPE ctx_t
1024 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(ctx_t)
1025 0 : #define STEM_CALLBACK_METRICS_WRITE metrics_write
1026 0 : #define STEM_CALLBACK_AFTER_CREDIT after_credit
1027 0 : #define STEM_CALLBACK_RETURNABLE_FRAG returnable_frag
1028 :
1029 : #include "../../disco/stem/fd_stem.c"
1030 :
1031 : fd_topo_run_tile_t fd_tile_tower = {
1032 : .name = "tower",
1033 : .populate_allowed_seccomp = populate_allowed_seccomp,
1034 : .populate_allowed_fds = populate_allowed_fds,
1035 : .scratch_align = scratch_align,
1036 : .scratch_footprint = scratch_footprint,
1037 : .unprivileged_init = unprivileged_init,
1038 : .privileged_init = privileged_init,
1039 : .run = stem_run,
1040 : };
|