Line data Source code
1 : #include "fd_tower_tile.h"
2 : #include "generated/fd_tower_tile_seccomp.h"
3 :
4 : #include "../../choreo/eqvoc/fd_eqvoc.h"
5 : #include "../../choreo/ghost/fd_ghost.h"
6 : #include "../../choreo/hfork/fd_hfork.h"
7 : #include "../../choreo/notar/fd_notar.h"
8 : #include "../../choreo/tower/fd_tower.h"
9 : #include "../../choreo/tower/fd_tower_accts.h"
10 : #include "../../choreo/tower/fd_tower_forks.h"
11 : #include "../../choreo/tower/fd_tower_serde.h"
12 : #include "../../disco/fd_txn_p.h"
13 : #include "../../disco/keyguard/fd_keyload.h"
14 : #include "../../disco/metrics/fd_metrics.h"
15 : #include "../../disco/shred/fd_stake_ci.h"
16 : #include "../../disco/topo/fd_topo.h"
17 : #include "../../disco/fd_txn_m.h"
18 : #include "../../choreo/tower/fd_epoch_stakes.h"
19 : #include "../../discof/fd_accdb_topo.h"
20 : #include "../../discof/restore/utils/fd_ssmsg.h"
21 : #include "../../discof/replay/fd_execrp.h"
22 : #include "../../discof/replay/fd_replay_tile.h"
23 : #include "../../flamenco/accdb/fd_accdb_sync.h"
24 : #include "../../flamenco/accdb/fd_accdb_pipe.h"
25 : #include "../../flamenco/gossip/fd_gossip_types.h"
26 : #include "../../flamenco/runtime/fd_bank.h"
27 : #include "../../util/pod/fd_pod.h"
28 :
29 : #include <errno.h>
30 : #include <fcntl.h>
31 : #include <unistd.h>
32 :
33 : /* The tower tile is responsible for two things:
34 :
35 : 1. running the fork choice (fd_ghost) and TowerBFT (fd_tower) rules
36 : after replaying a block.
37 : 2. listening to gossip (duplicate shred and vote messages) and
38 : monitoring for duplicate or duplicate confirmed blocks (fd_notar).
39 :
40 : Tower signals to other tiles about events that occur as a result of
41 : those two above events, such as what block to vote on, what block to
42 : reset onto as leader, what block got rooted, what blocks are
43 : duplicates and what blocks are confirmed.
44 :
45 : In general, tower uses the block_id as the identifier for blocks. The
46 : block_id is the merkle root of the last FEC set for a block. This is
47 : guaranteed to be unique for a given block and is the canonical
48 : identifier over the slot number because unlike slot numbers, if a
49 : leader equivocates (produces multiple blocks for the same slot) the
50 : block_id can disambiguate the blocks.
51 :
52 : However, the block_id was only introduced into the Solana protocol
53 : recently, and TowerBFT still uses the "legacy" identifier of slot
54 : numbers for blocks. So the tile (and relevant modules) will use
55 : block_id when possible to interface with the protocol but otherwise
56 : falling back to slot number when block_id is unsupported. */
57 :
58 : #define LOGGING 0
59 :
60 0 : #define IN_KIND_DEDUP (0)
61 0 : #define IN_KIND_EPOCH (1)
62 0 : #define IN_KIND_EXECRP (2)
63 0 : #define IN_KIND_GOSSIP (3)
64 0 : #define IN_KIND_IPECHO (4)
65 0 : #define IN_KIND_REPLAY (5)
66 0 : #define IN_KIND_SHRED (6)
67 :
68 0 : #define OUT_IDX 0
69 :
70 0 : #define VOTE_TXN_SIG_MAX (2UL) /* validator identity and vote authority */
71 :
72 : struct notif {
73 : ulong slot;
74 : int kind;
75 : fd_hash_t block_id; /* for notar confirmations only */
76 : };
77 : typedef struct notif notif_t;
78 :
79 : #define DEQUE_NAME notif
80 0 : #define DEQUE_T notif_t
81 : #include "../../util/tmpl/fd_deque_dynamic.c"
82 :
83 : struct fd_auth_key {
84 : fd_pubkey_t key;
85 : ulong idx;
86 : uint hash;
87 : };
88 : typedef struct fd_auth_key fd_auth_key_t;
89 :
90 : #define MAP_NAME fd_auth_key_set
91 0 : #define MAP_T fd_auth_key_t
92 0 : #define MAP_LG_SLOT_CNT 5
93 0 : #define MAP_KEY key
94 0 : #define MAP_KEY_T fd_pubkey_t
95 0 : #define MAP_KEY_NULL (fd_pubkey_t){0}
96 0 : #define MAP_KEY_EQUAL(k0,k1) (!(memcmp((k0).key,(k1).key,sizeof(fd_pubkey_t))))
97 0 : #define MAP_KEY_INVAL(k) (MAP_KEY_EQUAL((k),MAP_KEY_NULL))
98 : #define MAP_KEY_EQUAL_IS_SLOW 1
99 0 : #define MAP_KEY_HASH(k) ((uint)fd_ulong_hash( fd_ulong_load_8( (k).uc ) ))
100 : #include "../../util/tmpl/fd_map.c"
101 :
102 : static const fd_hash_t manifest_block_id = { .ul = { 0xf17eda2ce7b1d } }; /* FIXME manifest_block_id */
103 :
104 : typedef struct {
105 : int mcache_only;
106 : fd_wksp_t * mem;
107 : ulong chunk0;
108 : ulong wmark;
109 : ulong mtu;
110 : } in_ctx_t;
111 :
112 : typedef struct {
113 : fd_wksp_t * wksp; /* workspace */
114 :
115 : ulong seed; /* map seed */
116 : int checkpt_fd;
117 : int restore_fd;
118 : fd_pubkey_t identity_key[1];
119 : fd_pubkey_t vote_account[1];
120 : fd_auth_key_t * auth_key_set;
121 : uchar our_vote_acct[FD_VOTE_STATE_DATA_MAX]; /* buffer for reading back our own vote acct data */
122 : ulong out_vote_acct_sz;
123 : int debug_logging;
124 :
125 : /* structures owned by tower tile */
126 :
127 : fd_eqvoc_t * eqvoc;
128 : fd_forks_t * forks;
129 : fd_ghost_t * ghost;
130 : fd_hfork_t * hfork;
131 : fd_notar_t * notar;
132 : fd_tower_t * tower;
133 :
134 : fd_tower_t * tower_spare; /* spare tower used during processing */
135 : fd_tower_accts_t * tower_accts; /* deque of (pubkey, stake, vote account data) for a given slot */
136 : fd_epoch_stakes_t * slot_stakes; /* tracks the stakes for each voter in the epoch per fork */
137 : notif_t * notif; /* deque of confirmation notifications queued for publishing */
138 : fd_stake_ci_t * stake_ci; /* stake ci from replay_epoch */
139 :
140 : /* external joins owned by replay tile */
141 :
142 : fd_banks_t banks[1];
143 : fd_accdb_user_t accdb[1];
144 :
145 : /* frag-related structures (consume and publish) */
146 :
147 : fd_gossip_duplicate_shred_t chunks[ FD_EQVOC_CHUNK_CNT ];
148 : fd_compact_tower_sync_serde_t compact_tower_sync_serde;
149 : fd_sha512_t * vote_sha[VOTE_TXN_SIG_MAX];
150 : uchar vote_txn[FD_TPU_PARSED_MTU];
151 :
152 : /* slot watermarks */
153 :
154 : ulong init_slot; /* initial slot from genesis or snapshot */
155 : ulong root_slot; /* monotonically increasing contiguous tower root slot */
156 : ulong conf_slot; /* monotonically increasing contiguous confirmed slot */
157 : ulong supc_slot; /* monotonically increasing contiguous super slot */
158 :
159 : /* in/out link setup */
160 :
161 : int in_kind[ 64UL ];
162 : in_ctx_t in [ 64UL ];
163 :
164 : fd_wksp_t * out_mem;
165 : ulong out_chunk0;
166 : ulong out_wmark;
167 : ulong out_chunk;
168 :
169 : /* metrics */
170 :
171 : struct ctx_metrics_t {
172 :
173 : ulong ancestor_rollback;
174 : ulong sibling_confirmed;
175 : ulong same_fork;
176 : ulong switch_pass;
177 : ulong switch_fail;
178 : ulong lockout_fail;
179 : ulong threshold_fail;
180 : ulong propagated_fail;
181 :
182 : ulong vote_txn_invalid;
183 : ulong vote_txn_ignored;
184 :
185 : ulong proof_err_chunk_cnt;
186 : ulong proof_err_chunk_idx;
187 : ulong proof_err_chunk_len;
188 :
189 : ulong proof_err_shred_ser;
190 : ulong proof_err_shred_slot;
191 : ulong proof_err_shred_version;
192 : ulong proof_err_shred_type;
193 : ulong proof_err_shred_merkle;
194 : ulong proof_err_shred_signature;
195 :
196 : ulong proof_verified_merkle;
197 : ulong proof_verified_meta;
198 : ulong proof_verified_last;
199 : ulong proof_verified_overlap;
200 : ulong proof_verified_chained;
201 :
202 : ulong proof_constructed;
203 :
204 : ulong cluster_root_slot;
205 : ulong cluster_vote_slot;
206 : ulong local_root_slot;
207 : ulong local_vote_slot;
208 : ulong replay_slot;
209 : ulong reset_slot;
210 :
211 : ulong replay_slot_processed_gauge;
212 : ulong replay_slot_ignored_gauge;
213 : ulong replay_slot_processed_cnt;
214 : ulong replay_slot_ignored_cnt;
215 :
216 : fd_hfork_metrics_t hfork;
217 : } metrics;
218 :
219 : } ctx_t;
220 :
221 : FD_FN_CONST static inline ulong
222 0 : scratch_align( void ) {
223 0 : return 128UL;
224 0 : }
225 :
226 : FD_FN_PURE static inline ulong
227 0 : scratch_footprint( FD_PARAM_UNUSED fd_topo_tile_t const * tile ) {
228 0 : ulong slot_max = tile->tower.max_live_slots;
229 0 : ulong l = FD_LAYOUT_INIT;
230 0 : l = FD_LAYOUT_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
231 0 : l = FD_LAYOUT_APPEND( l, fd_auth_key_set_align(), fd_auth_key_set_footprint() );
232 0 : l = FD_LAYOUT_APPEND( l, fd_eqvoc_align(), fd_eqvoc_footprint( slot_max * 1024, slot_max, FD_VOTER_MAX ) );
233 0 : l = FD_LAYOUT_APPEND( l, fd_ghost_align(), fd_ghost_footprint( 2*slot_max, FD_VOTER_MAX ) );
234 0 : l = FD_LAYOUT_APPEND( l, fd_hfork_align(), fd_hfork_footprint( slot_max, FD_VOTER_MAX ) );
235 0 : l = FD_LAYOUT_APPEND( l, fd_notar_align(), fd_notar_footprint( tile->tower.max_vote_lookahead ) );
236 0 : l = FD_LAYOUT_APPEND( l, fd_tower_align(), fd_tower_footprint() );
237 0 : l = FD_LAYOUT_APPEND( l, fd_tower_accts_align(), fd_tower_accts_footprint( FD_VOTER_MAX ) );
238 0 : l = FD_LAYOUT_APPEND( l, fd_forks_align(), fd_forks_footprint( slot_max, FD_VOTER_MAX ) );
239 0 : l = FD_LAYOUT_APPEND( l, fd_tower_align(), fd_tower_footprint() ); /* ctx->tower_spare */
240 0 : l = FD_LAYOUT_APPEND( l, fd_epoch_stakes_align(), fd_epoch_stakes_footprint( slot_max ) );
241 0 : l = FD_LAYOUT_APPEND( l, notif_align(), notif_footprint( slot_max ) );
242 0 : l = FD_LAYOUT_APPEND( l, fd_stake_ci_align(), fd_stake_ci_footprint() );
243 0 : return FD_LAYOUT_FINI( l, scratch_align() );
244 0 : }
245 :
246 : static inline void
247 0 : metrics_write( ctx_t * ctx ) {
248 0 : FD_MCNT_SET( TOWER, ANCESTOR_ROLLBACK, ctx->metrics.ancestor_rollback );
249 0 : FD_MCNT_SET( TOWER, SIBLING_CONFIRMED, ctx->metrics.sibling_confirmed );
250 0 : FD_MCNT_SET( TOWER, SAME_FORK, ctx->metrics.same_fork );
251 0 : FD_MCNT_SET( TOWER, SWITCH_PASS, ctx->metrics.switch_pass );
252 0 : FD_MCNT_SET( TOWER, SWITCH_FAIL, ctx->metrics.switch_fail );
253 0 : FD_MCNT_SET( TOWER, LOCKOUT_FAIL, ctx->metrics.lockout_fail );
254 0 : FD_MCNT_SET( TOWER, THRESHOLD_FAIL, ctx->metrics.threshold_fail );
255 0 : FD_MCNT_SET( TOWER, PROPAGATED_FAIL, ctx->metrics.propagated_fail );
256 :
257 0 : FD_MCNT_SET( TOWER, VOTE_TXN_INVALID, ctx->metrics.vote_txn_invalid );
258 0 : FD_MCNT_SET( TOWER, VOTE_TXN_IGNORED, ctx->metrics.vote_txn_ignored );
259 :
260 0 : FD_MCNT_SET( TOWER, PROOF_ERR_CHUNK_CNT, ctx->metrics.proof_err_chunk_cnt );
261 0 : FD_MCNT_SET( TOWER, PROOF_ERR_CHUNK_IDX, ctx->metrics.proof_err_chunk_idx );
262 0 : FD_MCNT_SET( TOWER, PROOF_ERR_CHUNK_LEN, ctx->metrics.proof_err_chunk_len );
263 :
264 0 : FD_MCNT_SET( TOWER, PROOF_ERR_SHRED_SER, ctx->metrics.proof_err_shred_ser );
265 0 : FD_MCNT_SET( TOWER, PROOF_ERR_SHRED_SLOT, ctx->metrics.proof_err_shred_slot );
266 0 : FD_MCNT_SET( TOWER, PROOF_ERR_SHRED_VERSION, ctx->metrics.proof_err_shred_version );
267 0 : FD_MCNT_SET( TOWER, PROOF_ERR_SHRED_TYPE, ctx->metrics.proof_err_shred_type );
268 0 : FD_MCNT_SET( TOWER, PROOF_ERR_SHRED_MERKLE, ctx->metrics.proof_err_shred_merkle );
269 0 : FD_MCNT_SET( TOWER, PROOF_ERR_SHRED_SIGNATURE, ctx->metrics.proof_err_shred_signature );
270 :
271 0 : FD_MCNT_SET( TOWER, PROOF_VERIFIED_MERKLE, ctx->metrics.proof_verified_merkle );
272 0 : FD_MCNT_SET( TOWER, PROOF_VERIFIED_META, ctx->metrics.proof_verified_meta );
273 0 : FD_MCNT_SET( TOWER, PROOF_VERIFIED_LAST, ctx->metrics.proof_verified_last );
274 0 : FD_MCNT_SET( TOWER, PROOF_VERIFIED_OVERLAP,ctx->metrics.proof_verified_overlap);
275 0 : FD_MCNT_SET( TOWER, PROOF_VERIFIED_CHAINED,ctx->metrics.proof_verified_chained);
276 :
277 0 : FD_MCNT_SET( TOWER, PROOF_CONSTRUCTED, ctx->metrics.proof_constructed );
278 :
279 0 : FD_MCNT_SET ( TOWER, HARD_FORKS_SEEN, ctx->metrics.hfork.seen );
280 0 : FD_MCNT_SET ( TOWER, HARD_FORKS_PRUNED, ctx->metrics.hfork.pruned );
281 0 : FD_MGAUGE_SET( TOWER, HARD_FORKS_ACTIVE, ctx->metrics.hfork.active );
282 0 : FD_MGAUGE_SET( TOWER, HARD_FORKS_MAX_WIDTH, ctx->metrics.hfork.max_width );
283 :
284 0 : FD_MGAUGE_SET( TOWER, CLUSTER_ROOT_SLOT, ctx->metrics.cluster_root_slot );
285 0 : FD_MGAUGE_SET( TOWER, CLUSTER_VOTE_SLOT, ctx->metrics.cluster_vote_slot );
286 0 : FD_MGAUGE_SET( TOWER, LOCAL_ROOT_SLOT, ctx->metrics.local_root_slot );
287 0 : FD_MGAUGE_SET( TOWER, LOCAL_VOTE_SLOT, ctx->metrics.local_vote_slot );
288 0 : FD_MGAUGE_SET( TOWER, RESET_SLOT, ctx->metrics.reset_slot );
289 :
290 0 : FD_MGAUGE_SET( TOWER, REPLAY_SLOT_PROCESSED, ctx->metrics.replay_slot_processed_gauge );
291 0 : FD_MGAUGE_SET( TOWER, REPLAY_SLOT_IGNORED, ctx->metrics.replay_slot_ignored_gauge );
292 0 : FD_MCNT_SET ( TOWER, REPLAY_SLOT_PROCESSED, ctx->metrics.replay_slot_processed_cnt );
293 0 : FD_MCNT_SET ( TOWER, REPLAY_SLOT_IGNORED, ctx->metrics.replay_slot_ignored_cnt );
294 0 : }
295 :
296 : static void
297 : publish_slot_confirmed( ctx_t * ctx,
298 : fd_stem_context_t * stem,
299 : ulong tsorig,
300 : ulong slot,
301 : fd_hash_t const * block_id,
302 : ulong bank_idx,
303 0 : int kind ) {
304 0 : fd_tower_slot_confirmed_t * msg = fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
305 0 : msg->slot = slot;
306 0 : msg->block_id = *block_id;
307 0 : msg->bank_idx = bank_idx;
308 0 : msg->kind = kind;
309 0 : fd_stem_publish( stem, OUT_IDX, FD_TOWER_SIG_SLOT_CONFIRMED, ctx->out_chunk, sizeof(fd_tower_slot_confirmed_t), 0UL, tsorig, fd_frag_meta_ts_comp( fd_tickcount() ) );
310 0 : ctx->out_chunk = fd_dcache_compact_next( ctx->out_chunk, sizeof(fd_tower_slot_confirmed_t), ctx->out_chunk0, ctx->out_wmark );
311 0 : }
312 :
313 : static void
314 : contiguous_confirm( ctx_t * ctx,
315 : ulong slot,
316 : ulong wmark,
317 0 : int kind ) {
318 :
319 : /* For optimistic and rooted confirmations, confirming a slot means
320 : all ancestors are confirmed too, so we need to publish any skipped
321 : ancestors (confirmations can be out-of-order and roots can be
322 : skipped due to lockout). */
323 :
324 0 : ulong cnt = 0;
325 0 : ulong ancestor = slot;
326 0 : while( FD_UNLIKELY( ancestor > wmark ) ) {
327 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, ancestor );
328 0 : if( FD_UNLIKELY( !fork ) ) break; /* rooted past this ancestor */
329 0 : if( FD_UNLIKELY( !notif_avail( ctx->notif ) ) ) FD_LOG_CRIT(( "attempted to confirm %lu slots more than slot max %lu", cnt, notif_max( ctx->notif ) )); /* should be impossible */
330 0 : notif_push_tail( ctx->notif, (notif_t){ .slot = ancestor, .kind = kind } );
331 0 : cnt++;
332 0 : ancestor = fork->parent_slot;
333 0 : }
334 0 : }
335 :
336 : static void
337 : notar_confirm( ctx_t * ctx,
338 0 : fd_notar_blk_t * notar_blk ) {
339 :
340 : /* Record any confirmations in our tower forks structure and also
341 : publish slot_confirmed frags indicating confirmations to consumers.
342 :
343 : See documentation in fd_tower_tile.h for guarantees. */
344 :
345 0 : if( FD_LIKELY( notar_blk->dup_conf && !notar_blk->dup_notif ) ) {
346 0 : if( FD_UNLIKELY( !notif_avail( ctx->notif ) ) ) FD_LOG_CRIT(( "attempted to confirm more than slot max %lu", notif_max( ctx->notif ) )); /* should be impossible */
347 0 : notif_push_head( ctx->notif, (notif_t){ .slot = notar_blk->slot, .kind = FD_TOWER_SLOT_CONFIRMED_DUPLICATE, .block_id = notar_blk->block_id } );
348 0 : notar_blk->dup_notif = 1;
349 :
350 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, notar_blk->slot ); /* ensure fork exists */
351 0 : if( FD_UNLIKELY( !fork ) ) return; /* a slot can be duplicate confirmed by gossip votes before replay */
352 0 : fd_forks_confirmed( fork, ¬ar_blk->block_id );
353 0 : }
354 0 : if( FD_LIKELY( notar_blk->opt_conf ) ) {
355 0 : if( FD_UNLIKELY( !notar_blk->opt_notif ) ) {
356 0 : if( FD_UNLIKELY( !notif_avail( ctx->notif ) ) ) FD_LOG_CRIT(( "attempted to confirm more than slot max %lu", notif_max( ctx->notif ) )); /* should be impossible */
357 0 : notif_push_head( ctx->notif, (notif_t){ .slot = notar_blk->slot, .kind = FD_TOWER_SLOT_CONFIRMED_CLUSTER, .block_id = notar_blk->block_id } );
358 0 : notar_blk->opt_notif = 1;
359 0 : }
360 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, notar_blk->slot );
361 0 : if( FD_UNLIKELY( fork && notar_blk->slot > ctx->conf_slot ) ) {
362 0 : contiguous_confirm( ctx, notar_blk->slot, ctx->conf_slot, FD_TOWER_SLOT_CONFIRMED_OPTIMISTIC );
363 0 : ctx->conf_slot = notar_blk->slot;
364 0 : }
365 0 : }
366 0 : if( FD_LIKELY( notar_blk->sup_conf ) ) {
367 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, notar_blk->slot );
368 0 : if( FD_UNLIKELY( fork && notar_blk->slot > ctx->supc_slot ) ) {
369 0 : contiguous_confirm( ctx, notar_blk->slot, ctx->supc_slot, FD_TOWER_SLOT_CONFIRMED_SUPER );
370 0 : ctx->supc_slot = notar_blk->slot;
371 0 : }
372 0 : }
373 0 : }
374 :
375 : static void
376 : count_vote_txn( ctx_t * ctx,
377 : fd_txn_t const * txn,
378 0 : uchar const * payload ) {
379 :
380 : /* Count vote txns from resolv and replay. Note these txns have
381 : already been parsed and sigverified, so the only thing tower needs
382 : to do is filter for votes.
383 :
384 : We are a little stricter than Agave here when validating the vote
385 : because we use the same validation as pack ie. is_simple_vote which
386 : includes a check that there are at most two signers, whereas
387 : Agave's gossip vote parser does not perform that same check (the
388 : only two signers are the identity key and vote authority, which may
389 : optionally be the same).
390 :
391 : Being a little stricter here is ok because even if we drop some
392 : votes with extraneous signers that Agave would consider valid
393 : (unlikely), gossip votes are in general considered unreliable and
394 : ultimately consensus is reached through replaying the vote txns.
395 :
396 : The remaining checks mirror Agave as closely as possible (and are
397 : documented throughout below). */
398 :
399 0 : if( FD_UNLIKELY( !fd_txn_is_simple_vote_transaction( txn, payload ) ) ) { ctx->metrics.vote_txn_invalid++; return; }
400 :
401 : /* TODO check the authorized voter for this vote account (from epoch
402 : stakes) is one of the signers */
403 :
404 : /* Filter any non-TowerSync vote txns. */
405 :
406 0 : fd_txn_instr_t const * instr = &txn->instr[0];
407 0 : uchar const * instr_data = payload + instr->data_off;
408 0 : uint kind = fd_uint_load_4_fast( instr_data );
409 0 : if( FD_UNLIKELY( kind != FD_VOTE_IX_KIND_TOWER_SYNC && kind != FD_VOTE_IX_KIND_TOWER_SYNC_SWITCH ) ) { ctx->metrics.vote_txn_ignored++; return; };
410 :
411 : /* Deserialize the TowerSync out of the vote txn. */
412 :
413 0 : int err = fd_compact_tower_sync_deserialize( &ctx->compact_tower_sync_serde, instr_data + sizeof(uint), instr->data_sz - sizeof(uint) );
414 0 : if( FD_UNLIKELY( err == -1 ) ) { ctx->metrics.vote_txn_invalid++; return; }
415 0 : ulong slot = ctx->compact_tower_sync_serde.root;
416 0 : fd_tower_remove_all( ctx->tower_spare );
417 0 : for( ulong i = 0; i < ctx->compact_tower_sync_serde.lockouts_cnt; i++ ) {
418 0 : slot += ctx->compact_tower_sync_serde.lockouts[i].offset;
419 0 : fd_tower_push_tail( ctx->tower_spare, (fd_tower_vote_t){ .slot = slot, .conf = ctx->compact_tower_sync_serde.lockouts[i].confirmation_count } );
420 0 : }
421 0 : if( FD_UNLIKELY( 0==memcmp( &ctx->compact_tower_sync_serde.block_id, &hash_null, sizeof(fd_hash_t) ) ) ) { ctx->metrics.vote_txn_invalid++; return; };
422 :
423 0 : fd_pubkey_t const * accs = (fd_pubkey_t const *)fd_type_pun_const( payload + txn->acct_addr_off );
424 0 : fd_pubkey_t const * vote_acc = NULL;
425 0 : if( FD_UNLIKELY( txn->signature_cnt==1 ) ) vote_acc = (fd_pubkey_t const *)fd_type_pun_const( &accs[1] ); /* identity and authority same, account idx 1 is the vote account address */
426 0 : else vote_acc = (fd_pubkey_t const *)fd_type_pun_const( &accs[2] ); /* identity and authority diff, account idx 2 is the vote account address */
427 :
428 : /* Return early if their tower is empty. */
429 :
430 0 : if( FD_UNLIKELY( fd_tower_empty( ctx->tower_spare ) ) ) { ctx->metrics.vote_txn_ignored++; return; };
431 :
432 : /* The vote txn contains a block id and bank hash for their last vote
433 : slot in the tower. Agave always counts the last vote.
434 :
435 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/cluster_info_vote_listener.rs#L476-L487 */
436 :
437 0 : fd_tower_vote_t const * their_last_vote = fd_tower_peek_tail_const( ctx->tower_spare );
438 0 : fd_hash_t const * their_block_id = &ctx->compact_tower_sync_serde.block_id;
439 0 : fd_hash_t const * their_bank_hash = &ctx->compact_tower_sync_serde.hash;
440 :
441 : /* Similar to what Agave does in cluster_info_vote_listener, we use
442 : the stake associated with a vote account as of our current root
443 : (which could potentially be a different epoch than the vote we are
444 : counting or when we observe the vote). They default stake to 0 for
445 : voters who are not found. */
446 :
447 0 : ulong total_stake = fd_ghost_root( ctx->ghost )->total_stake;
448 :
449 0 : fd_voter_stake_key_t stake_key = { .vote_account = *vote_acc, .slot = ctx->root_slot };
450 0 : fd_voter_stake_t * stake = fd_voter_stake_map_ele_query( ctx->slot_stakes->voter_stake_map, &stake_key, NULL, ctx->slot_stakes->voter_stake_pool );
451 :
452 0 : fd_hfork_count_vote( ctx->hfork, &ctx->metrics.hfork, vote_acc, their_block_id, their_bank_hash, their_last_vote->slot, stake ? stake->stake : 0, total_stake );
453 :
454 0 : fd_notar_blk_t * notar_blk = fd_notar_count_vote( ctx->notar, total_stake, vote_acc, their_last_vote->slot, their_block_id );
455 0 : if( FD_LIKELY( notar_blk ) ) notar_confirm( ctx, notar_blk );
456 :
457 0 : fd_tower_forks_t * fork = fd_tower_forks_query( ctx->forks->tower_forks, their_last_vote->slot, NULL );
458 0 : if( FD_UNLIKELY( !fork ) ) { ctx->metrics.vote_txn_ignored++; return; }; /* we don't recognize this slot (likely replay lagging) */
459 :
460 0 : fd_hash_t const * our_block_id = fd_forks_canonical_block_id( ctx->forks, their_last_vote->slot );
461 0 : if( FD_UNLIKELY( 0!=memcmp( our_block_id, their_block_id, sizeof(fd_hash_t) ) ) ) { ctx->metrics.vote_txn_ignored++; return; } /* we don't recognize this block id */
462 :
463 : /* Agave decides to count intermediate vote slots in the tower only if
464 : 1. they've replayed the slot and 2. their replay bank hash matches
465 : the vote's bank hash. We do the same thing, but using block_ids.
466 :
467 : It's possible we haven't yet replayed this slot being voted on
468 : because gossip votes can be ahead of our replay.
469 :
470 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/cluster_info_vote_listener.rs#L483-L487 */
471 :
472 0 : int skipped_last_vote = 0;
473 0 : for( fd_tower_iter_t iter = fd_tower_iter_init_rev( ctx->tower_spare );
474 0 : !fd_tower_iter_done_rev( ctx->tower_spare, iter );
475 0 : iter = fd_tower_iter_prev ( ctx->tower_spare, iter ) ) {
476 0 : if( FD_UNLIKELY( !skipped_last_vote ) ) { skipped_last_vote = 1; continue; }
477 0 : fd_tower_vote_t const * their_intermediate_vote = fd_tower_iter_ele_const( ctx->tower_spare, iter );
478 :
479 : /* If we don't recognize an intermediate vote slot in their tower,
480 : it means their tower either:
481 :
482 : 1. Contains intermediate vote slots that are too old (older than
483 : our root) so we already pruned them for tower_forks. Normally
484 : if the descendant (last vote slot) is in tower forks, then all
485 : of its ancestors should be in there too.
486 :
487 : 2. Is invalid. Even though at this point we have successfully
488 : sigverified and deserialized their vote txn, the tower itself
489 : might still be invalid because unlike TPU vote txns, we have
490 : not plumbed through the vote program, but obviously gossip
491 : votes do not so we need to do some light validation here.
492 :
493 : We could throwaway this voter's tower, but we handle it the same
494 : way as Agave which is to just skip this intermediate vote slot:
495 :
496 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/cluster_info_vote_listener.rs#L513-L518 */
497 :
498 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, their_intermediate_vote->slot );
499 0 : if( FD_UNLIKELY( !fork ) ) { ctx->metrics.vote_txn_ignored++; continue; }
500 :
501 : /* Otherwise, we count the vote using our own block id for that slot
502 : (again, mirroring what Agave does albeit with bank hashes).
503 :
504 : Agave uses the current root bank's total stake when counting
505 : vote txns from gossip / replay:
506 :
507 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/cluster_info_vote_listener.rs#L500 */
508 :
509 :
510 0 : fd_notar_blk_t * notar_blk = fd_notar_count_vote( ctx->notar, total_stake, vote_acc, their_intermediate_vote->slot, fd_forks_canonical_block_id( ctx->forks, their_intermediate_vote->slot ) );
511 0 : if( FD_LIKELY( notar_blk ) ) notar_confirm( ctx, notar_blk );
512 0 : }
513 0 : }
514 :
515 : ulong
516 : query_acct_stake_from_bank( fd_tower_accts_t * tower_accts_deque,
517 : fd_epoch_stakes_t * epoch_stakes,
518 : fd_bank_t * bank,
519 0 : ulong slot ) {
520 0 : ulong total_stake = 0;
521 0 : fd_vote_states_t const * vote_states = fd_bank_vote_states_locking_query( bank );
522 0 : fd_vote_states_iter_t iter_[1];
523 0 : ulong prev_voter_idx = ULONG_MAX;
524 0 : for( fd_vote_states_iter_t * iter = fd_vote_states_iter_init( iter_, vote_states );
525 0 : !fd_vote_states_iter_done( iter );
526 0 : fd_vote_states_iter_next( iter ) ) {
527 0 : fd_vote_state_ele_t const * vote_state = fd_vote_states_iter_ele( iter );
528 0 : if( FD_UNLIKELY( vote_state->stake_t_2 == 0 ) ) continue; /* skip unstaked vote accounts */
529 0 : fd_pubkey_t const * vote_account_pubkey = &vote_state->vote_account;
530 0 : fd_tower_accts_push_tail( tower_accts_deque, (fd_tower_accts_t){ .addr = *vote_account_pubkey, .stake = vote_state->stake_t_2 } );
531 0 : prev_voter_idx = fd_epoch_stakes_slot_stakes_add( epoch_stakes, slot, vote_account_pubkey, vote_state->stake_t_2, prev_voter_idx );
532 0 : total_stake += vote_state->stake_t_2;
533 0 : }
534 0 : fd_bank_vote_states_end_locking_query( bank );
535 0 : return total_stake;
536 0 : }
537 :
538 : static int
539 : get_authority( ctx_t * ctx,
540 : ulong epoch,
541 : int vote_acc_found,
542 : fd_pubkey_t * authority_out,
543 0 : ulong * authority_idx_out ) {
544 :
545 0 : if( FD_UNLIKELY( !vote_acc_found ) ) return 0;
546 :
547 0 : fd_bincode_decode_ctx_t decode_ctx = {
548 0 : .data = ctx->our_vote_acct,
549 0 : .dataend = ctx->our_vote_acct + ctx->out_vote_acct_sz,
550 0 : };
551 :
552 0 : uchar __attribute__((aligned(FD_VOTE_STATE_VERSIONED_ALIGN))) vote_state_versioned[ FD_VOTE_STATE_VERSIONED_FOOTPRINT ];
553 :
554 0 : fd_vote_state_versioned_t * vsv = fd_vote_state_versioned_decode( vote_state_versioned, &decode_ctx );
555 0 : FD_CRIT( !vsv, "unable to decode vote state versioned" );
556 :
557 0 : fd_pubkey_t const * auth_voter = NULL;
558 0 : switch( vsv->discriminant ) {
559 0 : case fd_vote_state_versioned_enum_v0_23_5:
560 0 : auth_voter = &vsv->inner.v0_23_5.authorized_voter;
561 0 : break;
562 0 : case fd_vote_state_versioned_enum_v1_14_11:
563 0 : for( fd_vote_authorized_voters_treap_rev_iter_t iter = fd_vote_authorized_voters_treap_rev_iter_init( vsv->inner.v1_14_11.authorized_voters.treap, vsv->inner.v1_14_11.authorized_voters.pool );
564 0 : !fd_vote_authorized_voters_treap_rev_iter_done( iter );
565 0 : iter = fd_vote_authorized_voters_treap_rev_iter_next( iter, vsv->inner.v1_14_11.authorized_voters.pool ) ) {
566 0 : fd_vote_authorized_voter_t * ele = fd_vote_authorized_voters_treap_rev_iter_ele( iter, vsv->inner.v1_14_11.authorized_voters.pool );
567 0 : if( FD_LIKELY( ele->epoch<=epoch ) ) {
568 0 : auth_voter = &ele->pubkey;
569 0 : break;
570 0 : }
571 0 : }
572 0 : break;
573 0 : case fd_vote_state_versioned_enum_v3:
574 0 : for( fd_vote_authorized_voters_treap_rev_iter_t iter = fd_vote_authorized_voters_treap_rev_iter_init( vsv->inner.v3.authorized_voters.treap, vsv->inner.v3.authorized_voters.pool );
575 0 : !fd_vote_authorized_voters_treap_rev_iter_done( iter );
576 0 : iter = fd_vote_authorized_voters_treap_rev_iter_next( iter, vsv->inner.v3.authorized_voters.pool ) ) {
577 0 : fd_vote_authorized_voter_t * ele = fd_vote_authorized_voters_treap_rev_iter_ele( iter, vsv->inner.v3.authorized_voters.pool );
578 0 : if( FD_LIKELY( ele->epoch<=epoch ) ) {
579 0 : auth_voter = &ele->pubkey;
580 0 : break;
581 0 : }
582 0 : }
583 0 : break;
584 0 : case fd_vote_state_versioned_enum_v4:
585 0 : for( fd_vote_authorized_voters_treap_rev_iter_t iter = fd_vote_authorized_voters_treap_rev_iter_init( vsv->inner.v4.authorized_voters.treap, vsv->inner.v4.authorized_voters.pool );
586 0 : !fd_vote_authorized_voters_treap_rev_iter_done( iter );
587 0 : iter = fd_vote_authorized_voters_treap_rev_iter_next( iter, vsv->inner.v4.authorized_voters.pool ) ) {
588 0 : fd_vote_authorized_voter_t * ele = fd_vote_authorized_voters_treap_rev_iter_ele( iter, vsv->inner.v4.authorized_voters.pool );
589 0 : if( FD_LIKELY( ele->epoch<=epoch ) ) {
590 0 : auth_voter = &ele->pubkey;
591 0 : break;
592 0 : }
593 0 : }
594 0 : break;
595 0 : default:
596 0 : FD_LOG_CRIT(( "unsupported vote state versioned discriminant: %u", vsv->discriminant ));
597 0 : }
598 :
599 0 : FD_CRIT( !auth_voter, "unable to find authorized voter, likely corrupt vote account state" );
600 :
601 0 : if( fd_pubkey_eq( auth_voter, ctx->identity_key ) ) {
602 0 : *authority_idx_out = ULONG_MAX;
603 0 : *authority_out = *auth_voter;
604 0 : return 1;
605 0 : }
606 :
607 0 : fd_auth_key_t * auth_key = fd_auth_key_set_query( ctx->auth_key_set, *auth_voter, NULL );
608 0 : if( FD_LIKELY( auth_key ) ) {
609 0 : *authority_idx_out = auth_key->idx;
610 0 : *authority_out = *auth_voter;
611 0 : return 1;
612 0 : }
613 :
614 0 : return 0;
615 0 : }
616 :
617 : static void
618 : replay_slot_completed( ctx_t * ctx,
619 : fd_replay_slot_completed_t * slot_completed,
620 : ulong tsorig,
621 0 : fd_stem_context_t * stem ) {
622 :
623 : /* Initialize slot watermarks on the first replay_slot_completed. */
624 :
625 0 : if( FD_UNLIKELY( ctx->init_slot == ULONG_MAX ) ) {
626 0 : ctx->init_slot = slot_completed->slot;
627 0 : ctx->root_slot = slot_completed->slot;
628 0 : ctx->conf_slot = slot_completed->slot;
629 0 : ctx->supc_slot = slot_completed->slot;
630 0 : }
631 :
632 0 : if( FD_UNLIKELY( 0==memcmp( &slot_completed->block_id.uc, &hash_null, sizeof(fd_hash_t) ) ) ) {
633 0 : FD_LOG_CRIT(( "replay_slot_completed slot %lu block id is null", slot_completed->slot ));
634 0 : }
635 :
636 : /* This is a temporary patch for equivocation. */
637 :
638 0 : if( FD_UNLIKELY( fd_forks_query( ctx->forks, slot_completed->slot ) ) ) {
639 0 : FD_BASE58_ENCODE_32_BYTES( slot_completed->block_id.uc, block_id );
640 0 : FD_LOG_WARNING(( "tower ignoring replay of equivocating slot %lu %s", slot_completed->slot, block_id ));
641 :
642 : /* Still need to return a message to replay so the refcnt on the bank is decremented. */
643 0 : fd_tower_slot_ignored_t * msg = fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
644 0 : msg->slot = slot_completed->slot;
645 0 : msg->bank_idx = slot_completed->bank_idx;
646 :
647 0 : fd_stem_publish( stem, OUT_IDX, FD_TOWER_SIG_SLOT_IGNORED, ctx->out_chunk, sizeof(fd_tower_slot_ignored_t), 0UL, tsorig, fd_frag_meta_ts_comp( fd_tickcount() ) );
648 0 : ctx->out_chunk = fd_dcache_compact_next( ctx->out_chunk, sizeof(fd_tower_slot_ignored_t), ctx->out_chunk0, ctx->out_wmark );
649 0 : return;
650 0 : }
651 :
652 : /* Initialize the xid. */
653 :
654 0 : fd_funk_txn_xid_t xid = { .ul = { slot_completed->slot, slot_completed->bank_idx } };
655 :
656 : /* Query our on-chain vote acct and reconcile with our local tower. */
657 :
658 0 : ulong our_vote_acct_bal = ULONG_MAX;
659 0 : int found = 0;
660 0 : fd_accdb_ro_t ro[1];
661 0 : if( FD_LIKELY( fd_accdb_open_ro( ctx->accdb, ro, &xid, ctx->vote_account ) ) ) {
662 : /* Copy account data */
663 0 : found = 1;
664 0 : ctx->out_vote_acct_sz = fd_ulong_min( fd_accdb_ref_data_sz( ro ), FD_VOTE_STATE_DATA_MAX );
665 0 : our_vote_acct_bal = fd_accdb_ref_lamports( ro );
666 0 : fd_memcpy( ctx->our_vote_acct, fd_accdb_ref_data_const( ro ), ctx->out_vote_acct_sz );
667 0 : fd_accdb_close_ro( ctx->accdb, ro );
668 :
669 0 : fd_tower_reconcile( ctx->tower, ctx->root_slot, ctx->our_vote_acct );
670 : /* Sanity check that most recent vote in tower exists in tower forks */
671 0 : fd_tower_vote_t const * last_vote = fd_tower_peek_tail_const( ctx->tower );
672 0 : FD_TEST( !last_vote || fd_forks_query( ctx->forks, last_vote->slot ) );
673 0 : }
674 :
675 : /* Insert the vote acct addrs and stakes from the bank into accts. */
676 :
677 0 : fd_tower_accts_remove_all( ctx->tower_accts );
678 0 : fd_bank_t bank[1];
679 0 : if( FD_UNLIKELY( !fd_banks_bank_query( bank, ctx->banks, slot_completed->bank_idx ) ) ) FD_LOG_CRIT(( "invariant violation: bank %lu is missing", slot_completed->bank_idx ));
680 0 : ulong total_stake = query_acct_stake_from_bank( ctx->tower_accts, ctx->slot_stakes, bank, slot_completed->slot );
681 :
682 : /* Insert the just replayed block into forks. */
683 :
684 0 : FD_TEST( !fd_forks_query( ctx->forks, slot_completed->slot ) );
685 0 : fd_tower_forks_t * fork = fd_forks_insert( ctx->forks, slot_completed->slot, slot_completed->parent_slot );
686 0 : fork->parent_slot = slot_completed->parent_slot;
687 0 : fork->confirmed = 0;
688 0 : fork->voted = 0;
689 0 : fork->replayed_block_id = slot_completed->block_id;
690 0 : fork->bank_idx = slot_completed->bank_idx;
691 0 : fd_forks_replayed( ctx->forks, fork, slot_completed->bank_idx, &slot_completed->block_id );
692 0 : fd_forks_lockouts_clear( ctx->forks, slot_completed->parent_slot );
693 :
694 : /* Insert the just replayed block into ghost. */
695 :
696 0 : fd_hash_t const * parent_block_id = &slot_completed->parent_block_id;
697 0 : if( FD_UNLIKELY( slot_completed->parent_slot==ctx->init_slot ) ) parent_block_id = &manifest_block_id;
698 0 : if( FD_UNLIKELY( slot_completed->slot ==ctx->init_slot ) ) parent_block_id = NULL;
699 :
700 0 : if( FD_UNLIKELY( parent_block_id && !fd_ghost_query( ctx->ghost, parent_block_id ) ) ) {
701 :
702 : /* Rare occurrence where replay executes a block down a minority fork
703 : that we have pruned. Due to a race in reading frags, replay may
704 : believe the minority fork exists and is still executable, and
705 : executes the block and delivers it to tower. Tower should ignore
706 : this block as it's parent no longer exists. */
707 :
708 0 : FD_BASE58_ENCODE_32_BYTES( parent_block_id->uc, parent_block_id_cstr );
709 0 : FD_LOG_WARNING(( "replay likely lagging tower publish, executed slot %lu is missing parent block id %s, excluding from ghost", slot_completed->slot, parent_block_id_cstr ));
710 0 : ctx->metrics.replay_slot_ignored_gauge = slot_completed->slot;
711 0 : ctx->metrics.replay_slot_ignored_cnt++;
712 :
713 : /* Still need to return a message to replay so the refcnt on the
714 : bank is decremented. */
715 :
716 0 : fd_tower_slot_ignored_t * msg = fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
717 0 : msg->slot = slot_completed->slot;
718 0 : msg->bank_idx = slot_completed->bank_idx;
719 :
720 0 : fd_stem_publish( stem, OUT_IDX, FD_TOWER_SIG_SLOT_IGNORED, ctx->out_chunk, sizeof(fd_tower_slot_ignored_t), 0UL, tsorig, fd_frag_meta_ts_comp( fd_tickcount() ) );
721 0 : ctx->out_chunk = fd_dcache_compact_next( ctx->out_chunk, sizeof(fd_tower_slot_ignored_t), ctx->out_chunk0, ctx->out_wmark );
722 0 : return;
723 0 : }
724 :
725 0 : fd_ghost_blk_t * ghost_blk = fd_ghost_insert( ctx->ghost, &slot_completed->block_id, parent_block_id, slot_completed->slot );
726 0 : ghost_blk->total_stake = total_stake;
727 :
728 : /* Iterate vote accounts. */
729 :
730 0 : fd_tower_accts_t * tower_accts = ctx->tower_accts;
731 0 : fd_accdb_ro_pipe_t ro_pipe[1];
732 0 : fd_accdb_ro_pipe_init( ro_pipe, ctx->accdb, &xid );
733 0 : fd_tower_accts_iter_t iter_head = fd_tower_accts_iter_init( tower_accts );
734 0 : fd_tower_accts_iter_t iter_tail = fd_tower_accts_iter_init( tower_accts );
735 0 : for(;;) {
736 0 : if( FD_UNLIKELY( fd_tower_accts_iter_done( tower_accts, iter_head ) ) ) {
737 0 : fd_accdb_ro_pipe_flush( ro_pipe );
738 0 : }
739 :
740 0 : fd_accdb_ro_t * ro;
741 0 : while( (ro = fd_accdb_ro_pipe_poll( ro_pipe )) ) {
742 0 : fd_tower_accts_t * acct = fd_tower_accts_iter_ele( tower_accts, iter_tail );
743 0 : if( FD_UNLIKELY( !fd_accdb_ref_lamports( ro ) ) ) {
744 0 : FD_BASE58_ENCODE_32_BYTES( acct->addr.key, pubkey_b58 );
745 0 : FD_LOG_CRIT(( "vote account in bank->vote_states not found. slot %lu address %s", slot_completed->slot, pubkey_b58 ));
746 0 : }
747 0 : ulong data_sz = fd_ulong_min( fd_accdb_ref_data_sz( ro ), FD_VOTE_STATE_DATA_MAX );
748 0 : fd_memcpy( acct->data, fd_accdb_ref_data_const( ro ), data_sz );
749 :
750 : /* 1. Update forks with lockouts. */
751 :
752 0 : fd_forks_lockouts_add( ctx->forks, slot_completed->slot, &acct->addr, acct );
753 :
754 : /* 2. Count the last vote slot in the vote state towards ghost. */
755 :
756 0 : ulong vote_slot = fd_voter_vote_slot( acct->data );
757 0 : if( FD_LIKELY( vote_slot!=ULONG_MAX && /* has voted */
758 0 : vote_slot>=fd_ghost_root( ctx->ghost )->slot ) ) { /* vote not too old */
759 : /* We search up the ghost ancestry to find the ghost block for this
760 : vote slot. In Agave, they look this value up using a hashmap of
761 : slot->block_id ("fork progress"), but that approach only works
762 : because they dump and repair (so there's only ever one canonical
763 : block id). We retain multiple block ids, both the original and
764 : confirmed one. */
765 :
766 0 : fd_ghost_blk_t * ancestor_blk = fd_ghost_slot_ancestor( ctx->ghost, ghost_blk, vote_slot ); /* FIXME potentially slow */
767 :
768 : /* It is impossible for ancestor to be missing, because these are
769 : vote accounts on a given fork, not vote txns across forks. So we
770 : know these towers must contain slots we know about (as long as
771 : they are >= root, which we checked above). */
772 :
773 0 : if( FD_UNLIKELY( !ancestor_blk ) ) {
774 0 : FD_BASE58_ENCODE_32_BYTES( acct->addr.key, pubkey_b58 );
775 0 : FD_LOG_CRIT(( "missing ancestor. replay slot %lu vote slot %lu voter %s", slot_completed->slot, vote_slot, pubkey_b58 ));
776 0 : }
777 :
778 0 : fd_ghost_count_vote( ctx->ghost, ancestor_blk, &acct->addr, acct->stake, vote_slot );
779 0 : }
780 :
781 0 : if( FD_UNLIKELY( fd_tower_accts_iter_done( tower_accts, iter_tail ) ) ) {
782 0 : goto done_vote_iter;
783 0 : }
784 0 : iter_tail = fd_tower_accts_iter_next( tower_accts, iter_tail );
785 0 : }
786 :
787 0 : if( FD_UNLIKELY( fd_tower_accts_iter_done( tower_accts, iter_head ) ) ) break;
788 0 : fd_accdb_ro_pipe_enqueue( ro_pipe, fd_tower_accts_iter_ele( ctx->tower_accts, iter_head )->addr.key );
789 0 : iter_head = fd_tower_accts_iter_next( ctx->tower_accts, iter_head );
790 0 : }
791 :
792 0 : done_vote_iter:
793 0 : fd_accdb_ro_pipe_fini( ro_pipe );
794 :
795 : /* Insert the just replayed block into hard fork detector. */
796 :
797 0 : fd_hfork_record_our_bank_hash( ctx->hfork, &slot_completed->block_id, &slot_completed->bank_hash, fd_ghost_root( ctx->ghost )->total_stake );
798 :
799 : /* fd_notar requires some bookkeeping when there is a new epoch. */
800 :
801 0 : if( FD_UNLIKELY( ctx->notar->epoch==ULONG_MAX || slot_completed->epoch > ctx->notar->epoch ) ) {
802 0 : fd_notar_advance_epoch( ctx->notar, ctx->tower_accts, slot_completed->epoch );
803 0 : }
804 :
805 : /* Check if gossip votes already confirmed the fork's block_id (gossip
806 : can be ahead of replay - this is tracked by fd_notar). */
807 :
808 0 : fd_notar_slot_t * notar_slot = fd_notar_slot_query( ctx->notar->slot_map, slot_completed->slot, NULL );
809 0 : if( FD_UNLIKELY( notar_slot )) { /* optimize for replay keeping up (being ahead of gossip votes) */
810 0 : for( ulong i = 0; i < notar_slot->block_ids_cnt; i++ ) {
811 0 : fd_notar_blk_t * notar_blk = fd_notar_blk_query( ctx->notar->blk_map, notar_slot->block_ids[i], NULL );
812 0 : FD_TEST( notar_blk ); /* block_ids_cnt corrupt */
813 0 : if( FD_LIKELY( notar_blk->dup_conf ) ) {
814 0 : fork->confirmed = 1;
815 0 : fork->confirmed_block_id = notar_blk->block_id;
816 0 : break;
817 0 : }
818 0 : }
819 0 : }
820 :
821 : /* We replayed an unconfirmed duplicate, warn for now. Follow-up PR
822 : will implement eviction and repair of the correct one. */
823 :
824 0 : if( FD_UNLIKELY( fork->confirmed && 0!=memcmp( &fork->confirmed_block_id, &fork->replayed_block_id, sizeof(fd_hash_t) ) ) ) {
825 0 : FD_BASE58_ENCODE_32_BYTES( slot_completed->block_id.key, block_id_b58 );
826 0 : FD_BASE58_ENCODE_32_BYTES( fork->confirmed_block_id.key, confirmed_block_id_b58 );
827 0 : FD_LOG_WARNING(( "replayed an unconfirmed duplicate %lu. ours %s. confirmed %s.", slot_completed->slot, block_id_b58, confirmed_block_id_b58 ));
828 0 : }
829 :
830 : /* Determine reset, vote, and root slots. There may not be a vote or
831 : root slot but there is always a reset slot. */
832 :
833 0 : fd_tower_out_t out = fd_tower_vote_and_reset( ctx->tower, ctx->tower_accts, ctx->slot_stakes, ctx->forks, ctx->ghost, ctx->notar );
834 :
835 : /* Write out metrics for vote / reset reasons. */
836 :
837 0 : ctx->metrics.ancestor_rollback += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_ANCESTOR_ROLLBACK );
838 0 : ctx->metrics.sibling_confirmed += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SIBLING_CONFIRMED );
839 0 : ctx->metrics.same_fork += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SAME_FORK );
840 0 : ctx->metrics.switch_pass += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SWITCH_PASS );
841 0 : ctx->metrics.switch_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_SWITCH_FAIL );
842 0 : ctx->metrics.lockout_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_LOCKOUT_FAIL );
843 0 : ctx->metrics.threshold_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_THRESHOLD_FAIL );
844 0 : ctx->metrics.propagated_fail += (ulong)fd_uchar_extract_bit( out.flags, FD_TOWER_FLAG_PROPAGATED_FAIL );
845 :
846 : /* Update forks if there is a vote slot. */
847 :
848 0 : if( FD_LIKELY( out.vote_slot!=ULONG_MAX ) ) {
849 0 : ctx->metrics.local_vote_slot = out.vote_slot;
850 0 : fd_tower_forks_t * fork = fd_forks_query( ctx->forks, out.vote_slot );
851 0 : FD_TEST( fork ); /* we must have replayed every slot we voted for */
852 0 : fd_forks_voted( fork, &out.vote_block_id );
853 0 : }
854 :
855 : /* Publish according structures if there is a root */
856 :
857 0 : if( FD_UNLIKELY( out.root_slot!=ULONG_MAX ) ) {
858 0 : ctx->metrics.local_root_slot = out.root_slot;
859 :
860 0 : if( FD_UNLIKELY( 0==memcmp( &out.root_block_id, &hash_null, sizeof(fd_hash_t) ) ) ) {
861 0 : FD_LOG_CRIT(( "invariant violation: root block id is null at slot %lu", out.root_slot ));
862 0 : }
863 :
864 : /* forks */
865 :
866 0 : for(ulong slot = ctx->root_slot; slot < out.root_slot; slot++ ) {
867 0 : fd_tower_forks_t * fork = fd_forks_query ( ctx->forks, slot );
868 0 : if( FD_LIKELY( fork ) ) fd_forks_remove( ctx->forks, slot );
869 0 : fd_epoch_stakes_slot_t * slot_stakes = fd_epoch_stakes_slot_map_query ( ctx->slot_stakes->slot_stakes_map, slot, NULL );
870 0 : if( FD_LIKELY( slot_stakes ) ) fd_epoch_stakes_slot_stakes_remove( ctx->slot_stakes, slot_stakes );
871 0 : }
872 :
873 : /* ghost */
874 :
875 0 : fd_ghost_blk_t * newr = fd_ghost_query( ctx->ghost, &out.root_block_id );
876 0 : if( FD_UNLIKELY( !newr ) ) { /* a block id we rooted is missing from ghost */
877 0 : FD_BASE58_ENCODE_32_BYTES( out.root_block_id.uc, block_id_cstr );
878 0 : FD_LOG_CRIT(( "missing root block id %s at slot %lu", block_id_cstr, out.root_slot ));
879 0 : }
880 0 : fd_ghost_publish( ctx->ghost, newr );
881 :
882 : /* notar */
883 :
884 0 : fd_notar_advance_wmark( ctx->notar, out.root_slot );
885 :
886 : /* Rooting implies optimistic confirmation in the Firedancer API, so
887 : we need to make sure to publish weaker confirmation levels before
888 : publishing stronger ones. In most cases this is a no-op because
889 : gossip votes already triggered optimistic confirmation.
890 :
891 : TODO include replay votes in optimistic conf vote counting. */
892 :
893 0 : contiguous_confirm( ctx, out.root_slot, ctx->conf_slot, FD_TOWER_SLOT_CONFIRMED_OPTIMISTIC );
894 0 : contiguous_confirm( ctx, out.root_slot, ctx->supc_slot, FD_TOWER_SLOT_CONFIRMED_SUPER );
895 0 : contiguous_confirm( ctx, out.root_slot, ctx->root_slot, FD_TOWER_SLOT_CONFIRMED_ROOTED );
896 :
897 : /* Update slot watermarks. */
898 :
899 0 : ctx->root_slot = out.root_slot;
900 0 : ctx->metrics.local_root_slot = out.root_slot;
901 0 : }
902 :
903 : /* There must always be a reset slot. */
904 :
905 0 : ctx->metrics.reset_slot = out.reset_slot;
906 :
907 : /* Publish a slot_done frag to tower_out. */
908 :
909 0 : fd_tower_slot_done_t * msg = fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
910 0 : msg->replay_slot = slot_completed->slot;
911 0 : msg->active_fork_cnt = fd_tower_leaves_pool_used( ctx->forks->tower_leaves_pool );
912 0 : msg->vote_slot = out.vote_slot;
913 0 : msg->reset_slot = out.reset_slot;
914 0 : msg->reset_block_id = out.reset_block_id;
915 0 : msg->root_slot = out.root_slot;
916 0 : msg->root_block_id = out.root_block_id;
917 0 : msg->replay_bank_idx = slot_completed->bank_idx;
918 0 : msg->vote_acct_bal = our_vote_acct_bal;
919 :
920 : /* Populate slot_done with a vote txn representing our current tower
921 : (regardless of whether there was a new vote slot or not).
922 :
923 : TODO only do this on refresh_last_vote? */
924 :
925 0 : ulong authority_idx = ULONG_MAX;
926 0 : fd_pubkey_t authority[1];
927 0 : int found_authority = get_authority( ctx, slot_completed->epoch, found, authority, &authority_idx );
928 :
929 0 : if( FD_LIKELY( found_authority ) ) {
930 0 : msg->has_vote_txn = 1;
931 0 : fd_txn_p_t txn[1];
932 0 : fd_tower_to_vote_txn( ctx->tower, ctx->root_slot, &slot_completed->bank_hash, &slot_completed->block_id, &slot_completed->block_hash, ctx->identity_key, authority, ctx->vote_account, txn );
933 0 : FD_TEST( !fd_tower_empty( ctx->tower ) );
934 0 : FD_TEST( txn->payload_sz && txn->payload_sz<=FD_TPU_MTU );
935 0 : fd_memcpy( msg->vote_txn, txn->payload, txn->payload_sz );
936 0 : msg->vote_txn_sz = txn->payload_sz;
937 0 : msg->authority_idx = authority_idx;
938 0 : } else {
939 0 : msg->has_vote_txn = 0;
940 0 : }
941 :
942 0 : msg->tower_cnt = 0UL;
943 0 : if( FD_LIKELY( found ) ) msg->tower_cnt = fd_tower_with_lat_from_vote_acc( msg->tower, ctx->our_vote_acct );
944 :
945 0 : fd_stem_publish( stem, OUT_IDX, FD_TOWER_SIG_SLOT_DONE, ctx->out_chunk, sizeof(fd_tower_slot_done_t), 0UL, tsorig, fd_frag_meta_ts_comp( fd_tickcount() ) );
946 0 : ctx->out_chunk = fd_dcache_compact_next( ctx->out_chunk, sizeof(fd_tower_slot_done_t), ctx->out_chunk0, ctx->out_wmark );
947 :
948 0 : if( FD_UNLIKELY( ctx->debug_logging ) ) {
949 0 : fd_ghost_print( ctx->ghost, fd_ghost_root( ctx->ghost ) );
950 0 : fd_tower_print( ctx->tower, ctx->metrics.local_root_slot );
951 0 : }
952 :
953 : // if( FD_UNLIKELY( ctx->debug_fd!=-1 ) ) {
954 : // /* standard buf_sz used by below prints is ~3400 bytes, so buf_max of
955 : // 4096 is sufficient to keep the debug file mostly up to date */
956 : // fd_ghost_print( ctx->ghost, fd_ghost_root( ctx->ghost ), &ctx->debug_ostream );
957 : // fd_tower_print( ctx->tower, fd_ghost_root( ctx->ghost )->slot, &ctx->debug_ostream );
958 : // }
959 0 : }
960 :
961 : static inline void
962 : after_credit( ctx_t * ctx,
963 : fd_stem_context_t * stem,
964 : int * opt_poll_in FD_PARAM_UNUSED,
965 0 : int * charge_busy ) {
966 0 : if( FD_LIKELY( !notif_empty( ctx->notif ) ) ) {
967 :
968 : /* Contiguous confirmations are pushed to tail in order from child
969 : to ancestor, so we pop from tail to publish confirmations in
970 : order from ancestor to child. */
971 :
972 0 : notif_t ancestor = notif_pop_tail( ctx->notif );
973 0 : if( FD_UNLIKELY( ancestor.kind == FD_TOWER_SLOT_CONFIRMED_CLUSTER || ancestor.kind == FD_TOWER_SLOT_CONFIRMED_DUPLICATE ) ) {
974 :
975 : /* Duplicate confirmations and cluster confirmations were sourced
976 : from notar (through gossip txns and replay txns) so we need to
977 : use the block_id from the notif recorded at the time of the
978 : confirmation */
979 :
980 0 : publish_slot_confirmed( ctx, stem, fd_frag_meta_ts_comp( fd_tickcount() ), ancestor.slot, &ancestor.block_id, ULONG_MAX, ancestor.kind );
981 0 : } else {
982 0 : fd_tower_forks_t * fork = fd_tower_forks_query( ctx->forks->tower_forks, ancestor.slot, NULL );
983 0 : if( FD_UNLIKELY( !fork ) ) FD_LOG_CRIT(( "missing fork for ancestor %lu", ancestor.slot ));
984 0 : publish_slot_confirmed( ctx, stem, fd_frag_meta_ts_comp( fd_tickcount() ), ancestor.slot, fd_forks_canonical_block_id( ctx->forks, ancestor.slot ), fork->bank_idx, ancestor.kind );
985 0 : }
986 0 : *opt_poll_in = 0; /* drain the confirmations */
987 0 : *charge_busy = 1;
988 0 : }
989 0 : }
990 :
991 : static inline int
992 0 : verify_chunk_len( ulong chunk_len ) {
993 0 : ulong shred_szs[2] = { FD_SHRED_MIN_SZ, FD_SHRED_MAX_SZ };
994 0 : for( ulong i = 0; i < sizeof(shred_szs) / sizeof(ulong); i++ ) {
995 0 : for( ulong j = 0; j < sizeof(shred_szs) / sizeof(ulong); j++ ) {
996 0 : ulong all_chunks_sz = sizeof(ulong) + shred_szs[0] + sizeof(ulong) + shred_szs[1];
997 0 : ulong last_chunk_sz = all_chunks_sz - 2 * FD_EQVOC_CHUNK_SZ;
998 0 : if( FD_LIKELY( chunk_len==last_chunk_sz ) ) return 1;
999 0 : }
1000 0 : }
1001 0 : return 0;
1002 0 : }
1003 :
1004 : static inline void
1005 : record_eqvoc_metric( ctx_t * ctx,
1006 0 : int eqvoc_err ) {
1007 0 : switch( eqvoc_err ) {
1008 :
1009 0 : case FD_EQVOC_SUCCESS: break;
1010 :
1011 0 : case FD_EQVOC_VERIFIED_MERKLE: ctx->metrics.proof_verified_merkle++; break;
1012 0 : case FD_EQVOC_VERIFIED_META: ctx->metrics.proof_verified_merkle++; break;
1013 0 : case FD_EQVOC_VERIFIED_LAST: ctx->metrics.proof_verified_merkle++; break;
1014 0 : case FD_EQVOC_VERIFIED_OVERLAP: ctx->metrics.proof_verified_merkle++; break;
1015 0 : case FD_EQVOC_VERIFIED_CHAINED: ctx->metrics.proof_verified_merkle++; break;
1016 :
1017 0 : case FD_EQVOC_ERR_SER: ctx->metrics.proof_err_shred_ser++; break;
1018 0 : case FD_EQVOC_ERR_SLOT: ctx->metrics.proof_err_shred_slot++; break;
1019 0 : case FD_EQVOC_ERR_VERSION: ctx->metrics.proof_err_shred_version++; break;
1020 0 : case FD_EQVOC_ERR_TYPE: ctx->metrics.proof_err_shred_type++; break;
1021 0 : case FD_EQVOC_ERR_MERKLE: ctx->metrics.proof_err_shred_merkle++; break;
1022 0 : case FD_EQVOC_ERR_SIG: ctx->metrics.proof_err_shred_signature++; break;
1023 :
1024 0 : default: FD_LOG_ERR(( "unhandled eqvoc_err %d", eqvoc_err ));
1025 0 : }
1026 0 : }
1027 :
1028 :
1029 : static inline int
1030 : returnable_frag( ctx_t * ctx,
1031 : ulong in_idx,
1032 : ulong seq FD_PARAM_UNUSED,
1033 : ulong sig,
1034 : ulong chunk,
1035 : ulong sz,
1036 : ulong ctl FD_PARAM_UNUSED,
1037 : ulong tsorig,
1038 : ulong tspub FD_PARAM_UNUSED,
1039 0 : fd_stem_context_t * stem ) {
1040 :
1041 0 : if( FD_UNLIKELY( !ctx->in[ in_idx ].mcache_only && ( chunk<ctx->in[ in_idx ].chunk0 || chunk>ctx->in[ in_idx ].wmark || sz>ctx->in[ in_idx ].mtu ) ) )
1042 0 : FD_LOG_ERR(( "chunk %lu %lu from in %d corrupt, not in range [%lu,%lu]", chunk, sz, ctx->in_kind[ in_idx ], ctx->in[ in_idx ].chunk0, ctx->in[ in_idx ].wmark ));
1043 :
1044 0 : switch( ctx->in_kind[ in_idx ] ) {
1045 0 : case IN_KIND_DEDUP: {
1046 0 : if( FD_UNLIKELY( ctx->root_slot==ULONG_MAX ) ) return 1;
1047 0 : fd_txn_m_t * txnm = (fd_txn_m_t *)fd_chunk_to_laddr( ctx->in[in_idx].mem, chunk );
1048 0 : FD_TEST( txnm->payload_sz<=FD_TPU_MTU );
1049 0 : FD_TEST( txnm->txn_t_sz<=FD_TXN_MAX_SZ );
1050 0 : count_vote_txn( ctx, fd_txn_m_txn_t_const( txnm ), fd_txn_m_payload_const( txnm ) );
1051 0 : return 0;
1052 0 : }
1053 0 : case IN_KIND_EPOCH: {
1054 0 : fd_stake_ci_epoch_msg_init( ctx->stake_ci, fd_chunk_to_laddr_const( ctx->in[ in_idx ].mem, chunk ) );
1055 0 : fd_stake_ci_epoch_msg_fini( ctx->stake_ci );
1056 0 : return 0;
1057 0 : }
1058 0 : case IN_KIND_EXECRP: {
1059 0 : if( FD_LIKELY( (sig>>32)==FD_EXECRP_TT_TXN_EXEC ) ) {
1060 0 : fd_execrp_txn_exec_msg_t * msg = fd_chunk_to_laddr( ctx->in[in_idx].mem, chunk );
1061 0 : count_vote_txn( ctx, TXN(msg->txn), msg->txn->payload );
1062 0 : }
1063 0 : return 0;
1064 0 : }
1065 0 : case IN_KIND_GOSSIP: {
1066 0 : if( FD_LIKELY( sig==FD_GOSSIP_UPDATE_TAG_DUPLICATE_SHRED ) ) {
1067 0 : fd_gossip_update_message_t const * msg = (fd_gossip_update_message_t const *)fd_type_pun_const( fd_chunk_to_laddr_const( ctx->in[ in_idx ].mem, chunk ) );
1068 0 : fd_gossip_duplicate_shred_t const * duplicate_shred = &msg->duplicate_shred;
1069 0 : fd_pubkey_t const * from = (fd_pubkey_t const *)fd_type_pun_const( msg->origin_pubkey );
1070 :
1071 0 : FD_BASE58_ENCODE_32_BYTES( from->uc, from_b58 );
1072 :
1073 0 : fd_eqvoc_set_leader_schedule( ctx->eqvoc, fd_stake_ci_get_lsched_for_slot( ctx->stake_ci, duplicate_shred->slot ) );
1074 :
1075 : /* Agave drops msgs where num_chunks /= 3. https://github.com/anza-xyz/agave/blob/v3.1/gossip/src/duplicate_shred.rs#L262-L268 */
1076 :
1077 0 : if ( FD_UNLIKELY( duplicate_shred->num_chunks != FD_EQVOC_CHUNK_CNT ) ) ctx->metrics.proof_err_chunk_cnt++;
1078 0 : else if( FD_UNLIKELY( duplicate_shred->chunk_index >= FD_EQVOC_CHUNK_CNT ) ) ctx->metrics.proof_err_chunk_idx++;
1079 0 : else if( FD_UNLIKELY( verify_chunk_len( duplicate_shred->chunk_len ) ) ) ctx->metrics.proof_err_chunk_len++;
1080 0 : else record_eqvoc_metric( ctx, fd_eqvoc_chunk_insert( ctx->eqvoc, from, duplicate_shred ) );
1081 0 : }
1082 0 : return 0;
1083 0 : }
1084 0 : case IN_KIND_IPECHO: {
1085 0 : FD_TEST( sig!=0UL && sig<=USHORT_MAX );
1086 0 : fd_eqvoc_set_shred_version( ctx->eqvoc, (ushort)sig );
1087 0 : return 0;
1088 0 : }
1089 0 : case IN_KIND_REPLAY: {
1090 0 : if( FD_LIKELY( sig==REPLAY_SIG_SLOT_COMPLETED ) ) {
1091 0 : fd_replay_slot_completed_t * slot_completed = (fd_replay_slot_completed_t *)fd_type_pun( fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
1092 0 : replay_slot_completed( ctx, slot_completed, tsorig, stem );
1093 0 : } else if( FD_LIKELY( sig==REPLAY_SIG_SLOT_DEAD ) ) {
1094 0 : fd_replay_slot_dead_t * slot_dead = (fd_replay_slot_dead_t *)fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk );
1095 0 : fd_hfork_record_our_bank_hash( ctx->hfork, &slot_dead->block_id, NULL, fd_ghost_root( ctx->ghost )->total_stake );
1096 0 : }
1097 0 : return 0;
1098 0 : }
1099 0 : case IN_KIND_SHRED: {
1100 0 : if( FD_LIKELY( sz==FD_SHRED_MIN_SZ || sz==FD_SHRED_MAX_SZ ) ) { /* TODO depends on pending shred_out changes */
1101 0 : fd_shred_t * shred = (fd_shred_t *)fd_type_pun( fd_chunk_to_laddr( ctx->in[ in_idx ].mem, chunk ) );
1102 0 : fd_tower_slot_duplicate_t * out = fd_chunk_to_laddr( ctx->out_mem, ctx->out_chunk );
1103 0 : int err = fd_eqvoc_shred_insert( ctx->eqvoc, shred, out->chunks );
1104 0 : if( FD_UNLIKELY( err>0 ) ) {
1105 0 : record_eqvoc_metric( ctx, err );
1106 0 : ctx->metrics.proof_constructed++;
1107 0 : fd_stem_publish( stem, OUT_IDX, FD_TOWER_SIG_SLOT_DUPLICATE, ctx->out_chunk, sizeof(fd_tower_slot_duplicate_t), 0UL, tsorig, fd_frag_meta_ts_comp( fd_tickcount() ) );
1108 0 : }
1109 0 : }
1110 0 : return 0;
1111 0 : }
1112 0 : default: {
1113 0 : FD_LOG_ERR(( "unexpected input kind %d", ctx->in_kind[ in_idx ] ));
1114 0 : }
1115 0 : }
1116 0 : }
1117 :
1118 : static void
1119 : privileged_init( fd_topo_t * topo,
1120 0 : fd_topo_tile_t * tile ) {
1121 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
1122 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
1123 0 : ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
1124 0 : void * av_map = FD_SCRATCH_ALLOC_APPEND( l, fd_auth_key_set_align(), fd_auth_key_set_footprint() );
1125 0 : FD_SCRATCH_ALLOC_FINI( l, scratch_align() );
1126 :
1127 0 : FD_TEST( fd_rng_secure( &ctx->seed, sizeof(ctx->seed) ) );
1128 :
1129 0 : if( FD_UNLIKELY( !strcmp( tile->tower.identity_key, "" ) ) ) FD_LOG_ERR(( "identity_key_path not set" ));
1130 0 : ctx->identity_key[ 0 ] = *(fd_pubkey_t const *)fd_type_pun_const( fd_keyload_load( tile->tower.identity_key, /* pubkey only: */ 1 ) );
1131 :
1132 : /* The vote key can be specified either directly as a base58 encoded
1133 : pubkey, or as a file path. We first try to decode as a pubkey. */
1134 :
1135 0 : uchar * vote_key = fd_base58_decode_32( tile->tower.vote_account, ctx->vote_account->uc );
1136 0 : if( FD_UNLIKELY( !vote_key ) ) ctx->vote_account[ 0 ] = *(fd_pubkey_t const *)fd_type_pun_const( fd_keyload_load( tile->tower.vote_account, /* pubkey only: */ 1 ) );
1137 :
1138 0 : ctx->auth_key_set = fd_auth_key_set_join( fd_auth_key_set_new( av_map ) );
1139 0 : for( ulong i=0UL; i<tile->tower.authorized_voter_paths_cnt; i++ ) {
1140 0 : fd_auth_key_t * auth_key = fd_auth_key_set_insert( ctx->auth_key_set, *(fd_pubkey_t const *)fd_type_pun_const( fd_keyload_load( tile->tower.authorized_voter_paths[ i ], /* pubkey only: */ 1 ) ) );
1141 0 : auth_key->idx = i;
1142 0 : }
1143 :
1144 : /* The tower file is used to checkpt and restore the state of the
1145 : local tower. */
1146 :
1147 0 : char path[ PATH_MAX ];
1148 0 : FD_BASE58_ENCODE_32_BYTES( ctx->identity_key->uc, identity_key_b58 );
1149 0 : FD_TEST( fd_cstr_printf_check( path, sizeof(path), NULL, "%s/tower-1_9-%s.bin.new", tile->tower.base_path, identity_key_b58 ) );
1150 0 : ctx->checkpt_fd = open( path, O_WRONLY|O_CREAT|O_TRUNC, 0600 );
1151 0 : if( FD_UNLIKELY( -1==ctx->checkpt_fd ) ) FD_LOG_ERR(( "open(`%s`) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
1152 :
1153 0 : FD_TEST( fd_cstr_printf_check( path, sizeof(path), NULL, "%s/tower-1_9-%s.bin", tile->tower.base_path, identity_key_b58 ) );
1154 0 : ctx->restore_fd = open( path, O_RDONLY );
1155 0 : if( FD_UNLIKELY( -1==ctx->restore_fd && errno!=ENOENT ) ) FD_LOG_ERR(( "open(`%s`) failed (%i-%s)", path, errno, fd_io_strerror( errno ) ));
1156 0 : }
1157 :
1158 : static void
1159 : unprivileged_init( fd_topo_t * topo,
1160 0 : fd_topo_tile_t * tile ) {
1161 0 : ulong slot_max = tile->tower.max_live_slots;
1162 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
1163 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
1164 0 : ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
1165 0 : void * av_set = FD_SCRATCH_ALLOC_APPEND( l, fd_auth_key_set_align(), fd_auth_key_set_footprint() ); (void)av_set;
1166 0 : void * eqvoc = FD_SCRATCH_ALLOC_APPEND( l, fd_eqvoc_align(), fd_eqvoc_footprint( slot_max * 1024, slot_max, FD_VOTER_MAX ) );
1167 0 : void * ghost = FD_SCRATCH_ALLOC_APPEND( l, fd_ghost_align(), fd_ghost_footprint( 2*slot_max, FD_VOTER_MAX ) );
1168 0 : void * hfork = FD_SCRATCH_ALLOC_APPEND( l, fd_hfork_align(), fd_hfork_footprint( slot_max, FD_VOTER_MAX ) );
1169 0 : void * notar = FD_SCRATCH_ALLOC_APPEND( l, fd_notar_align(), fd_notar_footprint( tile->tower.max_vote_lookahead ) );
1170 0 : void * tower = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_align(), fd_tower_footprint() );
1171 0 : void * accts = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_accts_align(), fd_tower_accts_footprint( FD_VOTER_MAX ) );
1172 0 : void * forks = FD_SCRATCH_ALLOC_APPEND( l, fd_forks_align(), fd_forks_footprint( slot_max, FD_VOTER_MAX ) );
1173 0 : void * spare = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_align(), fd_tower_footprint() );
1174 0 : void * stake = FD_SCRATCH_ALLOC_APPEND( l, fd_epoch_stakes_align(), fd_epoch_stakes_footprint( slot_max ) );
1175 0 : void * notif = FD_SCRATCH_ALLOC_APPEND( l, notif_align(), notif_footprint( slot_max ) );
1176 0 : void * stkci = FD_SCRATCH_ALLOC_APPEND( l, fd_stake_ci_align(), fd_stake_ci_footprint() );
1177 0 : FD_SCRATCH_ALLOC_FINI( l, scratch_align() );
1178 :
1179 0 : ctx->wksp = topo->workspaces[ topo->objs[ tile->tile_obj_id ].wksp_id ].wksp;
1180 0 : ctx->eqvoc = fd_eqvoc_join ( fd_eqvoc_new ( eqvoc, slot_max * 1024, slot_max, FD_VOTER_MAX, ctx->seed ) );
1181 0 : ctx->ghost = fd_ghost_join ( fd_ghost_new ( ghost, 2*slot_max, FD_VOTER_MAX, ctx->seed ) ); /* FIXME seed */
1182 0 : ctx->hfork = fd_hfork_join ( fd_hfork_new ( hfork, slot_max, FD_VOTER_MAX, ctx->seed, tile->tower.hard_fork_fatal ) );
1183 0 : ctx->notar = fd_notar_join ( fd_notar_new ( notar, tile->tower.max_vote_lookahead ) );
1184 0 : ctx->tower = fd_tower_join ( fd_tower_new ( tower ) );
1185 0 : ctx->tower_accts = fd_tower_accts_join ( fd_tower_accts_new ( accts, FD_VOTER_MAX ) );
1186 0 : ctx->forks = fd_forks_join ( fd_forks_new ( forks, slot_max, FD_VOTER_MAX ) );
1187 0 : ctx->tower_spare = fd_tower_join ( fd_tower_new ( spare ) );
1188 0 : ctx->slot_stakes = fd_epoch_stakes_join( fd_epoch_stakes_new( stake, slot_max ) );
1189 0 : ctx->notif = notif_join ( notif_new ( notif, slot_max ) );
1190 0 : ctx->stake_ci = fd_stake_ci_join ( fd_stake_ci_new ( stkci, ctx->identity_key ) );
1191 0 : FD_TEST( ctx->ghost );
1192 0 : FD_TEST( ctx->hfork );
1193 0 : FD_TEST( ctx->notar );
1194 0 : FD_TEST( ctx->tower );
1195 0 : FD_TEST( ctx->forks );
1196 0 : FD_TEST( ctx->tower_spare );
1197 0 : FD_TEST( ctx->tower_accts );
1198 0 : FD_TEST( ctx->slot_stakes );
1199 0 : FD_TEST( ctx->notif );
1200 0 : FD_TEST( ctx->stake_ci );
1201 :
1202 0 : for( ulong i = 0; i<VOTE_TXN_SIG_MAX; i++ ) {
1203 0 : fd_sha512_t * sha = fd_sha512_join( fd_sha512_new( FD_SCRATCH_ALLOC_APPEND( l, alignof(fd_sha512_t), sizeof(fd_sha512_t) ) ) );
1204 0 : FD_TEST( sha );
1205 0 : ctx->vote_sha[i] = sha;
1206 0 : }
1207 :
1208 0 : ctx->init_slot = ULONG_MAX;
1209 0 : ctx->root_slot = ULONG_MAX;
1210 0 : ctx->conf_slot = ULONG_MAX;
1211 0 : ctx->supc_slot = ULONG_MAX;
1212 :
1213 0 : memset( &ctx->metrics, 0, sizeof( struct ctx_metrics_t ) );
1214 :
1215 0 : ulong banks_obj_id = fd_pod_query_ulong( topo->props, "banks", ULONG_MAX );
1216 0 : FD_TEST( banks_obj_id!=ULONG_MAX );
1217 0 : ulong banks_locks_obj_id = fd_pod_query_ulong( topo->props, "banks_locks", ULONG_MAX );
1218 0 : FD_TEST( banks_locks_obj_id!=ULONG_MAX );
1219 0 : FD_TEST( fd_banks_join( ctx->banks, fd_topo_obj_laddr( topo, banks_obj_id ), fd_topo_obj_laddr( topo, banks_locks_obj_id ) ) );
1220 :
1221 0 : fd_accdb_init_from_topo( ctx->accdb, topo, tile );
1222 :
1223 0 : FD_TEST( tile->in_cnt<sizeof(ctx->in_kind)/sizeof(ctx->in_kind[0]) );
1224 0 : for( ulong i=0UL; i<tile->in_cnt; i++ ) {
1225 0 : fd_topo_link_t * link = &topo->links[ tile->in_link_id[ i ] ];
1226 0 : fd_topo_wksp_t * link_wksp = &topo->workspaces[ topo->objs[ link->dcache_obj_id ].wksp_id ];
1227 :
1228 0 : if ( FD_LIKELY( !strcmp( link->name, "dedup_resolv" ) ) ) ctx->in_kind[ i ] = IN_KIND_DEDUP;
1229 0 : else if( FD_LIKELY( !strcmp( link->name, "replay_epoch" ) ) ) ctx->in_kind[ i ] = IN_KIND_EPOCH;
1230 0 : else if( FD_LIKELY( !strcmp( link->name, "replay_execrp" ) ) ) ctx->in_kind[ i ] = IN_KIND_EXECRP;
1231 0 : else if( FD_LIKELY( !strcmp( link->name, "gossip_out" ) ) ) ctx->in_kind[ i ] = IN_KIND_GOSSIP;
1232 0 : else if( FD_LIKELY( !strcmp( link->name, "ipecho_out" ) ) ) ctx->in_kind[ i ] = IN_KIND_IPECHO;
1233 0 : else if( FD_LIKELY( !strcmp( link->name, "replay_out" ) ) ) ctx->in_kind[ i ] = IN_KIND_REPLAY;
1234 0 : else if( FD_LIKELY( !strcmp( link->name, "shred_out" ) ) ) ctx->in_kind[ i ] = IN_KIND_SHRED;
1235 0 : else FD_LOG_ERR(( "tower tile has unexpected input link %lu %s", i, link->name ));
1236 :
1237 0 : ctx->in[ i ].mcache_only = !link->mtu;
1238 0 : if( FD_LIKELY( !ctx->in[ i ].mcache_only ) ) {
1239 0 : ctx->in[ i ].mem = link_wksp->wksp;
1240 0 : ctx->in[ i ].mtu = link->mtu;
1241 0 : ctx->in[ i ].chunk0 = fd_dcache_compact_chunk0( ctx->in[ i ].mem, link->dcache );
1242 0 : ctx->in[ i ].wmark = fd_dcache_compact_wmark ( ctx->in[ i ].mem, link->dcache, link->mtu );
1243 0 : }
1244 0 : }
1245 :
1246 0 : ctx->out_mem = topo->workspaces[ topo->objs[ topo->links[ tile->out_link_id[ 0 ] ].dcache_obj_id ].wksp_id ].wksp;
1247 0 : ctx->out_chunk0 = fd_dcache_compact_chunk0( ctx->out_mem, topo->links[ tile->out_link_id[ 0 ] ].dcache );
1248 0 : ctx->out_wmark = fd_dcache_compact_wmark ( ctx->out_mem, topo->links[ tile->out_link_id[ 0 ] ].dcache, topo->links[ tile->out_link_id[ 0 ] ].mtu );
1249 0 : ctx->out_chunk = ctx->out_chunk0;
1250 :
1251 0 : ctx->debug_logging = tile->tower.debug_logging;
1252 0 : }
1253 :
1254 : static ulong
1255 : populate_allowed_seccomp( fd_topo_t const * topo,
1256 : fd_topo_tile_t const * tile,
1257 : ulong out_cnt,
1258 0 : struct sock_filter * out ) {
1259 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
1260 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
1261 0 : ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
1262 :
1263 0 : populate_sock_filter_policy_fd_tower_tile( out_cnt, out, (uint)fd_log_private_logfile_fd(), (uint)ctx->checkpt_fd, (uint)ctx->restore_fd );
1264 0 : return sock_filter_policy_fd_tower_tile_instr_cnt;
1265 0 : }
1266 :
1267 : static ulong
1268 : populate_allowed_fds( fd_topo_t const * topo,
1269 : fd_topo_tile_t const * tile,
1270 : ulong out_fds_cnt,
1271 0 : int * out_fds ) {
1272 0 : void * scratch = fd_topo_obj_laddr( topo, tile->tile_obj_id );
1273 0 : FD_SCRATCH_ALLOC_INIT( l, scratch );
1274 0 : ctx_t * ctx = FD_SCRATCH_ALLOC_APPEND( l, alignof(ctx_t), sizeof(ctx_t) );
1275 :
1276 0 : if( FD_UNLIKELY( out_fds_cnt<4UL ) ) FD_LOG_ERR(( "out_fds_cnt %lu", out_fds_cnt ));
1277 :
1278 0 : ulong out_cnt = 0UL;
1279 0 : out_fds[ out_cnt++ ] = 2; /* stderr */
1280 0 : if( FD_LIKELY( -1!=fd_log_private_logfile_fd() ) )
1281 0 : out_fds[ out_cnt++ ] = fd_log_private_logfile_fd(); /* logfile */
1282 0 : if( FD_LIKELY( ctx->checkpt_fd!=-1 ) ) out_fds[ out_cnt++ ] = ctx->checkpt_fd;
1283 0 : if( FD_LIKELY( ctx->restore_fd!=-1 ) ) out_fds[ out_cnt++ ] = ctx->restore_fd;
1284 0 : return out_cnt;
1285 0 : }
1286 :
1287 0 : #define STEM_BURST (2UL) /* slot_conf AND (slot_done OR slot_ignored) */
1288 : /* See explanation in fd_pack */
1289 0 : #define STEM_LAZY (128L*3000L)
1290 :
1291 0 : #define STEM_CALLBACK_CONTEXT_TYPE ctx_t
1292 0 : #define STEM_CALLBACK_CONTEXT_ALIGN alignof(ctx_t)
1293 0 : #define STEM_CALLBACK_METRICS_WRITE metrics_write
1294 0 : #define STEM_CALLBACK_AFTER_CREDIT after_credit
1295 0 : #define STEM_CALLBACK_RETURNABLE_FRAG returnable_frag
1296 :
1297 : #include "../../disco/stem/fd_stem.c"
1298 :
1299 : fd_topo_run_tile_t fd_tile_tower = {
1300 : .name = "tower",
1301 : .populate_allowed_seccomp = populate_allowed_seccomp,
1302 : .populate_allowed_fds = populate_allowed_fds,
1303 : .scratch_align = scratch_align,
1304 : .scratch_footprint = scratch_footprint,
1305 : .unprivileged_init = unprivileged_init,
1306 : .privileged_init = privileged_init,
1307 : .run = stem_run,
1308 : };
|