Line data Source code
1 : #include <limits.h>
2 : #include <unistd.h>
3 : #include <fcntl.h>
4 : #include <sys/stat.h>
5 :
6 : #include "fd_tower.h"
7 : #include "../voter/fd_voter.h"
8 : #include "fd_tower_forks.h"
9 : #include "../../flamenco/txn/fd_txn_generate.h"
10 : #include "../../flamenco/runtime/fd_system_ids.h"
11 :
12 : #define LOGGING 0
13 :
14 0 : #define THRESHOLD_DEPTH (8)
15 0 : #define THRESHOLD_RATIO (2.0 / 3.0)
16 : #define SWITCH_RATIO (0.38)
17 :
18 : /* expiration calculates the expiration slot of vote given a slot and
19 : confirmation count. */
20 :
21 : static inline ulong
22 42 : expiration_slot( fd_tower_t const * vote ) {
23 42 : ulong lockout = 1UL << vote->conf;
24 42 : return vote->slot + lockout;
25 42 : }
26 :
27 : /* simulate_vote simulates voting for slot, popping all votes from the
28 : top that would be consecutively expired by voting for slot. */
29 :
30 : ulong
31 : simulate_vote( fd_tower_t const * tower,
32 48 : ulong slot ) {
33 48 : ulong cnt = fd_tower_cnt( tower );
34 54 : while( cnt ) {
35 42 : fd_tower_t const * top_vote = fd_tower_peek_index_const( tower, cnt - 1 );
36 42 : if( FD_LIKELY( expiration_slot( top_vote ) >= slot ) ) break; /* expire only if consecutive */
37 6 : cnt--;
38 6 : }
39 48 : return cnt;
40 48 : }
41 :
42 : /* push_vote pushes a new vote for slot onto the tower. Pops and
43 : returns the new root (bottom of the tower) if it reaches max lockout
44 : as a result of the new vote. Otherwise, returns ULONG_MAX.
45 :
46 : Max lockout is equivalent to 1 << FD_TOWER_VOTE_MAX + 1 (which
47 : implies confirmation count is FD_TOWER_VOTE_MAX + 1). As a result,
48 : fd_tower_vote also maintains the invariant that the tower contains at
49 : most FD_TOWER_VOTE_MAX votes, because (in addition to vote expiry)
50 : there will always be a pop before reaching FD_TOWER_VOTE_MAX + 1. */
51 :
52 : ulong
53 : push_vote( fd_tower_t * tower,
54 48 : ulong slot ) {
55 :
56 48 : # if FD_TOWER_PARANOID
57 48 : fd_tower_t const * vote = fd_tower_peek_tail_const( tower );
58 48 : if( FD_UNLIKELY( vote && slot <= vote->slot ) ) FD_LOG_ERR(( "[%s] slot %lu <= vote->slot %lu", __func__, slot, vote->slot )); /* caller error*/
59 48 : # endif
60 :
61 : /* Use simulate_vote to determine how many expired votes to pop. */
62 :
63 48 : ulong cnt = simulate_vote( tower, slot );
64 :
65 : /* Pop everything that got expired. */
66 :
67 54 : while( FD_LIKELY( fd_tower_cnt( tower ) > cnt ) ) {
68 6 : fd_tower_pop_tail( tower );
69 6 : }
70 :
71 : /* If the tower is still full after expiring, then pop and return the
72 : bottom vote slot as the new root because this vote has incremented
73 : it to max lockout. Otherwise this is a no-op and there is no new
74 : root (FD_SLOT_NULL). */
75 :
76 48 : ulong root = FD_SLOT_NULL;
77 48 : if( FD_LIKELY( fd_tower_full( tower ) ) ) { /* optimize for full tower */
78 0 : root = fd_tower_pop_head( tower ).slot;
79 0 : }
80 :
81 : /* Increment confirmations (double lockouts) for consecutive
82 : confirmations in prior votes. */
83 :
84 48 : ulong prev_conf = 0;
85 48 : for( fd_tower_iter_t iter = fd_tower_iter_init_rev( tower );
86 147 : !fd_tower_iter_done_rev( tower, iter );
87 99 : iter = fd_tower_iter_prev ( tower, iter ) ) {
88 99 : fd_tower_t * vote = fd_tower_iter_ele( tower, iter );
89 99 : if( FD_UNLIKELY( vote->conf != ++prev_conf ) ) break;
90 99 : vote->conf++;
91 99 : }
92 :
93 : /* Add the new vote to the tower. */
94 :
95 48 : fd_tower_push_tail( tower, (fd_tower_t){ .slot = slot, .conf = 1 } );
96 :
97 : /* Return the new root (FD_SLOT_NULL if there is none). */
98 :
99 48 : return root;
100 48 : }
101 :
102 : /* lockout_check checks if we are locked out from voting for slot.
103 : Returns 1 if we can vote for slot without violating lockout, 0
104 : otherwise.
105 :
106 : After voting for a slot n, we are locked out for 2^k slots, where k
107 : is the confirmation count of that vote. Once locked out, we cannot
108 : vote for a different fork until that previously-voted fork expires at
109 : slot n+2^k. This implies the earliest slot in which we can switch
110 : from the previously-voted fork is (n+2^k)+1. We use `ghost` to
111 : determine whether `slot` is on the same or different fork as previous
112 : vote slots.
113 :
114 : In the case of the tower, every vote has its own expiration slot
115 : depending on confirmations. The confirmation count is the max number
116 : of consecutive votes that have been pushed on top of the vote, and
117 : not necessarily its current height in the tower.
118 :
119 : For example, the following is a diagram of a tower pushing and
120 : popping with each vote:
121 :
122 :
123 : slot | confirmation count
124 : -----|-------------------
125 : 4 | 1 <- vote
126 : 3 | 2
127 : 2 | 3
128 : 1 | 4
129 :
130 :
131 : slot | confirmation count
132 : -----|-------------------
133 : 9 | 1 <- vote
134 : 2 | 3
135 : 1 | 4
136 :
137 :
138 : slot | confirmation count
139 : -----|-------------------
140 : 10 | 1 <- vote
141 : 9 | 2
142 : 2 | 3
143 : 1 | 4
144 :
145 :
146 : slot | confirmation count
147 : -----|-------------------
148 : 11 | 1 <- vote
149 : 10 | 2
150 : 9 | 3
151 : 2 | 4
152 : 1 | 5
153 :
154 :
155 : slot | confirmation count
156 : -----|-------------------
157 : 18 | 1 <- vote
158 : 2 | 4
159 : 1 | 5
160 :
161 :
162 : In the final tower, note the gap in confirmation counts between slot
163 : 18 and slot 2, even though slot 18 is directly above slot 2. */
164 :
165 : int
166 : lockout_check( fd_tower_t const * tower,
167 : fd_forks_t * forks,
168 0 : ulong slot ) {
169 :
170 0 : if( FD_UNLIKELY( fd_tower_empty( tower ) ) ) return 1; /* always not locked out if we haven't voted. */
171 0 : if( FD_UNLIKELY( slot <= fd_tower_peek_tail_const( tower )->slot ) ) return 0; /* always locked out from voting for slot <= last vote slot */
172 :
173 : /* Simulate a vote to pop off all the votes that would be expired by
174 : voting for slot. Then check if the newly top-of-tower vote is on
175 : the same fork as slot (if so this implies we can vote for it). */
176 :
177 0 : ulong cnt = simulate_vote( tower, slot ); /* pop off votes that would be expired */
178 0 : if( FD_UNLIKELY( !cnt ) ) return 1; /* tower is empty after popping expired votes */
179 :
180 0 : fd_tower_t const * vote = fd_tower_peek_index_const( tower, cnt - 1 ); /* newly top-of-tower */
181 : # if LOGGING
182 : FD_LOG_NOTICE(( "[%s] lockout_check for slot %lu against vote slot %lu", __func__, slot, vote->slot ));
183 : # endif
184 0 : return fd_forks_is_slot_descendant( forks, vote->slot, slot ); /* check if on same fork */
185 0 : }
186 :
187 : /* switch_check checks if we can switch to the fork of `slot`. Returns
188 : 1 if we can switch, 0 otherwise. Assumes tower is non-empty.
189 :
190 : There are two forks of interest: our last vote fork ("vote fork") and
191 : the fork we want to switch to ("switch fork"). The switch fork is on
192 : the fork of `slot`.
193 :
194 : In order to switch, FD_TOWER_SWITCH_PCT of stake must have voted for
195 : a slot that satisfies the following conditions: the
196 : GCA(slot, last_vote) is an ancestor of the switch_slot
197 :
198 : Recall from the lockout check a validator is locked out from voting
199 : for our last vote slot when their last vote slot is on a different
200 : fork, and that vote's expiration slot > our last vote slot.
201 :
202 : The following pseudocode describes the algorithm:
203 :
204 : ```
205 : for every fork f in the fork tree, take the most recently executed
206 : slot `s` (the leaf of the fork).
207 :
208 : Take the greatest common ancestor of the `s` and the our last vote
209 : slot. If the switch_slot is a descendant of this GCA, then votes for
210 : `s` can count towards the switch threshold.
211 :
212 : query banks(`s`) for vote accounts in `s`
213 : for all vote accounts v in `s`
214 : if v's locked out[1] from voting for our latest vote slot
215 : add v's stake to switch stake
216 :
217 : return switch stake >= FD_TOWER_SWITCH_PCT
218 : ```
219 :
220 : The switch check is used to safeguard optimistic confirmation.
221 : Specifically: FD_TOWER_OPT_CONF_PCT + FD_TOWER_SWITCH_PCT >= 1. */
222 :
223 : int
224 : switch_check( fd_tower_t const * tower,
225 : fd_forks_t * forks,
226 : fd_epoch_stakes_t * epoch_stakes,
227 : ulong total_stake,
228 33 : ulong switch_slot ) {
229 33 : ulong switch_stake = 0;
230 33 : ulong last_vote_slot = fd_tower_peek_tail_const( tower )->slot;
231 33 : ulong root_slot = fd_tower_peek_head_const( tower )->slot;
232 33 : for ( fd_tower_leaves_dlist_iter_t iter = fd_tower_leaves_dlist_iter_fwd_init( forks->tower_leaves_dlist, forks->tower_leaves_pool );
233 138 : !fd_tower_leaves_dlist_iter_done( iter, forks->tower_leaves_dlist, forks->tower_leaves_pool );
234 120 : iter = fd_tower_leaves_dlist_iter_fwd_next( iter, forks->tower_leaves_dlist, forks->tower_leaves_pool ) ) {
235 :
236 : /* Iterate over all the leaves of all forks */
237 120 : fd_tower_leaf_t * leaf = fd_tower_leaves_dlist_iter_ele( iter, forks->tower_leaves_dlist, forks->tower_leaves_pool );
238 120 : ulong candidate_slot = leaf->slot;
239 120 : ulong lca = fd_forks_lowest_common_ancestor( forks, candidate_slot, last_vote_slot );
240 :
241 120 : if( lca != ULONG_MAX && fd_forks_is_slot_descendant( forks, lca, switch_slot ) ) {
242 :
243 : /* This candidate slot may be considered for the switch proof, if
244 : it passes the following conditions:
245 :
246 : https://github.com/anza-xyz/agave/blob/c7b97bc77addacf03b229c51b47c18650d909576/core/src/consensus.rs#L1117
247 :
248 : Now for this candidate slot, look at the lockouts that were created at
249 : the time that we processed the bank for this candidate slot. */
250 :
251 78 : for( fd_lockout_slots_t const * slot = fd_lockout_slots_map_ele_query_const( forks->lockout_slots_map, &candidate_slot, NULL, forks->lockout_slots_pool );
252 84 : slot;
253 78 : slot = fd_lockout_slots_map_ele_next_const ( slot, NULL, forks->lockout_slots_pool ) ) {
254 21 : ulong interval_end = slot->interval_end;
255 21 : ulong key = fd_lockout_interval_key( candidate_slot, interval_end );
256 :
257 : /* Intervals are keyed by the end of the interval. If the end of
258 : the interval is < the last vote slot, then these vote
259 : accounts with this particular lockout are NOT locked out from
260 : voting for the last vote slot, which means we can skip this
261 : set of intervals. */
262 :
263 21 : if( interval_end < last_vote_slot ) continue;
264 :
265 : /* At this point we can actually query for the intervals by
266 : end interval to get the vote accounts. */
267 :
268 15 : for( fd_lockout_intervals_t const * interval = fd_lockout_intervals_map_ele_query_const( forks->lockout_intervals_map, &key, NULL, forks->lockout_intervals_pool );
269 24 : interval;
270 24 : interval = fd_lockout_intervals_map_ele_next_const( interval, NULL, forks->lockout_intervals_pool ) ) {
271 24 : ulong vote_slot = interval->interval_start;
272 24 : fd_hash_t const * vote_acc = &interval->vote_account_pubkey;
273 :
274 24 : if( FD_UNLIKELY( !fd_forks_is_slot_descendant( forks, vote_slot, last_vote_slot ) &&
275 24 : vote_slot > root_slot ) ) {
276 24 : fd_voter_stake_key_t key = { .vote_account = *vote_acc, .slot = switch_slot };
277 24 : fd_voter_stake_t const * voter_stake = fd_voter_stake_map_ele_query_const( epoch_stakes->voter_stake_map, &key, NULL, epoch_stakes->voter_stake_pool );
278 24 : if( FD_UNLIKELY( !voter_stake ) ) {
279 0 : FD_BASE58_ENCODE_32_BYTES( vote_acc->key, vote_acc_b58 );
280 0 : FD_LOG_CRIT(( "missing voter stake for vote account %s on slot %lu. Is this an error?", vote_acc_b58, switch_slot ));
281 0 : }
282 24 : ulong voter_idx = fd_voter_stake_pool_idx( epoch_stakes->voter_stake_pool, voter_stake );
283 :
284 : /* Don't count this vote account towards the switch cqheck if it has already been used. */
285 24 : if( FD_UNLIKELY( fd_used_acc_scratch_test( epoch_stakes->used_acc_scratch, voter_idx ) ) ) continue;
286 :
287 24 : fd_used_acc_scratch_insert( epoch_stakes->used_acc_scratch, voter_idx );
288 24 : switch_stake += voter_stake->stake;
289 24 : if( FD_LIKELY( (double)switch_stake >= (double)total_stake * SWITCH_RATIO ) ) {
290 15 : fd_used_acc_scratch_null( epoch_stakes->used_acc_scratch );
291 15 : FD_LOG_INFO(( "[%s] switch check from slot %lu to slot %lu passed with percentage: %.0lf%%", __func__, last_vote_slot, switch_slot, (double)switch_stake / (double)total_stake * 100.0 ));
292 15 : return 1;
293 15 : }
294 24 : }
295 24 : }
296 15 : }
297 78 : }
298 120 : }
299 18 : fd_used_acc_scratch_null( epoch_stakes->used_acc_scratch );
300 18 : FD_LOG_INFO(( "[%s] switch check from slot %lu to slot %lu failed with percentage: %.0lf%%", __func__, last_vote_slot, switch_slot, (double)switch_stake / (double)total_stake * 100.0 ));
301 18 : return 0;
302 33 : }
303 :
304 : /* threshold_check checks if we pass the threshold required to vote for
305 : `slot`. Returns 1 if we pass the threshold check, 0 otherwise.
306 :
307 : The following psuedocode describes the algorithm:
308 :
309 : ```
310 : simulate that we have voted for `slot`
311 :
312 : for all vote accounts in the current epoch
313 :
314 : simulate that the vote account has voted for `slot`
315 :
316 : pop all votes expired by that simulated vote
317 :
318 : if the validator's latest tower vote after expiry >= our threshold
319 : slot ie. our vote from THRESHOLD_DEPTH back also after simulating,
320 : then add validator's stake to threshold_stake.
321 :
322 : return threshold_stake >= FD_TOWER_THRESHOLD_RATIO
323 : ```
324 :
325 : The threshold check simulates voting for the current slot to expire
326 : stale votes. This is to prevent validators that haven't voted in a
327 : long time from counting towards the threshold stake. */
328 :
329 : int
330 : threshold_check( fd_tower_t const * tower,
331 : fd_tower_accts_t const * accts,
332 : ulong total_stake,
333 0 : ulong slot ) {
334 :
335 0 : uchar __attribute__((aligned(FD_TOWER_ALIGN))) scratch[ FD_TOWER_FOOTPRINT ];
336 0 : fd_tower_t * scratch_tower = fd_tower_join( fd_tower_new( scratch ) );
337 :
338 : /* First, simulate a vote on our tower, popping off everything that
339 : would be expired by voting for slot. */
340 :
341 0 : ulong cnt = simulate_vote( tower, slot );
342 :
343 : /* We can always vote if our tower is not at least THRESHOLD_DEPTH
344 : deep after simulating. */
345 :
346 0 : if( FD_UNLIKELY( cnt < THRESHOLD_DEPTH ) ) return 1;
347 :
348 : /* Get the vote slot from THRESHOLD_DEPTH back. Note THRESHOLD_DEPTH
349 : is the 8th index back _including_ the simulated vote at index 0. */
350 :
351 0 : ulong threshold_slot = fd_tower_peek_index_const( tower, cnt - THRESHOLD_DEPTH )->slot;
352 0 : ulong threshold_stake = 0;
353 0 : for( fd_tower_accts_iter_t iter = fd_tower_accts_iter_init( accts );
354 0 : !fd_tower_accts_iter_done( accts, iter );
355 0 : iter = fd_tower_accts_iter_next( accts, iter ) ) {
356 0 : fd_tower_accts_t const * acct = fd_tower_accts_iter_ele_const( accts, iter );
357 0 : fd_tower_remove_all( scratch_tower );
358 0 : fd_tower_from_vote_acc( scratch_tower, acct->data );
359 :
360 0 : ulong cnt = simulate_vote( scratch_tower, slot ); /* expire votes */
361 0 : if( FD_UNLIKELY( !cnt ) ) continue; /* no votes left after expiry */
362 :
363 : /* Count their stake towards the threshold check if their last vote
364 : slot >= our threshold slot.
365 :
366 : We know these votes are for our own fork because towers are
367 : sourced from vote _accounts_, not vote _transactions_.
368 :
369 : Because we are iterating vote accounts on the same fork that we
370 : we want to vote for, we know these slots must all occur along the
371 : same fork ancestry.
372 :
373 : Therefore, if their latest vote slot >= our threshold slot, we
374 : know that vote must be for the threshold slot itself or one of
375 : threshold slot's descendants. */
376 :
377 0 : ulong last_vote = fd_tower_peek_index_const( scratch_tower, cnt - 1 )->slot;
378 0 : if( FD_LIKELY( last_vote >= threshold_slot ) ) threshold_stake += acct->stake;
379 0 : }
380 :
381 0 : double threshold_pct = (double)threshold_stake / (double)total_stake;
382 : # if LOGGING
383 : FD_LOG_NOTICE(( "[%s] ok? %d. top: %lu. threshold: %lu. stake: %.0lf%%.", __func__, threshold_pct > THRESHOLD_RATIO, fd_tower_peek_tail_const( tower )->slot, threshold_slot, threshold_pct * 100.0 ));
384 : # endif
385 0 : return threshold_pct > THRESHOLD_RATIO;
386 0 : }
387 :
388 : int
389 : propagated_check( fd_notar_t * notar,
390 0 : ulong slot ) {
391 :
392 0 : fd_notar_slot_t * notar_slot = fd_notar_slot_query( notar->slot_map, slot, NULL );
393 0 : if( FD_UNLIKELY( !notar_slot ) ) return 1;
394 :
395 0 : if( FD_LIKELY( notar_slot->is_leader ) ) return 1; /* can always vote for slot in which we're leader */
396 0 : if( FD_LIKELY( notar_slot->prev_leader_slot==ULONG_MAX ) ) return 1; /* haven't been leader yet */
397 :
398 0 : fd_notar_slot_t * prev_leader_notar_slot = fd_notar_slot_query( notar->slot_map, notar_slot->prev_leader_slot, NULL );
399 0 : if( FD_LIKELY( !prev_leader_notar_slot ) ) return 1; /* already pruned rooted */
400 :
401 0 : return prev_leader_notar_slot->is_propagated;
402 0 : }
403 :
404 : fd_tower_out_t
405 : fd_tower_vote_and_reset( fd_tower_t * tower,
406 : fd_tower_accts_t * accts,
407 : fd_epoch_stakes_t * epoch_stakes,
408 : fd_forks_t * forks,
409 : fd_ghost_t * ghost,
410 0 : fd_notar_t * notar ) {
411 :
412 0 : uchar flags = 0;
413 0 : fd_ghost_blk_t const * best_blk = fd_ghost_best( ghost, fd_ghost_root( ghost ) );
414 0 : fd_ghost_blk_t const * reset_blk = NULL;
415 0 : fd_ghost_blk_t const * vote_blk = NULL;
416 :
417 : /* Case 0: if we haven't voted yet then we can always vote and reset
418 : to ghost_best.
419 :
420 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus.rs#L933-L935 */
421 :
422 0 : if( FD_UNLIKELY( fd_tower_empty( tower ) ) ) {
423 0 : fd_tower_forks_t * fork = fd_forks_query( forks, best_blk->slot );
424 0 : fork->voted = 1;
425 0 : fork->voted_block_id = best_blk->id;
426 0 : return (fd_tower_out_t){
427 0 : .flags = flags,
428 0 : .reset_slot = best_blk->slot,
429 0 : .reset_block_id = best_blk->id,
430 0 : .vote_slot = best_blk->slot,
431 0 : .vote_block_id = best_blk->id,
432 0 : .root_slot = push_vote( tower, best_blk->slot )
433 0 : };
434 0 : }
435 :
436 0 : ulong prev_vote_slot = fd_tower_peek_tail_const( tower )->slot;
437 0 : fd_tower_forks_t * prev_vote_fork = fd_forks_query( forks, prev_vote_slot );
438 0 : fd_hash_t * prev_vote_block_id = &prev_vote_fork->voted_block_id;
439 0 : fd_ghost_blk_t * prev_vote_blk = fd_ghost_query( ghost, prev_vote_block_id );
440 :
441 : /* Case 1: if an ancestor of our prev vote (including prev vote
442 : itself) is an unconfirmed duplicate, then our prev vote was on a
443 : duplicate fork.
444 :
445 : There are two subcases to check. */
446 :
447 0 : int invalid_ancestor = !!fd_ghost_invalid_ancestor( ghost, prev_vote_blk );
448 0 : if( FD_UNLIKELY( invalid_ancestor ) ) { /* do we have an invalid ancestor? */
449 :
450 : /* Case 1a: ghost_best is an ancestor of prev vote. This means
451 : ghost_best is rolling back to an ancestor that precedes the
452 : duplicate ancestor on the same fork as our prev vote. In this
453 : case, we can't vote on our ancestor, but we do reset to that
454 : ancestor.
455 :
456 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus.rs#L1016-L1019 */
457 :
458 0 : int ancestor_rollback = prev_vote_blk != best_blk && !!fd_ghost_ancestor( ghost, prev_vote_blk, &best_blk->id );
459 0 : if( FD_LIKELY( ancestor_rollback ) ) {
460 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_ANCESTOR_ROLLBACK );
461 0 : reset_blk = best_blk;
462 0 : }
463 :
464 : /* Case 1b: ghost_best is not an ancestor, but prev_vote is a
465 : duplicate and we've confirmed its duplicate sibling. In this
466 : case, we allow switching to ghost_best without a switch proof.
467 :
468 : Example: slot 5 is a duplicate. We first receive, replay and
469 : vote for block 5, so that is our prev vote. We later receive
470 : block 5' and observe that it is duplicate confirmed. ghost_best
471 : now returns block 5' and we both vote and reset to block 5'
472 : regardless of the switch check.
473 :
474 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus.rs#L1021-L1024 */
475 :
476 0 : int sibling_confirmed = 0!=memcmp( &prev_vote_fork->voted_block_id, &prev_vote_fork->confirmed_block_id, sizeof(fd_hash_t) );
477 0 : if( FD_LIKELY( sibling_confirmed ) ) {
478 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_SIBLING_CONFIRMED );
479 0 : reset_blk = best_blk;
480 0 : vote_blk = best_blk;
481 0 : }
482 :
483 : /* At this point our prev vote was on a duplicate fork but didn't
484 : match either of the above subcases.
485 :
486 : In this case, we have to pass the switch check to reset to a
487 : different fork from prev vote (same as non-duplicate case). */
488 0 : }
489 :
490 : /* Case 3: if our prev vote slot is an ancestor of the best slot, then
491 : they are on the same fork and we can both reset to it. We can also
492 : vote for it if we pass the can_vote checks.
493 :
494 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus.rs#L1057 */
495 :
496 0 : else if( FD_LIKELY( fd_forks_is_slot_ancestor( forks, best_blk->slot, prev_vote_slot ) ) ) {
497 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_SAME_FORK );
498 0 : reset_blk = best_blk;
499 0 : vote_blk = best_blk;
500 0 : }
501 :
502 : /* Case 4: if our prev vote is not an ancestor of the best block, then
503 : it is on a different fork. If we pass the switch check, we can
504 : reset to it. If we additionally pass the lockout check, we can
505 : also vote for it.
506 :
507 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus.rs#L1208-L1215
508 :
509 : Note also Agave uses the best blk's total stake for checking the
510 : threshold.
511 :
512 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus/fork_choice.rs#L443-L445 */
513 :
514 0 : else if( FD_LIKELY( switch_check( tower, forks, epoch_stakes, best_blk->total_stake, best_blk->slot ) ) ) {
515 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_SWITCH_PASS );
516 0 : reset_blk = best_blk;
517 0 : vote_blk = best_blk;
518 0 : }
519 :
520 : /* Case 5: same as case 4 but we didn't pass the switch check. In
521 : this case we reset to either ghost_best or ghost_deepest beginning
522 : from our prev vote blk.
523 :
524 : We must reset to a block beginning from our prev vote fork to
525 : ensure votes get a chance to propagate. Because in order for votes
526 : to land, someone needs to build a block on that fork.
527 :
528 : We reset to ghost_best or ghost_deepest depending on whether our
529 : prev vote is valid. When it's invalid we use ghost_deepest instead
530 : of ghost_best, because ghost_best won't be able to return a valid
531 : block beginning from our prev_vote because by definition the entire
532 : subtree will be invalid.
533 :
534 : When our prev vote fork is not a duplicate, we want to propagate
535 : votes that might allow others to switch to our fork. In addition,
536 : if our prev vote fork is a duplicate, we want to propagate votes
537 : that might "duplicate confirm" that block (reach 52% of stake).
538 :
539 : See top-level documentation in fd_tower.h for more details on vote
540 : propagation. */
541 :
542 0 : else {
543 :
544 : /* Case 5a: failed switch check, duplicate.
545 :
546 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus/heaviest_subtree_fork_choice.rs#L1187 */
547 :
548 0 : if( FD_UNLIKELY( invalid_ancestor ) ) {
549 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_SWITCH_FAIL );
550 0 : reset_blk = fd_ghost_deepest( ghost, prev_vote_blk );
551 0 : }
552 :
553 : /* Case 5b: failed switch check, non-duplicate.
554 :
555 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus/fork_choice.rs#L200 */
556 :
557 0 : else {
558 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_SWITCH_FAIL );
559 0 : reset_blk = fd_ghost_best( ghost, prev_vote_blk );
560 0 : }
561 0 : }
562 :
563 : /* If there is a block to vote for, there are a few additional checks
564 : to make sure we can actually vote for it.
565 :
566 : Specifically, we need to make sure we're not locked out, pass the
567 : threshold check and that our previous leader block has propagated
568 : (reached the prop threshold according to fd_notar).
569 :
570 : https://github.com/firedancer-io/agave/blob/master/core/src/consensus/fork_choice.rs#L382-L385
571 :
572 : Agave uses the total stake on the fork being threshold checked
573 : (vote_blk) for determining whether it meets the stake threshold. */
574 :
575 0 : if( FD_LIKELY( vote_blk ) ) {
576 0 : if ( FD_UNLIKELY( !lockout_check( tower, forks, vote_blk->slot ) ) ) {
577 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_LOCKOUT_FAIL );
578 0 : vote_blk = NULL;
579 0 : }
580 0 : else if( FD_UNLIKELY( !threshold_check( tower, accts, vote_blk->total_stake, vote_blk->slot ) ) ) {
581 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_THRESHOLD_FAIL );
582 0 : vote_blk = NULL;
583 0 : }
584 0 : else if( FD_UNLIKELY( !propagated_check( notar, vote_blk->slot ) ) ) {
585 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_PROPAGATED_FAIL );
586 0 : vote_blk = NULL;
587 0 : }
588 0 : }
589 :
590 0 : FD_TEST( reset_blk ); /* always a reset_blk */
591 0 : fd_tower_out_t out = {
592 0 : .flags = flags,
593 0 : .reset_slot = reset_blk->slot,
594 0 : .reset_block_id = reset_blk->id,
595 0 : .vote_slot = ULONG_MAX,
596 0 : .root_slot = ULONG_MAX
597 0 : };
598 :
599 : /* Finally, if our vote passed all the checks, we actually push the
600 : vote onto the tower. */
601 :
602 0 : if( FD_LIKELY( vote_blk ) ) {
603 0 : out.vote_slot = vote_blk->slot;
604 0 : out.vote_block_id = vote_blk->id;
605 0 : out.root_slot = push_vote( tower, vote_blk->slot );
606 :
607 : /* Query our tower fork for this slot we're voting for. Note this
608 : can never be NULL because we record tower forks as we replay, and
609 : we should never be voting on something we haven't replayed. */
610 :
611 0 : fd_tower_forks_t * fork = fd_forks_query( forks, vote_blk->slot );
612 0 : fork->voted = 1;
613 0 : fork->voted_block_id = vote_blk->id;
614 :
615 : /* Query the root slot's block id from tower forks. This block id
616 : may not necessarily be confirmed, because confirmation requires
617 : votes on the block itself (vs. block and its descendants).
618 :
619 : So if we have a confirmed block id, we return that. Otherwise
620 : we return our own vote block id for that slot, which we assume
621 : is the cluster converged on by the time we're rooting it.
622 :
623 : The only way it is possible for us to root the wrong version of
624 : a block (ie. not the one the cluster confirmed) is if there is
625 : mass equivocation (>2/3 of threshold check stake has voted for
626 : two versions of a block). This exceeds the equivocation safety
627 : threshold and we would eventually detect this via a bank hash
628 : mismatch and error out. */
629 :
630 0 : if( FD_LIKELY( out.root_slot!=ULONG_MAX ) ) {
631 0 : fd_tower_forks_t * root_fork = fd_forks_query( forks, out.root_slot );
632 0 : out.root_block_id = *fd_ptr_if( root_fork->confirmed, &root_fork->confirmed_block_id, &root_fork->voted_block_id );
633 0 : }
634 0 : }
635 :
636 : # if LOGGING
637 : FD_LOG_NOTICE(( "[%s] code: %d. reset_slot: %lu. vote_slot: %lu. root_slot: %lu.", __func__, out.code, out.reset_slot, out.vote_slot, out.root_slot ));
638 : # endif
639 :
640 0 : return out;
641 0 : }
642 :
643 : void
644 : fd_tower_reconcile( fd_tower_t * tower,
645 : ulong root,
646 0 : uchar const * vote_account_data ) {
647 0 : ulong on_chain_vote = fd_voter_vote_slot( vote_account_data );
648 0 : ulong on_chain_root = fd_voter_root_slot( vote_account_data );
649 :
650 0 : fd_tower_vote_t const * last_vote = fd_tower_peek_tail_const( tower );
651 0 : ulong last_vote_slot = last_vote ? last_vote->slot : ULONG_MAX;
652 :
653 0 : if( FD_UNLIKELY( ( on_chain_vote==ULONG_MAX && last_vote_slot==ULONG_MAX ) ) ) return;
654 0 : if( FD_LIKELY ( ( on_chain_vote!=ULONG_MAX && last_vote_slot!=ULONG_MAX
655 0 : && on_chain_vote <= last_vote_slot ) ) ) return;
656 :
657 : /* At this point our local tower is too old, and we need to replace it
658 : with our on-chain tower. However, it's possible our local root is
659 : newer than the on-chain root (even though the tower is older). The
660 : most likely reason this happens is because we just booted from a
661 : snapshot and the snapshot slot > on-chain root.
662 :
663 : So we need to filter out the stale votes < snapshot slot. This
664 : mirrors the Agave logic:
665 : https://github.com/firedancer-io/agave/blob/master/core/src/replay_stage.rs#L3690-L3719 */
666 :
667 0 : if( FD_LIKELY( on_chain_root == ULONG_MAX || root > on_chain_root ) ) {
668 0 : fd_tower_remove_all( tower );
669 0 : fd_voter_t const * voter = (fd_voter_t const *)fd_type_pun_const( vote_account_data );
670 0 : uint kind = fd_uint_load_4_fast( vote_account_data ); /* skip node_pubkey */
671 0 : for( ulong i=0; i<voter->votes_cnt; i++ ) {
672 0 : if( FD_LIKELY( kind==FD_VOTER_V3 ) ) fd_tower_push_tail( tower, (fd_tower_t){ .slot = voter->votes_v3[i].slot, .conf = voter->votes_v3[i].conf } );
673 0 : if( FD_LIKELY( kind==FD_VOTER_V2 ) ) fd_tower_push_tail( tower, (fd_tower_t){ .slot = voter->votes_v2[i].slot, .conf = voter->votes_v2[i].conf } );
674 0 : }
675 :
676 : /* Fast forward our tower to tower_root by retaining only votes >
677 : local tower root. */
678 :
679 0 : while( FD_LIKELY( !fd_tower_empty( tower ) ) ) {
680 0 : fd_tower_t const * vote = fd_tower_peek_head_const( tower );
681 0 : if( FD_LIKELY( vote->slot > root ) ) break;
682 0 : fd_tower_pop_head( tower );
683 0 : }
684 0 : }
685 0 : }
686 :
687 : void
688 : fd_tower_from_vote_acc( fd_tower_t * tower,
689 39 : uchar const * vote_acc ) {
690 39 : fd_voter_t const * voter = (fd_voter_t const *)fd_type_pun_const( vote_acc );
691 39 : uint kind = fd_uint_load_4_fast( vote_acc ); /* skip node_pubkey */
692 78 : for( ulong i=0; i<voter->votes_cnt; i++ ) {
693 39 : if( FD_LIKELY( kind==FD_VOTER_V3 ) ) fd_tower_push_tail( tower, (fd_tower_t){ .slot = voter->votes_v3[i].slot, .conf = voter->votes_v3[i].conf } );
694 39 : if( FD_LIKELY( kind==FD_VOTER_V2 ) ) fd_tower_push_tail( tower, (fd_tower_t){ .slot = voter->votes_v2[i].slot, .conf = voter->votes_v2[i].conf } );
695 39 : }
696 39 : }
697 :
698 : void
699 : fd_tower_to_vote_txn( fd_tower_t const * tower,
700 : ulong root,
701 : fd_lockout_offset_t * lockouts_scratch,
702 : fd_hash_t const * bank_hash,
703 : fd_hash_t const * recent_blockhash,
704 : fd_pubkey_t const * validator_identity,
705 : fd_pubkey_t const * vote_authority,
706 : fd_pubkey_t const * vote_acc,
707 0 : fd_txn_p_t * vote_txn ) {
708 :
709 0 : fd_compact_vote_state_update_t tower_sync;
710 0 : tower_sync.root = fd_ulong_if( root == ULONG_MAX, 0UL, root );
711 0 : tower_sync.lockouts_len = (ushort)fd_tower_cnt( tower );
712 0 : tower_sync.lockouts = lockouts_scratch;
713 0 : tower_sync.timestamp = fd_log_wallclock() / (long)1e9; /* seconds */
714 0 : tower_sync.has_timestamp = 1;
715 :
716 0 : ulong prev = tower_sync.root;
717 0 : ulong i = 0UL;
718 0 : for( fd_tower_iter_t iter = fd_tower_iter_init( tower );
719 0 : !fd_tower_iter_done( tower, iter );
720 0 : iter = fd_tower_iter_next( tower, iter ) ) {
721 0 : fd_tower_t const * vote = fd_tower_iter_ele_const( tower, iter );
722 0 : tower_sync.lockouts[i].offset = vote->slot - prev;
723 0 : tower_sync.lockouts[i].confirmation_count = (uchar)vote->conf;
724 0 : prev = vote->slot;
725 0 : i++;
726 0 : }
727 0 : memcpy( tower_sync.hash.uc, bank_hash, sizeof(fd_hash_t) );
728 :
729 0 : uchar * txn_out = vote_txn->payload;
730 0 : uchar * txn_meta_out = vote_txn->_;
731 :
732 0 : int same_addr = !memcmp( validator_identity, vote_authority, sizeof(fd_pubkey_t) );
733 0 : if( FD_LIKELY( same_addr ) ) {
734 :
735 : /* 0: validator identity
736 : 1: vote account address
737 : 2: vote program */
738 :
739 0 : fd_txn_accounts_t votes;
740 0 : votes.signature_cnt = 1;
741 0 : votes.readonly_signed_cnt = 0;
742 0 : votes.readonly_unsigned_cnt = 1;
743 0 : votes.acct_cnt = 3;
744 0 : votes.signers_w = validator_identity;
745 0 : votes.signers_r = NULL;
746 0 : votes.non_signers_w = vote_acc;
747 0 : votes.non_signers_r = &fd_solana_vote_program_id;
748 0 : FD_TEST( fd_txn_base_generate( txn_meta_out, txn_out, votes.signature_cnt, &votes, recent_blockhash->uc ) );
749 :
750 0 : } else {
751 :
752 : /* 0: validator identity
753 : 1: vote authority
754 : 2: vote account address
755 : 3: vote program */
756 :
757 0 : fd_txn_accounts_t votes;
758 0 : votes.signature_cnt = 2;
759 0 : votes.readonly_signed_cnt = 1;
760 0 : votes.readonly_unsigned_cnt = 1;
761 0 : votes.acct_cnt = 4;
762 0 : votes.signers_w = validator_identity;
763 0 : votes.signers_r = vote_authority;
764 0 : votes.non_signers_w = vote_acc;
765 0 : votes.non_signers_r = &fd_solana_vote_program_id;
766 0 : FD_TEST( fd_txn_base_generate( txn_meta_out, txn_out, votes.signature_cnt, &votes, recent_blockhash->uc ) );
767 0 : }
768 :
769 : /* Add the vote instruction to the transaction. */
770 :
771 0 : fd_vote_instruction_t vote_ix;
772 0 : uchar vote_ix_buf[FD_TXN_MTU];
773 0 : vote_ix.discriminant = fd_vote_instruction_enum_compact_update_vote_state;
774 0 : vote_ix.inner.compact_update_vote_state = tower_sync;
775 0 : fd_bincode_encode_ctx_t encode = { .data = vote_ix_buf, .dataend = ( vote_ix_buf + FD_TXN_MTU ) };
776 0 : fd_vote_instruction_encode( &vote_ix, &encode );
777 0 : uchar program_id;
778 0 : uchar ix_accs[2];
779 0 : if( FD_LIKELY( same_addr ) ) {
780 0 : ix_accs[0] = 1; /* vote account address */
781 0 : ix_accs[1] = 0; /* vote authority */
782 0 : program_id = 2; /* vote program */
783 0 : } else {
784 0 : ix_accs[0] = 2; /* vote account address */
785 0 : ix_accs[1] = 1; /* vote authority */
786 0 : program_id = 3; /* vote program */
787 0 : }
788 0 : ushort vote_ix_sz = (ushort)fd_vote_instruction_size( &vote_ix );
789 0 : vote_txn->payload_sz = fd_txn_add_instr( txn_meta_out, txn_out, program_id, ix_accs, 2, vote_ix_buf, vote_ix_sz );
790 0 : }
791 :
792 : int
793 0 : fd_tower_verify( fd_tower_t const * tower ) {
794 0 : if( FD_UNLIKELY( fd_tower_cnt( tower )>=FD_TOWER_VOTE_MAX ) ) {
795 0 : FD_LOG_WARNING(( "[%s] invariant violation: cnt %lu >= FD_TOWER_VOTE_MAX %lu", __func__, fd_tower_cnt( tower ), (ulong)FD_TOWER_VOTE_MAX ));
796 0 : return -1;
797 0 : }
798 :
799 0 : fd_tower_t const * prev = NULL;
800 0 : for( fd_tower_iter_t iter = fd_tower_iter_init( tower );
801 0 : !fd_tower_iter_done( tower, iter );
802 0 : iter = fd_tower_iter_next( tower, iter ) ) {
803 0 : fd_tower_t const * vote = fd_tower_iter_ele_const( tower, iter );
804 0 : if( FD_UNLIKELY( prev && ( vote->slot < prev->slot || vote->conf < prev->conf ) ) ) {
805 0 : FD_LOG_WARNING(( "[%s] invariant violation: vote (slot:%lu conf:%lu) prev (slot:%lu conf:%lu)", __func__, vote->slot, vote->conf, prev->slot, prev->conf ));
806 0 : return -1;
807 0 : }
808 0 : prev = vote;
809 0 : }
810 0 : return 0;
811 0 : }
812 :
813 : #include <stdio.h>
814 :
815 : void
816 0 : fd_tower_print( fd_tower_t const * tower, ulong root ) {
817 0 : FD_LOG_NOTICE( ( "\n\n[Tower]" ) );
818 :
819 0 : if( FD_UNLIKELY( fd_tower_empty( tower ) ) ) return;
820 :
821 0 : ulong max_slot = 0;
822 :
823 : /* Determine spacing. */
824 :
825 0 : for( fd_tower_iter_t iter = fd_tower_iter_init_rev( tower );
826 0 : !fd_tower_iter_done_rev( tower, iter );
827 0 : iter = fd_tower_iter_prev ( tower, iter ) ) {
828 :
829 0 : max_slot = fd_ulong_max( max_slot, fd_tower_iter_ele_const( tower, iter )->slot );
830 0 : }
831 :
832 : /* Calculate the number of digits in the maximum slot value. */
833 :
834 0 : int digit_cnt = 0;
835 0 : unsigned long rem = max_slot;
836 0 : do {
837 0 : rem /= 10;
838 0 : ++digit_cnt;
839 0 : } while( rem > 0 );
840 :
841 : /* Print the column headers. */
842 :
843 0 : printf( "slot%*s | %s\n", digit_cnt - (int)strlen("slot"), "", "confirmation count" );
844 :
845 : /* Print the divider line. */
846 :
847 0 : for( int i = 0; i < digit_cnt; i++ ) {
848 0 : printf( "-" );
849 0 : }
850 0 : printf( " | " );
851 0 : for( ulong i = 0; i < strlen( "confirmation count" ); i++ ) {
852 0 : printf( "-" );
853 0 : }
854 0 : printf( "\n" );
855 :
856 : /* Print each vote as a table. */
857 :
858 0 : for( fd_tower_iter_t iter = fd_tower_iter_init_rev( tower );
859 0 : !fd_tower_iter_done_rev( tower, iter );
860 0 : iter = fd_tower_iter_prev ( tower, iter ) ) {
861 :
862 0 : fd_tower_t const * vote = fd_tower_iter_ele_const( tower, iter );
863 0 : printf( "%*lu | %lu\n", digit_cnt, vote->slot, vote->conf );
864 0 : max_slot = fd_ulong_max( max_slot, fd_tower_iter_ele_const( tower, iter )->slot );
865 0 : }
866 0 : if( FD_UNLIKELY( root==ULONG_MAX ) ) {
867 0 : printf( "%*s | root\n", digit_cnt, "NULL" );
868 0 : } else {
869 0 : printf( "%*lu | root\n", digit_cnt, root );
870 0 : }
871 : printf( "\n" );
872 0 : }
|