Line data Source code
1 : #include <limits.h>
2 : #include <unistd.h>
3 : #include <fcntl.h>
4 : #include <sys/stat.h>
5 :
6 : #include "fd_tower.h"
7 : #include "../voter/fd_voter.h"
8 : #include "fd_tower_forks.h"
9 : #include "../../flamenco/txn/fd_txn_generate.h"
10 : #include "../../flamenco/runtime/fd_system_ids.h"
11 :
12 : #define LOGGING 0
13 :
14 0 : #define THRESHOLD_DEPTH (8)
15 0 : #define THRESHOLD_RATIO (2.0 / 3.0)
16 : #define SWITCH_RATIO (0.38)
17 :
18 : /* expiration calculates the expiration slot of vote given a slot and
19 : confirmation count. */
20 :
21 : static inline ulong
22 42 : expiration_slot( fd_tower_t const * vote ) {
23 42 : ulong lockout = 1UL << vote->conf;
24 42 : return vote->slot + lockout;
25 42 : }
26 :
27 : /* simulate_vote simulates voting for slot, popping all votes from the
28 : top that would be consecutively expired by voting for slot. */
29 :
30 : ulong
31 : simulate_vote( fd_tower_t const * tower,
32 48 : ulong slot ) {
33 48 : ulong cnt = fd_tower_cnt( tower );
34 54 : while( cnt ) {
35 42 : fd_tower_t const * top_vote = fd_tower_peek_index_const( tower, cnt - 1 );
36 42 : if( FD_LIKELY( expiration_slot( top_vote ) >= slot ) ) break; /* expire only if consecutive */
37 6 : cnt--;
38 6 : }
39 48 : return cnt;
40 48 : }
41 :
42 : /* push_vote pushes a new vote for slot onto the tower. Pops and
43 : returns the new root (bottom of the tower) if it reaches max lockout
44 : as a result of the new vote. Otherwise, returns ULONG_MAX.
45 :
46 : Max lockout is equivalent to 1 << FD_TOWER_VOTE_MAX + 1 (which
47 : implies confirmation count is FD_TOWER_VOTE_MAX + 1). As a result,
48 : fd_tower_vote also maintains the invariant that the tower contains at
49 : most FD_TOWER_VOTE_MAX votes, because (in addition to vote expiry)
50 : there will always be a pop before reaching FD_TOWER_VOTE_MAX + 1. */
51 :
52 : ulong
53 : push_vote( fd_tower_t * tower,
54 48 : ulong slot ) {
55 :
56 48 : # if FD_TOWER_PARANOID
57 48 : fd_tower_t const * vote = fd_tower_peek_tail_const( tower );
58 48 : if( FD_UNLIKELY( vote && slot <= vote->slot ) ) FD_LOG_ERR(( "[%s] slot %lu <= vote->slot %lu", __func__, slot, vote->slot )); /* caller error*/
59 48 : # endif
60 :
61 : /* Use simulate_vote to determine how many expired votes to pop. */
62 :
63 48 : ulong cnt = simulate_vote( tower, slot );
64 :
65 : /* Pop everything that got expired. */
66 :
67 54 : while( FD_LIKELY( fd_tower_cnt( tower ) > cnt ) ) {
68 6 : fd_tower_pop_tail( tower );
69 6 : }
70 :
71 : /* If the tower is still full after expiring, then pop and return the
72 : bottom vote slot as the new root because this vote has incremented
73 : it to max lockout. Otherwise this is a no-op and there is no new
74 : root (FD_SLOT_NULL). */
75 :
76 48 : ulong root = FD_SLOT_NULL;
77 48 : if( FD_LIKELY( fd_tower_full( tower ) ) ) { /* optimize for full tower */
78 0 : root = fd_tower_pop_head( tower ).slot;
79 0 : }
80 :
81 : /* Increment confirmations (double lockouts) for consecutive
82 : confirmations in prior votes. */
83 :
84 48 : ulong prev_conf = 0;
85 48 : for( fd_tower_iter_t iter = fd_tower_iter_init_rev( tower );
86 147 : !fd_tower_iter_done_rev( tower, iter );
87 99 : iter = fd_tower_iter_prev ( tower, iter ) ) {
88 99 : fd_tower_t * vote = fd_tower_iter_ele( tower, iter );
89 99 : if( FD_UNLIKELY( vote->conf != ++prev_conf ) ) break;
90 99 : vote->conf++;
91 99 : }
92 :
93 : /* Add the new vote to the tower. */
94 :
95 48 : fd_tower_push_tail( tower, (fd_tower_t){ .slot = slot, .conf = 1 } );
96 :
97 : /* Return the new root (FD_SLOT_NULL if there is none). */
98 :
99 48 : return root;
100 48 : }
101 :
102 : /* lockout_check checks if we are locked out from voting for slot.
103 : Returns 1 if we can vote for slot without violating lockout, 0
104 : otherwise.
105 :
106 : After voting for a slot n, we are locked out for 2^k slots, where k
107 : is the confirmation count of that vote. Once locked out, we cannot
108 : vote for a different fork until that previously-voted fork expires at
109 : slot n+2^k. This implies the earliest slot in which we can switch
110 : from the previously-voted fork is (n+2^k)+1. We use `ghost` to
111 : determine whether `slot` is on the same or different fork as previous
112 : vote slots.
113 :
114 : In the case of the tower, every vote has its own expiration slot
115 : depending on confirmations. The confirmation count is the max number
116 : of consecutive votes that have been pushed on top of the vote, and
117 : not necessarily its current height in the tower.
118 :
119 : For example, the following is a diagram of a tower pushing and
120 : popping with each vote:
121 :
122 :
123 : slot | confirmation count
124 : -----|-------------------
125 : 4 | 1 <- vote
126 : 3 | 2
127 : 2 | 3
128 : 1 | 4
129 :
130 :
131 : slot | confirmation count
132 : -----|-------------------
133 : 9 | 1 <- vote
134 : 2 | 3
135 : 1 | 4
136 :
137 :
138 : slot | confirmation count
139 : -----|-------------------
140 : 10 | 1 <- vote
141 : 9 | 2
142 : 2 | 3
143 : 1 | 4
144 :
145 :
146 : slot | confirmation count
147 : -----|-------------------
148 : 11 | 1 <- vote
149 : 10 | 2
150 : 9 | 3
151 : 2 | 4
152 : 1 | 5
153 :
154 :
155 : slot | confirmation count
156 : -----|-------------------
157 : 18 | 1 <- vote
158 : 2 | 4
159 : 1 | 5
160 :
161 :
162 : In the final tower, note the gap in confirmation counts between slot
163 : 18 and slot 2, even though slot 18 is directly above slot 2. */
164 :
165 : int
166 : lockout_check( fd_tower_t const * tower,
167 : fd_forks_t * forks,
168 0 : ulong slot ) {
169 :
170 0 : if( FD_UNLIKELY( fd_tower_empty( tower ) ) ) return 1; /* always not locked out if we haven't voted. */
171 0 : if( FD_UNLIKELY( slot <= fd_tower_peek_tail_const( tower )->slot ) ) return 0; /* always locked out from voting for slot <= last vote slot */
172 :
173 : /* Simulate a vote to pop off all the votes that would be expired by
174 : voting for slot. Then check if the newly top-of-tower vote is on
175 : the same fork as slot (if so this implies we can vote for it). */
176 :
177 0 : ulong cnt = simulate_vote( tower, slot ); /* pop off votes that would be expired */
178 0 : if( FD_UNLIKELY( !cnt ) ) return 1; /* tower is empty after popping expired votes */
179 :
180 0 : fd_tower_t const * vote = fd_tower_peek_index_const( tower, cnt - 1 ); /* newly top-of-tower */
181 : # if LOGGING
182 : FD_LOG_NOTICE(( "[%s] lockout_check for slot %lu against vote slot %lu", __func__, slot, vote->slot ));
183 : # endif
184 0 : return fd_forks_is_slot_descendant( forks, vote->slot, slot ); /* check if on same fork */
185 0 : }
186 :
187 : /* switch_check checks if we can switch to the fork of `slot`. Returns
188 : 1 if we can switch, 0 otherwise. Assumes tower is non-empty.
189 :
190 : There are two forks of interest: our last vote fork ("vote fork") and
191 : the fork we want to switch to ("switch fork"). The switch fork is on
192 : the fork of `slot`.
193 :
194 : In order to switch, FD_TOWER_SWITCH_PCT of stake must have voted for
195 : a slot that satisfies the following conditions: the
196 : GCA(slot, last_vote) is an ancestor of the switch_slot
197 :
198 : Recall from the lockout check a validator is locked out from voting
199 : for our last vote slot when their last vote slot is on a different
200 : fork, and that vote's expiration slot > our last vote slot.
201 :
202 : The following pseudocode describes the algorithm:
203 :
204 : ```
205 : for every fork f in the fork tree, take the most recently executed
206 : slot `s` (the leaf of the fork).
207 :
208 : Take the greatest common ancestor of the `s` and the our last vote
209 : slot. If the switch_slot is a descendant of this GCA, then votes for
210 : `s` can count towards the switch threshold.
211 :
212 : query banks(`s`) for vote accounts in `s`
213 : for all vote accounts v in `s`
214 : if v's locked out[1] from voting for our latest vote slot
215 : add v's stake to switch stake
216 :
217 : return switch stake >= FD_TOWER_SWITCH_PCT
218 : ```
219 :
220 : The switch check is used to safeguard optimistic confirmation.
221 : Specifically: FD_TOWER_OPT_CONF_PCT + FD_TOWER_SWITCH_PCT >= 1. */
222 :
223 : int
224 : switch_check( fd_tower_t const * tower,
225 : fd_forks_t * forks,
226 : fd_epoch_stakes_t * epoch_stakes,
227 : ulong total_stake,
228 33 : ulong switch_slot ) {
229 33 : ulong switch_stake = 0;
230 33 : ulong last_vote_slot = fd_tower_peek_tail_const( tower )->slot;
231 33 : ulong root_slot = fd_tower_peek_head_const( tower )->slot;
232 33 : for ( fd_tower_leaves_dlist_iter_t iter = fd_tower_leaves_dlist_iter_fwd_init( forks->tower_leaves_dlist, forks->tower_leaves_pool );
233 138 : !fd_tower_leaves_dlist_iter_done( iter, forks->tower_leaves_dlist, forks->tower_leaves_pool );
234 120 : iter = fd_tower_leaves_dlist_iter_fwd_next( iter, forks->tower_leaves_dlist, forks->tower_leaves_pool ) ) {
235 :
236 : /* Iterate over all the leaves of all forks */
237 120 : fd_tower_leaf_t * leaf = fd_tower_leaves_dlist_iter_ele( iter, forks->tower_leaves_dlist, forks->tower_leaves_pool );
238 120 : ulong candidate_slot = leaf->slot;
239 120 : ulong lca = fd_forks_lowest_common_ancestor( forks, candidate_slot, last_vote_slot );
240 :
241 120 : if( lca != ULONG_MAX && fd_forks_is_slot_descendant( forks, lca, switch_slot ) ) {
242 :
243 : /* This candidate slot may be considered for the switch proof, if
244 : it passes the following conditions:
245 :
246 : https://github.com/anza-xyz/agave/blob/c7b97bc77addacf03b229c51b47c18650d909576/core/src/consensus.rs#L1117
247 :
248 : Now for this candidate slot, look at the lockouts that were created at
249 : the time that we processed the bank for this candidate slot. */
250 :
251 78 : for( fd_lockout_slots_t const * slot = fd_lockout_slots_map_ele_query_const( forks->lockout_slots_map, &candidate_slot, NULL, forks->lockout_slots_pool );
252 84 : slot;
253 78 : slot = fd_lockout_slots_map_ele_next_const ( slot, NULL, forks->lockout_slots_pool ) ) {
254 21 : ulong interval_end = slot->interval_end;
255 21 : ulong key = fd_lockout_interval_key( candidate_slot, interval_end );
256 :
257 : /* Intervals are keyed by the end of the interval. If the end of
258 : the interval is < the last vote slot, then these vote
259 : accounts with this particular lockout are NOT locked out from
260 : voting for the last vote slot, which means we can skip this
261 : set of intervals. */
262 :
263 21 : if( interval_end < last_vote_slot ) continue;
264 :
265 : /* At this point we can actually query for the intervals by
266 : end interval to get the vote accounts. */
267 :
268 15 : for( fd_lockout_intervals_t const * interval = fd_lockout_intervals_map_ele_query_const( forks->lockout_intervals_map, &key, NULL, forks->lockout_intervals_pool );
269 24 : interval;
270 24 : interval = fd_lockout_intervals_map_ele_next_const( interval, NULL, forks->lockout_intervals_pool ) ) {
271 24 : ulong vote_slot = interval->interval_start;
272 24 : fd_hash_t const * vote_acc = &interval->vote_account_pubkey;
273 :
274 24 : if( FD_UNLIKELY( !fd_forks_is_slot_descendant( forks, vote_slot, last_vote_slot ) &&
275 24 : vote_slot > root_slot ) ) {
276 24 : fd_voter_stake_key_t key = { .vote_account = *vote_acc, .slot = switch_slot };
277 24 : fd_voter_stake_t const * voter_stake = fd_voter_stake_map_ele_query_const( epoch_stakes->voter_stake_map, &key, NULL, epoch_stakes->voter_stake_pool );
278 24 : if( FD_UNLIKELY( !voter_stake ) ) FD_LOG_CRIT(( "missing voter stake for vote account %s on slot %lu. Is this an error?", FD_BASE58_ENC_32_ALLOCA( vote_acc ), switch_slot ));
279 24 : ulong voter_idx = fd_voter_stake_pool_idx( epoch_stakes->voter_stake_pool, voter_stake );
280 :
281 : /* Don't count this vote account towards the switch cqheck if it has already been used. */
282 24 : if( FD_UNLIKELY( fd_used_acc_scratch_test( epoch_stakes->used_acc_scratch, voter_idx ) ) ) continue;
283 :
284 24 : fd_used_acc_scratch_insert( epoch_stakes->used_acc_scratch, voter_idx );
285 24 : switch_stake += voter_stake->stake;
286 24 : if( FD_LIKELY( (double)switch_stake >= (double)total_stake * SWITCH_RATIO ) ) {
287 15 : fd_used_acc_scratch_null( epoch_stakes->used_acc_scratch );
288 15 : FD_LOG_INFO(( "[%s] switch check from slot %lu to slot %lu passed with percentage: %.0lf%%", __func__, last_vote_slot, switch_slot, (double)switch_stake / (double)total_stake * 100.0 ));
289 15 : return 1;
290 15 : }
291 24 : }
292 24 : }
293 15 : }
294 78 : }
295 120 : }
296 18 : fd_used_acc_scratch_null( epoch_stakes->used_acc_scratch );
297 18 : FD_LOG_INFO(( "[%s] switch check from slot %lu to slot %lu failed with percentage: %.0lf%%", __func__, last_vote_slot, switch_slot, (double)switch_stake / (double)total_stake * 100.0 ));
298 18 : return 0;
299 33 : }
300 :
301 : /* threshold_check checks if we pass the threshold required to vote for
302 : `slot`. Returns 1 if we pass the threshold check, 0 otherwise.
303 :
304 : The following psuedocode describes the algorithm:
305 :
306 : ```
307 : simulate that we have voted for `slot`
308 :
309 : for all vote accounts in the current epoch
310 :
311 : simulate that the vote account has voted for `slot`
312 :
313 : pop all votes expired by that simulated vote
314 :
315 : if the validator's latest tower vote after expiry >= our threshold
316 : slot ie. our vote from THRESHOLD_DEPTH back also after simulating,
317 : then add validator's stake to threshold_stake.
318 :
319 : return threshold_stake >= FD_TOWER_THRESHOLD_RATIO
320 : ```
321 :
322 : The threshold check simulates voting for the current slot to expire
323 : stale votes. This is to prevent validators that haven't voted in a
324 : long time from counting towards the threshold stake. */
325 :
326 : int
327 : threshold_check( fd_tower_t const * tower,
328 : fd_tower_accts_t const * accts,
329 : ulong total_stake,
330 0 : ulong slot ) {
331 :
332 0 : uchar __attribute__((aligned(FD_TOWER_ALIGN))) scratch[ FD_TOWER_FOOTPRINT ];
333 0 : fd_tower_t * scratch_tower = fd_tower_join( fd_tower_new( scratch ) );
334 :
335 : /* First, simulate a vote on our tower, popping off everything that
336 : would be expired by voting for slot. */
337 :
338 0 : ulong cnt = simulate_vote( tower, slot );
339 :
340 : /* We can always vote if our tower is not at least THRESHOLD_DEPTH
341 : deep after simulating. */
342 :
343 0 : if( FD_UNLIKELY( cnt < THRESHOLD_DEPTH ) ) return 1;
344 :
345 : /* Get the vote slot from THRESHOLD_DEPTH back. Note THRESHOLD_DEPTH
346 : is the 8th index back _including_ the simulated vote at index 0. */
347 :
348 0 : ulong threshold_slot = fd_tower_peek_index_const( tower, cnt - THRESHOLD_DEPTH )->slot;
349 0 : ulong threshold_stake = 0;
350 0 : for( fd_tower_accts_iter_t iter = fd_tower_accts_iter_init( accts );
351 0 : !fd_tower_accts_iter_done( accts, iter );
352 0 : iter = fd_tower_accts_iter_next( accts, iter ) ) {
353 0 : fd_tower_accts_t const * acct = fd_tower_accts_iter_ele_const( accts, iter );
354 0 : fd_tower_remove_all( scratch_tower );
355 0 : fd_tower_from_vote_acc( scratch_tower, acct->data );
356 :
357 0 : ulong cnt = simulate_vote( scratch_tower, slot ); /* expire votes */
358 0 : if( FD_UNLIKELY( !cnt ) ) continue; /* no votes left after expiry */
359 :
360 : /* Count their stake towards the threshold check if their last vote
361 : slot >= our threshold slot.
362 :
363 : We know these votes are for our own fork because towers are
364 : sourced from vote _accounts_, not vote _transactions_.
365 :
366 : Because we are iterating vote accounts on the same fork that we
367 : we want to vote for, we know these slots must all occur along the
368 : same fork ancestry.
369 :
370 : Therefore, if their latest vote slot >= our threshold slot, we
371 : know that vote must be for the threshold slot itself or one of
372 : threshold slot's descendants. */
373 :
374 0 : ulong last_vote = fd_tower_peek_index_const( scratch_tower, cnt - 1 )->slot;
375 0 : if( FD_LIKELY( last_vote >= threshold_slot ) ) threshold_stake += acct->stake;
376 0 : }
377 :
378 0 : double threshold_pct = (double)threshold_stake / (double)total_stake;
379 : # if LOGGING
380 : FD_LOG_NOTICE(( "[%s] ok? %d. top: %lu. threshold: %lu. stake: %.0lf%%.", __func__, threshold_pct > THRESHOLD_RATIO, fd_tower_peek_tail_const( tower )->slot, threshold_slot, threshold_pct * 100.0 ));
381 : # endif
382 0 : return threshold_pct > THRESHOLD_RATIO;
383 0 : }
384 :
385 : int
386 : propagated_check( fd_notar_t * notar,
387 0 : ulong slot ) {
388 :
389 0 : fd_notar_slot_t * notar_slot = fd_notar_slot_query( notar->slot_map, slot, NULL );
390 0 : if( FD_UNLIKELY( !notar_slot ) ) return 1;
391 :
392 0 : if( FD_LIKELY( notar_slot->is_leader ) ) return 1; /* can always vote for slot in which we're leader */
393 0 : if( FD_LIKELY( notar_slot->prev_leader_slot==ULONG_MAX ) ) return 1; /* haven't been leader yet */
394 :
395 0 : fd_notar_slot_t * prev_leader_notar_slot = fd_notar_slot_query( notar->slot_map, notar_slot->prev_leader_slot, NULL );
396 0 : if( FD_LIKELY( !prev_leader_notar_slot ) ) return 1; /* already pruned rooted */
397 :
398 0 : return prev_leader_notar_slot->is_propagated;
399 0 : }
400 :
401 : fd_tower_out_t
402 : fd_tower_vote_and_reset( fd_tower_t * tower,
403 : fd_tower_accts_t * accts,
404 : fd_epoch_stakes_t * epoch_stakes,
405 : fd_forks_t * forks,
406 : fd_ghost_t * ghost,
407 0 : fd_notar_t * notar ) {
408 :
409 0 : uchar flags = 0;
410 0 : fd_ghost_blk_t const * best_blk = fd_ghost_best( ghost, fd_ghost_root( ghost ) );
411 0 : fd_ghost_blk_t const * reset_blk = NULL;
412 0 : fd_ghost_blk_t const * vote_blk = NULL;
413 :
414 : /* Case 0: if we haven't voted yet then we can always vote and reset
415 : to ghost_best.
416 :
417 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus.rs#L933-L935 */
418 :
419 0 : if( FD_UNLIKELY( fd_tower_empty( tower ) ) ) {
420 0 : fd_tower_forks_t * fork = fd_forks_query( forks, best_blk->slot );
421 0 : fork->voted = 1;
422 0 : fork->voted_block_id = best_blk->id;
423 0 : return (fd_tower_out_t){
424 0 : .flags = flags,
425 0 : .reset_slot = best_blk->slot,
426 0 : .reset_block_id = best_blk->id,
427 0 : .vote_slot = best_blk->slot,
428 0 : .vote_block_id = best_blk->id,
429 0 : .root_slot = push_vote( tower, best_blk->slot )
430 0 : };
431 0 : }
432 :
433 0 : ulong prev_vote_slot = fd_tower_peek_tail_const( tower )->slot;
434 0 : fd_tower_forks_t * prev_vote_fork = fd_forks_query( forks, prev_vote_slot );
435 0 : fd_hash_t * prev_vote_block_id = &prev_vote_fork->voted_block_id;
436 0 : fd_ghost_blk_t * prev_vote_blk = fd_ghost_query( ghost, prev_vote_block_id );
437 :
438 : /* Case 1: if an ancestor of our prev vote (including prev vote
439 : itself) is an unconfirmed duplicate, then our prev vote was on a
440 : duplicate fork.
441 :
442 : There are two subcases to check. */
443 :
444 0 : int invalid_ancestor = !!fd_ghost_invalid_ancestor( ghost, prev_vote_blk );
445 0 : if( FD_UNLIKELY( invalid_ancestor ) ) { /* do we have an invalid ancestor? */
446 :
447 : /* Case 1a: ghost_best is an ancestor of prev vote. This means
448 : ghost_best is rolling back to an ancestor that precedes the
449 : duplicate ancestor on the same fork as our prev vote. In this
450 : case, we can't vote on our ancestor, but we do reset to that
451 : ancestor.
452 :
453 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus.rs#L1016-L1019 */
454 :
455 0 : int ancestor_rollback = prev_vote_blk != best_blk && !!fd_ghost_ancestor( ghost, prev_vote_blk, &best_blk->id );
456 0 : if( FD_LIKELY( ancestor_rollback ) ) {
457 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_ANCESTOR_ROLLBACK );
458 0 : reset_blk = best_blk;
459 0 : }
460 :
461 : /* Case 1b: ghost_best is not an ancestor, but prev_vote is a
462 : duplicate and we've confirmed its duplicate sibling. In this
463 : case, we allow switching to ghost_best without a switch proof.
464 :
465 : Example: slot 5 is a duplicate. We first receive, replay and
466 : vote for block 5, so that is our prev vote. We later receive
467 : block 5' and observe that it is duplicate confirmed. ghost_best
468 : now returns block 5' and we both vote and reset to block 5'
469 : regardless of the switch check.
470 :
471 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus.rs#L1021-L1024 */
472 :
473 0 : int sibling_confirmed = 0!=memcmp( &prev_vote_fork->voted_block_id, &prev_vote_fork->confirmed_block_id, sizeof(fd_hash_t) );
474 0 : if( FD_LIKELY( sibling_confirmed ) ) {
475 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_SIBLING_CONFIRMED );
476 0 : reset_blk = best_blk;
477 0 : vote_blk = best_blk;
478 0 : }
479 :
480 : /* At this point our prev vote was on a duplicate fork but didn't
481 : match either of the above subcases.
482 :
483 : In this case, we have to pass the switch check to reset to a
484 : different fork from prev vote (same as non-duplicate case). */
485 0 : }
486 :
487 : /* Case 3: if our prev vote slot is an ancestor of the best slot, then
488 : they are on the same fork and we can both reset to it. We can also
489 : vote for it if we pass the can_vote checks.
490 :
491 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus.rs#L1057 */
492 :
493 0 : else if( FD_LIKELY( fd_forks_is_slot_ancestor( forks, best_blk->slot, prev_vote_slot ) ) ) {
494 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_SAME_FORK );
495 0 : reset_blk = best_blk;
496 0 : vote_blk = best_blk;
497 0 : }
498 :
499 : /* Case 4: if our prev vote is not an ancestor of the best block, then
500 : it is on a different fork. If we pass the switch check, we can
501 : reset to it. If we additionally pass the lockout check, we can
502 : also vote for it.
503 :
504 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus.rs#L1208-L1215
505 :
506 : Note also Agave uses the best blk's total stake for checking the
507 : threshold.
508 :
509 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus/fork_choice.rs#L443-L445 */
510 :
511 0 : else if( FD_LIKELY( switch_check( tower, forks, epoch_stakes, best_blk->total_stake, best_blk->slot ) ) ) {
512 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_SWITCH_PASS );
513 0 : reset_blk = best_blk;
514 0 : vote_blk = best_blk;
515 0 : }
516 :
517 : /* Case 5: same as case 4 but we didn't pass the switch check. In
518 : this case we reset to either ghost_best or ghost_deepest beginning
519 : from our prev vote blk.
520 :
521 : We must reset to a block beginning from our prev vote fork to
522 : ensure votes get a chance to propagate. Because in order for votes
523 : to land, someone needs to build a block on that fork.
524 :
525 : We reset to ghost_best or ghost_deepest depending on whether our
526 : prev vote is valid. When it's invalid we use ghost_deepest instead
527 : of ghost_best, because ghost_best won't be able to return a valid
528 : block beginning from our prev_vote because by definition the entire
529 : subtree will be invalid.
530 :
531 : When our prev vote fork is not a duplicate, we want to propagate
532 : votes that might allow others to switch to our fork. In addition,
533 : if our prev vote fork is a duplicate, we want to propagate votes
534 : that might "duplicate confirm" that block (reach 52% of stake).
535 :
536 : See top-level documentation in fd_tower.h for more details on vote
537 : propagation. */
538 :
539 0 : else {
540 :
541 : /* Case 5a: failed switch check, duplicate.
542 :
543 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus/heaviest_subtree_fork_choice.rs#L1187 */
544 :
545 0 : if( FD_UNLIKELY( invalid_ancestor ) ) {
546 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_SWITCH_FAIL );
547 0 : reset_blk = fd_ghost_deepest( ghost, prev_vote_blk );
548 0 : }
549 :
550 : /* Case 5b: failed switch check, non-duplicate.
551 :
552 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus/fork_choice.rs#L200 */
553 :
554 0 : else {
555 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_SWITCH_FAIL );
556 0 : reset_blk = fd_ghost_best( ghost, prev_vote_blk );
557 0 : }
558 0 : }
559 :
560 : /* If there is a block to vote for, there are a few additional checks
561 : to make sure we can actually vote for it.
562 :
563 : Specifically, we need to make sure we're not locked out, pass the
564 : threshold check and that our previous leader block has propagated
565 : (reached the prop threshold according to fd_notar).
566 :
567 : https://github.com/firedancer-io/agave/blob/master/core/src/consensus/fork_choice.rs#L382-L385
568 :
569 : Agave uses the total stake on the fork being threshold checked
570 : (vote_blk) for determining whether it meets the stake threshold. */
571 :
572 0 : if( FD_LIKELY( vote_blk ) ) {
573 0 : if ( FD_UNLIKELY( !lockout_check( tower, forks, vote_blk->slot ) ) ) {
574 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_LOCKOUT_FAIL );
575 0 : vote_blk = NULL;
576 0 : }
577 0 : else if( FD_UNLIKELY( !threshold_check( tower, accts, vote_blk->total_stake, vote_blk->slot ) ) ) {
578 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_THRESHOLD_FAIL );
579 0 : vote_blk = NULL;
580 0 : }
581 0 : else if( FD_UNLIKELY( !propagated_check( notar, vote_blk->slot ) ) ) {
582 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_PROPAGATED_FAIL );
583 0 : vote_blk = NULL;
584 0 : }
585 0 : }
586 :
587 0 : FD_TEST( reset_blk ); /* always a reset_blk */
588 0 : fd_tower_out_t out = {
589 0 : .flags = flags,
590 0 : .reset_slot = reset_blk->slot,
591 0 : .reset_block_id = reset_blk->id,
592 0 : .vote_slot = ULONG_MAX,
593 0 : .root_slot = ULONG_MAX
594 0 : };
595 :
596 : /* Finally, if our vote passed all the checks, we actually push the
597 : vote onto the tower. */
598 :
599 0 : if( FD_LIKELY( vote_blk ) ) {
600 0 : out.vote_slot = vote_blk->slot;
601 0 : out.vote_block_id = vote_blk->id;
602 0 : out.root_slot = push_vote( tower, vote_blk->slot );
603 :
604 : /* Query our tower fork for this slot we're voting for. Note this
605 : can never be NULL because we record tower forks as we replay, and
606 : we should never be voting on something we haven't replayed. */
607 :
608 0 : fd_tower_forks_t * fork = fd_forks_query( forks, vote_blk->slot );
609 0 : fork->voted = 1;
610 0 : fork->voted_block_id = vote_blk->id;
611 :
612 : /* Query the root slot's block id from tower forks. This block id
613 : may not necessarily be confirmed, because confirmation requires
614 : votes on the block itself (vs. block and its descendants).
615 :
616 : So if we have a confirmed block id, we return that. Otherwise
617 : we return our own vote block id for that slot, which we assume
618 : is the cluster converged on by the time we're rooting it.
619 :
620 : The only way it is possible for us to root the wrong version of
621 : a block (ie. not the one the cluster confirmed) is if there is
622 : mass equivocation (>2/3 of threshold check stake has voted for
623 : two versions of a block). This exceeds the equivocation safety
624 : threshold and we would eventually detect this via a bank hash
625 : mismatch and error out. */
626 :
627 0 : if( FD_LIKELY( out.root_slot!=ULONG_MAX ) ) {
628 0 : fd_tower_forks_t * root_fork = fd_forks_query( forks, out.root_slot );
629 0 : out.root_block_id = *fd_ptr_if( root_fork->confirmed, &root_fork->confirmed_block_id, &root_fork->voted_block_id );
630 0 : }
631 0 : }
632 :
633 : # if LOGGING
634 : FD_LOG_NOTICE(( "[%s] code: %d. reset_slot: %lu. vote_slot: %lu. root_slot: %lu.", __func__, out.code, out.reset_slot, out.vote_slot, out.root_slot ));
635 : # endif
636 :
637 0 : return out;
638 0 : }
639 :
640 : void
641 : fd_tower_reconcile( fd_tower_t * tower,
642 : ulong root,
643 0 : uchar const * vote_account_data ) {
644 0 : ulong on_chain_vote = fd_voter_vote_slot( vote_account_data );
645 0 : ulong on_chain_root = fd_voter_root_slot( vote_account_data );
646 :
647 0 : fd_tower_vote_t const * last_vote = fd_tower_peek_tail_const( tower );
648 0 : ulong last_vote_slot = last_vote ? last_vote->slot : ULONG_MAX;
649 :
650 0 : if( FD_UNLIKELY( ( on_chain_vote==ULONG_MAX && last_vote_slot==ULONG_MAX ) ) ) return;
651 0 : if( FD_LIKELY ( ( on_chain_vote!=ULONG_MAX && last_vote_slot!=ULONG_MAX
652 0 : && on_chain_vote <= last_vote_slot ) ) ) return;
653 :
654 : /* At this point our local tower is too old, and we need to replace it
655 : with our on-chain tower. However, it's possible our local root is
656 : newer than the on-chain root (even though the tower is older). The
657 : most likely reason this happens is because we just booted from a
658 : snapshot and the snapshot slot > on-chain root.
659 :
660 : So we need to filter out the stale votes < snapshot slot. This
661 : mirrors the Agave logic:
662 : https://github.com/firedancer-io/agave/blob/master/core/src/replay_stage.rs#L3690-L3719 */
663 :
664 0 : if( FD_LIKELY( on_chain_root == ULONG_MAX || root > on_chain_root ) ) {
665 0 : fd_tower_remove_all( tower );
666 0 : fd_voter_t const * voter = (fd_voter_t const *)fd_type_pun_const( vote_account_data );
667 0 : uint kind = fd_uint_load_4_fast( vote_account_data ); /* skip node_pubkey */
668 0 : for( ulong i=0; i<voter->votes_cnt; i++ ) {
669 0 : if( FD_LIKELY( kind==FD_VOTER_V3 ) ) fd_tower_push_tail( tower, (fd_tower_t){ .slot = voter->votes_v3[i].slot, .conf = voter->votes_v3[i].conf } );
670 0 : if( FD_LIKELY( kind==FD_VOTER_V2 ) ) fd_tower_push_tail( tower, (fd_tower_t){ .slot = voter->votes_v2[i].slot, .conf = voter->votes_v2[i].conf } );
671 0 : }
672 :
673 : /* Fast forward our tower to tower_root by retaining only votes >
674 : local tower root. */
675 :
676 0 : while( FD_LIKELY( !fd_tower_empty( tower ) ) ) {
677 0 : fd_tower_t const * vote = fd_tower_peek_head_const( tower );
678 0 : if( FD_LIKELY( vote->slot > root ) ) break;
679 0 : fd_tower_pop_head( tower );
680 0 : }
681 0 : }
682 0 : }
683 :
684 : void
685 : fd_tower_from_vote_acc( fd_tower_t * tower,
686 39 : uchar const * vote_acc ) {
687 39 : fd_voter_t const * voter = (fd_voter_t const *)fd_type_pun_const( vote_acc );
688 39 : uint kind = fd_uint_load_4_fast( vote_acc ); /* skip node_pubkey */
689 78 : for( ulong i=0; i<voter->votes_cnt; i++ ) {
690 39 : if( FD_LIKELY( kind==FD_VOTER_V3 ) ) fd_tower_push_tail( tower, (fd_tower_t){ .slot = voter->votes_v3[i].slot, .conf = voter->votes_v3[i].conf } );
691 39 : if( FD_LIKELY( kind==FD_VOTER_V2 ) ) fd_tower_push_tail( tower, (fd_tower_t){ .slot = voter->votes_v2[i].slot, .conf = voter->votes_v2[i].conf } );
692 39 : }
693 39 : }
694 :
695 : void
696 : fd_tower_to_vote_txn( fd_tower_t const * tower,
697 : ulong root,
698 : fd_lockout_offset_t * lockouts_scratch,
699 : fd_hash_t const * bank_hash,
700 : fd_hash_t const * recent_blockhash,
701 : fd_pubkey_t const * validator_identity,
702 : fd_pubkey_t const * vote_authority,
703 : fd_pubkey_t const * vote_acc,
704 0 : fd_txn_p_t * vote_txn ) {
705 :
706 0 : fd_compact_vote_state_update_t tower_sync;
707 0 : tower_sync.root = fd_ulong_if( root == ULONG_MAX, 0UL, root );
708 0 : tower_sync.lockouts_len = (ushort)fd_tower_cnt( tower );
709 0 : tower_sync.lockouts = lockouts_scratch;
710 0 : tower_sync.timestamp = fd_log_wallclock() / (long)1e9; /* seconds */
711 0 : tower_sync.has_timestamp = 1;
712 :
713 0 : ulong prev = tower_sync.root;
714 0 : ulong i = 0UL;
715 0 : for( fd_tower_iter_t iter = fd_tower_iter_init( tower );
716 0 : !fd_tower_iter_done( tower, iter );
717 0 : iter = fd_tower_iter_next( tower, iter ) ) {
718 0 : fd_tower_t const * vote = fd_tower_iter_ele_const( tower, iter );
719 0 : tower_sync.lockouts[i].offset = vote->slot - prev;
720 0 : tower_sync.lockouts[i].confirmation_count = (uchar)vote->conf;
721 0 : prev = vote->slot;
722 0 : i++;
723 0 : }
724 0 : memcpy( tower_sync.hash.uc, bank_hash, sizeof(fd_hash_t) );
725 :
726 0 : uchar * txn_out = vote_txn->payload;
727 0 : uchar * txn_meta_out = vote_txn->_;
728 :
729 0 : int same_addr = !memcmp( validator_identity, vote_authority, sizeof(fd_pubkey_t) );
730 0 : if( FD_LIKELY( same_addr ) ) {
731 :
732 : /* 0: validator identity
733 : 1: vote account address
734 : 2: vote program */
735 :
736 0 : fd_txn_accounts_t votes;
737 0 : votes.signature_cnt = 1;
738 0 : votes.readonly_signed_cnt = 0;
739 0 : votes.readonly_unsigned_cnt = 1;
740 0 : votes.acct_cnt = 3;
741 0 : votes.signers_w = validator_identity;
742 0 : votes.signers_r = NULL;
743 0 : votes.non_signers_w = vote_acc;
744 0 : votes.non_signers_r = &fd_solana_vote_program_id;
745 0 : FD_TEST( fd_txn_base_generate( txn_meta_out, txn_out, votes.signature_cnt, &votes, recent_blockhash->uc ) );
746 :
747 0 : } else {
748 :
749 : /* 0: validator identity
750 : 1: vote authority
751 : 2: vote account address
752 : 3: vote program */
753 :
754 0 : fd_txn_accounts_t votes;
755 0 : votes.signature_cnt = 2;
756 0 : votes.readonly_signed_cnt = 1;
757 0 : votes.readonly_unsigned_cnt = 1;
758 0 : votes.acct_cnt = 4;
759 0 : votes.signers_w = validator_identity;
760 0 : votes.signers_r = vote_authority;
761 0 : votes.non_signers_w = vote_acc;
762 0 : votes.non_signers_r = &fd_solana_vote_program_id;
763 0 : FD_TEST( fd_txn_base_generate( txn_meta_out, txn_out, votes.signature_cnt, &votes, recent_blockhash->uc ) );
764 0 : }
765 :
766 : /* Add the vote instruction to the transaction. */
767 :
768 0 : fd_vote_instruction_t vote_ix;
769 0 : uchar vote_ix_buf[FD_TXN_MTU];
770 0 : vote_ix.discriminant = fd_vote_instruction_enum_compact_update_vote_state;
771 0 : vote_ix.inner.compact_update_vote_state = tower_sync;
772 0 : fd_bincode_encode_ctx_t encode = { .data = vote_ix_buf, .dataend = ( vote_ix_buf + FD_TXN_MTU ) };
773 0 : fd_vote_instruction_encode( &vote_ix, &encode );
774 0 : uchar program_id;
775 0 : uchar ix_accs[2];
776 0 : if( FD_LIKELY( same_addr ) ) {
777 0 : ix_accs[0] = 1; /* vote account address */
778 0 : ix_accs[1] = 0; /* vote authority */
779 0 : program_id = 2; /* vote program */
780 0 : } else {
781 0 : ix_accs[0] = 2; /* vote account address */
782 0 : ix_accs[1] = 1; /* vote authority */
783 0 : program_id = 3; /* vote program */
784 0 : }
785 0 : ushort vote_ix_sz = (ushort)fd_vote_instruction_size( &vote_ix );
786 0 : vote_txn->payload_sz = fd_txn_add_instr( txn_meta_out, txn_out, program_id, ix_accs, 2, vote_ix_buf, vote_ix_sz );
787 0 : }
788 :
789 : int
790 0 : fd_tower_verify( fd_tower_t const * tower ) {
791 0 : if( FD_UNLIKELY( fd_tower_cnt( tower )>=FD_TOWER_VOTE_MAX ) ) {
792 0 : FD_LOG_WARNING(( "[%s] invariant violation: cnt %lu >= FD_TOWER_VOTE_MAX %lu", __func__, fd_tower_cnt( tower ), (ulong)FD_TOWER_VOTE_MAX ));
793 0 : return -1;
794 0 : }
795 :
796 0 : fd_tower_t const * prev = NULL;
797 0 : for( fd_tower_iter_t iter = fd_tower_iter_init( tower );
798 0 : !fd_tower_iter_done( tower, iter );
799 0 : iter = fd_tower_iter_next( tower, iter ) ) {
800 0 : fd_tower_t const * vote = fd_tower_iter_ele_const( tower, iter );
801 0 : if( FD_UNLIKELY( prev && ( vote->slot < prev->slot || vote->conf < prev->conf ) ) ) {
802 0 : FD_LOG_WARNING(( "[%s] invariant violation: vote (slot:%lu conf:%lu) prev (slot:%lu conf:%lu)", __func__, vote->slot, vote->conf, prev->slot, prev->conf ));
803 0 : return -1;
804 0 : }
805 0 : prev = vote;
806 0 : }
807 0 : return 0;
808 0 : }
809 :
810 : #include <stdio.h>
811 :
812 : void
813 0 : fd_tower_print( fd_tower_t const * tower, ulong root ) {
814 0 : FD_LOG_NOTICE( ( "\n\n[Tower]" ) );
815 :
816 0 : if( FD_UNLIKELY( fd_tower_empty( tower ) ) ) return;
817 :
818 0 : ulong max_slot = 0;
819 :
820 : /* Determine spacing. */
821 :
822 0 : for( fd_tower_iter_t iter = fd_tower_iter_init_rev( tower );
823 0 : !fd_tower_iter_done_rev( tower, iter );
824 0 : iter = fd_tower_iter_prev ( tower, iter ) ) {
825 :
826 0 : max_slot = fd_ulong_max( max_slot, fd_tower_iter_ele_const( tower, iter )->slot );
827 0 : }
828 :
829 : /* Calculate the number of digits in the maximum slot value. */
830 :
831 0 : int digit_cnt = 0;
832 0 : unsigned long rem = max_slot;
833 0 : do {
834 0 : rem /= 10;
835 0 : ++digit_cnt;
836 0 : } while( rem > 0 );
837 :
838 : /* Print the column headers. */
839 :
840 0 : printf( "slot%*s | %s\n", digit_cnt - (int)strlen("slot"), "", "confirmation count" );
841 :
842 : /* Print the divider line. */
843 :
844 0 : for( int i = 0; i < digit_cnt; i++ ) {
845 0 : printf( "-" );
846 0 : }
847 0 : printf( " | " );
848 0 : for( ulong i = 0; i < strlen( "confirmation count" ); i++ ) {
849 0 : printf( "-" );
850 0 : }
851 0 : printf( "\n" );
852 :
853 : /* Print each vote as a table. */
854 :
855 0 : for( fd_tower_iter_t iter = fd_tower_iter_init_rev( tower );
856 0 : !fd_tower_iter_done_rev( tower, iter );
857 0 : iter = fd_tower_iter_prev ( tower, iter ) ) {
858 :
859 0 : fd_tower_t const * vote = fd_tower_iter_ele_const( tower, iter );
860 0 : printf( "%*lu | %lu\n", digit_cnt, vote->slot, vote->conf );
861 0 : max_slot = fd_ulong_max( max_slot, fd_tower_iter_ele_const( tower, iter )->slot );
862 0 : }
863 0 : if( FD_UNLIKELY( root==ULONG_MAX ) ) {
864 0 : printf( "%*s | root\n", digit_cnt, "NULL" );
865 0 : } else {
866 0 : printf( "%*lu | root\n", digit_cnt, root );
867 0 : }
868 : printf( "\n" );
869 0 : }
|