Line data Source code
1 : #include <assert.h>
2 : #include <stdio.h>
3 : #include <string.h>
4 :
5 : #include "fd_tower.h"
6 : #include "../../flamenco/txn/fd_txn_generate.h"
7 : #include "../../flamenco/runtime/fd_system_ids.h"
8 : #include "../../flamenco/runtime/program/vote/fd_vote_state_versioned.h"
9 :
10 : /* Pool and map_chain for fd_tower_blk_t. */
11 :
12 : #define POOL_NAME blk_pool
13 108 : #define POOL_T fd_tower_blk_t
14 : #include "../../util/tmpl/fd_pool.c"
15 :
16 : #define MAP_NAME blk_map
17 3 : #define MAP_ELE_T fd_tower_blk_t
18 : #define MAP_KEY_T ulong
19 279 : #define MAP_KEY slot
20 2928 : #define MAP_KEY_EQ(k0,k1) (*(k0)==*(k1))
21 2571 : #define MAP_KEY_HASH(key,seed) ((*(key))^(seed))
22 : #define MAP_OPTIMIZE_RANDOM_ACCESS_REMOVAL 1
23 : #include "../../util/tmpl/fd_map_chain.c"
24 :
25 : /* lockout_interval tracks a map of lockout intervals.
26 :
27 : We need to track a list of lockout intervals per validator per slot.
28 : Intervals are inclusive. Example:
29 :
30 : After executing slot 33, validator A votes for slot 32, has a tower
31 :
32 : vote | confirmation count | lockout interval
33 : ----- | -------------------|------------------
34 : 32 | 1 | [32, 33]
35 : 2 | 3 | [2, 6]
36 : 1 | 4 | [1, 9]
37 :
38 : The lockout interval is the interval of slots that the validator is
39 : locked out from voting for if they want to switch off that vote. For
40 : example if validator A wants to switch off fork 1, they have to wait
41 : until slot 9.
42 :
43 : Agave tracks a similar structure.
44 :
45 : key: for an interval [vote, vote+lockout] for validator A,
46 : it is stored like:
47 : vote+lockout -> (vote, validator A) -> (2, validator B) -> (any other vote, any other validator)
48 :
49 : Since a validator can have up to 31 entries in the tower, and we have
50 : a max_vote_accounts, we can pool the interval objects to be
51 : 31*max_vote_accounts entries PER bank / executed slot. We can also
52 : string all the intervals of the same bank together as a linkedlist. */
53 :
54 : struct lockout_interval {
55 : ulong key; /* vote_slot (32 bits) | expiration_slot (32 bits) ie. vote_slot + (1 << confirmation count) */
56 : ulong next; /* reserved for fd_map_chain and fd_pool */
57 : fd_hash_t addr; /* vote account address */
58 : ulong start; /* For normal entries: start of interval (vote slot).
59 : For sentinel entries (key has expiration_slot==0):
60 : the interval_end value this sentinel indexes.
61 : Multiple sentinels can exist per slot (one per
62 : unique interval_end), all sharing key (slot, 0)
63 : via MAP_MULTI. */
64 : };
65 : typedef struct lockout_interval lockout_interval_t;
66 :
67 : #define MAP_NAME lockout_interval_map
68 207 : #define MAP_ELE_T lockout_interval_t
69 : #define MAP_MULTI 1
70 279 : #define MAP_KEY key
71 690 : #define MAP_NEXT next
72 : #include "../../util/tmpl/fd_map_chain.c"
73 :
74 : #define POOL_NAME lockout_interval_pool
75 108 : #define POOL_T lockout_interval_t
76 1700835 : #define POOL_NEXT next
77 : #include "../../util/tmpl/fd_pool.c"
78 :
79 : FD_FN_PURE static inline ulong
80 825 : lockout_interval_key( ulong fork_slot, ulong end_interval ) {
81 825 : return (fork_slot << 32) | end_interval;
82 825 : }
83 :
84 0 : #define THRESHOLD_DEPTH (8)
85 0 : #define THRESHOLD_RATIO (2.0 / 3.0)
86 : #define SWITCH_RATIO (0.38)
87 :
88 : ulong
89 477 : fd_tower_align( void ) {
90 477 : return 128UL;
91 477 : }
92 :
93 : ulong
94 : fd_tower_footprint( ulong blk_max,
95 99 : ulong vtr_max ) {
96 99 : ulong lck_interval_max = fd_ulong_pow2_up( FD_TOWER_LOCKOS_MAX*blk_max*vtr_max );
97 99 : ulong lck_pool_max = fd_ulong_pow2_up( 2UL * lck_interval_max );
98 :
99 99 : ulong stk_vtr_chain_cnt = fd_tower_stakes_vtr_map_chain_cnt_est( vtr_max * blk_max );
100 99 : int stk_lg_slot_cnt = fd_ulong_find_msb( fd_ulong_pow2_up( blk_max ) ) + 1;
101 :
102 99 : ulong l = FD_LAYOUT_INIT;
103 99 : l = FD_LAYOUT_APPEND( l, 128UL, sizeof(fd_tower_t) );
104 99 : l = FD_LAYOUT_APPEND( l, fd_tower_vote_align(), fd_tower_vote_footprint() );
105 99 : l = FD_LAYOUT_APPEND( l, blk_pool_align(), blk_pool_footprint ( blk_max ) );
106 99 : l = FD_LAYOUT_APPEND( l, blk_map_align(), blk_map_footprint ( blk_map_chain_cnt_est( blk_max ) ) );
107 99 : l = FD_LAYOUT_APPEND( l, fd_tower_vtr_align(), fd_tower_vtr_footprint ( vtr_max ) );
108 981 : for( ulong i = 0; i < vtr_max; i++ ) {
109 882 : l = FD_LAYOUT_APPEND( l, fd_tower_vote_align(), fd_tower_vote_footprint() );
110 882 : }
111 : /* lockos */
112 99 : l = FD_LAYOUT_APPEND( l, lockout_interval_pool_align(), lockout_interval_pool_footprint( lck_pool_max ) );
113 99 : l = FD_LAYOUT_APPEND( l, lockout_interval_map_align(), lockout_interval_map_footprint ( lck_pool_max ) );
114 : /* stakes */
115 99 : l = FD_LAYOUT_APPEND( l, fd_tower_stakes_vtr_map_align(), fd_tower_stakes_vtr_map_footprint ( stk_vtr_chain_cnt ) );
116 99 : l = FD_LAYOUT_APPEND( l, fd_tower_stakes_vtr_pool_align(), fd_tower_stakes_vtr_pool_footprint( vtr_max * blk_max ) );
117 99 : l = FD_LAYOUT_APPEND( l, fd_tower_stakes_slot_align(), fd_tower_stakes_slot_footprint( stk_lg_slot_cnt ) );
118 99 : l = FD_LAYOUT_APPEND( l, fd_used_acc_scratch_align(), fd_used_acc_scratch_footprint( vtr_max * blk_max ) );
119 99 : return FD_LAYOUT_FINI( l, fd_tower_align() );
120 99 : }
121 :
122 : void *
123 : fd_tower_new( void * shmem,
124 : ulong blk_max,
125 : ulong vtr_max,
126 54 : ulong seed ) {
127 :
128 54 : if( FD_UNLIKELY( !shmem ) ) {
129 0 : FD_LOG_WARNING(( "NULL mem" ));
130 0 : return NULL;
131 0 : }
132 :
133 54 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, fd_tower_align() ) ) ) {
134 0 : FD_LOG_WARNING(( "misaligned mem" ));
135 0 : return NULL;
136 0 : }
137 :
138 54 : ulong footprint = fd_tower_footprint( blk_max, vtr_max );
139 54 : if( FD_UNLIKELY( !footprint ) ) {
140 0 : FD_LOG_WARNING(( "bad blk_max (%lu) or vtr_max (%lu)", blk_max, vtr_max ));
141 0 : return NULL;
142 0 : }
143 :
144 54 : fd_memset( shmem, 0, footprint );
145 :
146 54 : ulong lck_interval_max = fd_ulong_pow2_up( FD_TOWER_LOCKOS_MAX*blk_max*vtr_max );
147 54 : ulong lck_pool_max = fd_ulong_pow2_up( 2UL * lck_interval_max );
148 :
149 54 : ulong stk_vtr_chain_cnt = fd_tower_stakes_vtr_map_chain_cnt_est( vtr_max * blk_max );
150 54 : int stk_lg_slot_cnt = fd_ulong_find_msb( fd_ulong_pow2_up( blk_max ) ) + 1;
151 :
152 54 : FD_SCRATCH_ALLOC_INIT( l, shmem );
153 54 : fd_tower_t * tower = FD_SCRATCH_ALLOC_APPEND( l, 128UL, sizeof(fd_tower_t) );
154 54 : void * votes = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_vote_align(), fd_tower_vote_footprint() );
155 54 : void * blk_pool = FD_SCRATCH_ALLOC_APPEND( l, blk_pool_align(), blk_pool_footprint ( blk_max ) );
156 54 : void * blk_map = FD_SCRATCH_ALLOC_APPEND( l, blk_map_align(), blk_map_footprint ( blk_map_chain_cnt_est( blk_max ) ) );
157 54 : void * vtrs = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_vtr_align(), fd_tower_vtr_footprint ( vtr_max ) );
158 54 : void * towers[ vtr_max ];
159 504 : for( ulong i = 0; i < vtr_max; i++ ) {
160 450 : towers[i] = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_vote_align(), fd_tower_vote_footprint() );
161 450 : }
162 54 : void * lck_pool_mem = FD_SCRATCH_ALLOC_APPEND( l, lockout_interval_pool_align(), lockout_interval_pool_footprint( lck_pool_max ) );
163 54 : void * lck_map_mem = FD_SCRATCH_ALLOC_APPEND( l, lockout_interval_map_align(), lockout_interval_map_footprint ( lck_pool_max ) );
164 54 : void * stk_vtr_map = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_stakes_vtr_map_align(), fd_tower_stakes_vtr_map_footprint ( stk_vtr_chain_cnt ) );
165 54 : void * stk_vtr_pool = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_stakes_vtr_pool_align(), fd_tower_stakes_vtr_pool_footprint( vtr_max * blk_max ) );
166 54 : void * stk_slot_map = FD_SCRATCH_ALLOC_APPEND( l, fd_tower_stakes_slot_align(), fd_tower_stakes_slot_footprint( stk_lg_slot_cnt ) );
167 54 : void * stk_used_acc = FD_SCRATCH_ALLOC_APPEND( l, fd_used_acc_scratch_align(), fd_used_acc_scratch_footprint( vtr_max * blk_max ) );
168 54 : FD_TEST( FD_SCRATCH_ALLOC_FINI( l, fd_tower_align() ) == (ulong)shmem + footprint );
169 :
170 54 : tower->root = ULONG_MAX;
171 54 : tower->blk_max = blk_max;
172 54 : tower->vtr_max = vtr_max;
173 54 : tower->votes = fd_tower_vote_new( votes );
174 54 : tower->blk_pool = blk_pool_new( blk_pool, blk_max );
175 54 : tower->blk_map = blk_map_new( blk_map, blk_map_chain_cnt_est( blk_max ), seed );
176 54 : tower->vtrs = fd_tower_vtr_new( vtrs, vtr_max );
177 504 : for( ulong i = 0; i < vtr_max; i++ ) {
178 450 : fd_tower_vtr_join( tower->vtrs )[i].votes = fd_tower_vote_new( towers[i] );
179 450 : }
180 :
181 54 : tower->lck_pool = lockout_interval_pool_new( lck_pool_mem, lck_pool_max );
182 54 : tower->lck_map = lockout_interval_map_new ( lck_map_mem, lck_pool_max, seed );
183 54 : tower->stk_vtr_map = fd_tower_stakes_vtr_map_new ( stk_vtr_map, stk_vtr_chain_cnt, seed );
184 54 : tower->stk_vtr_pool = fd_tower_stakes_vtr_pool_new( stk_vtr_pool, vtr_max * blk_max );
185 54 : tower->stk_slot_map = fd_tower_stakes_slot_new ( stk_slot_map, stk_lg_slot_cnt, seed );
186 54 : tower->stk_used_acc = fd_used_acc_scratch_new ( stk_used_acc, vtr_max * blk_max );
187 :
188 54 : return shmem;
189 54 : }
190 :
191 : fd_tower_t *
192 54 : fd_tower_join( void * shtower ) {
193 54 : fd_tower_t * tower = (fd_tower_t *)shtower;
194 :
195 54 : if( FD_UNLIKELY( !tower ) ) {
196 0 : FD_LOG_WARNING(( "NULL tower" ));
197 0 : return NULL;
198 0 : }
199 :
200 54 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)tower, fd_tower_align() ) ) ) {
201 0 : FD_LOG_WARNING(( "misaligned tower" ));
202 0 : return NULL;
203 0 : }
204 :
205 54 : tower->votes = fd_tower_vote_join( tower->votes );
206 54 : tower->blk_pool = blk_pool_join ( tower->blk_pool );
207 54 : tower->blk_map = blk_map_join ( tower->blk_map );
208 54 : tower->vtrs = fd_tower_vtr_join ( tower->vtrs );
209 504 : for( ulong i = 0; i < tower->vtr_max; i++ ) {
210 450 : tower->vtrs[i].votes = fd_tower_vote_join( tower->vtrs[i].votes );
211 450 : }
212 54 : tower->lck_pool = lockout_interval_pool_join( tower->lck_pool );
213 54 : tower->lck_map = lockout_interval_map_join ( tower->lck_map );
214 54 : tower->stk_vtr_map = fd_tower_stakes_vtr_map_join ( tower->stk_vtr_map );
215 54 : tower->stk_vtr_pool = fd_tower_stakes_vtr_pool_join( tower->stk_vtr_pool );
216 54 : tower->stk_slot_map = fd_tower_stakes_slot_join ( tower->stk_slot_map );
217 54 : tower->stk_used_acc = fd_used_acc_scratch_join ( tower->stk_used_acc );
218 :
219 54 : return tower;
220 54 : }
221 :
222 : void *
223 18 : fd_tower_leave( fd_tower_t const * tower ) {
224 :
225 18 : if( FD_UNLIKELY( !tower ) ) {
226 0 : FD_LOG_WARNING(( "NULL tower" ));
227 0 : return NULL;
228 0 : }
229 :
230 18 : return (void *)tower;
231 18 : }
232 :
233 : void *
234 18 : fd_tower_delete( void * shtower ) {
235 :
236 18 : if( FD_UNLIKELY( !shtower ) ) {
237 0 : FD_LOG_WARNING(( "NULL tower" ));
238 0 : return NULL;
239 0 : }
240 :
241 18 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shtower, fd_tower_align() ) ) ) {
242 0 : FD_LOG_WARNING(( "misaligned tower" ));
243 0 : return NULL;
244 0 : }
245 :
246 18 : return shtower;
247 18 : }
248 :
249 : /* expiration calculates the expiration slot of vote given a slot and
250 : confirmation count. */
251 :
252 : static inline ulong
253 270 : expiration_slot( fd_tower_vote_t const * vote ) {
254 270 : ulong lockout = 1UL << vote->conf;
255 270 : return vote->slot + lockout;
256 270 : }
257 :
258 : /* simulate_vote simulates voting for slot, popping all votes from the
259 : top that would be consecutively expired by voting for slot. */
260 :
261 : static ulong
262 : simulate_vote( fd_tower_vote_t const * votes,
263 297 : ulong slot ) {
264 297 : ulong cnt = fd_tower_vote_cnt( votes );
265 315 : while( cnt ) {
266 270 : fd_tower_vote_t const * top_vote = fd_tower_vote_peek_index_const( votes, cnt - 1 );
267 270 : if( FD_LIKELY( expiration_slot( top_vote ) >= slot ) ) break; /* expire only if consecutive */
268 18 : cnt--;
269 18 : }
270 297 : return cnt;
271 297 : }
272 :
273 : /* push_vote pushes a new vote for slot onto the tower. Pops and
274 : returns the new root (bottom of the tower) if it reaches max lockout
275 : as a result of the new vote. Otherwise, returns ULONG_MAX.
276 :
277 : Max lockout is equivalent to 1 << FD_TOWER_VOTE_MAX + 1 (which
278 : implies confirmation count is FD_TOWER_VOTE_MAX + 1). As a result,
279 : fd_tower_vote also maintains the invariant that the tower contains at
280 : most FD_TOWER_VOTE_MAX votes, because (in addition to vote expiry)
281 : there will always be a pop before reaching FD_TOWER_VOTE_MAX + 1. */
282 :
283 : static ulong
284 : push_vote( fd_tower_t * tower,
285 291 : ulong slot ) {
286 :
287 : /* Sanity check: slot should always be greater than previous vote slot in tower. */
288 :
289 291 : fd_tower_vote_t const * vote = fd_tower_vote_peek_tail_const( tower->votes );
290 291 : if( FD_UNLIKELY( vote && slot <= vote->slot ) ) FD_LOG_CRIT(( "[%s] slot %lu <= vote->slot %lu", __func__, slot, vote->slot ));
291 :
292 : /* Use simulate_vote to determine how many expired votes to pop. */
293 :
294 291 : ulong cnt = simulate_vote( tower->votes, slot );
295 :
296 : /* Pop everything that got expired. */
297 :
298 306 : while( FD_LIKELY( fd_tower_vote_cnt( tower->votes ) > cnt ) ) {
299 15 : fd_tower_vote_pop_tail( tower->votes );
300 15 : }
301 :
302 : /* If the tower is still full after expiring, then pop and return the
303 : bottom vote slot as the new root because this vote has incremented
304 : it to max lockout. Otherwise this is a no-op and there is no new
305 : root (ULONG_MAX). */
306 :
307 291 : ulong root = ULONG_MAX;
308 291 : if( FD_LIKELY( fd_tower_vote_full( tower->votes ) ) ) { /* optimize for full tower */
309 3 : root = fd_tower_vote_pop_head( tower->votes ).slot;
310 3 : }
311 :
312 : /* Increment confirmations (double lockouts) for consecutive
313 : confirmations in prior votes. */
314 :
315 291 : ulong prev_conf = 0;
316 291 : for( fd_tower_vote_iter_t iter = fd_tower_vote_iter_init_rev( tower->votes );
317 3321 : !fd_tower_vote_iter_done_rev( tower->votes, iter );
318 3033 : iter = fd_tower_vote_iter_prev ( tower->votes, iter ) ) {
319 3033 : fd_tower_vote_t * vote = fd_tower_vote_iter_ele( tower->votes, iter );
320 3033 : if( FD_UNLIKELY( vote->conf != ++prev_conf ) ) break;
321 3030 : vote->conf++;
322 3030 : }
323 :
324 : /* Add the new vote to the tower. */
325 :
326 291 : fd_tower_vote_push_tail( tower->votes, (fd_tower_vote_t){ .slot = slot, .conf = 1 } );
327 :
328 : /* Return the new root (FD_SLOT_NULL if there is none). */
329 :
330 291 : return root;
331 291 : }
332 :
333 : /* lockout_check checks if we are locked out from voting for slot.
334 : Returns 1 if we can vote for slot without violating lockout, 0
335 : otherwise.
336 :
337 : After voting for a slot n, we are locked out for 2^k slots, where k
338 : is the confirmation count of that vote. Once locked out, we cannot
339 : vote for a different fork until that previously-voted fork expires at
340 : slot n+2^k. This implies the earliest slot in which we can switch
341 : from the previously-voted fork is (n+2^k)+1. We use `ghost` to
342 : determine whether `slot` is on the same or different fork as previous
343 : vote slots.
344 :
345 : In the case of the tower, every vote has its own expiration slot
346 : depending on confirmations. The confirmation count is the max number
347 : of consecutive votes that have been pushed on top of the vote, and
348 : not necessarily its current height in the tower.
349 :
350 : For example, the following is a diagram of a tower pushing and
351 : popping with each vote:
352 :
353 :
354 : slot | confirmation count
355 : -----|-------------------
356 : 4 | 1 <- vote
357 : 3 | 2
358 : 2 | 3
359 : 1 | 4
360 :
361 :
362 : slot | confirmation count
363 : -----|-------------------
364 : 9 | 1 <- vote
365 : 2 | 3
366 : 1 | 4
367 :
368 :
369 : slot | confirmation count
370 : -----|-------------------
371 : 10 | 1 <- vote
372 : 9 | 2
373 : 2 | 3
374 : 1 | 4
375 :
376 :
377 : slot | confirmation count
378 : -----|-------------------
379 : 11 | 1 <- vote
380 : 10 | 2
381 : 9 | 3
382 : 2 | 4
383 : 1 | 5
384 :
385 :
386 : slot | confirmation count
387 : -----|-------------------
388 : 18 | 1 <- vote
389 : 2 | 4
390 : 1 | 5
391 :
392 :
393 : In the final tower, note the gap in confirmation counts between slot
394 : 18 and slot 2, even though slot 18 is directly above slot 2. */
395 :
396 : static int
397 : lockout_check( fd_tower_t * tower,
398 3 : ulong slot ) {
399 :
400 : /* Mirrors Agave's Tower::is_recent(): reject slot if it is not strictly
401 : newer than our last vote (non-empty tower) or our root (empty tower,
402 : e.g. snapshot boot).
403 : https://github.com/anza-xyz/agave/blob/v4.0.0-alpha.0/core/src/consensus.rs#L825-L836 */
404 3 : if( FD_UNLIKELY( fd_tower_vote_empty( tower->votes ) ) )
405 0 : return tower->root==ULONG_MAX || slot>tower->root;
406 3 : if( FD_UNLIKELY( slot<=fd_tower_vote_peek_tail_const( tower->votes )->slot ) ) return 0;
407 :
408 : /* Simulate a vote to pop off all the votes that would be expired by
409 : voting for slot. Then check if the newly top-of-tower vote is on
410 : the same fork as slot (if so this implies we can vote for it). */
411 :
412 3 : ulong cnt = simulate_vote( tower->votes, slot ); /* pop off votes that would be expired */
413 3 : if( FD_UNLIKELY( !cnt ) ) return 1; /* tower is empty after popping expired votes */
414 :
415 3 : fd_tower_vote_t const * vote = fd_tower_vote_peek_index_const( tower->votes, cnt - 1 ); /* newly top-of-tower */
416 3 : int lockout = fd_tower_blocks_is_slot_descendant( tower, vote->slot, slot ); /* check if on same fork */
417 3 : return lockout;
418 3 : }
419 :
420 : /* switch_check checks if we can switch to the fork of `slot`. Returns
421 : 1 if we can switch, 0 otherwise. Assumes tower is non-empty.
422 :
423 : There are two forks of interest: our last vote fork ("vote fork") and
424 : the fork we want to switch to ("switch fork"). The switch fork is on
425 : the fork of `slot`.
426 :
427 : In order to switch, SWITCH_RATIO of stake must have voted for
428 : a slot that satisfies the following conditions: the
429 : GCA(slot, last_vote) is an ancestor of the switch_slot
430 :
431 : Recall from the lockout check a validator is locked out from voting
432 : for our last vote slot when their last vote slot is on a different
433 : fork, and that vote's expiration slot > our last vote slot.
434 :
435 : The following pseudocode describes the algorithm:
436 :
437 : ```
438 : for every fork f in the fork tree, take the most recently executed
439 : slot `s` (the leaf of the fork).
440 :
441 : Take the greatest common ancestor of the `s` and the our last vote
442 : slot. If the switch_slot is a descendant of this GCA, then votes for
443 : `s` can count towards the switch threshold.
444 :
445 : query banks(`s`) for vote accounts in `s`
446 : for all vote accounts v in `s`
447 : if v's locked out[1] from voting for our latest vote slot
448 : add v's stake to switch stake
449 :
450 : return switch stake >= total_stake * SWITCH_RATIO
451 : ```
452 :
453 : The switch check is used to safeguard optimistic confirmation.
454 : Specifically: optimistic confirmation pct + SWITCH_RATIO >= 1. */
455 :
456 : static int
457 : is_purged( fd_tower_t * tower,
458 543 : fd_ghost_blk_t * blk ) {
459 543 : fd_tower_blk_t * tower_blk = fd_tower_blocks_query( tower, blk->slot );
460 543 : return tower_blk->confirmed && memcmp( &tower_blk->confirmed_block_id, &blk->id, sizeof(fd_hash_t) );
461 543 : }
462 :
463 : static int
464 : switch_check( fd_tower_t * tower,
465 : fd_ghost_t * ghost,
466 : ulong total_stake,
467 51 : ulong switch_slot ) {
468 :
469 51 : lockout_interval_map_t * lck_map = tower->lck_map;
470 51 : lockout_interval_t * lck_pool = tower->lck_pool;
471 :
472 51 : ulong switch_stake = 0;
473 51 : ulong vote_slot = fd_tower_vote_peek_tail_const( tower->votes )->slot;
474 51 : ulong root_slot = tower->root;
475 :
476 51 : ulong null = fd_ghost_blk_idx_null( ghost );
477 51 : fd_ghost_blk_t * head = fd_ghost_blk_map_remove( ghost, fd_ghost_root( ghost ) );
478 51 : fd_ghost_blk_t * tail = head;
479 51 : head->next = null;
480 :
481 591 : while( FD_LIKELY( head ) ) {
482 564 : fd_ghost_blk_t * blk = head; /* guaranteed to not be purged */
483 :
484 : /* Because agave has particular behavior where if they replay a
485 : equivocating version of a slot and then the correct version, the
486 : original version and all of it's children get purged from all
487 : structures. None of the nodes on this subtree can be considered
488 : for the switch proof. Note that this means as we BFS, a node
489 : can be considered a "valid leaf" if either it has no children,
490 : or if all of it's children are purged/superseded slots. We
491 : detect this by comparing against tower_blocks confirmed. */
492 :
493 564 : int is_valid_leaf = 1;
494 564 : fd_ghost_blk_t * child = fd_ghost_blk_child( ghost, head );
495 1107 : while( FD_LIKELY( child ) ) {
496 543 : if( FD_LIKELY( !is_purged( tower, child ) ) ) {
497 537 : fd_ghost_blk_map_remove( ghost, child );
498 537 : tail->next = fd_ghost_blk_idx( ghost, child );
499 537 : tail = child;
500 537 : tail->next = null;
501 537 : is_valid_leaf = 0;
502 537 : }
503 543 : child = fd_ghost_blk_sibling( ghost, child );
504 543 : }
505 :
506 564 : head = fd_ghost_blk_next( ghost, blk ); /* pop queue head */
507 564 : fd_ghost_blk_map_insert( ghost, blk ); /* re-insert into map */
508 :
509 564 : if( FD_UNLIKELY( !is_valid_leaf ) ) continue; /* not a real candidate */
510 :
511 147 : ulong candidate_slot = blk->slot;
512 147 : ulong lca = fd_tower_blocks_lowest_common_ancestor( tower, candidate_slot, vote_slot );
513 147 : if( FD_UNLIKELY( candidate_slot == vote_slot ) ) continue;
514 132 : if( FD_UNLIKELY( lca==ULONG_MAX ) ) continue; /* unlikely but this leaf is an already pruned minority fork */
515 :
516 132 : if( FD_UNLIKELY( fd_tower_blocks_is_slot_descendant( tower, lca, switch_slot ) ) ) {
517 :
518 : /* This candidate slot may be considered for the switch proof, if
519 : it passes the following conditions:
520 :
521 : https://github.com/anza-xyz/agave/blob/c7b97bc77addacf03b229c51b47c18650d909576/core/src/consensus.rs#L1117
522 :
523 : Now for this candidate slot, look at the lockouts that were
524 : created at the time that we processed the bank for this
525 : candidate slot. */
526 :
527 111 : ulong sentinel_key = lockout_interval_key( candidate_slot, 0 );
528 111 : for( lockout_interval_t const * sentinel = lockout_interval_map_ele_query_const( lck_map, &sentinel_key, NULL, lck_pool );
529 120 : sentinel;
530 111 : sentinel = lockout_interval_map_ele_next_const( sentinel, NULL, lck_pool ) ) {
531 33 : ulong interval_end = sentinel->start;
532 33 : ulong key = lockout_interval_key( candidate_slot, interval_end );
533 :
534 : /* Intervals are keyed by the end of the interval. If the end of
535 : the interval is < the last vote slot, then these vote
536 : accounts with this particular lockout are NOT locked out from
537 : voting for the last vote slot, which means we can skip this
538 : set of intervals. */
539 :
540 33 : if( FD_LIKELY( interval_end < vote_slot ) ) continue;
541 :
542 : /* At this point we can actually query for the intervals by
543 : end interval to get the vote accounts. */
544 :
545 27 : for( lockout_interval_t const * interval = lockout_interval_map_ele_query_const( lck_map, &key, NULL, lck_pool );
546 39 : interval;
547 36 : interval = lockout_interval_map_ele_next_const( interval, NULL, lck_pool ) ) {
548 36 : ulong interval_slot = interval->start;
549 36 : fd_hash_t const * vote_acc = &interval->addr;
550 :
551 36 : if( FD_UNLIKELY( !fd_tower_blocks_is_slot_descendant( tower, interval_slot, vote_slot ) && interval_slot > root_slot ) ) {
552 33 : fd_tower_stakes_vtr_xid_t key = { .addr = *vote_acc, .slot = switch_slot };
553 33 : fd_tower_stakes_vtr_t const * voter_stake = fd_tower_stakes_vtr_map_ele_query_const( tower->stk_vtr_map, &key, NULL, tower->stk_vtr_pool );
554 :
555 : /* Vote account could have been closed on the switch fork,
556 : and therefore not in the tower stakes map. In this case
557 : just count the vote stake as 0 and skip this voter.
558 : matches Agave. */
559 33 : if( FD_UNLIKELY( !voter_stake ) ) continue;
560 33 : ulong voter_idx = fd_tower_stakes_vtr_pool_idx( tower->stk_vtr_pool, voter_stake );
561 33 : if( FD_UNLIKELY( fd_used_acc_scratch_test( tower->stk_used_acc, voter_idx ) ) ) continue; /* exclude already counted voters */
562 33 : fd_used_acc_scratch_insert( tower->stk_used_acc, voter_idx );
563 33 : switch_stake += voter_stake->stake;
564 33 : if( FD_LIKELY( (double)switch_stake / (double)total_stake > SWITCH_RATIO ) ) {
565 24 : fd_used_acc_scratch_null( tower->stk_used_acc );
566 24 : FD_LOG_DEBUG(( "[%s] vote_slot: %lu. switch_slot: %lu. pct: %.0lf%%", __func__, vote_slot, switch_slot, (double)switch_stake / (double)total_stake * 100.0 ));
567 48 : while( FD_LIKELY( head ) ) { /* cleanup: re-insert remaining BFS queue into map */
568 24 : fd_ghost_blk_t * next = fd_ghost_blk_next( ghost, head );
569 24 : fd_ghost_blk_map_insert( ghost, head );
570 24 : head = next;
571 24 : }
572 24 : return 1;
573 24 : }
574 33 : }
575 36 : }
576 27 : }
577 111 : }
578 132 : }
579 27 : fd_used_acc_scratch_null( tower->stk_used_acc );
580 27 : FD_LOG_DEBUG(( "[%s] vote_slot: %lu. switch_slot: %lu. pct: %.0lf%%", __func__, vote_slot, switch_slot, (double)switch_stake / (double)total_stake * 100.0 ));
581 27 : return 0;
582 51 : }
583 :
584 : /* threshold_check checks if we pass the threshold required to vote for
585 : `slot`. Returns 1 if we pass the threshold check, 0 otherwise.
586 :
587 : The following psuedocode describes the algorithm:
588 :
589 : ```
590 : simulate that we have voted for `slot`
591 :
592 : for all vote accounts in the current epoch
593 :
594 : simulate that the vote account has voted for `slot`
595 :
596 : pop all votes expired by that simulated vote
597 :
598 : if the validator's latest tower vote after expiry >= our threshold
599 : slot ie. our vote from THRESHOLD_DEPTH back also after simulating,
600 : then add validator's stake to threshold_stake.
601 :
602 : return threshold_stake >= FD_TOWER_THRESHOLD_RATIO
603 : ```
604 :
605 : The threshold check simulates voting for the current slot to expire
606 : stale votes. This is to prevent validators that haven't voted in a
607 : long time from counting towards the threshold stake. */
608 :
609 : static int
610 : threshold_check( fd_tower_t const * tower,
611 : fd_tower_vtr_t const * accts,
612 : ulong total_stake,
613 0 : ulong slot ) {
614 :
615 : /* First, simulate a vote on our tower, popping off everything that
616 : would be expired by voting for slot. */
617 :
618 0 : ulong cnt = simulate_vote( tower->votes, slot );
619 :
620 : /* We can always vote if our tower is not at least THRESHOLD_DEPTH
621 : deep after simulating. */
622 :
623 0 : if( FD_UNLIKELY( cnt < THRESHOLD_DEPTH ) ) return 1;
624 :
625 : /* Get the vote slot from THRESHOLD_DEPTH back. Note THRESHOLD_DEPTH
626 : is the 8th index back _including_ the simulated vote at index 0. */
627 :
628 0 : ulong threshold_slot = fd_tower_vote_peek_index_const( tower->votes, cnt - THRESHOLD_DEPTH )->slot;
629 0 : ulong threshold_stake = 0;
630 0 : for( fd_tower_vtr_iter_t iter = fd_tower_vtr_iter_init( accts );
631 0 : !fd_tower_vtr_iter_done( accts, iter );
632 0 : iter = fd_tower_vtr_iter_next( accts, iter ) ) {
633 0 : fd_tower_vtr_t const * acct = fd_tower_vtr_iter_ele_const( accts, iter );
634 :
635 0 : ulong cnt = simulate_vote( acct->votes, slot ); /* expire votes */
636 0 : if( FD_UNLIKELY( !cnt ) ) continue; /* no votes left after expiry */
637 :
638 : /* Count their stake towards the threshold check if their prev vote
639 : slot >= our threshold slot.
640 :
641 : We know their prev vote slot is definitely on the same fork as
642 : our threshold slot, because these towers are sourced from vote
643 : _accounts_, not vote _transactions_ and the Vote Program
644 : validates that all slots in the vote account's tower exist on the
645 : current fork.
646 :
647 : Therefore, if their prev vote slot >= our threshold slot, we know
648 : that vote must be for the threshold slot itself or one of
649 : threshold slot's descendants. */
650 :
651 0 : ulong vote_slot = fd_tower_vote_peek_index_const( acct->votes, cnt - 1 )->slot;
652 0 : if( FD_LIKELY( vote_slot >= threshold_slot ) ) threshold_stake += acct->stake;
653 0 : }
654 :
655 0 : double threshold_pct = (double)threshold_stake / (double)total_stake;
656 0 : int threshold = threshold_pct > THRESHOLD_RATIO;
657 0 : if( FD_UNLIKELY( !threshold ) ) FD_LOG_DEBUG(( "[%s] vote_slot: %lu. threshold_slot: %lu. pct: %.0lf%%.", __func__, fd_tower_vote_peek_tail_const( tower->votes )->slot, threshold_slot, threshold_pct * 100.0 ));
658 0 : return threshold;
659 0 : }
660 :
661 : static int
662 : propagated_check( fd_tower_t * tower,
663 0 : ulong slot ) {
664 :
665 0 : fd_tower_blk_t * blk = fd_tower_blocks_query( tower, slot );
666 0 : FD_TEST( blk );
667 :
668 0 : if( FD_LIKELY( blk->leader ) ) return 1; /* can always vote for slot in which we're leader */
669 0 : if( FD_LIKELY( blk->prev_leader_slot==ULONG_MAX ) ) return 1; /* haven't been leader yet */
670 :
671 0 : fd_tower_blk_t * prev_leader_blk = fd_tower_blocks_query( tower, blk->prev_leader_slot );
672 0 : if( FD_LIKELY( !prev_leader_blk ) ) return 1; /* already pruned / rooted */
673 :
674 0 : return prev_leader_blk->propagated;
675 0 : }
676 :
677 : uchar
678 : fd_tower_vote_and_reset( fd_tower_t * tower,
679 : fd_ghost_t * ghost,
680 : fd_votes_t * votes FD_PARAM_UNUSED,
681 : ulong * reset_slot,
682 : fd_hash_t * reset_block_id,
683 : ulong * vote_slot,
684 : fd_hash_t * vote_block_id,
685 : fd_hash_t * vote_bank_hash,
686 : fd_hash_t * vote_block_hash,
687 : ulong * root_slot,
688 6 : fd_hash_t * root_block_id ) {
689 :
690 6 : uchar flags = 0;
691 6 : fd_ghost_blk_t const * best_blk = fd_ghost_best( ghost, fd_ghost_root( ghost ) );
692 6 : fd_ghost_blk_t const * reset_blk = NULL;
693 6 : fd_ghost_blk_t const * vote_blk = NULL;
694 :
695 : /* Case 0: if we haven't voted yet then there are two subcases where
696 : we short-circuit. */
697 :
698 : /* Case 0a: on boot, tower->root is set to the snapshot slot before
699 : any votes are recorded. In this case, lockout_check returns 0 for
700 : slot <= root, preventing a vote on the snapshot slot itself. */
701 :
702 : /* TODO refactor: 0a is a tile-concern not logic-concern */
703 :
704 6 : if( FD_UNLIKELY( fd_tower_vote_empty( tower->votes ) && !lockout_check( tower, best_blk->slot ) ) ) {
705 0 : FD_BASE58_ENCODE_32_BYTES( best_blk->id.uc, best_blk_id );
706 0 : FD_LOG_DEBUG(( "[%s] case 0a: not recent (slot %lu <= root %lu). reset_blk: (%lu, %s). vote_blk: (NULL)", __func__, best_blk->slot, tower->root, best_blk->slot, best_blk_id ));
707 0 : *reset_slot = best_blk->slot;
708 0 : *reset_block_id = best_blk->id;
709 0 : *vote_slot = ULONG_MAX;
710 0 : *vote_block_id = (fd_hash_t){0};
711 0 : *root_slot = ULONG_MAX;
712 0 : *root_block_id = (fd_hash_t){0};
713 0 : return flags;
714 0 : }
715 :
716 : /* Case 0b: if we haven't voted yet then we can always vote and reset
717 : to ghost_best. */
718 :
719 6 : if( FD_UNLIKELY( fd_tower_vote_empty( tower->votes ) ) ) {
720 0 : FD_BASE58_ENCODE_32_BYTES( best_blk->id.uc, best_blk_id );
721 0 : FD_LOG_DEBUG(( "[%s] case 0b: empty tower. reset_blk: (%lu, %s). vote_blk: (%lu, %s)", __func__, best_blk->slot, best_blk_id, best_blk->slot, best_blk_id ));
722 0 : fd_tower_blk_t * tower_blk = fd_tower_blocks_query( tower, best_blk->slot );
723 0 : tower_blk->voted = 1;
724 0 : tower_blk->voted_block_id = best_blk->id;
725 0 : *reset_slot = best_blk->slot;
726 0 : *reset_block_id = best_blk->id;
727 0 : *vote_slot = best_blk->slot;
728 0 : *vote_block_id = best_blk->id;
729 0 : *vote_bank_hash = tower_blk->bank_hash;
730 0 : *vote_block_hash = tower_blk->block_hash;
731 0 : *root_slot = push_vote( tower, best_blk->slot );
732 0 : *root_block_id = ( fd_hash_t ){ 0 };
733 0 : return flags;
734 0 : }
735 :
736 6 : ulong prev_vote_slot = fd_tower_vote_peek_tail_const( tower->votes )->slot;
737 6 : fd_tower_blk_t * prev_vote_fork = fd_tower_blocks_query( tower, prev_vote_slot ); /* must exist */
738 :
739 6 : fd_hash_t * prev_vote_block_id = &prev_vote_fork->voted_block_id;
740 6 : fd_ghost_blk_t * prev_vote_blk = fd_ghost_query( ghost, prev_vote_block_id );
741 :
742 : /* Case 1: if any ancestor of our prev vote (including prev vote
743 : itself) is an unconfirmed duplicate, then our prev vote was on a
744 : duplicate fork.
745 :
746 : There are three subcases to check. */
747 :
748 6 : int invalid_ancestor = !!fd_ghost_invalid_ancestor( ghost, prev_vote_blk );
749 :
750 : /* Case 1a: ghost_best is an ancestor of prev vote. This means
751 : ghost_best is rolling back to an ancestor that precedes the
752 : duplicate ancestor on the same fork as our prev vote. In this
753 : case, we can't vote on our ancestor, but we do reset to that
754 : ancestor.
755 :
756 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus.rs#L1016-L1019 */
757 :
758 6 : int ancestor_rollback = prev_vote_blk != best_blk && !!fd_ghost_ancestor( ghost, prev_vote_blk, &best_blk->id );
759 :
760 : /* Case 1b: ghost_best is not an ancestor, but prev_vote is a
761 : duplicate and we've confirmed its duplicate sibling. In this
762 : case, we allow switching to ghost_best without a switch proof.
763 :
764 : Example: slot 5 is a duplicate. We first receive, replay and
765 : vote for block 5, so that is our prev vote. We later receive
766 : block 5' and observe that it is duplicate confirmed. ghost_best
767 : now returns block 5' and we both vote and reset to block 5'
768 : regardless of the switch check.
769 :
770 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus.rs#L1021-L1024 */
771 :
772 6 : int sibling_confirmed = prev_vote_fork->confirmed && 0!=memcmp( &prev_vote_fork->voted_block_id, &prev_vote_fork->confirmed_block_id, sizeof(fd_hash_t) );
773 :
774 6 : if( FD_UNLIKELY( invalid_ancestor && ancestor_rollback ) ) {
775 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_ANCESTOR_ROLLBACK );
776 0 : reset_blk = best_blk;
777 0 : FD_BASE58_ENCODE_32_BYTES( reset_blk->id.uc, reset_blk_id );
778 0 : FD_LOG_DEBUG(( "[%s] case 1a: ancestor rollback. prev_vote_slot: %lu. reset_blk: (%lu, %s). vote_blk: (NULL)", __func__, prev_vote_slot, reset_blk->slot, reset_blk_id ));
779 :
780 6 : } else if( FD_UNLIKELY( invalid_ancestor && sibling_confirmed ) ) {
781 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_SIBLING_CONFIRMED );
782 0 : reset_blk = best_blk;
783 0 : vote_blk = best_blk;
784 0 : FD_BASE58_ENCODE_32_BYTES( reset_blk->id.uc, reset_blk_id );
785 0 : FD_BASE58_ENCODE_32_BYTES( vote_blk->id.uc, vote_blk_id );
786 0 : FD_LOG_DEBUG(( "[%s] case 1b: sibling confirmed. prev_vote_slot: %lu. reset_blk: (%lu, %s). vote_blk: (%lu, %s)", __func__, prev_vote_slot, reset_blk->slot, reset_blk_id, vote_blk->slot, vote_blk_id ));
787 0 : }
788 :
789 : /* Case 2: if our prev vote slot is an ancestor of the best slot, then
790 : they are on the same fork and we can both reset to it. We can also
791 : vote for it if we pass the can_vote checks.
792 :
793 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus.rs#L1057 */
794 :
795 6 : else if( FD_LIKELY( best_blk->slot == prev_vote_slot || fd_tower_blocks_is_slot_ancestor( tower, best_blk->slot, prev_vote_slot ) ) ) {
796 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_SAME_FORK );
797 0 : reset_blk = best_blk;
798 0 : vote_blk = best_blk;
799 0 : FD_BASE58_ENCODE_32_BYTES( reset_blk->id.uc, reset_blk_id );
800 0 : FD_BASE58_ENCODE_32_BYTES( vote_blk->id.uc, vote_blk_id );
801 0 : FD_LOG_DEBUG(( "[%s] case 2: same fork. prev_vote_slot: %lu. reset_blk: (%lu, %s). vote_blk: (%lu, %s)", __func__, prev_vote_slot, reset_blk->slot, reset_blk_id, vote_blk->slot, vote_blk_id ));
802 0 : }
803 :
804 : /* Case 3: if our prev vote is not an ancestor of the best block, then
805 : it is on a different fork. If we pass the switch check, we can
806 : reset to it. If we additionally pass the lockout check, we can
807 : also vote for it.
808 :
809 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus.rs#L1208-L1215
810 :
811 : Note also Agave uses the best blk's total stake for checking the
812 : threshold.
813 :
814 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus/fork_choice.rs#L443-L445 */
815 :
816 6 : else if( FD_LIKELY( switch_check( tower, ghost, best_blk->total_stake, best_blk->slot ) ) ) {
817 3 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_SWITCH_PASS );
818 3 : reset_blk = best_blk;
819 3 : vote_blk = best_blk;
820 3 : FD_BASE58_ENCODE_32_BYTES( reset_blk->id.uc, reset_blk_id );
821 3 : FD_BASE58_ENCODE_32_BYTES( vote_blk->id.uc, vote_blk_id );
822 3 : FD_LOG_DEBUG(( "[%s] case 3: switch pass. prev_vote_slot: %lu. reset_blk: (%lu, %s). vote_blk: (%lu, %s)", __func__, prev_vote_slot, reset_blk->slot, reset_blk_id, vote_blk->slot, vote_blk_id ));
823 3 : }
824 :
825 : /* Case 4: same as case 3 but we didn't pass the switch check. In
826 : this case we reset to either ghost_best or ghost_deepest beginning
827 : from our prev vote blk.
828 :
829 : We must reset to a block beginning from our prev vote fork to
830 : ensure votes get a chance to propagate. Because in order for votes
831 : to land, someone needs to build a block on that fork.
832 :
833 : We reset to ghost_best or ghost_deepest depending on whether our
834 : prev vote is valid. When it's invalid we use ghost_deepest instead
835 : of ghost_best, because ghost_best won't be able to return a valid
836 : block beginning from our prev_vote because by definition the entire
837 : subtree will be invalid.
838 :
839 : When our prev vote fork is not a duplicate, we want to propagate
840 : votes that might allow others to switch to our fork. In addition,
841 : if our prev vote fork is a duplicate, we want to propagate votes
842 : that might "duplicate confirm" that block (reach 52% of stake).
843 :
844 : See top-level documentation in fd_tower.h for more details on vote
845 : propagation. */
846 :
847 3 : else {
848 :
849 : /* Case 4a: failed switch check and last vote slot has an invalid
850 : ancestor.
851 :
852 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus/heaviest_subtree_fork_choice.rs#L1187 */
853 :
854 3 : if( FD_UNLIKELY( invalid_ancestor ) ) {
855 3 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_SWITCH_FAIL );
856 3 : reset_blk = fd_ghost_deepest( ghost, prev_vote_blk );
857 3 : FD_BASE58_ENCODE_32_BYTES( reset_blk->id.uc, reset_blk_id );
858 3 : FD_LOG_DEBUG(( "[%s] case 4a: switch fail, invalid ancestor. prev_vote_slot: %lu. reset_blk: (%lu, %s). vote_blk: (NULL)", __func__, prev_vote_slot, reset_blk->slot, reset_blk_id ));
859 3 : }
860 :
861 : /* Case 4b: failed switch check (no invalid ancestor).
862 :
863 : https://github.com/anza-xyz/agave/blob/v2.3.7/core/src/consensus/fork_choice.rs#L200 */
864 :
865 0 : else {
866 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_SWITCH_FAIL );
867 0 : reset_blk = fd_ghost_best( ghost, prev_vote_blk );
868 0 : FD_BASE58_ENCODE_32_BYTES( reset_blk->id.uc, reset_blk_id );
869 0 : FD_LOG_DEBUG(( "[%s] case 4b: switch fail, no invalid ancestor. prev_vote_slot: %lu. reset_blk: (%lu, %s). vote_blk: (NULL)", __func__, prev_vote_slot, reset_blk->slot, reset_blk_id ));
870 0 : }
871 3 : }
872 :
873 : /* If there is a block to vote for, there are a few additional checks
874 : to make sure we can actually vote for it.
875 :
876 : Specifically, we need to make sure we're not locked out, pass the
877 : threshold check and that our previous leader block has propagated
878 : (reached the prop threshold according to fd_votes).
879 :
880 : https://github.com/firedancer-io/agave/blob/master/core/src/consensus/fork_choice.rs#L382-L385
881 :
882 : Agave uses the total stake on the fork being threshold checked
883 : (vote_blk) for determining whether it meets the stake threshold. */
884 :
885 6 : if( FD_LIKELY( vote_blk ) ) {
886 3 : if ( FD_UNLIKELY( !lockout_check( tower, vote_blk->slot ) ) ) {
887 3 : FD_BASE58_ENCODE_32_BYTES( vote_blk->id.uc, vote_blk_id );
888 3 : FD_LOG_DEBUG(( "[%s] lockout check failed. prev_vote_slot: %lu. vote_blk: (%lu, %s)", __func__, prev_vote_slot, vote_blk->slot, vote_blk_id ));
889 3 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_LOCKOUT_FAIL );
890 3 : vote_blk = NULL;
891 3 : }
892 0 : else if( FD_UNLIKELY( !threshold_check( tower, tower->vtrs, vote_blk->total_stake, vote_blk->slot ) ) ) {
893 0 : FD_BASE58_ENCODE_32_BYTES( vote_blk->id.uc, vote_blk_id );
894 0 : FD_LOG_DEBUG(( "[%s] threshold check failed. prev_vote_slot: %lu. vote_blk: (%lu, %s)", __func__, prev_vote_slot, vote_blk->slot, vote_blk_id ));
895 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_THRESHOLD_FAIL );
896 0 : vote_blk = NULL;
897 0 : }
898 0 : else if( FD_UNLIKELY( !propagated_check( tower, vote_blk->slot ) ) ) {
899 0 : FD_BASE58_ENCODE_32_BYTES( vote_blk->id.uc, vote_blk_id );
900 0 : FD_LOG_DEBUG(( "[%s] propagated check failed. prev_vote_slot: %lu. vote_blk: (%lu, %s)", __func__, prev_vote_slot, vote_blk->slot, vote_blk_id ));
901 0 : flags = fd_uchar_set_bit( flags, FD_TOWER_FLAG_PROPAGATED_FAIL );
902 0 : vote_blk = NULL;
903 0 : }
904 3 : }
905 :
906 6 : FD_TEST( reset_blk ); /* always a reset_blk */
907 6 : *reset_slot = reset_blk->slot;
908 6 : *reset_block_id = reset_blk->id;
909 6 : *vote_slot = ULONG_MAX;
910 6 : *vote_block_id = (fd_hash_t){0};
911 6 : *vote_bank_hash = (fd_hash_t){0};
912 6 : *vote_block_hash = (fd_hash_t){0};
913 6 : *root_slot = ULONG_MAX;
914 6 : *root_block_id = (fd_hash_t){0};
915 :
916 : /* Finally, if our vote passed all the checks, we actually push the
917 : vote onto the tower. */
918 :
919 6 : if( FD_LIKELY( vote_blk ) ) {
920 0 : *vote_slot = vote_blk->slot;
921 0 : *vote_block_id = vote_blk->id;
922 0 : *root_slot = push_vote( tower, vote_blk->slot );
923 :
924 : /* Query our tower fork for this slot we're voting for. Note this
925 : can never be NULL because we record tower forks as we replay, and
926 : we should never be voting on something we haven't replayed. */
927 :
928 0 : fd_tower_blk_t * fork = fd_tower_blocks_query( tower, vote_blk->slot );
929 0 : fork->voted = 1;
930 0 : fork->voted_block_id = vote_blk->id;
931 0 : *vote_bank_hash = fork->bank_hash;
932 0 : *vote_block_hash = fork->block_hash;
933 :
934 : /* Query the root slot's block id from tower forks. This block id
935 : may not necessarily be confirmed, because confirmation requires
936 : votes on the block itself (vs. block and its descendants).
937 :
938 : So if we have a confirmed block id, we return that. Otherwise
939 : we return our own vote block id for that slot, which we assume
940 : is the cluster converged on by the time we're rooting it.
941 :
942 : The only way it is possible for us to root the wrong version of
943 : a block (ie. not the one the cluster confirmed) is if there is
944 : mass equivocation (>2/3 of threshold check stake has voted for
945 : two versions of a block). This exceeds the equivocation safety
946 : threshold and we would eventually detect this via a bank hash
947 : mismatch and error out. */
948 :
949 0 : if( FD_LIKELY( *root_slot!=ULONG_MAX ) ) {
950 0 : fd_tower_blk_t * root_fork = fd_tower_blocks_query( tower, *root_slot );
951 0 : *root_block_id = *fd_ptr_if( root_fork->confirmed, &root_fork->confirmed_block_id, &root_fork->voted_block_id );
952 0 : }
953 0 : }
954 :
955 6 : FD_BASE58_ENCODE_32_BYTES( reset_block_id->uc, reset_block_id_b58 );
956 6 : FD_BASE58_ENCODE_32_BYTES( vote_block_id->uc, vote_block_id_b58 );
957 6 : FD_BASE58_ENCODE_32_BYTES( root_block_id->uc, root_block_id_b58 );
958 6 : FD_LOG_DEBUG(( "[%s] flags: %d. reset_slot: %lu (%s). vote_slot: %lu (%s). root_slot: %lu (%s).", __func__, flags, *reset_slot, reset_block_id_b58, *vote_slot, vote_block_id_b58, *root_slot, root_block_id_b58 ));
959 6 : return flags;
960 6 : }
961 :
962 : /* fd_tower_reconcile reconciles our local tower with our on-chain tower
963 : (stored inside our vote account). This function is important in two
964 : contexts:
965 :
966 : ON BOOT
967 :
968 : When Firedancer boots up its local tower contains no votes, only a
969 : root slot set to the snapshot slot. It needs to restore its "latest"
970 : tower votes and root as of its previous run. This information is
971 : stored on-chain itself, in a vote account, and Firedancer updates
972 : vote account states during catchup by replaying blocks since the
973 : snapshot. Firedancer reconciles its local tower with the on-chain
974 : one every time it replays a block, and will by definition have its
975 : "latest" tower once it has caught up.
976 :
977 : Note that it is possible Firedancer had voted for a minority fork in
978 : the previous run. In this case, its true "latest" tower contains
979 : votes for slots that were pruned by the time of this boot. In theory
980 : TowerBFT stipulates that lockout can be up to 2^32 slots, but in
981 : practice slots are pruned once they fall out of the slot hash history
982 : limit, because they can no longer be canonically verified on-chain.
983 : Therefore, Firedancer can safely ignore slots that are pruned and
984 : restore its latest tower on the majority fork as of boot time.
985 :
986 : HIGH-AVAILABILITY SETUP
987 :
988 : A typical validator setup involves two nodes, a primary and a backup.
989 : The primary is a valid fee payer, and the one landing votes recording
990 : the latest state of its tower on-chain. The two nodes' towers will
991 : usually be identical but occassionally diverge when one node votes
992 : for slots that the other one doesn't. This usually happens when
993 : there are multiple forks.
994 :
995 : This becomes a problem, because the primary's tower may contain votes
996 : the backup doesn't have and/or vice versa. The primary's tower is
997 : the canonical one, since it's the one recorded on-chain, so reconcile
998 : is a no-op on the primary.
999 :
1000 : On the backup, reconcile is more involved. Because what's on-chain
1001 : is the primary's tower, there may be slots the backup never actually
1002 : voted for. When the backup node reads back the on-chain tower, some
1003 : metadata, namely `voted` and `voted_block_id`, will be missing from
1004 : its fd_tower instance.
1005 :
1006 : fd_tower_reconcile assumes that if a tower has been recorded on-chain
1007 : then it is safe to assume the vote account registered with the
1008 : currently running Firedancer has in fact at some point voted for the
1009 : slots in that tower.
1010 :
1011 : In case the instance is the backup, it updates the local tower votes,
1012 : root, and metadata structures accordingly with this assumption namely
1013 : by inserting voted_block_id for votes that the backup didn't actually
1014 : vote for but can safely assume the primary did.
1015 :
1016 : This affects the Tower voting rules (see fd_tower_vote_and_reset) in
1017 : that the voted_block_id is used for certain vote and reset decisions.
1018 :
1019 : There are some corner cases to consider related to equivocation:
1020 :
1021 : 2
1022 : / \
1023 : 3 3' (confirmed)
1024 :
1025 : Assume 3 and 3' are alternate blocks for the same slot (3) and have
1026 : different block ids. 3' is the block that eventually gets confirmed.
1027 : Let's consider a scenario in which the primary votes for "3" and the
1028 : backup misses the vote for "3". fd_tower_reconcile needs to backfill
1029 : the voted_block_id for "3" on the backup. However, it's unclear
1030 : whether that vote is for 3 (unconfirmed) or 3' (confirmed), because
1031 : all the on-chain tower contains is the slot "3" (with no block_id).
1032 : How does the backup figure out the voted_block_id?
1033 :
1034 : It turns out it doesn't really matter either way, the backup can just
1035 : backfill with whichever block_id it happened to replay (we know the
1036 : backup has to have replayed either 3 or 3' in order to observe an
1037 : on-chain tower containing 3 in the first place):
1038 :
1039 : If the primary voted for 3 and the backup backfills with 3', we know
1040 : the primary will eventually switch to the DC block (3') via repair.
1041 : So backfilling with 3' is ok because the primary will converge to it.
1042 :
1043 : If the primary voted for 3' and the backup backfills with 3, then the
1044 : backup will similarly eventually switch to the DC block via repair.
1045 : Indeed, it will "freebie" switch in fd_tower_vote_and_reset ie. case
1046 : 1b: "sibling confirmed". Thus, the backup will converge to 3'. */
1047 :
1048 : void
1049 : fd_tower_reconcile( fd_tower_t * tower,
1050 : fd_tower_vote_t * onchain_votes,
1051 30 : ulong onchain_root ) {
1052 :
1053 30 : fd_tower_vote_t * local_votes = tower->votes;
1054 30 : ulong local_root = tower->root;
1055 :
1056 30 : ulong local_vote = fd_tower_vote_empty( local_votes ) ? ULONG_MAX : fd_tower_vote_peek_tail_const( local_votes )->slot;
1057 30 : ulong onchain_vote = fd_tower_vote_empty( onchain_votes ) ? ULONG_MAX : fd_tower_vote_peek_tail_const( onchain_votes )->slot;
1058 :
1059 : /* Cases:
1060 :
1061 : Agave checks Option<onchain_vote> <= Option<local_vote>. Breakdown of Ord<Option<Slot>>:
1062 :
1063 : None, None => True
1064 : None, Some => True
1065 : Some, None => False
1066 : Some, Some => onchain_vote <= local_vote */
1067 :
1068 30 : if( FD_LIKELY( onchain_vote==ULONG_MAX || /* None, None or None, Some */
1069 30 : ( local_vote !=ULONG_MAX && onchain_vote<=local_vote ) /* Some, Some */ ) ) return;
1070 :
1071 : /* On-chain tower is newer, so sync our local tower to the on-chain tower. */
1072 :
1073 24 : FD_LOG_NOTICE(( "[%s] overwriting local tower (last: %lu, root: %lu) with onchain tower (last: %lu, root: %lu)", __func__, local_vote, local_root, onchain_vote, onchain_root ));
1074 :
1075 24 : FD_TEST( local_root!=ULONG_MAX ); /* local root should always be set before fd_tower_reconcile */
1076 24 : if( FD_LIKELY( onchain_root==ULONG_MAX || local_root > onchain_root ) ) {
1077 :
1078 : /* Local root is larger than on-chain root. Overwrite on-chain root
1079 : with local root (this is just a copy, not writing to accdb). */
1080 :
1081 3 : FD_LOG_DEBUG(( "[%s] local_root %lu > onchain_root %lu", __func__, local_root, onchain_root ));
1082 3 : onchain_root = local_root;
1083 :
1084 : /* Drop on-chain votes <= local root. */
1085 :
1086 12 : while( FD_LIKELY( !fd_tower_vote_empty( onchain_votes ) ) ) {
1087 12 : fd_tower_vote_t const * vote = fd_tower_vote_peek_head_const( onchain_votes );
1088 12 : if( FD_LIKELY( vote->slot > local_root ) ) break;
1089 9 : FD_LOG_DEBUG(( "[%s] dropping on-chain vote for slot %lu since it's <= local root %lu", __func__, vote->slot, local_root ));
1090 9 : fd_tower_vote_pop_head( onchain_votes );
1091 9 : }
1092 :
1093 : /* TODO add sanity-check that onchain_root is an ancestor of the
1094 : first vote's ancestor at this point. */
1095 3 : }
1096 :
1097 24 : for( fd_tower_vote_iter_t iter = fd_tower_vote_iter_init( tower->votes );
1098 66 : !fd_tower_vote_iter_done( tower->votes, iter );
1099 42 : iter = fd_tower_vote_iter_next( tower->votes, iter ) ) {
1100 42 : fd_tower_vote_t const * vote = fd_tower_vote_iter_ele_const( tower->votes, iter );
1101 42 : fd_tower_blk_t * tower_blk = fd_tower_blocks_query( tower, vote->slot );
1102 42 : FD_TEST( tower_blk ); /* must exist if it's in our tower */
1103 42 : tower_blk->voted = 0;
1104 42 : }
1105 :
1106 : /* Need to overwrite tower->root with onchain_root, so first clear out
1107 : any intermediate slots between them. */
1108 :
1109 30 : for( ulong slot = tower->root; slot < onchain_root; slot++ ) {
1110 6 : fd_tower_blocks_remove( tower, slot );
1111 6 : fd_tower_lockos_remove( tower, slot );
1112 6 : fd_tower_stakes_remove( tower, slot );
1113 6 : }
1114 :
1115 : /* Overwrite the root. No-op if local_root > onchain_root. */
1116 :
1117 24 : tower->root = onchain_root;
1118 :
1119 : /* Clear out all local_votes. */
1120 :
1121 24 : fd_tower_vote_remove_all( tower->votes );
1122 :
1123 : /* Replace them with onchain_votes. */
1124 :
1125 24 : for( fd_tower_vote_iter_t iter = fd_tower_vote_iter_init( onchain_votes );
1126 96 : !fd_tower_vote_iter_done( onchain_votes, iter );
1127 72 : iter = fd_tower_vote_iter_next( onchain_votes, iter ) ) {
1128 72 : fd_tower_vote_t const * vote = fd_tower_vote_iter_ele_const( onchain_votes, iter );
1129 72 : fd_tower_vote_push_tail( tower->votes, *vote );
1130 :
1131 : /* Additionally, backfill voted_block_id for the slots we didn't
1132 : actually vote for. This is intentionally always using the latest
1133 : replayed_block_id if we overwrote it with a second replay. */
1134 :
1135 72 : fd_tower_blk_t * tower_blk = fd_tower_blocks_query( tower, vote->slot );
1136 72 : FD_TEST( tower_blk ); /* must exist because
1137 : 1. all on-chain votes > root slot
1138 : 2. all on-chain votes <= replay slot */
1139 72 : if( FD_UNLIKELY( !tower_blk->voted ) ) {
1140 72 : tower_blk->voted = 1;
1141 72 : tower_blk->voted_block_id = tower_blk->replayed_block_id;
1142 72 : }
1143 72 : }
1144 24 : }
1145 :
1146 : void
1147 : fd_tower_from_vote_acc( fd_tower_vote_t * votes,
1148 : ulong * root,
1149 : uchar const * data,
1150 147 : ulong data_sz ) {
1151 147 : fd_vote_acc_desc_t desc[1];
1152 147 : if( FD_UNLIKELY( !fd_vote_acc_desc( desc, data, data_sz ) ) ) {
1153 0 : *root = ULONG_MAX;
1154 0 : return;
1155 0 : }
1156 147 : *root = desc->root_slot;
1157 474 : for( ulong i=0UL; i<desc->vote_cnt; i++ ) {
1158 327 : fd_tower_vote_t vote = {0};
1159 327 : switch( desc->kind ) {
1160 93 : case FD_VOTE_ACC_V2: {
1161 93 : fd_vote_acc_vote_v2_t const * v = fd_vote_acc_desc_vote( desc, data, i );
1162 93 : vote.slot = v->slot;
1163 93 : vote.conf = v->conf;
1164 93 : break;
1165 0 : }
1166 234 : case FD_VOTE_ACC_V3:
1167 234 : case FD_VOTE_ACC_V4: {
1168 234 : fd_vote_acc_vote_t const * v = fd_vote_acc_desc_vote( desc, data, i );
1169 234 : vote.slot = v->slot;
1170 234 : vote.conf = v->conf;
1171 234 : break;
1172 234 : }
1173 327 : }
1174 327 : fd_tower_vote_push_tail( votes, vote );
1175 327 : }
1176 147 : }
1177 :
1178 : ulong
1179 : fd_tower_with_lat_from_vote_acc( fd_vote_acc_vote_t tower[ static FD_TOWER_VOTE_MAX ],
1180 : uchar const * data,
1181 0 : ulong data_sz ) {
1182 0 : fd_vote_acc_desc_t desc[1];
1183 0 : if( FD_UNLIKELY( !fd_vote_acc_desc( desc, data, data_sz ) ) ) return 0UL;
1184 0 : assert( desc->vote_cnt <= FD_TOWER_VOTE_MAX );
1185 0 : switch( desc->kind ) {
1186 0 : case FD_VOTE_ACC_V2: {
1187 0 : for( ulong i=0UL; i<desc->vote_cnt; i++ ) {
1188 0 : fd_vote_acc_vote_v2_t const * v = fd_vote_acc_desc_vote( desc, data, i );
1189 0 : tower[ i ] = (fd_vote_acc_vote_t){
1190 0 : .slot = v->slot,
1191 0 : .conf = v->conf,
1192 0 : .latency = UCHAR_MAX
1193 0 : };
1194 0 : }
1195 0 : break;
1196 0 : }
1197 0 : case FD_VOTE_ACC_V3:
1198 0 : case FD_VOTE_ACC_V4:
1199 0 : fd_memcpy( tower, fd_vote_acc_desc_vote( desc, data, 0UL ), desc->vote_cnt * sizeof(fd_vote_acc_vote_t) );
1200 0 : break;
1201 0 : }
1202 0 : return desc->vote_cnt;
1203 0 : }
1204 :
1205 : void
1206 : fd_tower_to_vote_txn( fd_tower_t const * tower,
1207 : fd_hash_t const * bank_hash,
1208 : fd_hash_t const * block_id,
1209 : fd_hash_t const * recent_blockhash,
1210 : fd_pubkey_t const * validator_identity,
1211 : fd_pubkey_t const * vote_authority,
1212 : fd_pubkey_t const * vote_acc,
1213 3 : fd_txn_p_t * vote_txn ) {
1214 :
1215 3 : FD_TEST( fd_tower_vote_cnt( tower->votes )<=FD_TOWER_VOTE_MAX );
1216 3 : fd_compact_tower_sync_serde_t tower_sync_serde = {
1217 3 : .root = fd_ulong_if( tower->root == ULONG_MAX, 0UL, tower->root ),
1218 3 : .lockouts_cnt = (ushort)fd_tower_vote_cnt( tower->votes ),
1219 : /* .lockouts populated below */
1220 3 : .hash = *bank_hash,
1221 3 : .timestamp_option = 1,
1222 3 : .timestamp = fd_log_wallclock() / (long)1e9, /* seconds */
1223 3 : .block_id = *block_id
1224 3 : };
1225 :
1226 3 : ulong i = 0UL;
1227 3 : ulong prev = tower_sync_serde.root;
1228 3 : for( fd_tower_vote_iter_t iter = fd_tower_vote_iter_init( tower->votes );
1229 96 : !fd_tower_vote_iter_done( tower->votes, iter );
1230 93 : iter = fd_tower_vote_iter_next( tower->votes, iter ) ) {
1231 93 : fd_tower_vote_t const * vote = fd_tower_vote_iter_ele_const( tower->votes, iter );
1232 93 : tower_sync_serde.lockouts[i].offset = vote->slot - prev;
1233 93 : tower_sync_serde.lockouts[i].confirmation_count = (uchar)vote->conf;
1234 93 : prev = vote->slot;
1235 93 : i++;
1236 93 : }
1237 :
1238 3 : uchar * txn_out = vote_txn->payload;
1239 3 : uchar * txn_meta_out = vote_txn->_;
1240 :
1241 3 : int same_addr = !memcmp( validator_identity, vote_authority, sizeof(fd_pubkey_t) );
1242 3 : if( FD_LIKELY( same_addr ) ) {
1243 :
1244 : /* 0: validator identity
1245 : 1: vote account address
1246 : 2: vote program */
1247 :
1248 3 : fd_txn_accounts_t votes;
1249 3 : votes.signature_cnt = 1;
1250 3 : votes.readonly_signed_cnt = 0;
1251 3 : votes.readonly_unsigned_cnt = 1;
1252 3 : votes.acct_cnt = 3;
1253 3 : votes.signers_w = validator_identity;
1254 3 : votes.signers_r = NULL;
1255 3 : votes.non_signers_w = vote_acc;
1256 3 : votes.non_signers_r = &fd_solana_vote_program_id;
1257 3 : FD_TEST( fd_txn_base_generate( txn_meta_out, txn_out, votes.signature_cnt, &votes, recent_blockhash->uc ) );
1258 :
1259 3 : } else {
1260 :
1261 : /* 0: validator identity
1262 : 1: vote authority
1263 : 2: vote account address
1264 : 3: vote program */
1265 :
1266 0 : fd_txn_accounts_t votes;
1267 0 : votes.signature_cnt = 2;
1268 0 : votes.readonly_signed_cnt = 1;
1269 0 : votes.readonly_unsigned_cnt = 1;
1270 0 : votes.acct_cnt = 4;
1271 0 : votes.signers_w = validator_identity;
1272 0 : votes.signers_r = vote_authority;
1273 0 : votes.non_signers_w = vote_acc;
1274 0 : votes.non_signers_r = &fd_solana_vote_program_id;
1275 0 : FD_TEST( fd_txn_base_generate( txn_meta_out, txn_out, votes.signature_cnt, &votes, recent_blockhash->uc ) );
1276 0 : }
1277 :
1278 : /* Add the vote instruction to the transaction. */
1279 :
1280 3 : uchar vote_ix_buf[FD_TXN_MTU];
1281 3 : ulong vote_ix_sz = 0;
1282 3 : FD_STORE( uint, vote_ix_buf, FD_VOTE_IX_KIND_TOWER_SYNC );
1283 3 : FD_TEST( 0==fd_compact_tower_sync_ser( &tower_sync_serde, vote_ix_buf + sizeof(uint), FD_TXN_MTU - sizeof(uint), &vote_ix_sz ) ); // cannot fail if fd_tower_vote_cnt( tower->votes ) <= FD_TOWER_VOTE_MAX
1284 3 : vote_ix_sz += sizeof(uint);
1285 3 : uchar program_id;
1286 3 : uchar ix_accs[2];
1287 3 : if( FD_LIKELY( same_addr ) ) {
1288 3 : ix_accs[0] = 1; /* vote account address */
1289 3 : ix_accs[1] = 0; /* vote authority */
1290 3 : program_id = 2; /* vote program */
1291 3 : } else {
1292 0 : ix_accs[0] = 2; /* vote account address */
1293 0 : ix_accs[1] = 1; /* vote authority */
1294 0 : program_id = 3; /* vote program */
1295 0 : }
1296 3 : vote_txn->payload_sz = fd_txn_add_instr( txn_meta_out, txn_out, program_id, ix_accs, 2, vote_ix_buf, vote_ix_sz );
1297 3 : }
1298 :
1299 : int
1300 0 : fd_tower_verify( fd_tower_t const * tower ) {
1301 0 : if( FD_UNLIKELY( fd_tower_vote_cnt( tower->votes )>=FD_TOWER_VOTE_MAX ) ) {
1302 0 : FD_LOG_WARNING(( "[%s] invariant violation: cnt %lu >= FD_TOWER_VOTE_MAX %lu", __func__, fd_tower_vote_cnt( tower->votes ), (ulong)FD_TOWER_VOTE_MAX ));
1303 0 : return -1;
1304 0 : }
1305 :
1306 0 : fd_tower_vote_t const * prev = NULL;
1307 0 : for( fd_tower_vote_iter_t iter = fd_tower_vote_iter_init( tower->votes );
1308 0 : !fd_tower_vote_iter_done( tower->votes, iter );
1309 0 : iter = fd_tower_vote_iter_next( tower->votes, iter ) ) {
1310 0 : fd_tower_vote_t const * vote = fd_tower_vote_iter_ele_const( tower->votes, iter );
1311 0 : if( FD_UNLIKELY( prev && ( vote->slot < prev->slot || vote->conf < prev->conf ) ) ) {
1312 0 : FD_LOG_WARNING(( "[%s] invariant violation: vote (slot:%lu conf:%lu) prev (slot:%lu conf:%lu)", __func__, vote->slot, vote->conf, prev->slot, prev->conf ));
1313 0 : return -1;
1314 0 : }
1315 0 : prev = vote;
1316 0 : }
1317 0 : return 0;
1318 0 : }
1319 :
1320 : static void
1321 0 : to_cstr( fd_tower_t const * tower, char * s, ulong len ) {
1322 0 : ulong root = tower->root;
1323 0 : ulong off = 0;
1324 0 : int n;
1325 :
1326 0 : n = snprintf( s + off, len - off, "[Tower]\n\n" );
1327 0 : if( FD_UNLIKELY( n < 0 )) FD_LOG_CRIT(( "snprintf: %d", n ));
1328 0 : off += (ulong)n;
1329 :
1330 0 : if( FD_UNLIKELY( fd_tower_vote_empty( tower->votes ) ) ) return;
1331 :
1332 0 : ulong max_slot = 0;
1333 :
1334 : /* Determine spacing. */
1335 :
1336 0 : for( fd_tower_vote_iter_t iter = fd_tower_vote_iter_init_rev( tower->votes );
1337 0 : !fd_tower_vote_iter_done_rev( tower->votes, iter );
1338 0 : iter = fd_tower_vote_iter_prev ( tower->votes, iter ) ) {
1339 0 : max_slot = fd_ulong_max( max_slot, fd_tower_vote_iter_ele_const( tower->votes, iter )->slot );
1340 0 : }
1341 :
1342 : /* Calculate the number of digits in the maximum slot value. */
1343 :
1344 :
1345 0 : int digit_cnt = (int)fd_ulong_base10_dig_cnt( max_slot );
1346 :
1347 : /* Print the column headers. */
1348 :
1349 0 : if( off < len ) {
1350 0 : n = snprintf( s + off, len - off, "slot%*s | %s\n", digit_cnt - (int)strlen("slot"), "", "confirmation count" );
1351 0 : if( FD_UNLIKELY( n < 0 )) FD_LOG_CRIT(( "snprintf: %d", n ));
1352 0 : off += (ulong)n;
1353 0 : }
1354 :
1355 : /* Print the divider line. */
1356 :
1357 0 : for( int i = 0; i < digit_cnt && off < len; i++ ) {
1358 0 : s[off++] = '-';
1359 0 : }
1360 0 : if( off < len ) {
1361 0 : n = snprintf( s + off, len - off, " | " );
1362 0 : if( FD_UNLIKELY( n < 0 )) FD_LOG_CRIT(( "snprintf: %d", n ));
1363 0 : off += (ulong)n;
1364 0 : }
1365 0 : for( ulong i = 0; i < strlen( "confirmation count" ) && off < len; i++ ) {
1366 0 : s[off++] = '-';
1367 0 : }
1368 0 : if( off < len ) {
1369 0 : s[off++] = '\n';
1370 0 : }
1371 :
1372 : /* Print each vote as a table. */
1373 :
1374 0 : for( fd_tower_vote_iter_t iter = fd_tower_vote_iter_init_rev( tower->votes );
1375 0 : !fd_tower_vote_iter_done_rev( tower->votes, iter );
1376 0 : iter = fd_tower_vote_iter_prev ( tower->votes, iter ) ) {
1377 0 : fd_tower_vote_t const * vote = fd_tower_vote_iter_ele_const( tower->votes, iter );
1378 0 : if( off < len ) {
1379 0 : n = snprintf( s + off, len - off, "%*lu | %lu\n", digit_cnt, vote->slot, vote->conf );
1380 0 : if( FD_UNLIKELY( n < 0 )) FD_LOG_CRIT(( "snprintf: %d", n ));
1381 0 : off += (ulong)n;
1382 0 : }
1383 0 : }
1384 :
1385 0 : if( FD_UNLIKELY( root == ULONG_MAX ) ) {
1386 0 : if( off < len ) {
1387 0 : n = snprintf( s + off, len - off, "%*s | root\n", digit_cnt, "NULL" );
1388 0 : if( FD_UNLIKELY( n < 0 )) FD_LOG_CRIT(( "snprintf: %d", n ));
1389 0 : off += (ulong)n;
1390 0 : }
1391 0 : } else {
1392 0 : if( off < len ) {
1393 0 : n = snprintf( s + off, len - off, "%*lu | root\n", digit_cnt, root );
1394 0 : if( FD_UNLIKELY( n < 0 )) FD_LOG_CRIT(( "snprintf: %d", n ));
1395 0 : off += (ulong)n;
1396 0 : }
1397 0 : }
1398 :
1399 : /* Ensure null termination */
1400 0 : if( off < len ) {
1401 0 : s[off] = '\0';
1402 0 : } else {
1403 0 : s[len - 1] = '\0';
1404 0 : }
1405 0 : }
1406 :
1407 : char *
1408 : fd_tower_to_cstr( fd_tower_t const * tower,
1409 0 : char * cstr ) {
1410 0 : to_cstr( tower, cstr, FD_TOWER_CSTR_MIN );
1411 0 : return cstr;
1412 0 : }
1413 :
1414 : void
1415 : fd_tower_count_vote( fd_tower_t * tower,
1416 : fd_pubkey_t const * vote_acc,
1417 : ulong stake,
1418 : uchar const * data,
1419 9 : ulong data_sz ) {
1420 9 : fd_tower_vtr_t * vtr = fd_tower_vtr_push_tail_nocopy( tower->vtrs );
1421 9 : vtr->vote_acc = *vote_acc;
1422 9 : vtr->stake = stake;
1423 9 : fd_tower_vote_remove_all( vtr->votes );
1424 9 : fd_tower_from_vote_acc( vtr->votes, &vtr->root, data, data_sz );
1425 9 : }
1426 :
1427 : /* Block functions ********************************************************/
1428 :
1429 : static int
1430 : is_ancestor( fd_tower_t * tower,
1431 : ulong slot,
1432 177 : ulong ancestor_slot ) {
1433 177 : fd_tower_blk_t * anc = blk_map_ele_query( tower->blk_map, &slot, NULL, tower->blk_pool );
1434 639 : while( FD_LIKELY( anc ) ) {
1435 573 : if( FD_LIKELY( anc->parent_slot == ancestor_slot ) ) return 1;
1436 462 : anc = anc->parent_slot == ULONG_MAX ? NULL : blk_map_ele_query( tower->blk_map, &anc->parent_slot, NULL, tower->blk_pool );
1437 462 : }
1438 66 : return 0;
1439 177 : }
1440 :
1441 : int
1442 : fd_tower_blocks_is_slot_ancestor( fd_tower_t * tower,
1443 : ulong descendant_slot,
1444 6 : ulong ancestor_slot ) {
1445 6 : return is_ancestor( tower, descendant_slot, ancestor_slot );
1446 6 : }
1447 :
1448 : int
1449 : fd_tower_blocks_is_slot_descendant( fd_tower_t * tower,
1450 : ulong ancestor_slot,
1451 171 : ulong descendant_slot ) {
1452 171 : return is_ancestor( tower, descendant_slot, ancestor_slot );
1453 171 : }
1454 :
1455 : ulong
1456 : fd_tower_blocks_lowest_common_ancestor( fd_tower_t * tower,
1457 : ulong slot1,
1458 147 : ulong slot2 ) {
1459 :
1460 147 : fd_tower_blk_t * fork1 = blk_map_ele_query( tower->blk_map, &slot1, NULL, tower->blk_pool );
1461 147 : fd_tower_blk_t * fork2 = blk_map_ele_query( tower->blk_map, &slot2, NULL, tower->blk_pool );
1462 :
1463 147 : if( FD_UNLIKELY( !fork1 )) FD_LOG_CRIT(( "slot1 %lu not found", slot1 ));
1464 147 : if( FD_UNLIKELY( !fork2 )) FD_LOG_CRIT(( "slot2 %lu not found", slot2 ));
1465 :
1466 864 : while( FD_LIKELY( fork1 && fork2 ) ) {
1467 864 : if( FD_UNLIKELY( fork1->slot == fork2->slot ) ) return fork1->slot;
1468 717 : if( fork1->slot > fork2->slot ) fork1 = blk_map_ele_query( tower->blk_map, &fork1->parent_slot, NULL, tower->blk_pool );
1469 453 : else fork2 = blk_map_ele_query( tower->blk_map, &fork2->parent_slot, NULL, tower->blk_pool );
1470 717 : }
1471 :
1472 0 : return ULONG_MAX;
1473 147 : }
1474 :
1475 : fd_hash_t const *
1476 : fd_tower_blocks_canonical_block_id( fd_tower_t * tower,
1477 0 : ulong slot ) {
1478 0 : fd_tower_blk_t * blk = blk_map_ele_query( tower->blk_map, &slot, NULL, tower->blk_pool );
1479 0 : if( FD_UNLIKELY( !blk ) ) return NULL;
1480 0 : if ( FD_LIKELY( blk->confirmed ) ) return &blk->confirmed_block_id;
1481 0 : else if( FD_LIKELY( blk->voted ) ) return &blk->voted_block_id;
1482 0 : else return &blk->replayed_block_id;
1483 0 : }
1484 :
1485 : fd_tower_blk_t *
1486 702 : fd_tower_blocks_query( fd_tower_t * tower, ulong slot ) {
1487 702 : return blk_map_ele_query( tower->blk_map, &slot, NULL, tower->blk_pool );
1488 702 : }
1489 :
1490 : fd_tower_blk_t *
1491 : fd_tower_blocks_insert( fd_tower_t * tower,
1492 : ulong slot,
1493 276 : ulong parent_slot ) {
1494 276 : fd_tower_blk_t * blk = blk_pool_ele_acquire( tower->blk_pool );
1495 276 : if( FD_UNLIKELY( !blk ) ) return NULL;
1496 :
1497 276 : memset( blk, 0, sizeof(fd_tower_blk_t) );
1498 276 : blk->parent_slot = parent_slot;
1499 276 : blk->slot = slot;
1500 276 : blk->prev_leader_slot = ULONG_MAX;
1501 276 : blk_map_ele_insert( tower->blk_map, blk, tower->blk_pool );
1502 276 : return blk;
1503 276 : }
1504 :
1505 : void
1506 : fd_tower_blocks_remove( fd_tower_t * tower,
1507 6 : ulong slot ) {
1508 6 : fd_tower_blk_t * blk = blk_map_ele_query( tower->blk_map, &slot, NULL, tower->blk_pool );
1509 6 : if( FD_LIKELY( blk ) ) {
1510 3 : blk_map_ele_remove_fast( tower->blk_map, blk, tower->blk_pool );
1511 3 : blk_pool_ele_release( tower->blk_pool, blk );
1512 3 : }
1513 6 : }
1514 :
1515 : /* Lockos implementation */
1516 :
1517 : void
1518 : fd_tower_lockos_insert( fd_tower_t * tower,
1519 : ulong slot,
1520 : fd_hash_t const * addr,
1521 144 : fd_tower_vote_t * votes ) {
1522 :
1523 144 : lockout_interval_map_t * lck_map = tower->lck_map;
1524 144 : lockout_interval_t * lck_pool = tower->lck_pool;
1525 :
1526 144 : for( fd_tower_vote_iter_t iter = fd_tower_vote_iter_init( votes );
1527 288 : !fd_tower_vote_iter_done( votes, iter );
1528 144 : iter = fd_tower_vote_iter_next( votes, iter ) ) {
1529 144 : fd_tower_vote_t const * vote = fd_tower_vote_iter_ele_const( votes, iter );
1530 144 : ulong interval_start = vote->slot;
1531 144 : ulong interval_end = vote->slot + ( 1UL << vote->conf );
1532 144 : ulong key = lockout_interval_key( slot, interval_end );
1533 :
1534 144 : if( !lockout_interval_map_ele_query( lck_map, &key, NULL, lck_pool ) ) {
1535 : /* Insert sentinel for pruning. key = fork_slot | 0, start = interval_end. */
1536 135 : ulong sentinel_key = lockout_interval_key( slot, 0 );
1537 135 : FD_TEST( lockout_interval_pool_free( lck_pool ) );
1538 135 : lockout_interval_t * sentinel = lockout_interval_pool_ele_acquire( lck_pool );
1539 135 : sentinel->key = sentinel_key;
1540 135 : sentinel->start = interval_end;
1541 135 : lockout_interval_map_ele_insert( lck_map, sentinel, lck_pool );
1542 135 : }
1543 :
1544 144 : FD_TEST( lockout_interval_pool_free( lck_pool ) );
1545 144 : lockout_interval_t * interval = lockout_interval_pool_ele_acquire( lck_pool );
1546 144 : interval->key = key;
1547 144 : interval->addr = *addr;
1548 144 : interval->start = interval_start;
1549 144 : FD_TEST( lockout_interval_map_ele_insert( lck_map, interval, lck_pool ) );
1550 144 : }
1551 144 : }
1552 :
1553 : void
1554 : fd_tower_lockos_remove( fd_tower_t * tower,
1555 18 : ulong slot ) {
1556 :
1557 18 : lockout_interval_map_t * lck_map = tower->lck_map;
1558 18 : lockout_interval_t * lck_pool = tower->lck_pool;
1559 :
1560 18 : ulong sentinel_key = lockout_interval_key( slot, 0 );
1561 18 : for( lockout_interval_t * sentinel = lockout_interval_map_ele_remove( lck_map, &sentinel_key, NULL, lck_pool );
1562 120 : sentinel;
1563 102 : sentinel = lockout_interval_map_ele_remove( lck_map, &sentinel_key, NULL, lck_pool ) ) {
1564 102 : ulong interval_end = sentinel->start;
1565 102 : lockout_interval_pool_ele_release( lck_pool, sentinel );
1566 :
1567 102 : ulong key = lockout_interval_key( slot, interval_end );
1568 102 : for( lockout_interval_t * itrvl = lockout_interval_map_ele_remove( lck_map, &key, NULL, lck_pool );
1569 204 : itrvl;
1570 102 : itrvl = lockout_interval_map_ele_remove( lck_map, &key, NULL, lck_pool ) ) {
1571 102 : lockout_interval_pool_ele_release( lck_pool, itrvl );
1572 102 : }
1573 102 : }
1574 18 : }
|