Line data Source code
1 : #ifndef HEADER_fd_src_flamenco_leaders_fd_leaders_base_h 2 : #define HEADER_fd_src_flamenco_leaders_fd_leaders_base_h 3 : 4 : #include "../stakes/fd_stake_weight.h" 5 : #include "../runtime/sysvar/fd_sysvar_base.h" 6 : #include "../features/fd_features.h" 7 : 8 242412 : #define MAX_SHRED_DESTS 40200UL /* 200 * 201 - 1 (exclude self) */ 9 0 : #define MAX_SLOTS_PER_EPOCH 432000UL 10 168 : #define MAX_STAKED_LEADERS 108000UL 11 147 : #define MAX_COMPRESSED_STAKE_WEIGHTS (MAX_STAKED_LEADERS*2UL+1UL) 12 : 13 : /* Follows message structure in fd_stake_ci_stake_msg_init. 14 : Frankendancer only */ 15 : struct fd_stake_weight_msg_t { 16 : ulong epoch; /* Epoch for which the stake weights are valid */ 17 : ulong staked_vote_cnt; /* Number of staked nodes */ 18 : ulong staked_id_cnt; /* Number of staked nodes */ 19 : ulong start_slot; /* Start slot of the epoch */ 20 : ulong slot_cnt; /* Number of slots in the epoch */ 21 : ulong excluded_id_stake; /* Total stake that is excluded for shred dests */ 22 : }; 23 : typedef struct fd_stake_weight_msg_t fd_stake_weight_msg_t; 24 : 25 621 : #define FD_STAKE_CI_STAKE_MSG_HEADER_SZ (sizeof(fd_stake_weight_msg_t)) 26 273 : #define FD_STAKE_CI_STAKE_MSG_RECORD_SZ (sizeof(fd_vote_stake_weight_t)) 27 0 : #define FD_STAKE_CI_ID_WEIGHT_RECORD_SZ (sizeof(fd_stake_weight_t)) 28 0 : #define FD_STAKE_CI_STAKE_MSG_SZ (FD_STAKE_CI_STAKE_MSG_HEADER_SZ + MAX_COMPRESSED_STAKE_WEIGHTS * FD_STAKE_CI_STAKE_MSG_RECORD_SZ + MAX_SHRED_DESTS * FD_STAKE_CI_ID_WEIGHT_RECORD_SZ) 29 : 30 0 : #define FD_STAKE_OUT_MTU FD_STAKE_CI_STAKE_MSG_SZ 31 : 32 : static inline ulong fd_stake_weight_msg_sz( ulong staked_vote_cnt, 33 0 : ulong staked_id_cnt ) { 34 0 : return FD_STAKE_CI_STAKE_MSG_HEADER_SZ + staked_vote_cnt * FD_STAKE_CI_STAKE_MSG_RECORD_SZ + staked_id_cnt * FD_STAKE_CI_ID_WEIGHT_RECORD_SZ; 35 0 : } 36 : 37 : static inline fd_vote_stake_weight_t * 38 348 : fd_stake_weight_msg_stake_weights( fd_stake_weight_msg_t const * stake_weight_msg ) { 39 348 : return (fd_vote_stake_weight_t *)fd_type_pun( (uchar *)stake_weight_msg + FD_STAKE_CI_STAKE_MSG_HEADER_SZ ); 40 348 : } 41 : 42 : static inline fd_stake_weight_t * 43 273 : fd_stake_weight_msg_id_weights( fd_stake_weight_msg_t const * stake_weight_msg ) { 44 273 : return (fd_stake_weight_t *)fd_type_pun( (uchar *)stake_weight_msg + FD_STAKE_CI_STAKE_MSG_HEADER_SZ + stake_weight_msg->staked_vote_cnt * FD_STAKE_CI_STAKE_MSG_RECORD_SZ ); 45 273 : } 46 : 47 : /* Firedancer only */ 48 : struct fd_epoch_info_msg_t { 49 : ulong epoch; /* Epoch for which the info is valid */ 50 : ulong staked_vote_cnt; /* Number of staked nodes */ 51 : ulong staked_id_cnt; /* Number of staked nodes */ 52 : ulong start_slot; /* Start slot of the epoch */ 53 : ulong slot_cnt; /* Number of slots in the epoch */ 54 : ulong excluded_id_stake; /* Total stake that is excluded for shred dests */ 55 : fd_epoch_schedule_t epoch_schedule; /* Epoch schedule */ 56 : fd_features_t features; /* Feature activation slots */ 57 : }; 58 : typedef struct fd_epoch_info_msg_t fd_epoch_info_msg_t; 59 : 60 : /* Leader schedule calculation is done based on a weighted sample of 61 : a sorted list of stake weights (see fd_leaders and fd_wsample). 62 : Because there are many consumers of the stake weights from the Replay 63 : tile, a naive approach will make the link size very large: 64 : (72 bytes per weight * 40M vote accounts) = ~2.9GB. 65 : 66 : In order to do something more clever consider some protocol/message 67 : invariants: 68 : 1. the set of stake weights that is passed around is sorted based on 69 : stake and then tiebroken on lexicographic order of the vote key. 70 : 2. There are 108,000 leaders per epoch in the worst case (432000 71 : slots per epoch / 4 leaders per slot). 72 : 3. Stake weighted sample is done to select leaders. 73 : 74 : From this we know that there can be up to 108k leaders from a much 75 : larger set of stake weights. All other pubkeys can be ignored since 76 : we know that they won't be selected. In the worst case there are 77 : 40M - 108,000 vote accounts that won't be selected. In the weighted 78 : sample, two adjacent, non-selected pubkeys can be combined into one 79 : aggregated stake weight. If we take this idea to its limit we can 80 : combine all adjacent non-selected pubkeys into aggregated weights. 81 : The worst case number of these non-selected pubkeys is 108001 (if 82 : the non-selected keys are perfectly interleaved with the selected 83 : keys with the first and last key also being non-selected). In 84 : practice, this bound is probably tighter because the higher staked 85 : nodes are almost guaranteed to be selected. 86 : 87 : Regardless, this allows us to compress the set of stake weights into 88 : a much smaller, bounded set. The worst case number of compressed 89 : stake weights is 108000 + 108001 = 216001 keys. The aggregated 90 : weights will be stored as FD_DUMMY_ACCOUNT. The consumer is 91 : responsible for post-processing the aggregated weights to make sure 92 : they aren't inserted into any downstream data structures. */ 93 : 94 360 : #define FD_EPOCH_INFO_MSG_HEADER_SZ (sizeof(fd_epoch_info_msg_t)) 95 0 : #define FD_EPOCH_INFO_MAX_MSG_SZ (FD_EPOCH_INFO_MSG_HEADER_SZ + MAX_COMPRESSED_STAKE_WEIGHTS * sizeof(fd_vote_stake_weight_t) + MAX_SHRED_DESTS * sizeof(fd_stake_weight_t)) 96 0 : #define FD_EPOCH_OUT_MTU FD_EPOCH_INFO_MAX_MSG_SZ 97 : 98 : static inline ulong fd_epoch_info_msg_sz( ulong vote_cnt, 99 0 : ulong id_weight_cnt ) { 100 0 : return FD_EPOCH_INFO_MSG_HEADER_SZ + 101 0 : (vote_cnt * sizeof(fd_vote_stake_weight_t)) + 102 0 : (id_weight_cnt * sizeof(fd_stake_weight_t)); 103 0 : } 104 : 105 : static inline fd_vote_stake_weight_t * 106 180 : fd_epoch_info_msg_stake_weights( fd_epoch_info_msg_t const * epoch_info_msg ) { 107 180 : return (fd_vote_stake_weight_t *)fd_type_pun( (uchar *)epoch_info_msg + FD_EPOCH_INFO_MSG_HEADER_SZ ); 108 180 : } 109 : 110 : static inline fd_stake_weight_t * 111 180 : fd_epoch_info_msg_id_weights( fd_epoch_info_msg_t const * epoch_info_msg ) { 112 180 : return (fd_stake_weight_t *)fd_type_pun( (uchar *)epoch_info_msg + FD_EPOCH_INFO_MSG_HEADER_SZ + epoch_info_msg->staked_vote_cnt * sizeof(fd_vote_stake_weight_t) ); 113 180 : } 114 : 115 : /* compute_id_weights_from_vote_weights() translates vote-based 116 : stake weights into (older) identity-based stake weights. 117 : 118 : Before SIMD-0180, the leader schedule was generated starting from 119 : a list [(id, stake)] where `id` is the validator identity and 120 : `stake` its aggregated stake, and the same list was used to build 121 : the Turbine tree. 122 : 123 : After SIMD-0180, the leader schedule is generated by vote 124 : accounts, i.e. starting from a list [(vote, id, stake)] instead. 125 : This makes it easier to send rewards to the expected vote account. 126 : Notably, turbine tree doesn't change with SIMD-0180, so the old 127 : list [(id, stake)] is still necessary. 128 : 129 : Realistically, there should be a 1:1 relationship between id and 130 : vote, but unfortunately the on chain state allows for a 1:N 131 : relationship (1 id could be associated to N vote accounts). 132 : At the time of writing, testnet has one such example. 133 : id: DtSguGSHVrXdqZU1mKWKocsAjrXMhaC7YJic5xxN1Uom 134 : votes: 135 : - https://solscan.io/account/BbtyLT1ntMFbbXtsJRCZnYjpe7d7TUtyZeGKzod3eNsN?cluster=testnet 136 : - https://solscan.io/account/FFr8Gyjy3Wjeqv6oD4RjbwqD1mVfKycAFxQdASYAfR75?cluster=testnet 137 : 138 : Even when there is a 1:1 relationship, the order of the 2 lists 139 : can be different because validators with the same stake could 140 : be ordered differently by vote vs id. 141 : 142 : Last consideration, this operation is done only once per epoch, twice 143 : at startup. 144 : 145 : The current implementation uses sort in place to avoid extra memory 146 : for a map or tree. */ 147 : ulong 148 : compute_id_weights_from_vote_weights( fd_stake_weight_t * stake_weight, 149 : fd_vote_stake_weight_t const * vote_stake_weight, 150 : ulong staked_cnt ); 151 : 152 : #endif /* HEADER_fd_src_flamenco_leaders_fd_leaders_base_h */