Line data Source code
1 : #ifndef HEADER_fd_src_app_fdctl_run_tiles_fd_stake_ci_h
2 : #define HEADER_fd_src_app_fdctl_run_tiles_fd_stake_ci_h
3 :
4 : /* fd_stake_ci handles the thorny problem of keeping track of leader
5 : schedules and shred destinations, which are epoch specific. Around
6 : epoch boundaries, we may need to query information from the epoch on
7 : either side of the boundary.
8 :
9 : When you make a stake delegation change during epoch N, it becomes
10 : active at the start of the first slot of epoch N+1, but it doesn't
11 : affect the leader schedule or the shred destinations until epoch N+2.
12 : These methods take care all that complexity, so the caller does not
13 : need to do any adjustment. */
14 :
15 : #include "fd_shred_dest.h"
16 : #include "../../flamenco/leaders/fd_leaders.h"
17 :
18 241725 : #define MAX_SHRED_DESTS MAX_STAKED_LEADERS
19 : /* staked+unstaked <= MAX_SHRED_DESTS implies
20 : MAX_SHRED_DEST_FOOTPRINT>=fd_shred_dest_footprint( staked, unstaked )
21 : This is asserted in the tests. The size of fd_shred_dest_t, varies
22 : based on FD_SHA256_BATCH_FOOTPRINT, which depends on the compiler
23 : settings. */
24 : #define MAX_SHRED_DEST_FOOTPRINT (8386688UL + sizeof(fd_shred_dest_t))
25 :
26 : struct fd_per_epoch_info_private {
27 : /* Epoch, and [start_slot, start_slot+slot_cnt) refer to the time
28 : period for which lsched and sdest are valid. I.e. if you're
29 : interested in the leader or computing a shred destination for a
30 : slot s, this struct has the right data when s is in [start_slot,
31 : start_slot+slot_cnt). */
32 : ulong epoch;
33 : ulong start_slot;
34 : ulong slot_cnt;
35 : ulong excluded_stake;
36 : ulong vote_keyed_lsched;
37 :
38 : /* Invariant: These are always joined and use the memory below for
39 : their footprint. */
40 : fd_epoch_leaders_t * lsched;
41 : fd_shred_dest_t * sdest;
42 :
43 : uchar __attribute__((aligned(FD_EPOCH_LEADERS_ALIGN))) _lsched[ FD_EPOCH_LEADERS_FOOTPRINT(MAX_SHRED_DESTS, MAX_SLOTS_PER_EPOCH) ];
44 : uchar __attribute__((aligned(FD_SHRED_DEST_ALIGN ))) _sdest [ MAX_SHRED_DEST_FOOTPRINT ];
45 : };
46 : typedef struct fd_per_epoch_info_private fd_per_epoch_info_t;
47 :
48 : struct fd_stake_ci {
49 : fd_pubkey_t identity_key[ 1 ];
50 :
51 : /* scratch and stake_weight are only relevant between stake_msg_init
52 : and stake_msg_fini. shred_dest is only relevant between
53 : dest_add_init and dest_add_fini. */
54 : struct {
55 : ulong epoch;
56 : ulong start_slot;
57 : ulong slot_cnt;
58 : ulong staked_cnt;
59 : ulong excluded_stake;
60 : ulong vote_keyed_lsched;
61 : } scratch[1];
62 :
63 : fd_vote_stake_weight_t vote_stake_weight[ MAX_SHRED_DESTS ];
64 : fd_stake_weight_t stake_weight [ MAX_SHRED_DESTS ];
65 : fd_shred_dest_weighted_t shred_dest [ MAX_SHRED_DESTS ];
66 :
67 : fd_shred_dest_weighted_t shred_dest_temp[ MAX_SHRED_DESTS ];
68 :
69 : /* The information to be used for epoch i can be found at
70 : epoch_info[ i%2 ] if it is known. */
71 : fd_per_epoch_info_t epoch_info[ 2 ];
72 : };
73 : typedef struct fd_stake_ci fd_stake_ci_t;
74 :
75 : /* fd_stake_ci_{footprint, align} return the footprint and alignment
76 : required of a region of memory to be used as an fd_stake_ci_t.
77 : fd_stake_ci_t is statically sized, so it can just be declared
78 : outright if needed, but it's pretty large (~30 MB!), so you probably
79 : don't want it on the stack. */
80 :
81 0 : FD_FN_CONST static inline ulong fd_stake_ci_footprint( void ) { return sizeof (fd_stake_ci_t); }
82 0 : FD_FN_CONST static inline ulong fd_stake_ci_align ( void ) { return alignof(fd_stake_ci_t); }
83 :
84 : /* fd_stake_ci_new formats a piece of memory as a valid stake contact
85 : information store. `identity_key` is a pointer to the public key of
86 : the identity keypair of the local validator. This is used by
87 : fd_shred_dest to know where in the Turbine tree it belongs.
88 : Does NOT retain a read interest in identity_key after the function
89 : returns. */
90 : void * fd_stake_ci_new ( void * mem, fd_pubkey_t const * identity_key );
91 : fd_stake_ci_t * fd_stake_ci_join( void * mem );
92 :
93 : void * fd_stake_ci_leave ( fd_stake_ci_t * info );
94 : void * fd_stake_ci_delete( void * mem );
95 :
96 : /* Frankendancer and Firedancer's Gossip impls follow different regimes
97 : for broadcasting Contact Infos. Firedancer employs an update-based
98 : regime where we receive update/remove messages for individual contact
99 : info entries. Frankendancer (and thusly Agave) performs a full table
100 : broadcast. fd_stake_ci offers two sets of APIs that cater to the
101 : different regimes. */
102 :
103 : /* Frankendancer only:
104 : fd_stake_ci_stake_msg_{init, fini} are used to handle messages
105 : containing stake weight updates from the Rust side of the splice, and
106 : fd_stake_ci_dest_add_{init, fini} are used to handle messages
107 : containing contact info (potential shred destinations) updates from
108 : the Rust side of the splice.
109 :
110 : These are very specific to the current splices, but rather than parse
111 : the message in the pack and shred tiles, we parse it here. Since
112 : these messages arrive on a dcache and can get overrun, both expose a
113 : init/fini model.
114 :
115 : Upon returning from a call to fd_stake_ci_{stake_msg, dest_add}_init,
116 : the stake contact info object will be in a stake-msg-pending or
117 : dest-add-pending mode, respectively, regardless of what mode it was
118 : in before. In either of these modes, calls to the query functions
119 : (get_*_for slot) are okay and will return the same values they
120 : returned prior to the _init call.
121 :
122 : In order to call fd_stake_ci_{stake_msg, dest_add}_fini, the stake
123 : contact info must be in stake-msg-pending / dest-add-pending mode,
124 : respectively. This means, for example, you cannot call
125 : fd_stake_ci_stake_msg_init followed by fd_stake_ci_dest_add_fini
126 : without an intervening call to fd_stake_ci_dest_add_init. There's no
127 : need to cancel an operation that begun but didn't finish. Calling
128 : init multiple times without calling fini will not leak any resources.
129 :
130 : msg should be a pointer to the first byte of the dcache entry
131 : containing the stakes update. msg will be accessed msg->weights[i]
132 : for i in [0, msg->staked_cnt). msg must contain at least one
133 : staked pubkey, and the pubkeys must be sorted in the usual way (by
134 : stake descending, ties broken by pubkey ascending).
135 :
136 : fd_stake_ci_dest_add_init behaves slightly differently and returns a
137 : pointer to the first element of an array of size MAX_SHRED_DESTS-1 to
138 : be populated. This allows the caller to add augment the information
139 : in the message from Rust with additional information (i.e. mac
140 : addresses). The `cnt` argument to _dest_add_fini specifies the
141 : number of elements of the array returned by _init that were
142 : populated. 0<=cnt<MAX_SHRED_DESTS. _fini will only read the first
143 : `cnt` elements of the array. The stake_lamports field of the input
144 : is ignored. The identity pubkey provided at initialization must not
145 : be one of the cnt values in the array. The caller should not retain
146 : a read or write interest in the pointer returned by _init after fini
147 : has been called, or after the caller has determined that fini will
148 : not be called for that update, e.g. because the update was overrun.
149 : Calls to _fini may clobber the array.
150 :
151 : The list used for leader schedules is always just the staked nodes.
152 : The list used for shred destinations is the staked nodes along with
153 : any unstaked nodes for which we have contact info. If a stake
154 : message doesn't have contact info for a staked node, the previous
155 : contact info will be preserved. If a stake message doesn't have
156 : contact info for an unstaked node, on the other hand, that node will
157 : be deleted from the list. */
158 : void fd_stake_ci_stake_msg_init( fd_stake_ci_t * info, fd_stake_weight_msg_t const * msg );
159 : void fd_stake_ci_stake_msg_fini( fd_stake_ci_t * info );
160 : fd_shred_dest_weighted_t * fd_stake_ci_dest_add_init ( fd_stake_ci_t * info );
161 : void fd_stake_ci_dest_add_fini ( fd_stake_ci_t * info, ulong cnt );
162 :
163 : /* Firedancer only:
164 : The full client's Gossip update model publishes individual contact
165 : info updates (update/insert or remove), which requires a different
166 : set of dest_ APIs.
167 :
168 : fd_stake_ci_dest_update updates (or adds, if necessary) a shred dest
169 : entry. ip4 is in net order, port is in host order and are both
170 : assumed to be non-zero. */
171 :
172 : void fd_stake_ci_dest_update( fd_stake_ci_t * info, fd_pubkey_t const * pubkey, uint ip4, ushort port );
173 : void fd_stake_ci_dest_remove( fd_stake_ci_t * info, fd_pubkey_t const * pubkey );
174 :
175 :
176 : /* fd_stake_ci_set_identity changes the identity of the locally running
177 : validator at runtime. */
178 : void fd_stake_ci_set_identity( fd_stake_ci_t * info,
179 : fd_pubkey_t const * identity_key );
180 :
181 : /* fd_stake_ci_get_{sdest, lsched}_for_slot respectively return a
182 : pointer to the fd_shred_dest_t and fd_epoch_leaders_t containing
183 : information about the specified slot, if it is available. These
184 : functions are the primary query functions for fd_stake_ci. They
185 : return NULL if we don't have information for that slot.
186 :
187 : The fact these take a slot perhaps makes it more clear, but, it's
188 : worth mentioning again there's nothing like the adjustment performed
189 : by Solana's get_leader_schedule_epoch going on here. If you want to
190 : know the leader in slot X, just pass slot X. The returned leader
191 : schedule will not be based on the stake weights active during slot X,
192 : but rather the stake weights offset in time by an appropriate amount
193 : so they apply to slot X. */
194 : fd_shred_dest_t * fd_stake_ci_get_sdest_for_slot ( fd_stake_ci_t const * info, ulong slot );
195 : fd_epoch_leaders_t * fd_stake_ci_get_lsched_for_slot( fd_stake_ci_t const * info, ulong slot );
196 :
197 : /* compute_id_weights_from_vote_weights() translates vote-based
198 : stake weigths into (older) identity-based stake weigths.
199 :
200 : Before SIMD-0180, the leader schedule was generated starting from
201 : a list [(id, stake)] where `id` is the validator identity and
202 : `stake` its aggregated stake, and the same list was used to build
203 : the Turbine tree.
204 :
205 : After SIMD-0180, the leader schedule is generated by vote
206 : accounts, i.e. starting from a list [(vote, id, stake)] instead.
207 : This makes it easier to send rewards to the expected vote account.
208 : Notably, turbine tree doesn't change with SIMD-0180, so the old
209 : list [(id, stake)] is still necessary.
210 :
211 : Realistically, there should be a 1:1 relationship between id and
212 : vote, but unfortunately the on chain state allows for a 1:N
213 : relationship (1 id could be associated to N vote accounts).
214 : At the time of writing, testnet has one such example.
215 : id: DtSguGSHVrXdqZU1mKWKocsAjrXMhaC7YJic5xxN1Uom
216 : votes:
217 : - https://solscan.io/account/BbtyLT1ntMFbbXtsJRCZnYjpe7d7TUtyZeGKzod3eNsN?cluster=testnet
218 : - https://solscan.io/account/FFr8Gyjy3Wjeqv6oD4RjbwqD1mVfKycAFxQdASYAfR75?cluster=testnet
219 :
220 : Even when there is a 1:1 relationship, the order of the 2 lists
221 : can be different because validators with the same stake could
222 : be ordered differently by vote vs id.
223 :
224 : Last consideration, this operation is done only once per epoch, twice
225 : at startup.
226 :
227 : The current implementation uses sort in place to avoid extra memory
228 : for a map or tree. */
229 : ulong
230 : compute_id_weights_from_vote_weights( fd_stake_weight_t * stake_weight,
231 : fd_vote_stake_weight_t const * vote_stake_weight,
232 : ulong staked_cnt );
233 :
234 : #endif /* HEADER_fd_src_app_fdctl_run_tiles_fd_stake_ci_h */
|