Line data Source code
1 : #ifndef HEADER_fd_src_ballet_wsample_fd_wsample_h 2 : #define HEADER_fd_src_ballet_wsample_fd_wsample_h 3 : 4 : /* This header defines methods for computing weighted "random" samples 5 : of the specific type used by Solana for computing the leader 6 : schedule and the Turbine tree. 7 : 8 : In particular, those random samples are generated by: 9 : 10 : 1. Seeding ChaCha20 with a 32B seed (different for leader schedule vs 11 : Turbine). 12 : 2. Using the ChaCha20 random stream to generate a uniformly 13 : distributed random integer x in [0, total_stake_weight). 14 : 3. Returning the first index such that the cumulative sum of stake is 15 : at least x. */ 16 : 17 : #include "../fd_ballet_base.h" 18 : #include "../chacha/fd_chacha_rng.h" 19 : 20 : struct fd_wsample_private; 21 : typedef struct fd_wsample_private fd_wsample_t; 22 : 23 : #define FD_WSAMPLE_RNG_CHACHA20 (0U) 24 : #define FD_WSAMPLE_RNG_CHACHA8 (1U) 25 : 26 6684 : #define FD_WSAMPLE_ALIGN (64UL) 27 : /* fd_leaders really wants a compile time-compatible footprint... The 28 : internal count is 1/8 * (9^ceil(log_9(ele_cnt)) - 1) */ 29 : #define FD_WSAMPLE_FOOTPRINT( ele_cnt, restore_enabled ) \ 30 : (192UL + 64UL*((restore_enabled)?2UL:1UL)*( \ 31 : ((ele_cnt)<= 1UL)? 0UL : \ 32 : ((ele_cnt)<= 9UL)? 1UL : \ 33 : ((ele_cnt)<= 81UL)? 10UL : \ 34 : ((ele_cnt)<= 729UL)? 91UL : \ 35 : ((ele_cnt)<= 6561UL)? 820UL : \ 36 : ((ele_cnt)<= 59049UL)? 7381UL : \ 37 : ((ele_cnt)<= 531441UL)? 66430UL : \ 38 : ((ele_cnt)<= 4782969UL)? 597871UL : \ 39 : ((ele_cnt)<= 43046721UL)? 5380840UL : \ 40 : ((ele_cnt)<= 387420489UL)? 48427561UL : \ 41 : ((ele_cnt)<=3486784401UL)?435848050UL : 3922632451UL )) \ 42 : 43 : /* fd_wsample_{align, footprint} give the alignment and footprint 44 : respectively required to create a weighted sampler with at most 45 : ele_cnt stake weights. If restore_enabled is zero, calls to 46 : wsample_restore_all will be no-ops, but the footprint required will 47 : be smaller. ele_cnt in [0, UINT_MAX-2) (note, not ULONG MAX). 48 : 49 : fd_wsample_{join,leave} join and leave a memory region formatted as a 50 : weighted sampler, respectively. They both are simple casts. 51 : 52 : fd_wsample_delete unformats a memory region used as a weighted 53 : sampler. Releases all interest in rng. */ 54 : 55 : FD_FN_CONST ulong fd_wsample_align ( void ); 56 : FD_FN_CONST ulong fd_wsample_footprint( ulong ele_cnt, int restore_enabled ); 57 : fd_wsample_t * fd_wsample_join ( void * shmem ); 58 : void * fd_wsample_leave ( fd_wsample_t * sampler ); 59 : void * fd_wsample_delete ( void * shmem ); 60 : 61 : 62 : /* FD_WSAMPLE_HINT_*: Hints that can be passed to fd_wsample_new in the 63 : opt_hint field. The hint specifies the distribution of weights: 64 : FLAT: The all weights are approximately constant. 65 : POWERLAW: The weights are sorted largest to smallest and decay with a 66 : power law distribution. At the moment, this assumes it's 67 : proportional to 1/x. 68 : 69 : The hint can also specify whether sampling will be done with deleting 70 : or without deleting: 71 : REMOVE: Sampling will be done without replacement, i.e. mostly with 72 : fd_wsample_sample_and_remove and/or 73 : fd_wsample_sample_and_remove_many. 74 : NOREMOVE: Sampling will be done with replacement, i.e. mostly with 75 : fd_wsample_sample and/or 76 : fd_wsample_sample_many. */ 77 1560 : #define FD_WSAMPLE_HINT_FLAT 0 78 6726 : #define FD_WSAMPLE_HINT_POWERLAW_NOREMOVE 2 79 313590 : #define FD_WSAMPLE_HINT_POWERLAW_REMOVE 3 80 : 81 : /* fd_wsample_new_init, fd_wsample_new_add_weight, and 82 : fd_wsample_new_fini format a memory region with the appropriate 83 : alignment and footprint to be usable as a weighted sampler. This 84 : multi-function initialization process prevents needing to construct a 85 : flat array of weights, which is often inconvenient. 86 : 87 : The caller must first call fd_wsample_new_init, then new_add_weight 88 : ele_cnt times, and finally new_fini. Only at that point will the 89 : region of memory be ready to be joined. 90 : 91 : fd_wsample_new_init begins the formatting a memory region. shmem is 92 : a pointer to the first byte of the memory region to use. rng must be 93 : a local join of a ChaCha20 RNG struct. The weighted sampler will use 94 : rng to generate random numbers. It may seem more natural for the 95 : weighted sampler to own its own rng, but this is done to facilitate 96 : sharing of rngs between weighted samplers, which is useful for 97 : Turbine. ele_cnt specifies the number of elements that can be 98 : sampled from and must be less than UINT_MAX. If restore_enabled is 99 : set to 0, fd_wsample_restore_all will not work but the required 100 : footprint is smaller. opt_hint gives a hint of the shape of the 101 : weights and the style of queries that will be most common; this hint 102 : impacts query performance but not correctness. opt_hint must be one 103 : of FD_WSAMPLE_HINT_*. 104 : 105 : fd_wsample_new_add adds a weight to a partially formatted memory 106 : region. shmem must be a partially constructed region of memory, as 107 : returned by fd_wsample_new_init or fd_wsample_new_add_weight, weight 108 : must be strictly positive, and the cumulative sum of this weight and 109 : all other weights must be no more than ULONG_MAX. 110 : 111 : fd_wsample_new_fini finalizes the formatting of a partially formatted 112 : memory region. shmem must be a partially constructed region of 113 : memory, as returned by fd_wsample_new_add_weight (or 114 : fd_wsample_new_init if ele_cnt==0). If poisoned_weight is non-zero, 115 : the weighted sampler will end with a poisoned region representing an 116 : indeterminate number of unknown elements with total weight equal to 117 : poisoned_weight. This is useful for chopping off a long tail so that 118 : the number of elements can be bounded easily. The sum of all weights 119 : and poisoned_weight must be no more than ULONG_MAX. 120 : 121 : Retains read/write interest in rng. 122 : 123 : Each function returns shmem on success and NULL on failure. It's 124 : safe to pass NULL as shmem, in which case NULL will be returned, so 125 : you only need to check the final result. 126 : 127 : On successful completion of the formatting process, the weighted 128 : sampler will contain an element corresponding to each provided 129 : weight. Caller is not joined on return. */ 130 : void * fd_wsample_new_init( void * shmem, 131 : fd_chacha_rng_t * rng, 132 : ulong ele_cnt, 133 : int restore_enabled, 134 : int opt_hint ); 135 : void * fd_wsample_new_add ( void * shmem, ulong weight ); 136 : void * fd_wsample_new_fini( void * shmem, ulong poisoned_weight ); 137 : 138 : /* fd_wsample_get_rng returns the value provided for rng in new. */ 139 : fd_chacha_rng_t * fd_wsample_get_rng( fd_wsample_t * sampler ); 140 : 141 : /* fd_wsample_seed_rng seeds the ChaCha rng with the provided seed in 142 : preparation for sampling. This function is compatible with Solana's 143 : ChaChaRng::from_seed. */ 144 : void fd_wsample_seed_rng( fd_wsample_t * sampler, 145 : uchar seed[ 32 ], 146 : int use_chacha8 ); 147 : 148 : /* fd_wsample_sample{_and_remove}{,_many} produces one or cnt (in the 149 : _many case) weighted random samples from the sampler. If the 150 : _and_remove variant of the function is called, the returned node will 151 : be temporarily removed from the sampler, i.e. for sampling without 152 : replacement. Random samples are produced using the Solana-required 153 : method. Sampler's RNG must be seeded appropriately 154 : prior to using these functions. 155 : 156 : The _many variants of the function store the ith index they sampled 157 : in idxs[i] for i in [0, cnt). The other variants of the function 158 : simply return the sampled index. 159 : 160 : If the sampler has no unremoved elements, these functions will 161 : return/store FD_WSAMPLE_EMPTY. 162 : 163 : If the RNG sample lands in the poisoned region, these functions will 164 : return/store FD_WSAMPLE_INDETERMINATE. If such a sample is produced 165 : in the without replacement mode, all subsequent calls (with and 166 : without replacement) will also return FD_WSAMPLE_INDETERMINATE until 167 : the next call to fd_wsample_restore_all. This is necessary because 168 : the poisoned region represents an indeterminate number of elements, 169 : so it's not possible to know how removing one of them will affect the 170 : total weight, and thus all subsequent random samples. 171 : 172 : For each index i, fd_wsample_sample returns i with 173 : probability weights[i]/sum(weights), only considering weights of 174 : elements that have not been removed. */ 175 341628 : #define FD_WSAMPLE_EMPTY UINT_MAX 176 23866362 : #define FD_WSAMPLE_INDETERMINATE (UINT_MAX-1UL) 177 : ulong fd_wsample_sample ( fd_wsample_t * sampler ); 178 : ulong fd_wsample_sample_and_remove ( fd_wsample_t * sampler ); 179 : void fd_wsample_sample_many ( fd_wsample_t * sampler, ulong * idxs, ulong cnt ); 180 : void fd_wsample_sample_and_remove_many( fd_wsample_t * sampler, ulong * idxs, ulong cnt ); 181 : 182 : /* fd_wsample_remove_idx removes an element by index as if it had been selected 183 : for sampling without replacement. Unless restore_all is called, this 184 : index will no longer be returned by any of the sample methods, and 185 : its weight will be excluded from the remaining sampling operations. 186 : Removing an element that has already been removed is a no-op. idx 187 : must be in [0, ele_cnt). */ 188 : void fd_wsample_remove_idx( fd_wsample_t * sampler, ulong idx ); 189 : 190 : /* fd_wsample_restore_all restores all elements removed with one of the 191 : sample_and_remove functions or with fd_wsample_remove_idx. The 192 : elements with have their original weight. This is faster than 193 : recreating the weighted sampler, even if all elements have been 194 : removed. Returns sampler on success and NULL on failure. The only 195 : error case is if sampler was constructed with restore_enabled set to 0, 196 : in which case no elements are restored. */ 197 : fd_wsample_t * fd_wsample_restore_all( fd_wsample_t * sampler ); 198 : 199 : #endif /* HEADER_fd_src_ballet_wsample_fd_wsample_h */