Line data Source code
1 : #ifndef HEADER_fd_src_ballet_shred_fd_wsample_h 2 : #define HEADER_fd_src_ballet_shred_fd_wsample_h 3 : 4 : /* This header defines methods for computing weighted "random" samples 5 : of the specific type used by Solana for computing the leader 6 : schedule and the Turbine tree. 7 : 8 : In particular, those random samples are generated by: 9 : 10 : 1. Seeding ChaCha20 with a 32B seed (different for leader schedule vs 11 : Turbine). 12 : 2. Using the ChaCha20 random stream to generate a uniformly 13 : distributed random integer x in [0, total_stake_weight). 14 : 3. Returning the first index such that the cumulative sum of stake is 15 : at least x. */ 16 : 17 : #include "../fd_ballet_base.h" 18 : #include "../chacha20/fd_chacha20rng.h" 19 : 20 : struct fd_wsample_private; 21 : typedef struct fd_wsample_private fd_wsample_t; 22 : 23 6180 : #define FD_WSAMPLE_ALIGN (64UL) 24 : /* fd_leaders really wants a compile time-compatible footprint... The 25 : internal count is 1/8 * (9^ceil(log_9(ele_cnt)) - 1) */ 26 : #define FD_WSAMPLE_FOOTPRINT( ele_cnt, restore_enabled ) \ 27 : (192UL + 64UL*((restore_enabled)?2UL:1UL)*( \ 28 : ((ele_cnt)<= 1UL)? 0UL : \ 29 : ((ele_cnt)<= 9UL)? 1UL : \ 30 : ((ele_cnt)<= 81UL)? 10UL : \ 31 : ((ele_cnt)<= 729UL)? 91UL : \ 32 : ((ele_cnt)<= 6561UL)? 820UL : \ 33 : ((ele_cnt)<= 59049UL)? 7381UL : \ 34 : ((ele_cnt)<= 531441UL)? 66430UL : \ 35 : ((ele_cnt)<= 4782969UL)? 597871UL : \ 36 : ((ele_cnt)<= 43046721UL)? 5380840UL : \ 37 : ((ele_cnt)<= 387420489UL)? 48427561UL : \ 38 : ((ele_cnt)<=3486784401UL)?435848050UL : 3922632451UL )) \ 39 : 40 : /* fd_wsample_{align, footprint} give the alignment and footprint 41 : respectively required to create a weighted sampler with at most 42 : ele_cnt stake weights. If restore_enabled is zero, calls to 43 : wsample_restore_all will be no-ops, but the footprint required will 44 : be smaller. ele_cnt in [0, UINT_MAX-2) (note, not ULONG MAX). 45 : 46 : fd_wsample_{join,leave} join and leave a memory region formatted as a 47 : weighted sampler, respectively. They both are simple casts. 48 : 49 : fd_wsample_delete unformats a memory region used as a weighted 50 : sampler. Releases all interest in rng. */ 51 : 52 : FD_FN_CONST ulong fd_wsample_align ( void ); 53 : FD_FN_CONST ulong fd_wsample_footprint( ulong ele_cnt, int restore_enabled ); 54 : fd_wsample_t * fd_wsample_join ( void * shmem ); 55 : void * fd_wsample_leave ( fd_wsample_t * sampler ); 56 : void * fd_wsample_delete ( void * shmem ); 57 : 58 : 59 : /* FD_WSAMPLE_HINT_*: Hints that can be passed to fd_wsample_new in the 60 : opt_hint field. The hint specifies the distribution of weights: 61 : FLAT: The all weights are approximately constant. 62 : POWERLAW: The weights are sorted largest to smallest and decay with a 63 : power law distribution. At the moment, this assumes it's 64 : proportional to 1/x. 65 : 66 : The hint can also specify whether sampling will be done with deleting 67 : or without deleting: 68 : REMOVE: Sampling will be done without replacement, i.e. mostly with 69 : fd_wsample_sample_and_remove and/or 70 : fd_wsample_sample_and_remove_many. 71 : NOREMOVE: Sampling will be done with replacement, i.e. mostly with 72 : fd_wsample_sample and/or 73 : fd_wsample_sample_many. */ 74 1554 : #define FD_WSAMPLE_HINT_FLAT 0 75 6222 : #define FD_WSAMPLE_HINT_POWERLAW_NOREMOVE 2 76 312939 : #define FD_WSAMPLE_HINT_POWERLAW_REMOVE 3 77 : 78 : /* fd_wsample_new_init, fd_wsample_new_add_weight, and 79 : fd_wsample_new_fini format a memory region with the appropriate 80 : alignment and footprint to be usable as a weighted sampler. This 81 : multi-function initialization process prevents needing to construct a 82 : flat array of weights, which is often inconvenient. 83 : 84 : The caller must first call fd_wsample_new_init, then new_add_weight 85 : ele_cnt times, and finally new_fini. Only at that point will the 86 : region of memory be ready to be joined. 87 : 88 : fd_wsample_new_init begins the formatting a memory region. shmem is 89 : a pointer to the first byte of the memory region to use. rng must be 90 : a local join of a ChaCha20 RNG struct. The weighted sampler will use 91 : rng to generate random numbers. It may seem more natural for the 92 : weighted sampler to own its own rng, but this is done to facilitate 93 : sharing of rngs between weighted samplers, which is useful for 94 : Turbine. ele_cnt specifies the number of elements that can be 95 : sampled from and must be less than UINT_MAX. If restore_enabled is 96 : set to 0, fd_wsample_restore_all will not work but the required 97 : footprint is smaller. opt_hint gives a hint of the shape of the 98 : weights and the style of queries that will be most common; this hint 99 : impacts query performance but not correctness. opt_hint must be one 100 : of FD_WSAMPLE_HINT_*. 101 : 102 : fd_wsample_new_add adds a weight to a partially formatted memory 103 : region. shmem must be a partially constructed region of memory, as 104 : returned by fd_wsample_new_init or fd_wsample_new_add_weight, weight 105 : must be strictly positive, and the cumulative sum of this weight and 106 : all other weights must be no more than ULONG_MAX. 107 : 108 : fd_wsample_new_fini finalizes the formatting of a partially formatted 109 : memory region. shmem must be a partially constructed region of 110 : memory, as returned by fd_wsample_new_add_weight (or 111 : fd_wsample_new_init if ele_cnt==0). If poisoned_weight is non-zero, 112 : the weighted sampler will end with a poisoned region representing an 113 : indeterminate number of unknown elements with total weight equal to 114 : poisoned_weight. This is useful for chopping off a long tail so that 115 : the number of elements can be bounded easily. The sum of all weights 116 : and poisoned_weight must be no more than ULONG_MAX. 117 : 118 : Retains read/write interest in rng. 119 : 120 : Each function returns shmem on success and NULL on failure. It's 121 : safe to pass NULL as shmem, in which case NULL will be returned, so 122 : you only need to check the final result. 123 : 124 : On successful completion of the formatting process, the weighted 125 : sampler will contain an element corresponding to each provided 126 : weight. Caller is not joined on return. */ 127 : void * fd_wsample_new_init( void * shmem, 128 : fd_chacha20rng_t * rng, 129 : ulong ele_cnt, 130 : int restore_enabled, 131 : int opt_hint ); 132 : void * fd_wsample_new_add ( void * shmem, ulong weight ); 133 : void * fd_wsample_new_fini( void * shmem, ulong poisoned_weight ); 134 : 135 : /* fd_wsample_get_rng returns the value provided for rng in new. */ 136 : fd_chacha20rng_t * fd_wsample_get_rng( fd_wsample_t * sampler ); 137 : 138 : /* fd_wsample_seed_rng seeds the ChaCha20 rng with the provided seed in 139 : preparation for sampling. This function is compatible with Solana's 140 : ChaChaRng::from_seed. */ 141 : void fd_wsample_seed_rng( fd_chacha20rng_t * rng, uchar seed[static 32] ); 142 : 143 : /* fd_wsample_sample{_and_remove}{,_many} produces one or cnt (in the 144 : _many case) weighted random samples from the sampler. If the 145 : _and_remove variant of the function is called, the returned node will 146 : be temporarily removed from the sampler, i.e. for sampling without 147 : replacement. Random samples are produced using the Solana-required 148 : method. Sampler's RNG must be seeded appropriately 149 : prior to using these functions. 150 : 151 : The _many variants of the function store the ith index they sampled 152 : in idxs[i] for i in [0, cnt). The other variants of the function 153 : simply return the sampled index. 154 : 155 : If the sampler has no unremoved elements, these functions will 156 : return/store FD_WSAMPLE_EMPTY. 157 : 158 : If the RNG sample lands in the poisoned region, these functions will 159 : return/store FD_WSAMPLE_INDETERMINATE. If such a sample is produced 160 : in the without replacement mode, all subsequent calls (with and 161 : without replacement) will also return FD_WSAMPLE_INDETERMINATE until 162 : the next call to fd_wsample_restore_all. This is necessary because 163 : the poisoned region represents an indeterminate number of elements, 164 : so it's not possible to know how removing one of them will affect the 165 : total weight, and thus all subsequent random samples. 166 : 167 : For each index i, fd_wsample_sample returns i with 168 : probability weights[i]/sum(weights), only considering weights of 169 : elements that have not been removed. */ 170 341619 : #define FD_WSAMPLE_EMPTY UINT_MAX 171 23866362 : #define FD_WSAMPLE_INDETERMINATE (UINT_MAX-1UL) 172 : ulong fd_wsample_sample ( fd_wsample_t * sampler ); 173 : ulong fd_wsample_sample_and_remove ( fd_wsample_t * sampler ); 174 : void fd_wsample_sample_many ( fd_wsample_t * sampler, ulong * idxs, ulong cnt ); 175 : void fd_wsample_sample_and_remove_many( fd_wsample_t * sampler, ulong * idxs, ulong cnt ); 176 : 177 : /* fd_wsample_remove_idx removes an element by index as if it had been selected 178 : for sampling without replacement. Unless restore_all is called, this 179 : index will no longer be returned by any of the sample methods, and 180 : its weight will be excluded from the remaining sampling operations. 181 : Removing an element that has already been removed is a no-op. idx 182 : must be in [0, ele_cnt). */ 183 : void fd_wsample_remove_idx( fd_wsample_t * sampler, ulong idx ); 184 : 185 : /* fd_wsample_restore_all restores all elements removed with one of the 186 : sample_and_remove functions or with fd_wsample_remove_idx. The 187 : elements with have their original weight. This is faster than 188 : recreating the weighted sampler, even if all elements have been 189 : removed. Returns sampler on success and NULL on failure. The only 190 : error case is if sampler was constructed with restore_enabled set to 0, 191 : in which case no elements are restored. */ 192 : fd_wsample_t * fd_wsample_restore_all( fd_wsample_t * sampler ); 193 : 194 : 195 : 196 : #endif /* HEADER_fd_src_ballet_shred_fd_wsample_h */