Line data Source code
1 : #ifndef HEADER_fd_src_ballet_chacha_fd_chacha_rng_h 2 : #define HEADER_fd_src_ballet_chacha_fd_chacha_rng_h 3 : 4 : /* fd_chacha_rng provides APIs for ChaCha-based RNG, as used in the 5 : Solana protocol. This API should only be used where necessary. 6 : fd_rng is a better choice in all other cases. */ 7 : 8 : #include "fd_chacha.h" 9 : #include "../../util/bits/fd_uwide.h" 10 : 11 : /* FD_CHACHA_RNG_DEBUG controls debug logging. 0 is off; 1 is on. */ 12 : 13 : #ifndef FD_CHACHA_RNG_DEBUG 14 : #define FD_CHACHA_RNG_DEBUG 0 15 : #endif 16 : 17 : /* Solana uses different mechanisms of mapping a ulong to an unbiased 18 : integer in [0, n) in different parts of the code. In particular, 19 : leader schedule generation uses MODE_MOD and Turbine uses MODE_SHIFT. 20 : See the note in fd_chacha_rng_ulong_roll for more details. */ 21 1199003316 : #define FD_CHACHA_RNG_MODE_MOD 1 22 314355 : #define FD_CHACHA_RNG_MODE_SHIFT 2 23 : 24 : /* Solana uses different ChaCha algorithms. Leader schedule generation 25 : uses ChaCha20 and Turbine switched from ChaCha20 to ChaCha8. */ 26 2135268 : #define FD_CHACHA_RNG_ALGO_CHACHA20 1 27 23202700 : #define FD_CHACHA_RNG_ALGO_CHACHA8 2 28 : 29 : /* FD_CHACHA_RNG_BUFSZ is the internal buffer size of pre-generated 30 : ChaCha20 blocks. Multiple of block size (64 bytes) and a power of 2. */ 31 : 32 : #if FD_HAS_AVX512 33 4399998 : #define FD_CHACHA_RNG_BUFSZ (16*FD_CHACHA_BLOCK_SZ) 34 : #elif FD_HAS_AVX 35 8799996 : #define FD_CHACHA_RNG_BUFSZ (8*FD_CHACHA_BLOCK_SZ) 36 : #else 37 : #define FD_CHACHA_RNG_BUFSZ (256UL) 38 : #endif 39 : 40 : struct __attribute__((aligned(32UL))) fd_chacha_rng_private { 41 : /* ChaCha20 encryption key */ 42 : uchar key[ 32UL ] __attribute__((aligned(32UL))); 43 : 44 : /* Ring buffer of pre-generated ChaCha20 RNG data. 45 : Note: We currently assume all reads are 8 byte. This means the 46 : cursor is always aligned by 8 and strictly increases in 47 : increments of 8. Thus, we really only have to refill the 48 : buffer if buf_off==buf_fill. */ 49 : uchar buf[ FD_CHACHA_RNG_BUFSZ ] __attribute__((aligned(FD_CHACHA_BLOCK_SZ))); 50 : ulong buf_off; /* Total number of bytes consumed */ 51 : ulong buf_fill; /* Total number of bytes produced 52 : Always aligned by FD_CHACHA_BLOCK_SZ */ 53 : 54 : int mode; 55 : int algo; 56 : }; 57 : typedef struct fd_chacha_rng_private fd_chacha_rng_t; 58 : 59 : FD_PROTOTYPES_BEGIN 60 : 61 : /* fd_chacha_rng_{align,footprint} give the needed alignment and 62 : footprint of a memory region suitable to hold a ChaCha20-based RNG. 63 : 64 : fd_chacha_rng_new formats a memory region with suitable alignment 65 : and footprint for holding a chacha20_rng object. Assumes shmem 66 : points on the caller to the first byte of the memory region owned by 67 : the caller to use. `mode` must be one of the FD_CHACHA_RNG_MODE_* 68 : constants defined above and dictates what mode this object will use 69 : to generate random numbers. Returns shmem on success and NULL on 70 : failure (logs details). The memory region will be owned by the 71 : object on successful return. The caller is not joined on return. 72 : 73 : fd_chacha_rng_join joins the caller to a chacha20_rng object. 74 : Assumes shrng points to the first byte of the memory region holding 75 : the object. Returns a local handle to the join on success (this is 76 : not necessarily a simple cast of the address) and NULL on failure 77 : (logs details). 78 : 79 : fd_chacha_rng_leave leaves the caller's current local join to a 80 : ChaCha20 RNG object. Returns a pointer to the memory region holding 81 : the object on success this is not necessarily a simple cast of the 82 : address) and NULL on failure (logs details). The caller is not 83 : joined on successful return. 84 : 85 : fd_chacha_rng_delete unformats a memory region that holds a ChaCha20 86 : RNG object. Assumes shrng points on the caller to the first byte of 87 : the memory region holding the state and that nobody is joined. 88 : Returns a pointer to the memory region on success and NULL on failure 89 : (logs details). The caller has ownership of the memory region on 90 : successful return. */ 91 : 92 : FD_FN_CONST ulong 93 : fd_chacha_rng_align( void ); 94 : 95 : FD_FN_CONST ulong 96 : fd_chacha_rng_footprint( void ); 97 : 98 : void * 99 : fd_chacha_rng_new( void * shmem, int mode ); 100 : 101 : fd_chacha_rng_t * 102 : fd_chacha_rng_join( void * shrng ); 103 : 104 : void * 105 : fd_chacha_rng_leave( fd_chacha_rng_t * ); 106 : 107 : void * 108 : fd_chacha_rng_delete( void * shrng ); 109 : 110 : /* fd_chacha_rng_init starts a ChaCha{8,20} RNG stream. rng is 111 : assumed to be a current local join to a chacha_rng object with no 112 : other concurrent operation that would modify the state while this is 113 : executing. seed points to the first byte of the RNG seed byte vector 114 : with 32 byte size. Any preexisting state for an in-progress or 115 : recently completed calculation will be discarded. Returns rng (on 116 : return, rng will have the state of a new in-progress calculation). 117 : 118 : The param algo is expected to be FD_CHACHA_RNG_ALGO_CHACHA20 or 119 : FD_CHACHA_RNG_ALGO_CHACH8. If invalid, it defaults to chacha20. 120 : 121 : Compatible with Rust fn rand_chacha::ChaCha20Rng::from_seed 122 : https://docs.rs/rand_chacha/latest/rand_chacha/struct.ChaCha20Rng.html#method.from_seed 123 : (and ChaCha8Rng) */ 124 : 125 : fd_chacha_rng_t * 126 : fd_chacha_rng_init( fd_chacha_rng_t * rng, 127 : void const * key, 128 : int algo ); 129 : 130 : /* The refill function. Not part of the public API. */ 131 : 132 : #if FD_HAS_AVX512 133 : void fd_chacha8_rng_refill_avx512 ( fd_chacha_rng_t * rng ); 134 : void fd_chacha20_rng_refill_avx512( fd_chacha_rng_t * rng ); 135 : #endif 136 : 137 : #if FD_HAS_AVX 138 : void fd_chacha8_rng_refill_avx ( fd_chacha_rng_t * rng ); 139 : void fd_chacha20_rng_refill_avx( fd_chacha_rng_t * rng ); 140 : #endif 141 : 142 : void fd_chacha8_rng_refill_seq ( fd_chacha_rng_t * rng ); 143 : void fd_chacha20_rng_refill_seq( fd_chacha_rng_t * rng ); 144 : 145 : #if FD_HAS_AVX512 146 791108 : #define fd_chacha8_rng_private_refill fd_chacha8_rng_refill_avx512 147 3967978 : #define fd_chacha20_rng_private_refill fd_chacha20_rng_refill_avx512 148 : #elif FD_HAS_AVX 149 2795118 : #define fd_chacha8_rng_private_refill fd_chacha8_rng_refill_avx 150 14868460 : #define fd_chacha20_rng_private_refill fd_chacha20_rng_refill_avx 151 : #else 152 : #define fd_chacha8_rng_private_refill fd_chacha8_rng_refill_seq 153 : #define fd_chacha20_rng_private_refill fd_chacha20_rng_refill_seq 154 : #endif 155 : 156 : /* fd_chacha_rng_avail returns the number of buffered bytes. */ 157 : 158 : FD_FN_PURE static inline ulong 159 1597418946 : fd_chacha_rng_avail( fd_chacha_rng_t const * rng ) { 160 1597418946 : return rng->buf_fill - rng->buf_off; 161 1597418946 : } 162 : 163 : /* fd_chacha_rng_ulong read a 64-bit integer in [0,2^64) from the 164 : RNG stream. */ 165 : 166 : static inline ulong 167 1597418946 : fd_chacha_rng_ulong( fd_chacha_rng_t * rng ) { 168 1597418946 : if( FD_UNLIKELY( fd_chacha_rng_avail( rng ) < sizeof(ulong) ) ) { 169 19828385 : if( rng->algo==FD_CHACHA_RNG_ALGO_CHACHA8 ) { 170 2806180 : fd_chacha8_rng_private_refill( rng ); 171 17022205 : } else { 172 17022205 : fd_chacha20_rng_private_refill( rng ); 173 17022205 : } 174 19828385 : } 175 1597418946 : ulong x = FD_LOAD( ulong, rng->buf + (rng->buf_off % FD_CHACHA_RNG_BUFSZ) ); 176 1597418946 : rng->buf_off += 8U; 177 1597418946 : return x; 178 1597418946 : } 179 : 180 : /* fd_chacha_rng_ulong_roll returns an uniform IID rand in [0,n) 181 : analogous to fd_rng_ulong_roll. Rejection method based using 182 : fd_chacha_rng_ulong. 183 : 184 : Compatible with Rust type 185 : <rand_chacha::ChaCha20Rng as rand::Rng>::gen<rand::distributions::Uniform<u64>>() 186 : as of version 0.7.0 of the crate 187 : https://docs.rs/rand/latest/rand/distributions/struct.Uniform.html */ 188 : 189 : static inline ulong 190 : fd_chacha_rng_ulong_roll( fd_chacha_rng_t * rng, 191 1198996626 : ulong n ) { 192 : /* We use a pretty standard rejection-sampling based approach here, 193 : but for future reference, here's an explanation: 194 : 195 : We know that v can take 2^64 values, and so any method that maps 196 : each of the 2^64 values to the range directly [0, n) will not be 197 : uniform distribution when 2^64 is not divisible by n. This 198 : motivates using rejection sampling. 199 : 200 : The most basic approach is to map v from [0, n*floor(2^64/n) ) to 201 : [0, n) using v%n, but that puts a modulus on the critical path. To 202 : avoid that, the Rust rand crate uses a different approach: compute 203 : v*n/2^64, which is also in [0, n). 204 : 205 : Now the question to answer is which values to throw out. We pick a 206 : large integer k such that k*n<=2^64 and map [0, k*n) -> 0, [2^64, 207 : 2^64+k*n) -> 1, etc. Since k*n might be 2^64 and then not fit in a 208 : long, we define zone=k*n-1 <= ULONG_MAX, and make the intervals 209 : closed instead of half-open. 210 : 211 : Here's where the mode comes in. Depending on what method you call 212 : and what datatype you use, the Rust crate uses different values of 213 : k. When MODE_MOD is set, we use largest possible value of k, 214 : namely floor(2^64/n). You can compute zone directly as follows: 215 : zone = k*n-1 216 : = floor(2^64/n)*n - 1 217 : = 2^64 - (2^64%n) - 1 218 : = 2^64-1 - (2^64-n)%n, since n<2^64 219 : = 2^64-1 - ((2^64-1)-n+1)%n 220 : Which is back to having a mod... But at least if n is a 221 : compile-time constant then the whole zone computation becomes a 222 : compile-time constant. 223 : 224 : When MODE_SHIFT is set, we use uses almost the largest possible 225 : power of two for k. Precisely, it uses the smallest power of two 226 : such that k*n >= 2^63, which is the largest power of two such that 227 : k*n<=2^64 unless n is a power of two. This approach eliminates the 228 : mod calculation but increases the expected number of samples 229 : required. */ 230 1198996626 : ulong const zone = fd_ulong_if( rng->mode==FD_CHACHA_RNG_MODE_MOD, 231 1198996626 : ULONG_MAX - (ULONG_MAX-n+1UL)%n, 232 1198996626 : (n << (63 - fd_ulong_find_msb( n ) )) - 1UL ); 233 : 234 1531118943 : for( int i=0; 1; i++ ) { 235 1531118943 : ulong v = fd_chacha_rng_ulong( rng ); 236 1531118943 : ulong hi, lo; 237 1531118943 : fd_uwide_mul( &hi, &lo, v, n ); 238 : 239 : # if FD_CHACHA_RNG_DEBUG 240 : FD_LOG_DEBUG(( "roll (attempt %d): n=%016lx zone: %016lx v=%016lx lo=%016lx hi=%016lx", i, n, zone, v, lo, hi )); 241 : # else 242 1531118943 : (void)i; 243 1531118943 : # endif /* FD_CHACHA_RNG_DEBUG */ 244 : 245 1531118943 : if( FD_LIKELY( lo<=zone ) ) return hi; 246 1531118943 : } 247 1198996626 : } 248 : 249 : FD_PROTOTYPES_END 250 : 251 : #endif /* HEADER_fd_src_ballet_chacha20_fd_chacha20_rng_h */