Line data Source code
1 : #ifndef HEADER_fd_src_ballet_chacha_fd_chacha_rng_h 2 : #define HEADER_fd_src_ballet_chacha_fd_chacha_rng_h 3 : 4 : /* fd_chacha_rng provides APIs for ChaCha-based RNG, as used in the 5 : Solana protocol. This API should only be used where necessary. 6 : fd_rng is a better choice in all other cases. */ 7 : 8 : #include "fd_chacha.h" 9 : #if !FD_HAS_INT128 10 : #include "../../util/bits/fd_uwide.h" 11 : #endif 12 : 13 : /* FD_CHACHA_RNG_DEBUG controls debug logging. 0 is off; 1 is on. */ 14 : 15 : #ifndef FD_CHACHA_RNG_DEBUG 16 : #define FD_CHACHA_RNG_DEBUG 0 17 : #endif 18 : 19 : /* Solana uses different mechanisms of mapping a ulong to an unbiased 20 : integer in [0, n) in different parts of the code. In particular, 21 : leader schedule generation uses MODE_MOD and Turbine uses MODE_SHIFT. 22 : See the note in fd_chacha20_rng_ulong_roll for more details. */ 23 1578622923 : #define FD_CHACHA_RNG_MODE_MOD 1 24 313848 : #define FD_CHACHA_RNG_MODE_SHIFT 2 25 : 26 : /* FD_CHACHA_RNG_BUFSZ is the internal buffer size of pre-generated 27 : ChaCha20 blocks. Multiple of block size (64 bytes) and a power of 2. */ 28 : 29 : #if FD_HAS_AVX512 30 4399998 : #define FD_CHACHA_RNG_BUFSZ (16*FD_CHACHA_BLOCK_SZ) 31 : #elif FD_HAS_AVX 32 8799996 : #define FD_CHACHA_RNG_BUFSZ (8*FD_CHACHA_BLOCK_SZ) 33 : #else 34 : #define FD_CHACHA_RNG_BUFSZ (256UL) 35 : #endif 36 : 37 : struct __attribute__((aligned(32UL))) fd_chacha_rng_private { 38 : /* ChaCha20 encryption key */ 39 : uchar key[ 32UL ] __attribute__((aligned(32UL))); 40 : 41 : /* Ring buffer of pre-generated ChaCha20 RNG data. 42 : Note: We currently assume all reads are 8 byte. This means the 43 : cursor is always aligned by 8 and strictly increases in 44 : increments of 8. Thus, we really only have to refill the 45 : buffer if buf_off==buf_fill. */ 46 : uchar buf[ FD_CHACHA_RNG_BUFSZ ] __attribute__((aligned(FD_CHACHA_BLOCK_SZ))); 47 : ulong buf_off; /* Total number of bytes consumed */ 48 : ulong buf_fill; /* Total number of bytes produced 49 : Always aligned by FD_CHACHA_BLOCK_SZ */ 50 : 51 : int mode; 52 : }; 53 : typedef struct fd_chacha_rng_private fd_chacha_rng_t; 54 : 55 : FD_PROTOTYPES_BEGIN 56 : 57 : /* fd_chacha_rng_{align,footprint} give the needed alignment and 58 : footprint of a memory region suitable to hold a ChaCha20-based RNG. 59 : 60 : fd_chacha_rng_new formats a memory region with suitable alignment 61 : and footprint for holding a chacha20_rng object. Assumes shmem 62 : points on the caller to the first byte of the memory region owned by 63 : the caller to use. `mode` must be one of the FD_CHACHA_RNG_MODE_* 64 : constants defined above and dictates what mode this object will use 65 : to generate random numbers. Returns shmem on success and NULL on 66 : failure (logs details). The memory region will be owned by the 67 : object on successful return. The caller is not joined on return. 68 : 69 : fd_chacha_rng_join joins the caller to a chacha20_rng object. 70 : Assumes shrng points to the first byte of the memory region holding 71 : the object. Returns a local handle to the join on success (this is 72 : not necessarily a simple cast of the address) and NULL on failure 73 : (logs details). 74 : 75 : fd_chacha_rng_leave leaves the caller's current local join to a 76 : ChaCha20 RNG object. Returns a pointer to the memory region holding 77 : the object on success this is not necessarily a simple cast of the 78 : address) and NULL on failure (logs details). The caller is not 79 : joined on successful return. 80 : 81 : fd_chacha_rng_delete unformats a memory region that holds a ChaCha20 82 : RNG object. Assumes shrng points on the caller to the first byte of 83 : the memory region holding the state and that nobody is joined. 84 : Returns a pointer to the memory region on success and NULL on failure 85 : (logs details). The caller has ownership of the memory region on 86 : successful return. */ 87 : 88 : FD_FN_CONST ulong 89 : fd_chacha_rng_align( void ); 90 : 91 : FD_FN_CONST ulong 92 : fd_chacha_rng_footprint( void ); 93 : 94 : void * 95 : fd_chacha_rng_new( void * shmem, int mode ); 96 : 97 : fd_chacha_rng_t * 98 : fd_chacha_rng_join( void * shrng ); 99 : 100 : void * 101 : fd_chacha_rng_leave( fd_chacha_rng_t * ); 102 : 103 : void * 104 : fd_chacha_rng_delete( void * shrng ); 105 : 106 : /* fd_chacha_rng{8,20}_init starts a ChaCha{8,20} RNG stream. rng is 107 : assumed to be a current local join to a chacha20_rng object with no 108 : other concurrent operation that would modify the state while this is 109 : executing. seed points to the first byte of the RNG seed byte vector 110 : with 32 byte size. Any preexisting state for an in-progress or 111 : recently completed calculation will be discarded. Returns rng (on 112 : return, rng will have the state of a new in-progress calculation). 113 : 114 : Compatible with Rust fn rand_chacha::ChaCha20Rng::from_seed 115 : https://docs.rs/rand_chacha/latest/rand_chacha/struct.ChaCha20Rng.html#method.from_seed */ 116 : 117 : fd_chacha_rng_t * 118 : fd_chacha8_rng_init( fd_chacha_rng_t * rng, 119 : void const * key ); 120 : 121 : fd_chacha_rng_t * 122 : fd_chacha20_rng_init( fd_chacha_rng_t * rng, 123 : void const * key ); 124 : 125 : /* The refill function. Not part of the public API. */ 126 : 127 : #if FD_HAS_AVX512 128 : void fd_chacha8_rng_refill_avx512 ( fd_chacha_rng_t * rng ); 129 : void fd_chacha20_rng_refill_avx512( fd_chacha_rng_t * rng ); 130 : #endif 131 : 132 : #if FD_HAS_AVX 133 : void fd_chacha8_rng_refill_avx ( fd_chacha_rng_t * rng ); 134 : void fd_chacha20_rng_refill_avx( fd_chacha_rng_t * rng ); 135 : #endif 136 : 137 : void fd_chacha8_rng_refill_seq ( fd_chacha_rng_t * rng ); 138 : void fd_chacha20_rng_refill_seq( fd_chacha_rng_t * rng ); 139 : 140 : #if FD_HAS_AVX512 141 85937 : #define fd_chacha8_rng_private_refill fd_chacha8_rng_refill_avx512 142 6392087 : #define fd_chacha20_rng_private_refill fd_chacha20_rng_refill_avx512 143 : #elif FD_HAS_AVX 144 343748 : #define fd_chacha8_rng_private_refill fd_chacha8_rng_refill_avx 145 23285488 : #define fd_chacha20_rng_private_refill fd_chacha20_rng_refill_avx 146 : #else 147 : #define fd_chacha8_rng_private_refill fd_chacha8_rng_refill_seq 148 : #define fd_chacha20_rng_private_refill fd_chacha20_rng_refill_seq 149 : #endif 150 : 151 : /* fd_chacha_rng_avail returns the number of buffered bytes. */ 152 : 153 : FD_FN_PURE static inline ulong 154 2097812754 : fd_chacha_rng_avail( fd_chacha_rng_t const * rng ) { 155 2097812754 : return rng->buf_fill - rng->buf_off; 156 2097812754 : } 157 : 158 : /* fd_chacha{8,20}_rng_ulong read a 64-bit integer in [0,2^64) from the 159 : RNG stream. */ 160 : 161 : static inline ulong 162 33000000 : fd_chacha8_rng_ulong( fd_chacha_rng_t * rng ) { 163 33000000 : if( FD_UNLIKELY( fd_chacha_rng_avail( rng ) < sizeof(ulong) ) ) 164 429685 : fd_chacha8_rng_private_refill( rng ); 165 33000000 : ulong x = FD_LOAD( ulong, rng->buf + (rng->buf_off % FD_CHACHA_RNG_BUFSZ) ); 166 33000000 : rng->buf_off += 8U; 167 33000000 : return x; 168 33000000 : } 169 : 170 : static inline ulong 171 2064812754 : fd_chacha20_rng_ulong( fd_chacha_rng_t * rng ) { 172 2064812754 : if( FD_UNLIKELY( fd_chacha_rng_avail( rng ) < sizeof(ulong) ) ) 173 25163683 : fd_chacha20_rng_private_refill( rng ); 174 2064812754 : ulong x = FD_LOAD( ulong, rng->buf + (rng->buf_off % FD_CHACHA_RNG_BUFSZ) ); 175 2064812754 : rng->buf_off += 8U; 176 2064812754 : return x; 177 2064812754 : } 178 : 179 : /* fd_chacha20_rng_ulong_roll returns an uniform IID rand in [0,n) 180 : analogous to fd_rng_ulong_roll. Rejection method based using 181 : fd_chacha20_rng_ulong. 182 : 183 : Compatible with Rust type 184 : <rand_chacha::ChaCha20Rng as rand::Rng>::gen<rand::distributions::Uniform<u64>>() 185 : as of version 0.7.0 of the crate 186 : https://docs.rs/rand/latest/rand/distributions/struct.Uniform.html */ 187 : 188 : static inline ulong 189 : fd_chacha20_rng_ulong_roll( fd_chacha_rng_t * rng, 190 1578616560 : ulong n ) { 191 : /* We use a pretty standard rejection-sampling based approach here, 192 : but for future reference, here's an explanation: 193 : 194 : We know that v can take 2^64 values, and so any method that maps 195 : each of the 2^64 values to the range directly [0, n) will not be 196 : uniform distribution when 2^64 is not divisible by n. This 197 : motivates using rejection sampling. 198 : 199 : The most basic approach is to map v from [0, n*floor(2^64/n) ) to 200 : [0, n) using v%n, but that puts a modulus on the critical path. To 201 : avoid that, the Rust rand crate uses a different approach: compute 202 : v*n/2^64, which is also in [0, n). 203 : 204 : Now the question to answer is which values to throw out. We pick a 205 : large integer k such that k*n<=2^64 and map [0, k*n) -> 0, [2^64, 206 : 2^64+k*n) -> 1, etc. Since k*n might be 2^64 and then not fit in a 207 : long, we define zone=k*n-1 <= ULONG_MAX, and make the intervals 208 : closed instead of half-open. 209 : 210 : Here's where the mode comes in. Depending on what method you call 211 : and what datatype you use, the Rust crate uses different values of 212 : k. When MODE_MOD is set, we use largest possible value of k, 213 : namely floor(2^64/n). You can compute zone directly as follows: 214 : zone = k*n-1 215 : = floor(2^64/n)*n - 1 216 : = 2^64 - (2^64%n) - 1 217 : = 2^64-1 - (2^64-n)%n, since n<2^64 218 : = 2^64-1 - ((2^64-1)-n+1)%n 219 : Which is back to having a mod... But at least if n is a 220 : compile-time constant then the whole zone computation becomes a 221 : compile-time constant. 222 : 223 : When MODE_SHIFT is set, we use uses almost the largest possible 224 : power of two for k. Precisely, it uses the smallest power of two 225 : such that k*n >= 2^63, which is the largest power of two such that 226 : k*n<=2^64 unless n is a power of two. This approach eliminates the 227 : mod calculation but increases the expected number of samples 228 : required. */ 229 1578616560 : ulong const zone = fd_ulong_if( rng->mode==FD_CHACHA_RNG_MODE_MOD, 230 1578616560 : ULONG_MAX - (ULONG_MAX-n+1UL)%n, 231 1578616560 : (n << (63 - fd_ulong_find_msb( n ) )) - 1UL ); 232 : 233 2031512751 : for( int i=0; 1; i++ ) { 234 2031512751 : ulong v = fd_chacha20_rng_ulong( rng ); 235 2031512751 : #if FD_HAS_INT128 236 : /* Compiles to one mulx instruction */ 237 2031512751 : uint128 res = (uint128)v * (uint128)n; 238 2031512751 : ulong hi = (ulong)(res>>64); 239 2031512751 : ulong lo = (ulong) res; 240 : #else 241 : ulong hi, lo; 242 : fd_uwide_mul( &hi, &lo, v, n ); 243 : #endif 244 : 245 : # if FD_CHACHA_RNG_DEBUG 246 : FD_LOG_DEBUG(( "roll (attempt %d): n=%016lx zone: %016lx v=%016lx lo=%016lx hi=%016lx", i, n, zone, v, lo, hi )); 247 : # else 248 2031512751 : (void)i; 249 2031512751 : # endif /* FD_CHACHA_RNG_DEBUG */ 250 : 251 2031512751 : if( FD_LIKELY( lo<=zone ) ) return hi; 252 2031512751 : } 253 1578616560 : } 254 : 255 : FD_PROTOTYPES_END 256 : 257 : #endif /* HEADER_fd_src_ballet_chacha20_fd_chacha20_rng_h */