Line data Source code
1 : #ifndef HEADER_fd_src_ballet_chacha20_fd_chacha20rng_h 2 : #define HEADER_fd_src_ballet_chacha20_fd_chacha20rng_h 3 : 4 : /* fd_chacha20rng provides APIs for ChaCha20-based RNG, as used in the 5 : Solana protocol. This API should only be used where necessary. 6 : fd_rng is a better choice in all other cases. */ 7 : 8 : #include "fd_chacha20.h" 9 : #if !FD_HAS_INT128 10 : #include "../../util/bits/fd_uwide.h" 11 : #endif 12 : 13 : /* FD_CHACHA20RNG_DEBUG controls debug logging. 0 is off; 1 is on. */ 14 : 15 : #ifndef FD_CHACHA20RNG_DEBUG 16 : #define FD_CHACHA20RNG_DEBUG 0 17 : #endif 18 : 19 : /* Solana uses different mechanisms of mapping a ulong to an unbiased 20 : integer in [0, n) in different parts of the code. In particular, 21 : leader schedule generation uses MODE_MOD and Turbine uses MODE_SHIFT. 22 : See the note in fd_chacha20rng_ulong_roll for more details. */ 23 1578622926 : #define FD_CHACHA20RNG_MODE_MOD 1 24 313848 : #define FD_CHACHA20RNG_MODE_SHIFT 2 25 : 26 : /* FD_CHACHA20RNG_BUFSZ is the internal buffer size of pre-generated 27 : ChaCha20 blocks. Multiple of block size (64 bytes) and a power of 2. */ 28 : 29 : #if FD_HAS_AVX512 30 2199999 : #define FD_CHACHA20RNG_BUFSZ (16*FD_CHACHA20_BLOCK_SZ) 31 : #elif FD_HAS_AVX 32 4399998 : #define FD_CHACHA20RNG_BUFSZ (8*FD_CHACHA20_BLOCK_SZ) 33 : #else 34 : #define FD_CHACHA20RNG_BUFSZ (256UL) 35 : #endif 36 : 37 : struct __attribute__((aligned(32UL))) fd_chacha20rng_private { 38 : /* ChaCha20 encryption key */ 39 : uchar key[ 32UL ] __attribute__((aligned(32UL))); 40 : 41 : /* Ring buffer of pre-generated ChaCha20 RNG data. 42 : Note: We currently assume all reads are 8 byte. This means the 43 : cursor is always aligned by 8 and strictly increases in 44 : increments of 8. Thus, we really only have to refill the 45 : buffer if buf_off==buf_fill. */ 46 : uchar buf[ FD_CHACHA20RNG_BUFSZ ] __attribute__((aligned(FD_CHACHA20_BLOCK_SZ))); 47 : ulong buf_off; /* Total number of bytes consumed */ 48 : ulong buf_fill; /* Total number of bytes produced 49 : Always aligned by FD_CHACHA20_BLOCK_SZ */ 50 : 51 : int mode; 52 : }; 53 : typedef struct fd_chacha20rng_private fd_chacha20rng_t; 54 : 55 : FD_PROTOTYPES_BEGIN 56 : 57 : /* fd_chacha20rng_{align,footprint} give the needed alignment and 58 : footprint of a memory region suitable to hold a ChaCha20-based RNG. 59 : 60 : fd_chacha20rng_new formats a memory region with suitable alignment 61 : and footprint for holding a chacha20rng object. Assumes shmem 62 : points on the caller to the first byte of the memory region owned by 63 : the caller to use. `mode` must be one of the FD_CHACHA20RNG_MODE_* 64 : constants defined above and dictates what mode this object will use 65 : to generate random numbers. Returns shmem on success and NULL on 66 : failure (logs details). The memory region will be owned by the 67 : object on successful return. The caller is not joined on return. 68 : 69 : fd_chacha20rng_join joins the caller to a chacha20rng object. 70 : Assumes shrng points to the first byte of the memory region holding 71 : the object. Returns a local handle to the join on success (this is 72 : not necessarily a simple cast of the address) and NULL on failure 73 : (logs details). 74 : 75 : fd_chacha20rng_leave leaves the caller's current local join to a 76 : ChaCha20 RNG object. Returns a pointer to the memory region holding 77 : the object on success this is not necessarily a simple cast of the 78 : address) and NULL on failure (logs details). The caller is not 79 : joined on successful return. 80 : 81 : fd_chacha20rng_delete unformats a memory region that holds a ChaCha20 82 : RNG object. Assumes shrng points on the caller to the first byte of 83 : the memory region holding the state and that nobody is joined. 84 : Returns a pointer to the memory region on success and NULL on failure 85 : (logs details). The caller has ownership of the memory region on 86 : successful return. */ 87 : 88 : FD_FN_CONST ulong 89 : fd_chacha20rng_align( void ); 90 : 91 : FD_FN_CONST ulong 92 : fd_chacha20rng_footprint( void ); 93 : 94 : void * 95 : fd_chacha20rng_new( void * shmem, int mode ); 96 : 97 : fd_chacha20rng_t * 98 : fd_chacha20rng_join( void * shrng ); 99 : 100 : void * 101 : fd_chacha20rng_leave( fd_chacha20rng_t * ); 102 : 103 : void * 104 : fd_chacha20rng_delete( void * shrng ); 105 : 106 : /* fd_chacha20rng_init starts a ChaCha20 RNG stream. rng is assumed to 107 : be a current local join to a chacha20rng object with no other 108 : concurrent operation that would modify the state while this is 109 : executing. seed points to the first byte of the RNG seed byte vector 110 : with 32 byte size. Any preexisting state for an in-progress or 111 : recently completed calculation will be discarded. Returns rng (on 112 : return, rng will have the state of a new in-progress calculation). 113 : 114 : Compatible with Rust fn rand_chacha::ChaCha20Rng::from_seed 115 : https://docs.rs/rand_chacha/latest/rand_chacha/struct.ChaCha20Rng.html#method.from_seed */ 116 : 117 : fd_chacha20rng_t * 118 : fd_chacha20rng_init( fd_chacha20rng_t * rng, 119 : void const * key ); 120 : 121 : /* The refill function. Not part of the public API. */ 122 : 123 : #if FD_HAS_AVX512 124 : void 125 : fd_chacha20rng_refill_avx512( fd_chacha20rng_t * rng ); 126 : #endif 127 : 128 : #if FD_HAS_AVX 129 : void 130 : fd_chacha20rng_refill_avx( fd_chacha20rng_t * rng ); 131 : #endif 132 : 133 : void 134 : fd_chacha20rng_refill_seq( fd_chacha20rng_t * rng ); 135 : 136 : #if FD_HAS_AVX512 137 6392083 : #define fd_chacha20rng_private_refill fd_chacha20rng_refill_avx512 138 : #elif FD_HAS_AVX 139 23285482 : #define fd_chacha20rng_private_refill fd_chacha20rng_refill_avx 140 : #else 141 : #define fd_chacha20rng_private_refill fd_chacha20rng_refill_seq 142 : #endif 143 : 144 : /* fd_chacha20rng_avail returns the number of buffered bytes. */ 145 : 146 : FD_FN_PURE static inline ulong 147 2064812754 : fd_chacha20rng_avail( fd_chacha20rng_t const * rng ) { 148 2064812754 : return rng->buf_fill - rng->buf_off; 149 2064812754 : } 150 : 151 : /* fd_chacha20rng_ulong reads a 64-bit integer in [0,2^64) from the RNG 152 : stream. */ 153 : 154 : static ulong 155 2064812754 : fd_chacha20rng_ulong( fd_chacha20rng_t * rng ) { 156 2064812754 : if( FD_UNLIKELY( fd_chacha20rng_avail( rng ) < sizeof(ulong) ) ) 157 25163683 : fd_chacha20rng_private_refill( rng ); 158 2064812754 : ulong x = FD_LOAD( ulong, rng->buf + (rng->buf_off % FD_CHACHA20RNG_BUFSZ) ); 159 2064812754 : rng->buf_off += 8U; 160 2064812754 : return x; 161 2064812754 : } 162 : 163 : /* fd_chacha20rng_ulong_roll returns an uniform IID rand in [0,n) 164 : analogous to fd_rng_ulong_roll. Rejection method based using 165 : fd_chacha20rng_ulong. 166 : 167 : Compatible with Rust type 168 : <rand_chacha::ChaCha20Rng as rand::Rng>::gen<rand::distributions::Uniform<u64>>() 169 : as of version 0.7.0 of the crate 170 : https://docs.rs/rand/latest/rand/distributions/struct.Uniform.html */ 171 : 172 : static inline ulong 173 : fd_chacha20rng_ulong_roll( fd_chacha20rng_t * rng, 174 1578616560 : ulong n ) { 175 : /* We use a pretty standard rejection-sampling based approach here, 176 : but for future reference, here's an explanation: 177 : 178 : We know that v can take 2^64 values, and so any method that maps 179 : each of the 2^64 values to the range directly [0, n) will not be 180 : uniform distribution when 2^64 is not divisible by n. This 181 : motivates using rejection sampling. 182 : 183 : The most basic approach is to map v from [0, n*floor(2^64/n) ) to 184 : [0, n) using v%n, but that puts a modulus on the critical path. To 185 : avoid that, the Rust rand crate uses a different approach: compute 186 : v*n/2^64, which is also in [0, n). 187 : 188 : Now the question to answer is which values to throw out. We pick a 189 : large integer k such that k*n<=2^64 and map [0, k*n) -> 0, [2^64, 190 : 2^64+k*n) -> 1, etc. Since k*n might be 2^64 and then not fit in a 191 : long, we define zone=k*n-1 <= ULONG_MAX, and make the intervals 192 : closed instead of half-open. 193 : 194 : Here's where the mode comes in. Depending on what method you call 195 : and what datatype you use, the Rust crate uses different values of 196 : k. When MODE_MOD is set, we use largest possible value of k, 197 : namely floor(2^64/n). You can compute zone directly as follows: 198 : zone = k*n-1 199 : = floor(2^64/n)*n - 1 200 : = 2^64 - (2^64%n) - 1 201 : = 2^64-1 - (2^64-n)%n, since n<2^64 202 : = 2^64-1 - ((2^64-1)-n+1)%n 203 : Which is back to having a mod... But at least if n is a 204 : compile-time constant then the whole zone computation becomes a 205 : compile-time constant. 206 : 207 : When MODE_SHIFT is set, we use uses almost the largest possible 208 : power of two for k. Precisely, it uses the smallest power of two 209 : such that k*n >= 2^63, which is the largest power of two such that 210 : k*n<=2^64 unless n is a power of two. This approach eliminates the 211 : mod calculation but increases the expected number of samples 212 : required. */ 213 1578616560 : ulong const zone = fd_ulong_if( rng->mode==FD_CHACHA20RNG_MODE_MOD, 214 1578616560 : ULONG_MAX - (ULONG_MAX-n+1UL)%n, 215 1578616560 : (n << (63 - fd_ulong_find_msb( n ) )) - 1UL ); 216 : 217 2031512751 : for( int i=0; 1; i++ ) { 218 2031512751 : ulong v = fd_chacha20rng_ulong( rng ); 219 2031512751 : #if FD_HAS_INT128 220 : /* Compiles to one mulx instruction */ 221 2031512751 : uint128 res = (uint128)v * (uint128)n; 222 2031512751 : ulong hi = (ulong)(res>>64); 223 2031512751 : ulong lo = (ulong) res; 224 : #else 225 : ulong hi, lo; 226 : fd_uwide_mul( &hi, &lo, v, n ); 227 : #endif 228 : 229 : # if FD_CHACHA20RNG_DEBUG 230 : FD_LOG_DEBUG(( "roll (attempt %d): n=%016lx zone: %016lx v=%016lx lo=%016lx hi=%016lx", i, n, zone, v, lo, hi )); 231 : # else 232 2031512751 : (void)i; 233 2031512751 : # endif /* FD_CHACHA20RNG_DEBUG */ 234 : 235 2031512751 : if( FD_LIKELY( lo<=zone ) ) return hi; 236 2031512751 : } 237 1578616560 : } 238 : 239 : FD_PROTOTYPES_END 240 : 241 : #endif /* HEADER_fd_src_ballet_chacha20_fd_chacha20rng_h */