Line data Source code
1 : #ifndef HEADER_fd_src_ballet_chacha20_fd_chacha20rng_h 2 : #define HEADER_fd_src_ballet_chacha20_fd_chacha20rng_h 3 : 4 : /* fd_chacha20rng provides APIs for ChaCha20-based RNG, as used in the 5 : Solana protocol. This API should only be used where necessary. 6 : fd_rng is a better choice in all other cases. */ 7 : 8 : #include "fd_chacha20.h" 9 : #if !FD_HAS_INT128 10 : #include "../../util/bits/fd_uwide.h" 11 : #endif 12 : 13 : /* FD_CHACHA20RNG_DEBUG controls debug logging. 0 is off; 1 is on. */ 14 : 15 : #ifndef FD_CHACHA20RNG_DEBUG 16 : #define FD_CHACHA20RNG_DEBUG 0 17 : #endif 18 : 19 : /* Solana uses different mechanisms of mapping a ulong to an unbiased 20 : integer in [0, n) in different parts of the code. In particular, 21 : leader schedule generation uses MODE_MOD and Turbine uses MODE_SHIFT. 22 : See the note in fd_chacha20rng_ulong_roll for more details. */ 23 1578772578 : #define FD_CHACHA20RNG_MODE_MOD 1 24 313701 : #define FD_CHACHA20RNG_MODE_SHIFT 2 25 : 26 : /* FD_CHACHA20RNG_BUFSZ is the internal buffer size of pre-generated 27 : ChaCha20 blocks. Multiple of block size (64 bytes) and a power of 2. */ 28 : 29 : #if FD_HAS_AVX 30 : #define FD_CHACHA20RNG_BUFSZ (8*FD_CHACHA20_BLOCK_SZ) 31 : #else 32 : #define FD_CHACHA20RNG_BUFSZ (256UL) 33 : #endif 34 : 35 : struct __attribute__((aligned(32UL))) fd_chacha20rng_private { 36 : /* ChaCha20 encryption key */ 37 : uchar key[ 32UL ] __attribute__((aligned(32UL))); 38 : 39 : /* Ring buffer of pre-generated ChaCha20 RNG data. 40 : Note: We currently assume all reads are 8 byte. This means the 41 : cursor is always aligned by 8 and strictly increases in 42 : increments of 8. Thus, we really only have to refill the 43 : buffer if buf_off==buf_fill. */ 44 : uchar buf[ FD_CHACHA20RNG_BUFSZ ] __attribute__((aligned(FD_CHACHA20_BLOCK_SZ))); 45 : ulong buf_off; /* Total number of bytes consumed */ 46 : ulong buf_fill; /* Total number of bytes produced 47 : Always aligned by FD_CHACHA20_BLOCK_SZ */ 48 : 49 : int mode; 50 : }; 51 : typedef struct fd_chacha20rng_private fd_chacha20rng_t; 52 : 53 : FD_PROTOTYPES_BEGIN 54 : 55 : /* fd_chacha20rng_{align,footprint} give the needed alignment and 56 : footprint of a memory region suitable to hold a ChaCha20-based RNG. 57 : 58 : fd_chacha20rng_new formats a memory region with suitable alignment 59 : and footprint for holding a chacha20rng object. Assumes shmem 60 : points on the caller to the first byte of the memory region owned by 61 : the caller to use. `mode` must be one of the FD_CHACHA20RNG_MODE_* 62 : constants defined above and dictates what mode this object will use 63 : to generate random numbers. Returns shmem on success and NULL on 64 : failure (logs details). The memory region will be owned by the 65 : object on successful return. The caller is not joined on return. 66 : 67 : fd_chacha20rng_join joins the caller to a chacha20rng object. 68 : Assumes shrng points to the first byte of the memory region holding 69 : the object. Returns a local handle to the join on success (this is 70 : not necessarily a simple cast of the address) and NULL on failure 71 : (logs details). 72 : 73 : fd_chacha20rng_leave leaves the caller's current local join to a 74 : ChaCha20 RNG object. Returns a pointer to the memory region holding 75 : the object on success this is not necessarily a simple cast of the 76 : address) and NULL on failure (logs details). The caller is not 77 : joined on successful return. 78 : 79 : fd_chacha20rng_delete unformats a memory region that holds a ChaCha20 80 : RNG object. Assumes shrng points on the caller to the first byte of 81 : the memory region holding the state and that nobody is joined. 82 : Returns a pointer to the memory region on success and NULL on failure 83 : (logs details). The caller has ownership of the memory region on 84 : successful return. */ 85 : 86 : FD_FN_CONST ulong 87 : fd_chacha20rng_align( void ); 88 : 89 : FD_FN_CONST ulong 90 : fd_chacha20rng_footprint( void ); 91 : 92 : void * 93 : fd_chacha20rng_new( void * shmem, int mode ); 94 : 95 : fd_chacha20rng_t * 96 : fd_chacha20rng_join( void * shrng ); 97 : 98 : void * 99 : fd_chacha20rng_leave( fd_chacha20rng_t * ); 100 : 101 : void * 102 : fd_chacha20rng_delete( void * shrng ); 103 : 104 : /* fd_chacha20rng_init starts a ChaCha20 RNG stream. rng is assumed to 105 : be a current local join to a chacha20rng object with no other 106 : concurrent operation that would modify the state while this is 107 : executing. seed points to the first byte of the RNG seed byte vector 108 : with 32 byte size. Any preexisting state for an in-progress or 109 : recently completed calculation will be discarded. Returns rng (on 110 : return, rng will have the state of a new in-progress calculation). 111 : 112 : Compatible with Rust fn rand_chacha::ChaCha20Rng::from_seed 113 : https://docs.rs/rand_chacha/latest/rand_chacha/struct.ChaCha20Rng.html#method.from_seed */ 114 : 115 : fd_chacha20rng_t * 116 : fd_chacha20rng_init( fd_chacha20rng_t * rng, 117 : void const * key ); 118 : 119 : /* The refill function . Not part of the public API. */ 120 : 121 : void 122 : fd_chacha20rng_refill_avx( fd_chacha20rng_t * rng ); 123 : 124 : void 125 : fd_chacha20rng_refill_seq( fd_chacha20rng_t * rng ); 126 : 127 : #if FD_HAS_AVX 128 34931019 : #define fd_chacha20rng_private_refill fd_chacha20rng_refill_avx 129 : #else 130 : #define fd_chacha20rng_private_refill fd_chacha20rng_refill_seq 131 : #endif 132 : 133 : /* fd_chacha20rng_avail returns the number of buffered bytes. */ 134 : 135 : FD_FN_PURE static inline ulong 136 2065076313 : fd_chacha20rng_avail( fd_chacha20rng_t const * rng ) { 137 2065076313 : return rng->buf_fill - rng->buf_off; 138 2065076313 : } 139 : 140 : /* fd_chacha20rng_ulong reads a 64-bit integer in [0,2^64) from the RNG 141 : stream. */ 142 : 143 : static ulong 144 2065076313 : fd_chacha20rng_ulong( fd_chacha20rng_t * rng ) { 145 2065076313 : if( FD_UNLIKELY( fd_chacha20rng_avail( rng ) < sizeof(ulong) ) ) 146 30417321 : fd_chacha20rng_private_refill( rng ); 147 2065076313 : ulong x = FD_LOAD( ulong, rng->buf + (rng->buf_off % FD_CHACHA20RNG_BUFSZ) ); 148 2065076313 : rng->buf_off += 8U; 149 2065076313 : return x; 150 2065076313 : } 151 : 152 : /* fd_chacha20rng_ulong_roll returns an uniform IID rand in [0,n) 153 : analogous to fd_rng_ulong_roll. Rejection method based using 154 : fd_chacha20rng_ulong. 155 : 156 : Compatible with Rust type 157 : <rand_chacha::ChaCha20Rng as rand::Rng>::gen<rand::distributions::Uniform<u64>>() 158 : as of version 0.7.0 of the crate 159 : https://docs.rs/rand/latest/rand/distributions/struct.Uniform.html */ 160 : 161 : static inline ulong 162 : fd_chacha20rng_ulong_roll( fd_chacha20rng_t * rng, 163 1578766392 : ulong n ) { 164 : /* We use a pretty standard rejection-sampling based approach here, 165 : but for future reference, here's an explanation: 166 : 167 : We know that v can take 2^64 values, and so any method that maps 168 : each of the 2^64 values to the range directly [0, n) will not be 169 : uniform distribution when 2^64 is not divisible by n. This 170 : motivates using rejection sampling. 171 : 172 : The most basic approach is to map v from [0, n*floor(2^64/n) ) to 173 : [0, n) using v%n, but that puts a modulus on the critical path. To 174 : avoid that, the Rust rand crate uses a different approach: compute 175 : v*n/2^64, which is also in [0, n). 176 : 177 : Now the question to answer is which values to throw out. We pick a 178 : large integer k such that k*n<=2^64 and map [0, k*n) -> 0, [2^64, 179 : 2^64+k*n) -> 1, etc. Since k*n might be 2^64 and then not fit in a 180 : long, we define zone=k*n-1 <= ULONG_MAX, and make the intervals 181 : closed instead of half-open. 182 : 183 : Here's where the mode comes in. Depending on what method you call 184 : and what datatype you use, the Rust crate uses different values of 185 : k. When MODE_MOD is set, we use largest possible value of k, 186 : namely floor(2^64/n). You can compute zone directly as follows: 187 : zone = k*n-1 188 : = floor(2^64/n)*n - 1 189 : = 2^64 - (2^64%n) - 1 190 : = 2^64-1 - (2^64-n)%n, since n<2^64 191 : = 2^64-1 - ((2^64-1)-n+1)%n 192 : Which is back to having a mod... But at least if n is a 193 : compile-time constant then the whole zone computation becomes a 194 : compile-time constant. 195 : 196 : When MODE_SHIFT is set, we use uses almost the largest possible 197 : power of two for k. Precisely, it uses the smallest power of two 198 : such that k*n >= 2^63, which is the largest power of two such that 199 : k*n<=2^64 unless n is a power of two. This approach eliminates the 200 : mod calculation but increases the expected number of samples 201 : required. */ 202 1578766392 : ulong const zone = fd_ulong_if( rng->mode==FD_CHACHA20RNG_MODE_MOD, 203 1578766392 : ULONG_MAX - (ULONG_MAX-n+1UL)%n, 204 1578766392 : (n << (63 - fd_ulong_find_msb( n ) )) - 1UL ); 205 : 206 2031776307 : for( int i=0; 1; i++ ) { 207 2031776307 : ulong v = fd_chacha20rng_ulong( rng ); 208 2031776307 : #if FD_HAS_INT128 209 : /* Compiles to one mulx instruction */ 210 2031776307 : uint128 res = (uint128)v * (uint128)n; 211 2031776307 : ulong hi = (ulong)(res>>64); 212 2031776307 : ulong lo = (ulong) res; 213 : #else 214 : ulong hi, lo; 215 : fd_uwide_mul( &hi, &lo, v, n ); 216 : #endif 217 : 218 : # if FD_CHACHA20RNG_DEBUG 219 : FD_LOG_DEBUG(( "roll (attempt %d): n=%016lx zone: %016lx v=%016lx lo=%016lx hi=%016lx", i, n, zone, v, lo, hi )); 220 : # else 221 2031776307 : (void)i; 222 2031776307 : # endif /* FD_CHACHA20RNG_DEBUG */ 223 : 224 2031776307 : if( FD_LIKELY( lo<=zone ) ) return hi; 225 2031776307 : } 226 1578766392 : } 227 : 228 : FD_PROTOTYPES_END 229 : 230 : #endif /* HEADER_fd_src_ballet_chacha20_fd_chacha20rng_h */