Line data Source code
1 : #ifndef HEADER_fd_src_rng_fd_rng_h
2 : #define HEADER_fd_src_rng_fd_rng_h
3 :
4 : /* Simple fast high quality non-cryptographic pseudo random number
5 : generator. Supports parallel generation, interprocess shared memory
6 : usage, checkpointing, random access, reversible, atomic, etc. Passes
7 : extremely strict tests of randomness.
8 :
9 : Assumes fd_bits provides a high quality 64<>64-bit integer hash
10 : functions (i.e. full avalanche) with the property
11 : fd_ulong_hash(0)==0, fd_ulong_hash(i) for i in [0,2^64) yields a
12 : permutation of [0,2^64) and also provides reasonably efficient
13 : inverse of this. */
14 :
15 : #include "../bits/fd_bits.h"
16 :
17 : /* FD_RNG_{ALIGN,FOOTPRINT} describe the alignment and footprint needed
18 : for a rng. ALIGN should be a positive integer power of 2. FOOTPRINT
19 : is multiple of ALIGN. These are provided to facilitate compile time
20 : declarations. */
21 :
22 : #define FD_RNG_ALIGN (16UL)
23 : #define FD_RNG_FOOTPRINT (16UL)
24 :
25 : /* fd_rng_t should be treated as an opaque handle of a pseudo random
26 : number generator. (It technically isn't here to facilitate inlining
27 : of fd_rng operations.) */
28 :
29 : struct __attribute__((aligned(FD_RNG_ALIGN))) fd_rng_private {
30 : ulong seq;
31 : ulong idx;
32 : };
33 :
34 : typedef struct fd_rng_private fd_rng_t;
35 :
36 : FD_PROTOTYPES_BEGIN
37 :
38 : /* Private: fd_rng_private_expand(seq) randomly expands an arbitrary
39 : 32-bit value into a unique 64-bit non-sparse value such that the
40 : original 32-bit value can be recovered and that 0 expands to
41 : something non-zero (the non-sparse expansion helps reduce
42 : correlations between different sequences, the zero to non-zero
43 : expansion means the common initialization of seq=0, idx=0 doesn't
44 : yield 0 for the first random value as would happen for hash functions
45 : that have the property fd_ulong_hash(0)==0, the XOR 64-bit const here
46 : is zero in the lower 32-bits and an arbitrary non-zero in the upper
47 : 32-bits). For the current fd_ulong_hash implementation and XOR
48 : 64-bit constant, the pop count of the expanded seq is ~32.000 +/-
49 : ~4.000 and in [8,56] for all possible values of seq (i.e. the
50 : expanded seq popcount is well approximated as a normal with mean 32
51 : and rms 4 and the extremes are in line with the expected extremes for
52 : 2^32 samples). */
53 :
54 : FD_FN_CONST static inline ulong
55 12884988717 : fd_rng_private_expand( uint seq ) {
56 12884988717 : return fd_ulong_hash( 0x900df00d00000000UL ^ (ulong)seq );
57 12884988717 : }
58 :
59 : /* Private: fd_rng_private_contract(seq) extract the original 32-bit seq
60 : from its expanded value */
61 :
62 : FD_FN_CONST static inline uint
63 12884949903 : fd_rng_private_contract( ulong eseq ) {
64 12884949903 : return (uint)fd_ulong_hash_inverse( eseq );
65 12884949903 : }
66 :
67 : /* fd_rng_{align,footprint} give the needed alignment and footprint
68 : for a memory region suitable to hold a fd_rng's state. Declaration /
69 : aligned_alloc / fd_alloca friendly (e.g. a memory region declared as
70 : "fd_rng_t _rng[1];", or created by
71 : "aligned_alloc(alignof(fd_rng_t),sizeof(fd_rng_t))" or created by
72 : "fd_alloca(alignof(fd_rng_t),sizeof(fd_rng_t))" will all
73 : automatically have the needed alignment and footprint).
74 : fd_rng_{align,footprint} return the same value as
75 : FD_RNG_{ALIGN,FOOTPRINT}.
76 :
77 : fd_rng_new takes ownership of the memory region pointed to by mem
78 : (which is assumed to be non-NULL with the appropriate alignment and
79 : footprint) and formats it as a fd_rng. The random number generator
80 : stream will initialized to use pseudo random sequence "seq" and will
81 : start at slot "idx". Returns mem (which will be formatted for use).
82 : The caller will not be joined to the region on return.
83 :
84 : fd_rng_join joins the caller to a memory region holding the state of
85 : a fd_rng. _rng points to a memory region in the local address space
86 : that holds a fd_rng. Returns an opaque handle of the local join in
87 : the local address space to the fd_rng (which might not be the same
88 : thing as _rng ... the separation of new and join is to facilitate
89 : interprocess shared memory usage patterns while supporting
90 : transparent upgrades users of this to more elaborate algorithms where
91 : address translations under the hood may not be trivial).
92 :
93 : fd_rng_leave leaves the current rng join. Returns a pointer in the
94 : local address space to the memory region holding the state of the
95 : fd_rng. The join should not be used afterward.
96 :
97 : fd_rng_delete unformats the memory region currently used to hold the
98 : state of a _rng and returns ownership of the underlying memory region
99 : to the caller. There should be no joins in the system on the fd_rng.
100 : Returns a pointer to the underlying memory region. */
101 :
102 0 : FD_FN_CONST static inline ulong fd_rng_align ( void ) { return alignof( fd_rng_t ); }
103 0 : FD_FN_CONST static inline ulong fd_rng_footprint( void ) { return sizeof ( fd_rng_t ); }
104 :
105 : static inline void *
106 : fd_rng_new( void * mem,
107 : uint seq,
108 62874 : ulong idx ) {
109 62874 : fd_rng_t * rng = (fd_rng_t *)mem;
110 62874 : rng->seq = fd_rng_private_expand( seq );
111 62874 : rng->idx = idx;
112 62874 : return (void *)rng;
113 62874 : }
114 :
115 59553 : static inline fd_rng_t * fd_rng_join ( void * _rng ) { return (fd_rng_t *)_rng; }
116 45606 : static inline void * fd_rng_leave ( fd_rng_t * rng ) { return (void *) rng; }
117 45606 : static inline void * fd_rng_delete( void * _rng ) { return (void *)_rng; }
118 :
119 : /* fd_rng_seq returns the sequence used by the rng. fd_rng_idx returns
120 : the next slot that will be consumed by the rng. */
121 :
122 48015 : static inline uint fd_rng_seq( fd_rng_t * rng ) { return fd_rng_private_contract( rng->seq ); }
123 48012 : static inline ulong fd_rng_idx( fd_rng_t * rng ) { return rng->idx; }
124 :
125 : /* fd_rng_seq_set sets the sequence to be used by rng and returns
126 : the replaced value. fd_rng_idx_set sets the next slot that will be
127 : consumed next by rng and returns the replaced value. */
128 :
129 : static inline uint
130 : fd_rng_seq_set( fd_rng_t * rng,
131 24006 : uint seq ) {
132 24006 : uint old = fd_rng_seq( rng );
133 24006 : rng->seq = fd_rng_private_expand( seq );
134 24006 : return old;
135 24006 : }
136 :
137 : static inline ulong
138 : fd_rng_idx_set( fd_rng_t * rng,
139 24003 : ulong idx ) {
140 24003 : ulong old = fd_rng_idx( rng );
141 24003 : rng->idx = idx;
142 24003 : return old;
143 24003 : }
144 :
145 : /* fd_rng_{uchar,ushort,uint,ulong} returns the next integer in the PRNG
146 : sequence in [0,2^N) for N in {8,16,32,64} respectively with uniform
147 : probability with a period of 2^64 (fd_rng_ulong has a period of 2^63
148 : as it consumes two slots). Passes various strict PRNG tests (e.g.
149 : diehard, dieharder, testu01, etc). Assumes rng is a current join.
150 : fd_rng_{schar,short,int,long} are the same but return a value in
151 : [0,2^(N-1)). (A signed generator that can assume all possible values
152 : of a signed int uniform IID can be obtained by casting the output of
153 : the unsigned generator of the same, assuming a typical twos
154 : complement arithmetic platform.)
155 :
156 : The theory for this that fd_ulong_hash(i) for i in [0,2^64) specifies
157 : a random looking permutation of the integers in [0,2^64). Returning
158 : the low order bits of this random permutation then yields a high
159 : quality non-cryptographic random number stream automatically as it:
160 :
161 : - Has a long period (2^64). This is implied by the permutation
162 : property.
163 :
164 : - Has the expected random properties (as theoretically best possible
165 : for a finite period generator) of a true uniform IID bit source.
166 : For example, the probability of next random number is uniform and
167 : independent of previous N random numbers for N<<2^64). This is
168 : also implied by the full avalanche and permutation property.
169 :
170 : - Is "unpredictable". That is, the internal state of the generator
171 : is difficult to recover from its outputs, e.g. a return from
172 : fd_rng_uint could be have been generated from 2^32 internal states
173 : (if the sequence is known), up to 2^32 sequences (if the state is
174 : known) and up to 2^64 (state,seq) pairs neither is known (the state
175 : / sequence is potentially recoverable given a long enough stream of
176 : values though). This is implied by the truncation of hash values.
177 :
178 : To turn this into parameterizable family of generators, note that
179 : fd_ulong_hash( perm_j( i ) ) where j is some parameterized family of
180 : random permutations is still a permutation and would have all the
181 : above properties for free so long as no perm_j is similar to the hash
182 : permutation inverse. Practically, xoring i by a non-sparse 64-bit
183 : number will ultra cheaply generate a wide family of "good enough"
184 : permutations to do a parameterized shuffling of the original
185 : fd_ulong_hash permutation, creating a large number of parallel
186 : sequences. Since users are empirically notoriously bad at seeding
187 : though, we only let the user specify a 32-bit sequence id and then
188 : generate a unique non-sparse 64-bit random looking seed from it. */
189 :
190 4432087687 : static inline uchar fd_rng_uchar ( fd_rng_t * rng ) { return (uchar )fd_ulong_hash( rng->seq ^ (rng->idx++) ); }
191 264328743 : static inline ushort fd_rng_ushort( fd_rng_t * rng ) { return (ushort)fd_ulong_hash( rng->seq ^ (rng->idx++) ); }
192 71292141748 : static inline uint fd_rng_uint ( fd_rng_t * rng ) { return (uint )fd_ulong_hash( rng->seq ^ (rng->idx++) ); }
193 :
194 : FD_FN_UNUSED static ulong /* Work around -Winline */
195 25558341240 : fd_rng_ulong( fd_rng_t * rng ) {
196 25558341240 : ulong hi = (ulong)fd_rng_uint( rng );
197 25558341240 : return (hi<<32) | (ulong)fd_rng_uint( rng );
198 25558341240 : }
199 :
200 3003000 : static inline schar fd_rng_schar( fd_rng_t * rng ) { return (schar)( fd_rng_uchar ( rng ) >> 1 ); }
201 3003000 : static inline short fd_rng_short( fd_rng_t * rng ) { return (short)( fd_rng_ushort( rng ) >> 1 ); }
202 4203000 : static inline int fd_rng_int ( fd_rng_t * rng ) { return (int )( fd_rng_uint ( rng ) >> 1 ); }
203 3003000 : static inline long fd_rng_long ( fd_rng_t * rng ) { return (long )( fd_rng_ulong ( rng ) >> 1 ); }
204 :
205 : #if FD_HAS_INT128
206 : FD_FN_UNUSED static uint128 /* Work around -Winline */
207 5105803281 : fd_rng_uint128( fd_rng_t * rng ) {
208 5105803281 : return (((uint128)fd_rng_ulong( rng ))<<64) | ((uint128)fd_rng_ulong( rng ));
209 5105803281 : }
210 :
211 : /* FIXME: MIGHT BE BETTER TO MASK OR (hi<<63) ^ lo */
212 3003000 : static inline int128 fd_rng_int128( fd_rng_t * rng ) { return (int128)( fd_rng_uint128( rng ) >> 1 ); }
213 : #endif
214 :
215 : /* fd_rng_{uint_to_float,ulong_to_double}_{c0,c1,c,o}( u ): These
216 : quickly and robustly convert uniform random integers into uniform
217 : random floating point with appropriate rounding. Intervals are:
218 :
219 : c0 -> [0,1)
220 : c1 -> (0,1]
221 : c -> [0,1]
222 : o -> (0,1)
223 :
224 : To provide more specifics, let the real numbers from [0,1) be
225 : partitioned into N uniform disjoint intervals such that interval i
226 : goes from [i/N,(i+1)/N) where i is in [0,N). For single (double)
227 : precision, "float" ("double"), the largest N for which the range of
228 : each interval is _exactly_ representable is N = 2^24 (2^53).
229 :
230 : Given then a uniform IID uint random input, the
231 : fd_rng_uint_to_float_c0 converter output is as though a continuous
232 : IID uniform random in [0,1) was generated and then rounded down to
233 : the start of the containing interval (2^24 intervals). As such, this
234 : generator can never return exactly 1 but it can exactly return +0.
235 : Since floats have higher resolution near 0 than 1, this will not
236 : return all float possible representations in [0,1) but it can return
237 : all possible float representations in [1/2,1). In particular, this
238 : will never return a denorm or -0.
239 :
240 : Similarly for fd_rng_uint_to_float_c1 converter rounds up to the end
241 : of the containing interval / start of the next interval (2^24
242 : intervals). As such, this converter can never return exactly +0 but
243 : it can exactly return 1. It will not return all possible float
244 : representations in (0,1] but it can return all possible float
245 : representations in [1/2,1]. This will never return a denorm or -0.
246 :
247 : The fd_rng_uint_to_float_c converter rounds toward nearest even
248 : toward the start containing interval or start of the next interval
249 : (2^24 intervals). As such, this can return both exactly +0 and
250 : exactly 1 (and the probability of returning exactly +0 == probability
251 : of exactly 1 == (1/2) probability all other possible return values).
252 : It will not return all possible float representations in [0,1] but it
253 : can return all float possible representations in [1/2,1]. This will
254 : never return a denorm or -0.
255 :
256 : The fd_rng_uint_to_float_o converter rounds toward the middle of
257 : containing internal (2^23 intervals ... note that then in a sense
258 : this converter is 1-bit less accurate than the above). As such, this
259 : can neither return +0 nor 1 and will not return all possible float
260 : representations in (0,1). This will never return a denorm or -0.
261 :
262 : Similarly for the double variants (*_{c0,c1,c} uses 2^53 intervals
263 : and o uses 2^52 intervals). */
264 :
265 30003006 : FD_FN_CONST static inline float fd_rng_uint_to_float_c0 ( uint u ) { return (1.f/(float )(1 <<24))*(float )(int )( u>>(32-24) ); }
266 3006 : FD_FN_CONST static inline float fd_rng_uint_to_float_c1 ( uint u ) { return (1.f/(float )(1 <<24))*(float )(int )((u>>(32-24))+ 1U ); }
267 25496106 : FD_FN_CONST static inline float fd_rng_uint_to_float_c ( uint u ) { return (1.f/(float )(1 <<24))*(float )(int )((u>>(32-24))+(u&1U )); }
268 3006 : FD_FN_CONST static inline float fd_rng_uint_to_float_o ( uint u ) { return (1.f/(float )(1 <<24))*(float )(int )((u>>(32-24))| 1U ); }
269 :
270 : #if FD_HAS_DOUBLE
271 30003006 : FD_FN_CONST static inline double fd_rng_ulong_to_double_c0( ulong u ) { return (1. /(double)(1L<<53))*(double)(long)( u>>(64-53) ); }
272 3006 : FD_FN_CONST static inline double fd_rng_ulong_to_double_c1( ulong u ) { return (1. /(double)(1L<<53))*(double)(long)((u>>(64-53))+ 1UL ); }
273 25576242 : FD_FN_CONST static inline double fd_rng_ulong_to_double_c ( ulong u ) { return (1. /(double)(1L<<53))*(double)(long)((u>>(64-53))+(u&1UL)); }
274 3006 : FD_FN_CONST static inline double fd_rng_ulong_to_double_o ( ulong u ) { return (1. /(double)(1L<<53))*(double)(long)((u>>(64-53))| 1UL ); }
275 : #endif
276 :
277 : /* fd_rng_{float,double}_{c0,c1,c,o} are basic uniform generators on the
278 : appropriate interval of 0 to 1 based on the above converters. The
279 : float variant consumes 1 slot, the double variant consumes 2 slots. */
280 :
281 30003000 : static inline float fd_rng_float_c0 ( fd_rng_t * rng ) { return fd_rng_uint_to_float_c0 ( fd_rng_uint ( rng ) ); }
282 3000 : static inline float fd_rng_float_c1 ( fd_rng_t * rng ) { return fd_rng_uint_to_float_c1 ( fd_rng_uint ( rng ) ); }
283 25496100 : static inline float fd_rng_float_c ( fd_rng_t * rng ) { return fd_rng_uint_to_float_c ( fd_rng_uint ( rng ) ); }
284 3000 : static inline float fd_rng_float_o ( fd_rng_t * rng ) { return fd_rng_uint_to_float_o ( fd_rng_uint ( rng ) ); }
285 :
286 : #if FD_HAS_DOUBLE
287 30003000 : static inline double fd_rng_double_c0( fd_rng_t * rng ) { return fd_rng_ulong_to_double_c0( fd_rng_ulong( rng ) ); }
288 3000 : static inline double fd_rng_double_c1( fd_rng_t * rng ) { return fd_rng_ulong_to_double_c1( fd_rng_ulong( rng ) ); }
289 25576236 : static inline double fd_rng_double_c ( fd_rng_t * rng ) { return fd_rng_ulong_to_double_c ( fd_rng_ulong( rng ) ); }
290 3000 : static inline double fd_rng_double_o ( fd_rng_t * rng ) { return fd_rng_ulong_to_double_o ( fd_rng_ulong( rng ) ); }
291 : #endif
292 :
293 : /* fd_rng_int_roll uses the given rng to roll an n-sided die where n is
294 : the number of sides (assumed to be positive). That is returns
295 : uniform IID rand in [0,n) even if n is not an exact power of two.
296 : Similarly for the other types.
297 :
298 : Rejection method based. Specifically, the number of rng slots
299 : consumed is typically 1 but theoretically might be higher
300 : occasionally (64-bit wide types consume rng slots twice as fast).
301 :
302 : Deterministic_rng slot consumption possible with a slightly more
303 : approximate implementation (bound the number of iterations such that
304 : this always consumes a fixed number of slot and accept the
305 : practically infinitesimal bias when n is not a power of 2). */
306 :
307 : static inline uint
308 : fd_rng_private_roll32( fd_rng_t * rng,
309 9340103185 : uint n ) {
310 9340103185 : uint r = (-n) % n; /* Compute 2^32 mod n = (2^32 - n) mod n = (-n) mod n, compile time for compile time n */
311 9340103347 : uint u; do u = fd_rng_uint( rng ); while( FD_UNLIKELY( u<r ) ); /* Rejection unlikely (highly unlikely for n<<<2^32) */
312 : /* At this point, u is uniform in [r,2^32) which has an integer
313 : multiple of n elements (thus u % n is in [0,n) uniform) */
314 9340103185 : return u % n;
315 9340103185 : }
316 :
317 : static inline ulong
318 : fd_rng_private_roll64( fd_rng_t * rng,
319 2012270036 : ulong n ) {
320 2012270036 : ulong r = (-n) % n; /* Compute 2^64 mod n = (2^64 - n) mod n = (-n) mod n, compile time for compile time n */
321 2012270036 : ulong u; do u = fd_rng_ulong( rng ); while( FD_UNLIKELY( u<r ) ); /* Rejection unlikely (highly unlikely for n<<<2^64) */
322 : /* At this point, u is uniform in [r,2^64) which has an integer
323 : multiple of n elements (thus u % n is in [0,n) uniform) */
324 2012270036 : return u % n;
325 2012270036 : }
326 :
327 1610625000 : static inline uchar fd_rng_uchar_roll ( fd_rng_t * rng, uchar n ) { return (uchar )fd_rng_private_roll32( rng, (uint )n ); }
328 0 : static inline ushort fd_rng_ushort_roll( fd_rng_t * rng, ushort n ) { return (ushort)fd_rng_private_roll32( rng, (uint )n ); }
329 7110178131 : static inline uint fd_rng_uint_roll ( fd_rng_t * rng, uint n ) { return (uint )fd_rng_private_roll32( rng, (uint )n ); }
330 2012270036 : static inline ulong fd_rng_ulong_roll ( fd_rng_t * rng, ulong n ) { return (ulong )fd_rng_private_roll64( rng, (ulong)n ); }
331 :
332 0 : static inline schar fd_rng_schar_roll ( fd_rng_t * rng, schar n ) { return (schar )fd_rng_private_roll32( rng, (uint )n ); }
333 0 : static inline short fd_rng_short_roll ( fd_rng_t * rng, short n ) { return (short )fd_rng_private_roll32( rng, (uint )n ); }
334 619300054 : static inline int fd_rng_int_roll ( fd_rng_t * rng, int n ) { return (int )fd_rng_private_roll32( rng, (uint )n ); }
335 0 : static inline long fd_rng_long_roll ( fd_rng_t * rng, long n ) { return (long )fd_rng_private_roll64( rng, (ulong)n ); }
336 :
337 : /* fd_rng_coin_tosses tosses a fair coin until it comes up tails and
338 : returns the number of tosses taken (including the final toss that
339 : came up tails). That is, the PDF of coin tosses is:
340 :
341 : Pr(cnt) = 2^-cnt, cnt>0
342 : 0, otherwise
343 :
344 : Typically consumes 1 slot but can consume more with exceedingly low
345 : probability (~2^-32). Deterministic slot consumption is possible by
346 : truncating the maximum number of tosses. Practically a fast O(1). */
347 :
348 : FD_FN_UNUSED static ulong /* Work around -Winline */
349 761100 : fd_rng_coin_tosses( fd_rng_t * rng ) {
350 761100 : ulong cnt = 1UL;
351 761100 : ulong u;
352 761100 : for(;;) {
353 761100 : u = (ulong)fd_rng_uint( rng );
354 761100 : if( FD_LIKELY( u ) ) break;
355 0 : cnt += 32UL;
356 0 : }
357 761100 : cnt += (ulong)fd_ulong_find_lsb( u );
358 761100 : return cnt;
359 761100 : }
360 :
361 : /* fd_rng_float_robust generates a uniform random number in [0,1) and
362 : rounds this infinite precision result to the closest exactly
363 : representable float in [0,1]. As such, this can theoretically
364 : generate any exactly representable floating point number in [0,1],
365 : including 0 (with an exceedingly small probability), denorms (also
366 : with an exceedingly small probability) and exact 1 (with a
367 : probability of ~2^-25). This will never produce a value larger than
368 : 1, a negative value or -0. This is slower than the above uniform
369 : floating point generators above but still a reasonably fast O(1).
370 : Typically consumes 2 slots but can consume more with exceedingly low
371 : probability.
372 :
373 : Similarly for the double precision variant. Typically consumes 3
374 : slots. Reasonably fast O(1).
375 :
376 : fd_rng_float_exp generates a random number with an exponential
377 : distribution.
378 :
379 : PDF:
380 : f(x) ~ exp(-x), x>=0
381 : 0, otherwise
382 : CDF:
383 : F(x) ~ 1-exp(-x), x>=0
384 : 0, otherwise
385 :
386 : Based on transformation method applied to a 63-bit uniform rand. As
387 : such, some quantization due to the floating point limitations and
388 : generator precision is present around zero and in the extreme tails
389 : but these rarely affect typical use cases (variants that can generate
390 : every non-negative float possible are possible in principle but these
391 : are more expensive under the hood). Extreme values are:
392 :
393 : output | prob
394 : 0 ~ -ln( 1 ) | ~2^-25 (limited by floating rep)
395 : ~5.96e-8 ~ -ln( 1- 2^-24 ) | ~2^-24 (limited by floating rep)
396 : ~1.19e-7 ~ -ln( 1-2*2^-24 ) | ~2^-24 (limited by floating rep)
397 : ...
398 : ~42.975 ~ -ln( 2*2^-63 ) | ~2^-63 (limited by generator)
399 : ~43.668 ~ -ln( 2^-63 ) | ~2^-63 (limited by generator)
400 :
401 : The current implementation will only generate bit level identical
402 : results between machine targets that have a libm with correctly
403 : rounded exp and log functions. It is possible to do this reasonably
404 : fast without use of libm if necessary though. Consumes 2 slots.
405 : Reasonably fast O(1).
406 :
407 : For the double precision generator, similar considerations apply:
408 :
409 : output | prob
410 : 0 ~ -ln( 1 ) | ~2^-54 (limited by floating rep)
411 : ~1.11e-16 ~ -ln( 1- 2^-53 ) | ~2^-53 (limited by floating rep)
412 : ~2.22e-16 ~ -ln( 1-2*2^-53 ) | ~2^-53 (limited by floating rep)
413 : ...
414 : ~42.975 ~ -ln( 2*2^-63 ) | ~2^-63 (limited by generator)
415 : ~43.668 ~ -ln( 2^-63 ) | ~2^-63 (limited by generator)
416 :
417 : Consumes 2 slots. Reasonably fast O(1).
418 :
419 : fd_rng_float_norm generates a random number with a normal
420 : distribution.
421 :
422 : PDF:
423 : f(x) ~ exp(-x^2/2) / sqrt(2 pi)
424 :
425 : Based on the Ziggurat method. User should assume any finite value is
426 : possible (including denorms and -0 though note that denorms will
427 : likely be flushed to zero if the processor is configured to do so for
428 : performance, as is typical and that values larger in magnitude than
429 : sqrt( 2 log N ) where N is the number of invocations of this will be
430 : exceedingly rare). Typically consumes 1 slot but can consume more
431 : with moderately low probability. Reasonably fast O(1).
432 : Deterministic slot consumption can be obtained by using the
433 : Box-Muller method (will consume more slots on average though).
434 :
435 : Similarly for the double precision variant. Typically consumes 2
436 : slots. Reasonably fast O(1). */
437 :
438 : float fd_rng_float_robust( fd_rng_t * rng );
439 : float fd_rng_float_exp ( fd_rng_t * rng );
440 : float fd_rng_float_norm ( fd_rng_t * rng );
441 :
442 : #if FD_HAS_DOUBLE
443 : double fd_rng_double_robust( fd_rng_t * rng );
444 : double fd_rng_double_exp ( fd_rng_t * rng );
445 : double fd_rng_double_norm ( fd_rng_t * rng );
446 : #endif
447 :
448 : /* FIXME: IMPORT ATOMIC VARIANTS FOR REENTRANT USAGE (E.G. ATOMIC_XCHG
449 : FOR SET, ATOMIC_INC OF INDEX FOR THE RETURN TYPES, CAS STATE UPDATES,
450 : ETC) */
451 :
452 : /* fd_rng_secure reads random bytes from a cryptographically secure
453 : source provided by the platform. Features /dev/urandom like entropy.
454 :
455 : On success, returns d and guarantees that [d,d+sz) is filled with
456 : unguessable random bytes. On failure, returns NULL and prints reason
457 : for failure to warning log.
458 :
459 : (!!!) This operation may fail if no secure RNG is available or the
460 : RNG failed for some reason. Always check the return code.
461 :
462 : Currently available on Linux, FreeBSD, and macOS.
463 : On Linux and FreeBSD uses getrandom(2).
464 : On macOS uses CommonCrypto's CommonRandom. */
465 :
466 : FD_FN_SENSITIVE __attribute__((warn_unused_result))
467 : void *
468 : fd_rng_secure( void * d,
469 : ulong sz );
470 :
471 : FD_PROTOTYPES_END
472 :
473 : #if FD_HAS_X86
474 :
475 : #include <immintrin.h>
476 :
477 : /* rdrand reads sz cryptographically secure bytes using the RDRAND x86
478 : instruction. Returns 1 if the read succeeded, 0 on failure.
479 :
480 : Intel architecture manual section 7.3.17.1 (December 2023):
481 :
482 : The RDRAND instruction returns a random number. All Intel
483 : processors that support the RDRAND instruction indicate the
484 : availability of the RDRAND instruction via reporting
485 : CPUID.01H:ECX.RDRAND[bit 30] = 1.
486 :
487 : RDRAND returns random numbers that are supplied by a
488 : cryptographically secure, deterministic] random bit generator DRBG.
489 : The DRBG is designed to meet the NIST SP 800-90A standard. The DRBG
490 : is re-seeded frequently from an on-chip non-deterministic entropy
491 : source to guarantee data returned by RDRAND is statistically
492 : uniform, nonperiodic and non-deterministic.
493 :
494 : In order for the hardware design to meet its security goals, the
495 : random number generator continuously tests itself and the random
496 : data it is generating. Runtime failures in the random number
497 : generator circuitry or statistically anomalous data occurring by
498 : chance will be detected by the self test hardware and flag the
499 : resulting data as being bad. In such extremely rare cases, the
500 : RDRAND instruction will return no data instead of bad data.
501 :
502 : Under heavy load, with multiple cores executing RDRAND in parallel,
503 : it is possible, though unlikely, for the demand of random numbers
504 : by software processes/threads to exceed the rate at which the
505 : random number generator hardware can supply them. This will lead to
506 : the RDRAND instruction returning no data transitorily. */
507 :
508 : FD_PROTOTYPES_BEGIN
509 :
510 : __attribute__((warn_unused_result)) static inline int
511 : fd_rdrand( uchar * dst,
512 12582912 : ulong sz ) {
513 :
514 12582912 : uchar * cur = dst;
515 12582912 : ulong align_sz = sz & ~0x7UL;
516 25165824 : while( cur < dst + align_sz ) {
517 12582912 : unsigned long long slice;
518 12582912 : if( FD_UNLIKELY( !_rdrand64_step( &slice ) ) )
519 0 : return 0;
520 12582912 : FD_STORE( ulong, dst, slice );
521 12582912 : cur += 8UL;
522 12582912 : }
523 12582912 : if( FD_UNLIKELY( cur < dst ) ) {
524 0 : unsigned long long slice;
525 0 : if( FD_UNLIKELY( !_rdrand64_step( &slice ) ) )
526 0 : return 0;
527 0 : ulong tail = (ulong)( cur - dst );
528 0 : fd_memcpy( cur, &slice, tail );
529 0 : cur += tail;
530 0 : }
531 :
532 12582912 : return 1;
533 12582912 : }
534 :
535 : FD_PROTOTYPES_END
536 :
537 : #endif /* FD_HAS_X86 */
538 :
539 : #endif /* HEADER_fd_src_rng_fd_rng_h */
|