LCOV - code coverage report
Current view: top level - util/rng - fd_rng.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 93 109 85.3 %
Date: 2024-11-13 11:58:15 Functions: 863 31250 2.8 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_rng_fd_rng_h
       2             : #define HEADER_fd_src_rng_fd_rng_h
       3             : 
       4             : /* Simple fast high quality non-cryptographic pseudo random number
       5             :    generator.  Supports parallel generation, interprocess shared memory
       6             :    usage, checkpointing, random access, reversible, atomic, etc.  Passes
       7             :    extremely strict tests of randomness.
       8             : 
       9             :    Assumes fd_bits provides a high quality 64<>64-bit integer hash
      10             :    functions (i.e. full avalanche) with the property
      11             :    fd_ulong_hash(0)==0, fd_ulong_hash(i) for i in [0,2^64) yields a
      12             :    permutation of [0,2^64) and also provides reasonably efficient
      13             :    inverse of this. */
      14             : 
      15             : #include "../bits/fd_bits.h"
      16             : 
      17             : /* FD_RNG_{ALIGN,FOOTPRINT} describe the alignment and footprint needed
      18             :    for a rng.  ALIGN should be a positive integer power of 2.  FOOTPRINT
      19             :    is multiple of ALIGN.  These are provided to facilitate compile time
      20             :    declarations.  */
      21             : 
      22             : #define FD_RNG_ALIGN     (16UL)
      23             : #define FD_RNG_FOOTPRINT (16UL)
      24             : 
      25             : /* fd_rng_t should be treated as an opaque handle of a pseudo random
      26             :    number generator.  (It technically isn't here to facilitate inlining
      27             :    of fd_rng operations.) */
      28             : 
      29             : struct __attribute__((aligned(FD_RNG_ALIGN))) fd_rng_private {
      30             :   ulong seq;
      31             :   ulong idx;
      32             : };
      33             : 
      34             : typedef struct fd_rng_private fd_rng_t;
      35             : 
      36             : FD_PROTOTYPES_BEGIN
      37             : 
      38             : /* Private: fd_rng_private_expand(seq) randomly expands an arbitrary
      39             :    32-bit value into a unique 64-bit non-sparse value such that the
      40             :    original 32-bit value can be recovered and that 0 expands to
      41             :    something non-zero (the non-sparse expansion helps reduce
      42             :    correlations between different sequences, the zero to non-zero
      43             :    expansion means the common initialization of seq=0, idx=0 doesn't
      44             :    yield 0 for the first random value as would happen for hash functions
      45             :    that have the property fd_ulong_hash(0)==0, the XOR 64-bit const here
      46             :    is zero in the lower 32-bits and an arbitrary non-zero in the upper
      47             :    32-bits).  For the current fd_ulong_hash implementation and XOR
      48             :    64-bit constant, the pop count of the expanded seq is ~32.000 +/-
      49             :    ~4.000 and in [8,56] for all possible values of seq (i.e. the
      50             :    expanded seq popcount is well approximated as a normal with mean 32
      51             :    and rms 4 and the extremes are in line with the expected extremes for
      52             :    2^32 samples). */
      53             : 
      54             : FD_FN_CONST static inline ulong
      55 12884988717 : fd_rng_private_expand( uint seq ) {
      56 12884988717 :   return fd_ulong_hash( 0x900df00d00000000UL ^ (ulong)seq );
      57 12884988717 : }
      58             : 
      59             : /* Private: fd_rng_private_contract(seq) extract the original 32-bit seq
      60             :    from its expanded value */
      61             : 
      62             : FD_FN_CONST static inline uint
      63 12884949903 : fd_rng_private_contract( ulong eseq ) {
      64 12884949903 :   return (uint)fd_ulong_hash_inverse( eseq );
      65 12884949903 : }
      66             : 
      67             : /* fd_rng_{align,footprint} give the needed alignment and footprint
      68             :    for a memory region suitable to hold a fd_rng's state.  Declaration /
      69             :    aligned_alloc / fd_alloca friendly (e.g. a memory region declared as
      70             :    "fd_rng_t _rng[1];", or created by
      71             :    "aligned_alloc(alignof(fd_rng_t),sizeof(fd_rng_t))" or created by
      72             :    "fd_alloca(alignof(fd_rng_t),sizeof(fd_rng_t))" will all
      73             :    automatically have the needed alignment and footprint).
      74             :    fd_rng_{align,footprint} return the same value as
      75             :    FD_RNG_{ALIGN,FOOTPRINT}.
      76             : 
      77             :    fd_rng_new takes ownership of the memory region pointed to by mem
      78             :    (which is assumed to be non-NULL with the appropriate alignment and
      79             :    footprint) and formats it as a fd_rng.  The random number generator
      80             :    stream will initialized to use pseudo random sequence "seq" and will
      81             :    start at slot "idx".  Returns mem (which will be formatted for use).
      82             :    The caller will not be joined to the region on return.
      83             : 
      84             :    fd_rng_join joins the caller to a memory region holding the state of
      85             :    a fd_rng.  _rng points to a memory region in the local address space
      86             :    that holds a fd_rng.  Returns an opaque handle of the local join in
      87             :    the local address space to the fd_rng (which might not be the same
      88             :    thing as _rng ... the separation of new and join is to facilitate
      89             :    interprocess shared memory usage patterns while supporting
      90             :    transparent upgrades users of this to more elaborate algorithms where
      91             :    address translations under the hood may not be trivial).
      92             : 
      93             :    fd_rng_leave leaves the current rng join.  Returns a pointer in the
      94             :    local address space to the memory region holding the state of the
      95             :    fd_rng.  The join should not be used afterward.
      96             : 
      97             :    fd_rng_delete unformats the memory region currently used to hold the
      98             :    state of a _rng and returns ownership of the underlying memory region
      99             :    to the caller.  There should be no joins in the system on the fd_rng.
     100             :    Returns a pointer to the underlying memory region. */
     101             : 
     102           0 : FD_FN_CONST static inline ulong fd_rng_align    ( void ) { return alignof( fd_rng_t ); }
     103           0 : FD_FN_CONST static inline ulong fd_rng_footprint( void ) { return sizeof ( fd_rng_t ); }
     104             : 
     105             : static inline void *
     106             : fd_rng_new( void * mem,
     107             :             uint   seq,
     108       62874 :             ulong  idx ) {
     109       62874 :   fd_rng_t * rng = (fd_rng_t *)mem;
     110       62874 :   rng->seq = fd_rng_private_expand( seq );
     111       62874 :   rng->idx = idx;
     112       62874 :   return (void *)rng;
     113       62874 : }
     114             : 
     115       59553 : static inline fd_rng_t * fd_rng_join  ( void     * _rng ) { return (fd_rng_t *)_rng; }
     116       45606 : static inline void     * fd_rng_leave ( fd_rng_t *  rng ) { return (void     *) rng; }
     117       45606 : static inline void     * fd_rng_delete( void     * _rng ) { return (void     *)_rng; }
     118             : 
     119             : /* fd_rng_seq returns the sequence used by the rng.  fd_rng_idx returns
     120             :    the next slot that will be consumed by the rng. */
     121             : 
     122       48015 : static inline uint  fd_rng_seq( fd_rng_t * rng ) { return fd_rng_private_contract( rng->seq ); }
     123       48012 : static inline ulong fd_rng_idx( fd_rng_t * rng ) { return rng->idx;                            }
     124             : 
     125             : /* fd_rng_seq_set sets the sequence to be used by rng and returns
     126             :    the replaced value.  fd_rng_idx_set sets the next slot that will be
     127             :    consumed next by rng and returns the replaced value. */
     128             : 
     129             : static inline uint
     130             : fd_rng_seq_set( fd_rng_t * rng,
     131       24006 :                 uint       seq ) {
     132       24006 :   uint old = fd_rng_seq( rng );
     133       24006 :   rng->seq = fd_rng_private_expand( seq );
     134       24006 :   return old;
     135       24006 : }
     136             : 
     137             : static inline ulong
     138             : fd_rng_idx_set( fd_rng_t * rng,
     139       24003 :                 ulong      idx ) {
     140       24003 :   ulong old = fd_rng_idx( rng );
     141       24003 :   rng->idx = idx;
     142       24003 :   return old;
     143       24003 : }
     144             : 
     145             : /* fd_rng_{uchar,ushort,uint,ulong} returns the next integer in the PRNG
     146             :    sequence in [0,2^N) for N in {8,16,32,64} respectively with uniform
     147             :    probability with a period of 2^64 (fd_rng_ulong has a period of 2^63
     148             :    as it consumes two slots).  Passes various strict PRNG tests (e.g.
     149             :    diehard, dieharder, testu01, etc).  Assumes rng is a current join.
     150             :    fd_rng_{schar,short,int,long} are the same but return a value in
     151             :    [0,2^(N-1)).  (A signed generator that can assume all possible values
     152             :    of a signed int uniform IID can be obtained by casting the output of
     153             :    the unsigned generator of the same, assuming a typical twos
     154             :    complement arithmetic platform.)
     155             : 
     156             :    The theory for this that fd_ulong_hash(i) for i in [0,2^64) specifies
     157             :    a random looking permutation of the integers in [0,2^64).  Returning
     158             :    the low order bits of this random permutation then yields a high
     159             :    quality non-cryptographic random number stream automatically as it:
     160             : 
     161             :    - Has a long period (2^64).  This is implied by the permutation
     162             :      property.
     163             : 
     164             :    - Has the expected random properties (as theoretically best possible
     165             :      for a finite period generator) of a true uniform IID bit source.
     166             :      For example, the probability of next random number is uniform and
     167             :      independent of previous N random numbers for N<<2^64).  This is
     168             :      also implied by the full avalanche and permutation property.
     169             : 
     170             :    - Is "unpredictable".  That is, the internal state of the generator
     171             :      is difficult to recover from its outputs, e.g. a return from
     172             :      fd_rng_uint could be have been generated from 2^32 internal states
     173             :      (if the sequence is known), up to 2^32 sequences (if the state is
     174             :      known) and up to 2^64 (state,seq) pairs neither is known (the state
     175             :      / sequence is potentially recoverable given a long enough stream of
     176             :      values though).  This is implied by the truncation of hash values.
     177             : 
     178             :    To turn this into parameterizable family of generators, note that
     179             :    fd_ulong_hash( perm_j( i ) ) where j is some parameterized family of
     180             :    random permutations is still a permutation and would have all the
     181             :    above properties for free so long as no perm_j is similar to the hash
     182             :    permutation inverse.  Practically, xoring i by a non-sparse 64-bit
     183             :    number will ultra cheaply generate a wide family of "good enough"
     184             :    permutations to do a parameterized shuffling of the original
     185             :    fd_ulong_hash permutation, creating a large number of parallel
     186             :    sequences.  Since users are empirically notoriously bad at seeding
     187             :    though, we only let the user specify a 32-bit sequence id and then
     188             :    generate a unique non-sparse 64-bit random looking seed from it. */
     189             : 
     190  4432087687 : static inline uchar  fd_rng_uchar ( fd_rng_t * rng ) { return (uchar )fd_ulong_hash( rng->seq ^ (rng->idx++) ); }
     191   264328743 : static inline ushort fd_rng_ushort( fd_rng_t * rng ) { return (ushort)fd_ulong_hash( rng->seq ^ (rng->idx++) ); }
     192 71292141748 : static inline uint   fd_rng_uint  ( fd_rng_t * rng ) { return (uint  )fd_ulong_hash( rng->seq ^ (rng->idx++) ); }
     193             : 
     194             : FD_FN_UNUSED static ulong /* Work around -Winline */
     195 25558341240 : fd_rng_ulong( fd_rng_t * rng ) {
     196 25558341240 :   ulong hi = (ulong)fd_rng_uint( rng );
     197 25558341240 :   return (hi<<32) | (ulong)fd_rng_uint( rng );
     198 25558341240 : }
     199             : 
     200     3003000 : static inline schar fd_rng_schar( fd_rng_t * rng ) { return (schar)( fd_rng_uchar ( rng ) >> 1 ); }
     201     3003000 : static inline short fd_rng_short( fd_rng_t * rng ) { return (short)( fd_rng_ushort( rng ) >> 1 ); }
     202     4203000 : static inline int   fd_rng_int  ( fd_rng_t * rng ) { return (int  )( fd_rng_uint  ( rng ) >> 1 ); }
     203     3003000 : static inline long  fd_rng_long ( fd_rng_t * rng ) { return (long )( fd_rng_ulong ( rng ) >> 1 ); }
     204             : 
     205             : #if FD_HAS_INT128
     206             : FD_FN_UNUSED static uint128 /* Work around -Winline */
     207  5105803281 : fd_rng_uint128( fd_rng_t * rng ) {
     208  5105803281 :   return (((uint128)fd_rng_ulong( rng ))<<64) | ((uint128)fd_rng_ulong( rng ));
     209  5105803281 : }
     210             : 
     211             : /* FIXME: MIGHT BE BETTER TO MASK OR (hi<<63) ^ lo */
     212     3003000 : static inline int128 fd_rng_int128( fd_rng_t * rng ) { return (int128)( fd_rng_uint128( rng ) >> 1 ); }
     213             : #endif
     214             : 
     215             : /* fd_rng_{uint_to_float,ulong_to_double}_{c0,c1,c,o}( u ):  These
     216             :    quickly and robustly convert uniform random integers into uniform
     217             :    random floating point with appropriate rounding.  Intervals are:
     218             : 
     219             :      c0 -> [0,1)
     220             :      c1 -> (0,1]
     221             :      c  -> [0,1]
     222             :      o  -> (0,1)
     223             : 
     224             :    To provide more specifics, let the real numbers from [0,1) be
     225             :    partitioned into N uniform disjoint intervals such that interval i
     226             :    goes from [i/N,(i+1)/N) where i is in [0,N).  For single (double)
     227             :    precision, "float" ("double"), the largest N for which the range of
     228             :    each interval is _exactly_ representable is N = 2^24 (2^53).
     229             : 
     230             :    Given then a uniform IID uint random input, the
     231             :    fd_rng_uint_to_float_c0 converter output is as though a continuous
     232             :    IID uniform random in [0,1) was generated and then rounded down to
     233             :    the start of the containing interval (2^24 intervals).  As such, this
     234             :    generator can never return exactly 1 but it can exactly return +0.
     235             :    Since floats have higher resolution near 0 than 1, this will not
     236             :    return all float possible representations in [0,1) but it can return
     237             :    all possible float representations in [1/2,1).  In particular, this
     238             :    will never return a denorm or -0.
     239             : 
     240             :    Similarly for fd_rng_uint_to_float_c1 converter rounds up to the end
     241             :    of the containing interval / start of the next interval (2^24
     242             :    intervals).  As such, this converter can never return exactly +0 but
     243             :    it can exactly return 1.  It will not return all possible float
     244             :    representations in (0,1] but it can return all possible float
     245             :    representations in [1/2,1].  This will never return a denorm or -0.
     246             : 
     247             :    The fd_rng_uint_to_float_c converter rounds toward nearest even
     248             :    toward the start containing interval or start of the next interval
     249             :    (2^24 intervals).  As such, this can return both exactly +0 and
     250             :    exactly 1 (and the probability of returning exactly +0 == probability
     251             :    of exactly 1 == (1/2) probability all other possible return values).
     252             :    It will not return all possible float representations in [0,1] but it
     253             :    can return all float possible representations in [1/2,1].  This will
     254             :    never return a denorm or -0.
     255             : 
     256             :    The fd_rng_uint_to_float_o converter rounds toward the middle of
     257             :    containing internal (2^23 intervals ... note that then in a sense
     258             :    this converter is 1-bit less accurate than the above).  As such, this
     259             :    can neither return +0 nor 1 and will not return all possible float
     260             :    representations in (0,1).  This will never return a denorm or -0.
     261             : 
     262             :    Similarly for the double variants (*_{c0,c1,c} uses 2^53 intervals
     263             :    and o uses 2^52 intervals). */
     264             : 
     265    30003006 : FD_FN_CONST static inline float  fd_rng_uint_to_float_c0  ( uint  u ) { return (1.f/(float )(1 <<24))*(float )(int )( u>>(32-24)         ); }
     266        3006 : FD_FN_CONST static inline float  fd_rng_uint_to_float_c1  ( uint  u ) { return (1.f/(float )(1 <<24))*(float )(int )((u>>(32-24))+   1U  ); }
     267    25496106 : FD_FN_CONST static inline float  fd_rng_uint_to_float_c   ( uint  u ) { return (1.f/(float )(1 <<24))*(float )(int )((u>>(32-24))+(u&1U )); }
     268        3006 : FD_FN_CONST static inline float  fd_rng_uint_to_float_o   ( uint  u ) { return (1.f/(float )(1 <<24))*(float )(int )((u>>(32-24))|   1U  ); }
     269             : 
     270             : #if FD_HAS_DOUBLE
     271    30003006 : FD_FN_CONST static inline double fd_rng_ulong_to_double_c0( ulong u ) { return (1. /(double)(1L<<53))*(double)(long)( u>>(64-53)         ); }
     272        3006 : FD_FN_CONST static inline double fd_rng_ulong_to_double_c1( ulong u ) { return (1. /(double)(1L<<53))*(double)(long)((u>>(64-53))+   1UL ); }
     273    25576242 : FD_FN_CONST static inline double fd_rng_ulong_to_double_c ( ulong u ) { return (1. /(double)(1L<<53))*(double)(long)((u>>(64-53))+(u&1UL)); }
     274        3006 : FD_FN_CONST static inline double fd_rng_ulong_to_double_o ( ulong u ) { return (1. /(double)(1L<<53))*(double)(long)((u>>(64-53))|   1UL ); }
     275             : #endif
     276             : 
     277             : /* fd_rng_{float,double}_{c0,c1,c,o} are basic uniform generators on the
     278             :    appropriate interval of 0 to 1 based on the above converters.  The
     279             :    float variant consumes 1 slot, the double variant consumes 2 slots. */
     280             : 
     281    30003000 : static inline float  fd_rng_float_c0 ( fd_rng_t * rng ) { return fd_rng_uint_to_float_c0  ( fd_rng_uint ( rng ) ); }
     282        3000 : static inline float  fd_rng_float_c1 ( fd_rng_t * rng ) { return fd_rng_uint_to_float_c1  ( fd_rng_uint ( rng ) ); }
     283    25496100 : static inline float  fd_rng_float_c  ( fd_rng_t * rng ) { return fd_rng_uint_to_float_c   ( fd_rng_uint ( rng ) ); }
     284        3000 : static inline float  fd_rng_float_o  ( fd_rng_t * rng ) { return fd_rng_uint_to_float_o   ( fd_rng_uint ( rng ) ); }
     285             : 
     286             : #if FD_HAS_DOUBLE
     287    30003000 : static inline double fd_rng_double_c0( fd_rng_t * rng ) { return fd_rng_ulong_to_double_c0( fd_rng_ulong( rng ) ); }
     288        3000 : static inline double fd_rng_double_c1( fd_rng_t * rng ) { return fd_rng_ulong_to_double_c1( fd_rng_ulong( rng ) ); }
     289    25576236 : static inline double fd_rng_double_c ( fd_rng_t * rng ) { return fd_rng_ulong_to_double_c ( fd_rng_ulong( rng ) ); }
     290        3000 : static inline double fd_rng_double_o ( fd_rng_t * rng ) { return fd_rng_ulong_to_double_o ( fd_rng_ulong( rng ) ); }
     291             : #endif
     292             : 
     293             : /* fd_rng_int_roll uses the given rng to roll an n-sided die where n is
     294             :    the number of sides (assumed to be positive).  That is returns
     295             :    uniform IID rand in [0,n) even if n is not an exact power of two.
     296             :    Similarly for the other types.
     297             : 
     298             :    Rejection method based.  Specifically, the number of rng slots
     299             :    consumed is typically 1 but theoretically might be higher
     300             :    occasionally (64-bit wide types consume rng slots twice as fast).
     301             : 
     302             :    Deterministic_rng slot consumption possible with a slightly more
     303             :    approximate implementation (bound the number of iterations such that
     304             :    this always consumes a fixed number of slot and accept the
     305             :    practically infinitesimal bias when n is not a power of 2). */
     306             : 
     307             : static inline uint
     308             : fd_rng_private_roll32( fd_rng_t * rng,
     309  9340103185 :                        uint       n ) {
     310  9340103185 :   uint r = (-n) % n; /* Compute 2^32 mod n = (2^32 - n) mod n = (-n) mod n, compile time for compile time n */
     311  9340103347 :   uint u; do u = fd_rng_uint( rng ); while( FD_UNLIKELY( u<r ) ); /* Rejection unlikely (highly unlikely for n<<<2^32) */
     312             :   /* At this point, u is uniform in [r,2^32) which has an integer
     313             :      multiple of n elements (thus u % n is in [0,n) uniform) */
     314  9340103185 :   return u % n;
     315  9340103185 : }
     316             : 
     317             : static inline ulong
     318             : fd_rng_private_roll64( fd_rng_t * rng,
     319  2012270036 :                        ulong      n ) {
     320  2012270036 :   ulong r = (-n) % n; /* Compute 2^64 mod n = (2^64 - n) mod n = (-n) mod n, compile time for compile time n */
     321  2012270036 :   ulong u; do u = fd_rng_ulong( rng ); while( FD_UNLIKELY( u<r ) ); /* Rejection unlikely (highly unlikely for n<<<2^64) */
     322             :   /* At this point, u is uniform in [r,2^64) which has an integer
     323             :      multiple of n elements (thus u % n is in [0,n) uniform) */
     324  2012270036 :   return u % n;
     325  2012270036 : }
     326             : 
     327  1610625000 : static inline uchar  fd_rng_uchar_roll ( fd_rng_t * rng, uchar  n ) { return (uchar )fd_rng_private_roll32( rng, (uint )n ); }
     328           0 : static inline ushort fd_rng_ushort_roll( fd_rng_t * rng, ushort n ) { return (ushort)fd_rng_private_roll32( rng, (uint )n ); }
     329  7110178131 : static inline uint   fd_rng_uint_roll  ( fd_rng_t * rng, uint   n ) { return (uint  )fd_rng_private_roll32( rng, (uint )n ); }
     330  2012270036 : static inline ulong  fd_rng_ulong_roll ( fd_rng_t * rng, ulong  n ) { return (ulong )fd_rng_private_roll64( rng, (ulong)n ); }
     331             : 
     332           0 : static inline schar  fd_rng_schar_roll ( fd_rng_t * rng, schar  n ) { return (schar )fd_rng_private_roll32( rng, (uint )n ); }
     333           0 : static inline short  fd_rng_short_roll ( fd_rng_t * rng, short  n ) { return (short )fd_rng_private_roll32( rng, (uint )n ); }
     334   619300054 : static inline int    fd_rng_int_roll   ( fd_rng_t * rng, int    n ) { return (int   )fd_rng_private_roll32( rng, (uint )n ); }
     335           0 : static inline long   fd_rng_long_roll  ( fd_rng_t * rng, long   n ) { return (long  )fd_rng_private_roll64( rng, (ulong)n ); }
     336             : 
     337             : /* fd_rng_coin_tosses tosses a fair coin until it comes up tails and
     338             :    returns the number of tosses taken (including the final toss that
     339             :    came up tails).  That is, the PDF of coin tosses is:
     340             : 
     341             :      Pr(cnt) = 2^-cnt, cnt>0
     342             :                0,      otherwise
     343             : 
     344             :    Typically consumes 1 slot but can consume more with exceedingly low
     345             :    probability (~2^-32).  Deterministic slot consumption is possible by
     346             :    truncating the maximum number of tosses.  Practically a fast O(1). */
     347             : 
     348             : FD_FN_UNUSED static ulong /* Work around -Winline */
     349      761100 : fd_rng_coin_tosses( fd_rng_t * rng ) {
     350      761100 :   ulong cnt = 1UL;
     351      761100 :   ulong u;
     352      761100 :   for(;;) {
     353      761100 :     u = (ulong)fd_rng_uint( rng );
     354      761100 :     if( FD_LIKELY( u ) ) break;
     355           0 :     cnt += 32UL;
     356           0 :   }
     357      761100 :   cnt += (ulong)fd_ulong_find_lsb( u );
     358      761100 :   return cnt;
     359      761100 : }
     360             : 
     361             : /* fd_rng_float_robust generates a uniform random number in [0,1) and
     362             :    rounds this infinite precision result to the closest exactly
     363             :    representable float in [0,1].  As such, this can theoretically
     364             :    generate any exactly representable floating point number in [0,1],
     365             :    including 0 (with an exceedingly small probability), denorms (also
     366             :    with an exceedingly small probability) and exact 1 (with a
     367             :    probability of ~2^-25).  This will never produce a value larger than
     368             :    1, a negative value or -0.  This is slower than the above uniform
     369             :    floating point generators above but still a reasonably fast O(1).
     370             :    Typically consumes 2 slots but can consume more with exceedingly low
     371             :    probability.
     372             : 
     373             :    Similarly for the double precision variant.  Typically consumes 3
     374             :    slots.  Reasonably fast O(1).
     375             : 
     376             :    fd_rng_float_exp generates a random number with an exponential
     377             :    distribution.
     378             : 
     379             :    PDF:
     380             :      f(x) ~ exp(-x), x>=0
     381             :             0,       otherwise
     382             :    CDF:
     383             :      F(x) ~ 1-exp(-x), x>=0
     384             :             0,         otherwise
     385             : 
     386             :    Based on transformation method applied to a 63-bit uniform rand.  As
     387             :    such, some quantization due to the floating point limitations and
     388             :    generator precision is present around zero and in the extreme tails
     389             :    but these rarely affect typical use cases (variants that can generate
     390             :    every non-negative float possible are possible in principle but these
     391             :    are more expensive under the hood).  Extreme values are:
     392             : 
     393             :      output                      | prob
     394             :      0        ~ -ln( 1         ) | ~2^-25 (limited by floating rep)
     395             :      ~5.96e-8 ~ -ln( 1-  2^-24 ) | ~2^-24 (limited by floating rep)
     396             :      ~1.19e-7 ~ -ln( 1-2*2^-24 ) | ~2^-24 (limited by floating rep)
     397             :      ...
     398             :      ~42.975  ~ -ln(   2*2^-63 ) | ~2^-63 (limited by generator)
     399             :      ~43.668  ~ -ln(     2^-63 ) | ~2^-63 (limited by generator)
     400             : 
     401             :    The current implementation will only generate bit level identical
     402             :    results between machine targets that have a libm with correctly
     403             :    rounded exp and log functions.  It is possible to do this reasonably
     404             :    fast without use of libm if necessary though.  Consumes 2 slots.
     405             :    Reasonably fast O(1).
     406             : 
     407             :    For the double precision generator, similar considerations apply:
     408             : 
     409             :      output                       | prob
     410             :      0         ~ -ln( 1         ) | ~2^-54 (limited by floating rep)
     411             :      ~1.11e-16 ~ -ln( 1-  2^-53 ) | ~2^-53 (limited by floating rep)
     412             :      ~2.22e-16 ~ -ln( 1-2*2^-53 ) | ~2^-53 (limited by floating rep)
     413             :      ...
     414             :      ~42.975   ~ -ln(   2*2^-63 ) | ~2^-63 (limited by generator)
     415             :      ~43.668   ~ -ln(     2^-63 ) | ~2^-63 (limited by generator)
     416             : 
     417             :    Consumes 2 slots.  Reasonably fast O(1).
     418             : 
     419             :    fd_rng_float_norm generates a random number with a normal
     420             :    distribution.
     421             : 
     422             :    PDF:
     423             :      f(x) ~ exp(-x^2/2) / sqrt(2 pi)
     424             : 
     425             :    Based on the Ziggurat method.  User should assume any finite value is
     426             :    possible (including denorms and -0 though note that denorms will
     427             :    likely be flushed to zero if the processor is configured to do so for
     428             :    performance, as is typical and that values larger in magnitude than
     429             :    sqrt( 2 log N ) where N is the number of invocations of this will be
     430             :    exceedingly rare).  Typically consumes 1 slot but can consume more
     431             :    with moderately low probability.  Reasonably fast O(1).
     432             :    Deterministic slot consumption can be obtained by using the
     433             :    Box-Muller method (will consume more slots on average though).
     434             : 
     435             :    Similarly for the double precision variant.  Typically consumes 2
     436             :    slots.  Reasonably fast O(1). */
     437             : 
     438             : float fd_rng_float_robust( fd_rng_t * rng );
     439             : float fd_rng_float_exp   ( fd_rng_t * rng );
     440             : float fd_rng_float_norm  ( fd_rng_t * rng );
     441             : 
     442             : #if FD_HAS_DOUBLE
     443             : double fd_rng_double_robust( fd_rng_t * rng );
     444             : double fd_rng_double_exp   ( fd_rng_t * rng );
     445             : double fd_rng_double_norm  ( fd_rng_t * rng );
     446             : #endif
     447             : 
     448             : /* FIXME: IMPORT ATOMIC VARIANTS FOR REENTRANT USAGE (E.G. ATOMIC_XCHG
     449             :    FOR SET, ATOMIC_INC OF INDEX FOR THE RETURN TYPES, CAS STATE UPDATES,
     450             :    ETC) */
     451             : 
     452             : /* fd_rng_secure reads random bytes from a cryptographically secure
     453             :    source provided by the platform.  Features /dev/urandom like entropy.  
     454             : 
     455             :    On success, returns d and guarantees that [d,d+sz) is filled with 
     456             :    unguessable random bytes.  On failure, returns NULL and prints reason
     457             :    for failure to warning log.
     458             : 
     459             :    (!!!) This operation may fail if no secure RNG is available or the
     460             :          RNG failed for some reason.  Always check the return code.
     461             :    
     462             :    Currently available on Linux, FreeBSD, and macOS.
     463             :    On Linux and FreeBSD uses getrandom(2).
     464             :    On macOS uses CommonCrypto's CommonRandom. */
     465             : 
     466             : FD_FN_SENSITIVE __attribute__((warn_unused_result))
     467             : void *
     468             : fd_rng_secure( void * d,
     469             :                ulong  sz );
     470             : 
     471             : FD_PROTOTYPES_END
     472             : 
     473             : #if FD_HAS_X86
     474             : 
     475             : #include <immintrin.h>
     476             : 
     477             : /* rdrand reads sz cryptographically secure bytes using the RDRAND x86
     478             :    instruction.  Returns 1 if the read succeeded, 0 on failure.
     479             : 
     480             :    Intel architecture manual section 7.3.17.1 (December 2023):
     481             : 
     482             :      The RDRAND instruction returns a random number. All Intel
     483             :      processors that support the RDRAND instruction indicate the
     484             :      availability of the RDRAND instruction via reporting
     485             :      CPUID.01H:ECX.RDRAND[bit 30] = 1.
     486             : 
     487             :      RDRAND returns random numbers that are supplied by a
     488             :      cryptographically secure, deterministic] random bit generator DRBG.
     489             :      The DRBG is designed to meet the NIST SP 800-90A standard. The DRBG
     490             :      is re-seeded frequently from an on-chip non-deterministic entropy
     491             :      source to guarantee data returned by RDRAND is statistically
     492             :      uniform, nonperiodic and non-deterministic.
     493             : 
     494             :      In order for the hardware design to meet its security goals, the
     495             :      random number generator continuously tests itself and the random
     496             :      data it is generating. Runtime failures in the random number
     497             :      generator circuitry or statistically anomalous data occurring by
     498             :      chance will be detected by the self test hardware and flag the
     499             :      resulting data as being bad. In such extremely rare cases, the
     500             :      RDRAND instruction will return no data instead of bad data.
     501             : 
     502             :      Under heavy load, with multiple cores executing RDRAND in parallel,
     503             :      it is possible, though unlikely, for the demand of random numbers
     504             :      by software processes/threads to exceed the rate at which the
     505             :      random number generator hardware can supply them. This will lead to
     506             :      the RDRAND instruction returning no data transitorily. */
     507             : 
     508             : FD_PROTOTYPES_BEGIN
     509             : 
     510             : __attribute__((warn_unused_result)) static inline int
     511             : fd_rdrand( uchar * dst,
     512    12582912 :            ulong   sz ) {
     513             : 
     514    12582912 :   uchar * cur      = dst;
     515    12582912 :   ulong   align_sz = sz & ~0x7UL;
     516    25165824 :   while( cur < dst + align_sz ) {
     517    12582912 :     unsigned long long slice;
     518    12582912 :     if( FD_UNLIKELY( !_rdrand64_step( &slice ) ) )
     519           0 :       return 0;
     520    12582912 :     FD_STORE( ulong, dst, slice );
     521    12582912 :     cur += 8UL;
     522    12582912 :   }
     523    12582912 :   if( FD_UNLIKELY( cur < dst ) ) {
     524           0 :     unsigned long long slice;
     525           0 :     if( FD_UNLIKELY( !_rdrand64_step( &slice ) ) )
     526           0 :       return 0;
     527           0 :     ulong tail = (ulong)( cur - dst );
     528           0 :     fd_memcpy( cur, &slice, tail );
     529           0 :     cur += tail;
     530           0 :   }
     531             : 
     532    12582912 :   return 1;
     533    12582912 : }
     534             : 
     535             : FD_PROTOTYPES_END
     536             : 
     537             : #endif /* FD_HAS_X86 */
     538             : 
     539             : #endif /* HEADER_fd_src_rng_fd_rng_h */

Generated by: LCOV version 1.14