LCOV - code coverage report
Current view: top level - ballet/chacha - fd_chacha_rng.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 40 40 100.0 %
Date: 2025-09-19 04:41:14 Functions: 9 676 1.3 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_ballet_chacha_fd_chacha_rng_h
       2             : #define HEADER_fd_src_ballet_chacha_fd_chacha_rng_h
       3             : 
       4             : /* fd_chacha_rng provides APIs for ChaCha-based RNG, as used in the
       5             :    Solana protocol.  This API should only be used where necessary.
       6             :    fd_rng is a better choice in all other cases. */
       7             : 
       8             : #include "fd_chacha.h"
       9             : #if !FD_HAS_INT128
      10             : #include "../../util/bits/fd_uwide.h"
      11             : #endif
      12             : 
      13             : /* FD_CHACHA_RNG_DEBUG controls debug logging.  0 is off; 1 is on. */
      14             : 
      15             : #ifndef FD_CHACHA_RNG_DEBUG
      16             : #define FD_CHACHA_RNG_DEBUG 0
      17             : #endif
      18             : 
      19             : /* Solana uses different mechanisms of mapping a ulong to an unbiased
      20             :    integer in [0, n) in different parts of the code.  In particular,
      21             :    leader schedule generation uses MODE_MOD and Turbine uses MODE_SHIFT.
      22             :    See the note in fd_chacha20_rng_ulong_roll for more details. */
      23  1578622923 : #define FD_CHACHA_RNG_MODE_MOD   1
      24      313848 : #define FD_CHACHA_RNG_MODE_SHIFT 2
      25             : 
      26             : /* FD_CHACHA_RNG_BUFSZ is the internal buffer size of pre-generated
      27             :    ChaCha20 blocks.  Multiple of block size (64 bytes) and a power of 2. */
      28             : 
      29             : #if FD_HAS_AVX512
      30     4399998 : #define FD_CHACHA_RNG_BUFSZ (16*FD_CHACHA_BLOCK_SZ)
      31             : #elif FD_HAS_AVX
      32     8799996 : #define FD_CHACHA_RNG_BUFSZ (8*FD_CHACHA_BLOCK_SZ)
      33             : #else
      34             : #define FD_CHACHA_RNG_BUFSZ (256UL)
      35             : #endif
      36             : 
      37             : struct __attribute__((aligned(32UL))) fd_chacha_rng_private {
      38             :   /* ChaCha20 encryption key */
      39             :   uchar key[ 32UL ] __attribute__((aligned(32UL)));
      40             : 
      41             :   /* Ring buffer of pre-generated ChaCha20 RNG data.
      42             :      Note: We currently assume all reads are 8 byte.  This means the
      43             :            cursor is always aligned by 8 and strictly increases in
      44             :            increments of 8.  Thus, we really only have to refill the
      45             :            buffer if buf_off==buf_fill.  */
      46             :   uchar buf[ FD_CHACHA_RNG_BUFSZ ] __attribute__((aligned(FD_CHACHA_BLOCK_SZ)));
      47             :   ulong buf_off;   /* Total number of bytes consumed */
      48             :   ulong buf_fill;  /* Total number of bytes produced
      49             :                       Always aligned by FD_CHACHA_BLOCK_SZ */
      50             : 
      51             :   int mode;
      52             : };
      53             : typedef struct fd_chacha_rng_private fd_chacha_rng_t;
      54             : 
      55             : FD_PROTOTYPES_BEGIN
      56             : 
      57             : /* fd_chacha_rng_{align,footprint} give the needed alignment and
      58             :    footprint of a memory region suitable to hold a ChaCha20-based RNG.
      59             : 
      60             :    fd_chacha_rng_new formats a memory region with suitable alignment
      61             :    and footprint for holding a chacha20_rng object.  Assumes shmem
      62             :    points on the caller to the first byte of the memory region owned by
      63             :    the caller to use.  `mode` must be one of the FD_CHACHA_RNG_MODE_*
      64             :    constants defined above and dictates what mode this object will use
      65             :    to generate random numbers. Returns shmem on success and NULL on
      66             :    failure (logs details).  The memory region will be owned by the
      67             :    object on successful return.  The caller is not joined on return.
      68             : 
      69             :    fd_chacha_rng_join joins the caller to a chacha20_rng object.
      70             :    Assumes shrng points to the first byte of the memory region holding
      71             :    the object.  Returns a local handle to the join on success (this is
      72             :    not necessarily a simple cast of the address) and NULL on failure
      73             :    (logs details).
      74             : 
      75             :    fd_chacha_rng_leave leaves the caller's current local join to a
      76             :    ChaCha20 RNG object.  Returns a pointer to the memory region holding
      77             :    the object on success this is not necessarily a simple cast of the
      78             :    address) and NULL on failure (logs details).  The caller is not
      79             :    joined on successful return.
      80             : 
      81             :    fd_chacha_rng_delete unformats a memory region that holds a ChaCha20
      82             :    RNG object.  Assumes shrng points on the caller to the first byte of
      83             :    the memory region holding the state and that nobody is joined.
      84             :    Returns a pointer to the memory region on success and NULL on failure
      85             :    (logs details).  The caller has ownership of the memory region on
      86             :    successful return. */
      87             : 
      88             : FD_FN_CONST ulong
      89             : fd_chacha_rng_align( void );
      90             : 
      91             : FD_FN_CONST ulong
      92             : fd_chacha_rng_footprint( void );
      93             : 
      94             : void *
      95             : fd_chacha_rng_new( void * shmem, int mode );
      96             : 
      97             : fd_chacha_rng_t *
      98             : fd_chacha_rng_join( void * shrng );
      99             : 
     100             : void *
     101             : fd_chacha_rng_leave( fd_chacha_rng_t * );
     102             : 
     103             : void *
     104             : fd_chacha_rng_delete( void * shrng );
     105             : 
     106             : /* fd_chacha_rng{8,20}_init starts a ChaCha{8,20} RNG stream.  rng is
     107             :    assumed to be a current local join to a chacha20_rng object with no
     108             :    other concurrent operation that would modify the state while this is
     109             :    executing.  seed points to the first byte of the RNG seed byte vector
     110             :    with 32 byte size.  Any preexisting state for an in-progress or
     111             :    recently completed calculation will be discarded.  Returns rng (on
     112             :    return, rng will have the state of a new in-progress calculation).
     113             : 
     114             :    Compatible with Rust fn rand_chacha::ChaCha20Rng::from_seed
     115             :    https://docs.rs/rand_chacha/latest/rand_chacha/struct.ChaCha20Rng.html#method.from_seed */
     116             : 
     117             : fd_chacha_rng_t *
     118             : fd_chacha8_rng_init( fd_chacha_rng_t * rng,
     119             :                      void const *      key );
     120             : 
     121             : fd_chacha_rng_t *
     122             : fd_chacha20_rng_init( fd_chacha_rng_t * rng,
     123             :                       void const *      key );
     124             : 
     125             : /* The refill function.  Not part of the public API. */
     126             : 
     127             : #if FD_HAS_AVX512
     128             : void fd_chacha8_rng_refill_avx512 ( fd_chacha_rng_t * rng );
     129             : void fd_chacha20_rng_refill_avx512( fd_chacha_rng_t * rng );
     130             : #endif
     131             : 
     132             : #if FD_HAS_AVX
     133             : void fd_chacha8_rng_refill_avx ( fd_chacha_rng_t * rng );
     134             : void fd_chacha20_rng_refill_avx( fd_chacha_rng_t * rng );
     135             : #endif
     136             : 
     137             : void fd_chacha8_rng_refill_seq ( fd_chacha_rng_t * rng );
     138             : void fd_chacha20_rng_refill_seq( fd_chacha_rng_t * rng );
     139             : 
     140             : #if FD_HAS_AVX512
     141       85937 : #define fd_chacha8_rng_private_refill  fd_chacha8_rng_refill_avx512
     142     6392087 : #define fd_chacha20_rng_private_refill fd_chacha20_rng_refill_avx512
     143             : #elif FD_HAS_AVX
     144      343748 : #define fd_chacha8_rng_private_refill  fd_chacha8_rng_refill_avx
     145    23285488 : #define fd_chacha20_rng_private_refill fd_chacha20_rng_refill_avx
     146             : #else
     147             : #define fd_chacha8_rng_private_refill  fd_chacha8_rng_refill_seq
     148             : #define fd_chacha20_rng_private_refill fd_chacha20_rng_refill_seq
     149             : #endif
     150             : 
     151             : /* fd_chacha_rng_avail returns the number of buffered bytes. */
     152             : 
     153             : FD_FN_PURE static inline ulong
     154  2097812754 : fd_chacha_rng_avail( fd_chacha_rng_t const * rng ) {
     155  2097812754 :   return rng->buf_fill - rng->buf_off;
     156  2097812754 : }
     157             : 
     158             : /* fd_chacha{8,20}_rng_ulong read a 64-bit integer in [0,2^64) from the
     159             :    RNG stream. */
     160             : 
     161             : static inline ulong
     162    33000000 : fd_chacha8_rng_ulong( fd_chacha_rng_t * rng ) {
     163    33000000 :   if( FD_UNLIKELY( fd_chacha_rng_avail( rng ) < sizeof(ulong) ) )
     164      429685 :     fd_chacha8_rng_private_refill( rng );
     165    33000000 :   ulong x = FD_LOAD( ulong, rng->buf + (rng->buf_off % FD_CHACHA_RNG_BUFSZ) );
     166    33000000 :   rng->buf_off += 8U;
     167    33000000 :   return x;
     168    33000000 : }
     169             : 
     170             : static inline ulong
     171  2064812754 : fd_chacha20_rng_ulong( fd_chacha_rng_t * rng ) {
     172  2064812754 :   if( FD_UNLIKELY( fd_chacha_rng_avail( rng ) < sizeof(ulong) ) )
     173    25163683 :     fd_chacha20_rng_private_refill( rng );
     174  2064812754 :   ulong x = FD_LOAD( ulong, rng->buf + (rng->buf_off % FD_CHACHA_RNG_BUFSZ) );
     175  2064812754 :   rng->buf_off += 8U;
     176  2064812754 :   return x;
     177  2064812754 : }
     178             : 
     179             : /* fd_chacha20_rng_ulong_roll returns an uniform IID rand in [0,n)
     180             :    analogous to fd_rng_ulong_roll.  Rejection method based using
     181             :    fd_chacha20_rng_ulong.
     182             : 
     183             :    Compatible with Rust type
     184             :    <rand_chacha::ChaCha20Rng as rand::Rng>::gen<rand::distributions::Uniform<u64>>()
     185             :    as of version 0.7.0 of the crate
     186             :    https://docs.rs/rand/latest/rand/distributions/struct.Uniform.html */
     187             : 
     188             : static inline ulong
     189             : fd_chacha20_rng_ulong_roll( fd_chacha_rng_t * rng,
     190  1578616560 :                            ulong             n ) {
     191             :   /* We use a pretty standard rejection-sampling based approach here,
     192             :      but for future reference, here's an explanation:
     193             : 
     194             :      We know that v can take 2^64 values, and so any method that maps
     195             :      each of the 2^64 values to the range directly [0, n) will not be
     196             :      uniform distribution when 2^64 is not divisible by n.  This
     197             :      motivates using rejection sampling.
     198             : 
     199             :      The most basic approach is to map v from [0, n*floor(2^64/n) ) to
     200             :      [0, n) using v%n, but that puts a modulus on the critical path.  To
     201             :      avoid that, the Rust rand crate uses a different approach: compute
     202             :      v*n/2^64, which is also in [0, n).
     203             : 
     204             :      Now the question to answer is which values to throw out.  We pick a
     205             :      large integer k such that k*n<=2^64 and map [0, k*n) -> 0, [2^64,
     206             :      2^64+k*n) -> 1, etc.  Since k*n might be 2^64 and then not fit in a
     207             :      long, we define zone=k*n-1 <= ULONG_MAX, and make the intervals
     208             :      closed instead of half-open.
     209             : 
     210             :      Here's where the mode comes in.  Depending on what method you call
     211             :      and what datatype you use, the Rust crate uses different values of
     212             :      k.  When MODE_MOD is set, we use largest possible value of k,
     213             :      namely floor(2^64/n).  You can compute zone directly as follows:
     214             :                zone  = k*n-1
     215             :                      = floor(2^64/n)*n - 1
     216             :                      = 2^64 - (2^64%n) - 1
     217             :                      = 2^64-1 - (2^64-n)%n, since n<2^64
     218             :                      = 2^64-1 - ((2^64-1)-n+1)%n
     219             :      Which is back to having a mod... But at least if n is a
     220             :      compile-time constant then the whole zone computation becomes a
     221             :      compile-time constant.
     222             : 
     223             :      When MODE_SHIFT is set, we use uses almost the largest possible
     224             :      power of two for k.  Precisely, it uses the smallest power of two
     225             :      such that k*n >= 2^63, which is the largest power of two such that
     226             :      k*n<=2^64 unless n is a power of two.  This approach eliminates the
     227             :      mod calculation but increases the expected number of samples
     228             :      required. */
     229  1578616560 :   ulong const zone = fd_ulong_if( rng->mode==FD_CHACHA_RNG_MODE_MOD,
     230  1578616560 :                                   ULONG_MAX - (ULONG_MAX-n+1UL)%n,
     231  1578616560 :                                   (n << (63 - fd_ulong_find_msb( n ) )) - 1UL );
     232             : 
     233  2031512751 :   for( int i=0; 1; i++ ) {
     234  2031512751 :     ulong   v   = fd_chacha20_rng_ulong( rng );
     235  2031512751 : #if FD_HAS_INT128
     236             :     /* Compiles to one mulx instruction */
     237  2031512751 :     uint128 res = (uint128)v * (uint128)n;
     238  2031512751 :     ulong   hi  = (ulong)(res>>64);
     239  2031512751 :     ulong   lo  = (ulong) res;
     240             : #else
     241             :     ulong hi, lo;
     242             :     fd_uwide_mul( &hi, &lo, v, n );
     243             : #endif
     244             : 
     245             : #   if FD_CHACHA_RNG_DEBUG
     246             :     FD_LOG_DEBUG(( "roll (attempt %d): n=%016lx zone: %016lx v=%016lx lo=%016lx hi=%016lx", i, n, zone, v, lo, hi ));
     247             : #   else
     248  2031512751 :     (void)i;
     249  2031512751 : #   endif /* FD_CHACHA_RNG_DEBUG */
     250             : 
     251  2031512751 :     if( FD_LIKELY( lo<=zone ) ) return hi;
     252  2031512751 :   }
     253  1578616560 : }
     254             : 
     255             : FD_PROTOTYPES_END
     256             : 
     257             : #endif /* HEADER_fd_src_ballet_chacha20_fd_chacha20_rng_h */

Generated by: LCOV version 1.14