LCOV - code coverage report
Current view: top level - ballet/chacha - fd_chacha_rng.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 38 38 100.0 %
Date: 2026-01-23 05:02:40 Functions: 8 618 1.3 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_ballet_chacha_fd_chacha_rng_h
       2             : #define HEADER_fd_src_ballet_chacha_fd_chacha_rng_h
       3             : 
       4             : /* fd_chacha_rng provides APIs for ChaCha-based RNG, as used in the
       5             :    Solana protocol.  This API should only be used where necessary.
       6             :    fd_rng is a better choice in all other cases. */
       7             : 
       8             : #include "fd_chacha.h"
       9             : #include "../../util/bits/fd_uwide.h"
      10             : 
      11             : /* FD_CHACHA_RNG_DEBUG controls debug logging.  0 is off; 1 is on. */
      12             : 
      13             : #ifndef FD_CHACHA_RNG_DEBUG
      14             : #define FD_CHACHA_RNG_DEBUG 0
      15             : #endif
      16             : 
      17             : /* Solana uses different mechanisms of mapping a ulong to an unbiased
      18             :    integer in [0, n) in different parts of the code.  In particular,
      19             :    leader schedule generation uses MODE_MOD and Turbine uses MODE_SHIFT.
      20             :    See the note in fd_chacha_rng_ulong_roll for more details. */
      21  1199003316 : #define FD_CHACHA_RNG_MODE_MOD   1
      22      314355 : #define FD_CHACHA_RNG_MODE_SHIFT 2
      23             : 
      24             : /* Solana uses different ChaCha algorithms. Leader schedule generation
      25             :    uses ChaCha20 and Turbine switched from ChaCha20 to ChaCha8. */
      26     2135268 : #define FD_CHACHA_RNG_ALGO_CHACHA20   1
      27    23202700 : #define FD_CHACHA_RNG_ALGO_CHACHA8    2
      28             : 
      29             : /* FD_CHACHA_RNG_BUFSZ is the internal buffer size of pre-generated
      30             :    ChaCha20 blocks.  Multiple of block size (64 bytes) and a power of 2. */
      31             : 
      32             : #if FD_HAS_AVX512
      33     4399998 : #define FD_CHACHA_RNG_BUFSZ (16*FD_CHACHA_BLOCK_SZ)
      34             : #elif FD_HAS_AVX
      35     8799996 : #define FD_CHACHA_RNG_BUFSZ (8*FD_CHACHA_BLOCK_SZ)
      36             : #else
      37             : #define FD_CHACHA_RNG_BUFSZ (256UL)
      38             : #endif
      39             : 
      40             : struct __attribute__((aligned(32UL))) fd_chacha_rng_private {
      41             :   /* ChaCha20 encryption key */
      42             :   uchar key[ 32UL ] __attribute__((aligned(32UL)));
      43             : 
      44             :   /* Ring buffer of pre-generated ChaCha20 RNG data.
      45             :      Note: We currently assume all reads are 8 byte.  This means the
      46             :            cursor is always aligned by 8 and strictly increases in
      47             :            increments of 8.  Thus, we really only have to refill the
      48             :            buffer if buf_off==buf_fill.  */
      49             :   uchar buf[ FD_CHACHA_RNG_BUFSZ ] __attribute__((aligned(FD_CHACHA_BLOCK_SZ)));
      50             :   ulong buf_off;   /* Total number of bytes consumed */
      51             :   ulong buf_fill;  /* Total number of bytes produced
      52             :                       Always aligned by FD_CHACHA_BLOCK_SZ */
      53             : 
      54             :   int mode;
      55             :   int algo;
      56             : };
      57             : typedef struct fd_chacha_rng_private fd_chacha_rng_t;
      58             : 
      59             : FD_PROTOTYPES_BEGIN
      60             : 
      61             : /* fd_chacha_rng_{align,footprint} give the needed alignment and
      62             :    footprint of a memory region suitable to hold a ChaCha20-based RNG.
      63             : 
      64             :    fd_chacha_rng_new formats a memory region with suitable alignment
      65             :    and footprint for holding a chacha20_rng object.  Assumes shmem
      66             :    points on the caller to the first byte of the memory region owned by
      67             :    the caller to use.  `mode` must be one of the FD_CHACHA_RNG_MODE_*
      68             :    constants defined above and dictates what mode this object will use
      69             :    to generate random numbers. Returns shmem on success and NULL on
      70             :    failure (logs details).  The memory region will be owned by the
      71             :    object on successful return.  The caller is not joined on return.
      72             : 
      73             :    fd_chacha_rng_join joins the caller to a chacha20_rng object.
      74             :    Assumes shrng points to the first byte of the memory region holding
      75             :    the object.  Returns a local handle to the join on success (this is
      76             :    not necessarily a simple cast of the address) and NULL on failure
      77             :    (logs details).
      78             : 
      79             :    fd_chacha_rng_leave leaves the caller's current local join to a
      80             :    ChaCha20 RNG object.  Returns a pointer to the memory region holding
      81             :    the object on success this is not necessarily a simple cast of the
      82             :    address) and NULL on failure (logs details).  The caller is not
      83             :    joined on successful return.
      84             : 
      85             :    fd_chacha_rng_delete unformats a memory region that holds a ChaCha20
      86             :    RNG object.  Assumes shrng points on the caller to the first byte of
      87             :    the memory region holding the state and that nobody is joined.
      88             :    Returns a pointer to the memory region on success and NULL on failure
      89             :    (logs details).  The caller has ownership of the memory region on
      90             :    successful return. */
      91             : 
      92             : FD_FN_CONST ulong
      93             : fd_chacha_rng_align( void );
      94             : 
      95             : FD_FN_CONST ulong
      96             : fd_chacha_rng_footprint( void );
      97             : 
      98             : void *
      99             : fd_chacha_rng_new( void * shmem, int mode );
     100             : 
     101             : fd_chacha_rng_t *
     102             : fd_chacha_rng_join( void * shrng );
     103             : 
     104             : void *
     105             : fd_chacha_rng_leave( fd_chacha_rng_t * );
     106             : 
     107             : void *
     108             : fd_chacha_rng_delete( void * shrng );
     109             : 
     110             : /* fd_chacha_rng_init starts a ChaCha{8,20} RNG stream.  rng is
     111             :    assumed to be a current local join to a chacha_rng object with no
     112             :    other concurrent operation that would modify the state while this is
     113             :    executing.  seed points to the first byte of the RNG seed byte vector
     114             :    with 32 byte size.  Any preexisting state for an in-progress or
     115             :    recently completed calculation will be discarded.  Returns rng (on
     116             :    return, rng will have the state of a new in-progress calculation).
     117             : 
     118             :    The param algo is expected to be FD_CHACHA_RNG_ALGO_CHACHA20 or
     119             :    FD_CHACHA_RNG_ALGO_CHACH8. If invalid, it defaults to chacha20.
     120             : 
     121             :    Compatible with Rust fn rand_chacha::ChaCha20Rng::from_seed
     122             :    https://docs.rs/rand_chacha/latest/rand_chacha/struct.ChaCha20Rng.html#method.from_seed
     123             :    (and ChaCha8Rng) */
     124             : 
     125             : fd_chacha_rng_t *
     126             : fd_chacha_rng_init( fd_chacha_rng_t * rng,
     127             :                     void const *      key,
     128             :                     int               algo );
     129             : 
     130             : /* The refill function.  Not part of the public API. */
     131             : 
     132             : #if FD_HAS_AVX512
     133             : void fd_chacha8_rng_refill_avx512 ( fd_chacha_rng_t * rng );
     134             : void fd_chacha20_rng_refill_avx512( fd_chacha_rng_t * rng );
     135             : #endif
     136             : 
     137             : #if FD_HAS_AVX
     138             : void fd_chacha8_rng_refill_avx ( fd_chacha_rng_t * rng );
     139             : void fd_chacha20_rng_refill_avx( fd_chacha_rng_t * rng );
     140             : #endif
     141             : 
     142             : void fd_chacha8_rng_refill_seq ( fd_chacha_rng_t * rng );
     143             : void fd_chacha20_rng_refill_seq( fd_chacha_rng_t * rng );
     144             : 
     145             : #if FD_HAS_AVX512
     146      791108 : #define fd_chacha8_rng_private_refill  fd_chacha8_rng_refill_avx512
     147     3967978 : #define fd_chacha20_rng_private_refill fd_chacha20_rng_refill_avx512
     148             : #elif FD_HAS_AVX
     149     2795118 : #define fd_chacha8_rng_private_refill  fd_chacha8_rng_refill_avx
     150    14868460 : #define fd_chacha20_rng_private_refill fd_chacha20_rng_refill_avx
     151             : #else
     152             : #define fd_chacha8_rng_private_refill  fd_chacha8_rng_refill_seq
     153             : #define fd_chacha20_rng_private_refill fd_chacha20_rng_refill_seq
     154             : #endif
     155             : 
     156             : /* fd_chacha_rng_avail returns the number of buffered bytes. */
     157             : 
     158             : FD_FN_PURE static inline ulong
     159  1597418946 : fd_chacha_rng_avail( fd_chacha_rng_t const * rng ) {
     160  1597418946 :   return rng->buf_fill - rng->buf_off;
     161  1597418946 : }
     162             : 
     163             : /* fd_chacha_rng_ulong read a 64-bit integer in [0,2^64) from the
     164             :    RNG stream. */
     165             : 
     166             : static inline ulong
     167  1597418946 : fd_chacha_rng_ulong( fd_chacha_rng_t * rng ) {
     168  1597418946 :   if( FD_UNLIKELY( fd_chacha_rng_avail( rng ) < sizeof(ulong) ) ) {
     169    19828385 :     if( rng->algo==FD_CHACHA_RNG_ALGO_CHACHA8 ) {
     170     2806180 :       fd_chacha8_rng_private_refill( rng );
     171    17022205 :     } else {
     172    17022205 :       fd_chacha20_rng_private_refill( rng );
     173    17022205 :     }
     174    19828385 :   }
     175  1597418946 :   ulong x = FD_LOAD( ulong, rng->buf + (rng->buf_off % FD_CHACHA_RNG_BUFSZ) );
     176  1597418946 :   rng->buf_off += 8U;
     177  1597418946 :   return x;
     178  1597418946 : }
     179             : 
     180             : /* fd_chacha_rng_ulong_roll returns an uniform IID rand in [0,n)
     181             :    analogous to fd_rng_ulong_roll.  Rejection method based using
     182             :    fd_chacha_rng_ulong.
     183             : 
     184             :    Compatible with Rust type
     185             :    <rand_chacha::ChaCha20Rng as rand::Rng>::gen<rand::distributions::Uniform<u64>>()
     186             :    as of version 0.7.0 of the crate
     187             :    https://docs.rs/rand/latest/rand/distributions/struct.Uniform.html */
     188             : 
     189             : static inline ulong
     190             : fd_chacha_rng_ulong_roll( fd_chacha_rng_t * rng,
     191  1198996626 :                           ulong             n ) {
     192             :   /* We use a pretty standard rejection-sampling based approach here,
     193             :      but for future reference, here's an explanation:
     194             : 
     195             :      We know that v can take 2^64 values, and so any method that maps
     196             :      each of the 2^64 values to the range directly [0, n) will not be
     197             :      uniform distribution when 2^64 is not divisible by n.  This
     198             :      motivates using rejection sampling.
     199             : 
     200             :      The most basic approach is to map v from [0, n*floor(2^64/n) ) to
     201             :      [0, n) using v%n, but that puts a modulus on the critical path.  To
     202             :      avoid that, the Rust rand crate uses a different approach: compute
     203             :      v*n/2^64, which is also in [0, n).
     204             : 
     205             :      Now the question to answer is which values to throw out.  We pick a
     206             :      large integer k such that k*n<=2^64 and map [0, k*n) -> 0, [2^64,
     207             :      2^64+k*n) -> 1, etc.  Since k*n might be 2^64 and then not fit in a
     208             :      long, we define zone=k*n-1 <= ULONG_MAX, and make the intervals
     209             :      closed instead of half-open.
     210             : 
     211             :      Here's where the mode comes in.  Depending on what method you call
     212             :      and what datatype you use, the Rust crate uses different values of
     213             :      k.  When MODE_MOD is set, we use largest possible value of k,
     214             :      namely floor(2^64/n).  You can compute zone directly as follows:
     215             :                zone  = k*n-1
     216             :                      = floor(2^64/n)*n - 1
     217             :                      = 2^64 - (2^64%n) - 1
     218             :                      = 2^64-1 - (2^64-n)%n, since n<2^64
     219             :                      = 2^64-1 - ((2^64-1)-n+1)%n
     220             :      Which is back to having a mod... But at least if n is a
     221             :      compile-time constant then the whole zone computation becomes a
     222             :      compile-time constant.
     223             : 
     224             :      When MODE_SHIFT is set, we use uses almost the largest possible
     225             :      power of two for k.  Precisely, it uses the smallest power of two
     226             :      such that k*n >= 2^63, which is the largest power of two such that
     227             :      k*n<=2^64 unless n is a power of two.  This approach eliminates the
     228             :      mod calculation but increases the expected number of samples
     229             :      required. */
     230  1198996626 :   ulong const zone = fd_ulong_if( rng->mode==FD_CHACHA_RNG_MODE_MOD,
     231  1198996626 :                                   ULONG_MAX - (ULONG_MAX-n+1UL)%n,
     232  1198996626 :                                   (n << (63 - fd_ulong_find_msb( n ) )) - 1UL );
     233             : 
     234  1531118943 :   for( int i=0; 1; i++ ) {
     235  1531118943 :     ulong v = fd_chacha_rng_ulong( rng );
     236  1531118943 :     ulong hi, lo;
     237  1531118943 :     fd_uwide_mul( &hi, &lo, v, n );
     238             : 
     239             : #   if FD_CHACHA_RNG_DEBUG
     240             :     FD_LOG_DEBUG(( "roll (attempt %d): n=%016lx zone: %016lx v=%016lx lo=%016lx hi=%016lx", i, n, zone, v, lo, hi ));
     241             : #   else
     242  1531118943 :     (void)i;
     243  1531118943 : #   endif /* FD_CHACHA_RNG_DEBUG */
     244             : 
     245  1531118943 :     if( FD_LIKELY( lo<=zone ) ) return hi;
     246  1531118943 :   }
     247  1198996626 : }
     248             : 
     249             : FD_PROTOTYPES_END
     250             : 
     251             : #endif /* HEADER_fd_src_ballet_chacha20_fd_chacha20_rng_h */

Generated by: LCOV version 1.14