LCOV - code coverage report
Current view: top level - ballet/chacha20 - fd_chacha20rng.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 28 28 100.0 %
Date: 2024-11-13 11:58:15 Functions: 8 429 1.9 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_ballet_chacha20_fd_chacha20rng_h
       2             : #define HEADER_fd_src_ballet_chacha20_fd_chacha20rng_h
       3             : 
       4             : /* fd_chacha20rng provides APIs for ChaCha20-based RNG, as used in the
       5             :    Solana protocol.  This API should only be used where necessary.
       6             :    fd_rng is a better choice in all other cases. */
       7             : 
       8             : #include "fd_chacha20.h"
       9             : #if !FD_HAS_INT128
      10             : #include "../../util/bits/fd_uwide.h"
      11             : #endif
      12             : 
      13             : /* FD_CHACHA20RNG_DEBUG controls debug logging.  0 is off; 1 is on. */
      14             : 
      15             : #ifndef FD_CHACHA20RNG_DEBUG
      16             : #define FD_CHACHA20RNG_DEBUG 0
      17             : #endif
      18             : 
      19             : /* Solana uses different mechanisms of mapping a ulong to an unbiased
      20             :    integer in [0, n) in different parts of the code.  In particular,
      21             :    leader schedule generation uses MODE_MOD and Turbine uses MODE_SHIFT.
      22             :    See the note in fd_chacha20rng_ulong_roll for more details. */
      23  1578772578 : #define FD_CHACHA20RNG_MODE_MOD   1
      24      313701 : #define FD_CHACHA20RNG_MODE_SHIFT 2
      25             : 
      26             : /* FD_CHACHA20RNG_BUFSZ is the internal buffer size of pre-generated
      27             :    ChaCha20 blocks.  Multiple of block size (64 bytes) and a power of 2. */
      28             : 
      29             : #if FD_HAS_AVX
      30             : #define FD_CHACHA20RNG_BUFSZ (8*FD_CHACHA20_BLOCK_SZ)
      31             : #else
      32             : #define FD_CHACHA20RNG_BUFSZ (256UL)
      33             : #endif
      34             : 
      35             : struct __attribute__((aligned(32UL))) fd_chacha20rng_private {
      36             :   /* ChaCha20 encryption key */
      37             :   uchar key[ 32UL ] __attribute__((aligned(32UL)));
      38             : 
      39             :   /* Ring buffer of pre-generated ChaCha20 RNG data.
      40             :      Note: We currently assume all reads are 8 byte.  This means the
      41             :            cursor is always aligned by 8 and strictly increases in
      42             :            increments of 8.  Thus, we really only have to refill the
      43             :            buffer if buf_off==buf_fill.  */
      44             :   uchar buf[ FD_CHACHA20RNG_BUFSZ ] __attribute__((aligned(FD_CHACHA20_BLOCK_SZ)));
      45             :   ulong buf_off;   /* Total number of bytes consumed */
      46             :   ulong buf_fill;  /* Total number of bytes produced
      47             :                       Always aligned by FD_CHACHA20_BLOCK_SZ */
      48             : 
      49             :   int mode;
      50             : };
      51             : typedef struct fd_chacha20rng_private fd_chacha20rng_t;
      52             : 
      53             : FD_PROTOTYPES_BEGIN
      54             : 
      55             : /* fd_chacha20rng_{align,footprint} give the needed alignment and
      56             :    footprint of a memory region suitable to hold a ChaCha20-based RNG.
      57             : 
      58             :    fd_chacha20rng_new formats a memory region with suitable alignment
      59             :    and footprint for holding a chacha20rng object.  Assumes shmem
      60             :    points on the caller to the first byte of the memory region owned by
      61             :    the caller to use.  `mode` must be one of the FD_CHACHA20RNG_MODE_*
      62             :    constants defined above and dictates what mode this object will use
      63             :    to generate random numbers. Returns shmem on success and NULL on
      64             :    failure (logs details).  The memory region will be owned by the
      65             :    object on successful return.  The caller is not joined on return.
      66             : 
      67             :    fd_chacha20rng_join joins the caller to a chacha20rng object.
      68             :    Assumes shrng points to the first byte of the memory region holding
      69             :    the object.  Returns a local handle to the join on success (this is
      70             :    not necessarily a simple cast of the address) and NULL on failure
      71             :    (logs details).
      72             : 
      73             :    fd_chacha20rng_leave leaves the caller's current local join to a
      74             :    ChaCha20 RNG object.  Returns a pointer to the memory region holding
      75             :    the object on success this is not necessarily a simple cast of the
      76             :    address) and NULL on failure (logs details).  The caller is not
      77             :    joined on successful return.
      78             : 
      79             :    fd_chacha20rng_delete unformats a memory region that holds a ChaCha20
      80             :    RNG object.  Assumes shrng points on the caller to the first byte of
      81             :    the memory region holding the state and that nobody is joined.
      82             :    Returns a pointer to the memory region on success and NULL on failure
      83             :    (logs details).  The caller has ownership of the memory region on
      84             :    successful return. */
      85             : 
      86             : FD_FN_CONST ulong
      87             : fd_chacha20rng_align( void );
      88             : 
      89             : FD_FN_CONST ulong
      90             : fd_chacha20rng_footprint( void );
      91             : 
      92             : void *
      93             : fd_chacha20rng_new( void * shmem, int mode );
      94             : 
      95             : fd_chacha20rng_t *
      96             : fd_chacha20rng_join( void * shrng );
      97             : 
      98             : void *
      99             : fd_chacha20rng_leave( fd_chacha20rng_t * );
     100             : 
     101             : void *
     102             : fd_chacha20rng_delete( void * shrng );
     103             : 
     104             : /* fd_chacha20rng_init starts a ChaCha20 RNG stream.  rng is assumed to
     105             :    be a current local join to a chacha20rng object with no other
     106             :    concurrent operation that would modify the state while this is
     107             :    executing.  seed points to the first byte of the RNG seed byte vector
     108             :    with 32 byte size.  Any preexisting state for an in-progress or
     109             :    recently completed calculation will be discarded.  Returns rng (on
     110             :    return, rng will have the state of a new in-progress calculation).
     111             : 
     112             :    Compatible with Rust fn rand_chacha::ChaCha20Rng::from_seed
     113             :    https://docs.rs/rand_chacha/latest/rand_chacha/struct.ChaCha20Rng.html#method.from_seed */
     114             : 
     115             : fd_chacha20rng_t *
     116             : fd_chacha20rng_init( fd_chacha20rng_t * rng,
     117             :                      void const *       key );
     118             : 
     119             : /* The refill function .  Not part of the public API. */
     120             : 
     121             : void
     122             : fd_chacha20rng_refill_avx( fd_chacha20rng_t * rng );
     123             : 
     124             : void
     125             : fd_chacha20rng_refill_seq( fd_chacha20rng_t * rng );
     126             : 
     127             : #if FD_HAS_AVX
     128    34931019 : #define fd_chacha20rng_private_refill fd_chacha20rng_refill_avx
     129             : #else
     130             : #define fd_chacha20rng_private_refill fd_chacha20rng_refill_seq
     131             : #endif
     132             : 
     133             : /* fd_chacha20rng_avail returns the number of buffered bytes. */
     134             : 
     135             : FD_FN_PURE static inline ulong
     136  2065076313 : fd_chacha20rng_avail( fd_chacha20rng_t const * rng ) {
     137  2065076313 :   return rng->buf_fill - rng->buf_off;
     138  2065076313 : }
     139             : 
     140             : /* fd_chacha20rng_ulong reads a 64-bit integer in [0,2^64) from the RNG
     141             :    stream. */
     142             : 
     143             : static ulong
     144  2065076313 : fd_chacha20rng_ulong( fd_chacha20rng_t * rng ) {
     145  2065076313 :   if( FD_UNLIKELY( fd_chacha20rng_avail( rng ) < sizeof(ulong) ) )
     146    30417321 :     fd_chacha20rng_private_refill( rng );
     147  2065076313 :   ulong x = FD_LOAD( ulong, rng->buf + (rng->buf_off % FD_CHACHA20RNG_BUFSZ) );
     148  2065076313 :   rng->buf_off += 8U;
     149  2065076313 :   return x;
     150  2065076313 : }
     151             : 
     152             : /* fd_chacha20rng_ulong_roll returns an uniform IID rand in [0,n)
     153             :    analogous to fd_rng_ulong_roll.  Rejection method based using
     154             :    fd_chacha20rng_ulong.
     155             : 
     156             :    Compatible with Rust type
     157             :    <rand_chacha::ChaCha20Rng as rand::Rng>::gen<rand::distributions::Uniform<u64>>()
     158             :    as of version 0.7.0 of the crate
     159             :    https://docs.rs/rand/latest/rand/distributions/struct.Uniform.html */
     160             : 
     161             : static inline ulong
     162             : fd_chacha20rng_ulong_roll( fd_chacha20rng_t * rng,
     163  1578766392 :                            ulong              n ) {
     164             :   /* We use a pretty standard rejection-sampling based approach here,
     165             :      but for future reference, here's an explanation:
     166             : 
     167             :      We know that v can take 2^64 values, and so any method that maps
     168             :      each of the 2^64 values to the range directly [0, n) will not be
     169             :      uniform distribution when 2^64 is not divisible by n.  This
     170             :      motivates using rejection sampling.
     171             : 
     172             :      The most basic approach is to map v from [0, n*floor(2^64/n) ) to
     173             :      [0, n) using v%n, but that puts a modulus on the critical path.  To
     174             :      avoid that, the Rust rand crate uses a different approach: compute
     175             :      v*n/2^64, which is also in [0, n).
     176             : 
     177             :      Now the question to answer is which values to throw out.  We pick a
     178             :      large integer k such that k*n<=2^64 and map [0, k*n) -> 0, [2^64,
     179             :      2^64+k*n) -> 1, etc.  Since k*n might be 2^64 and then not fit in a
     180             :      long, we define zone=k*n-1 <= ULONG_MAX, and make the intervals
     181             :      closed instead of half-open.
     182             : 
     183             :      Here's where the mode comes in.  Depending on what method you call
     184             :      and what datatype you use, the Rust crate uses different values of
     185             :      k.  When MODE_MOD is set, we use largest possible value of k,
     186             :      namely floor(2^64/n).  You can compute zone directly as follows:
     187             :                zone  = k*n-1
     188             :                      = floor(2^64/n)*n - 1
     189             :                      = 2^64 - (2^64%n) - 1
     190             :                      = 2^64-1 - (2^64-n)%n, since n<2^64
     191             :                      = 2^64-1 - ((2^64-1)-n+1)%n
     192             :      Which is back to having a mod... But at least if n is a
     193             :      compile-time constant then the whole zone computation becomes a
     194             :      compile-time constant.
     195             : 
     196             :      When MODE_SHIFT is set, we use uses almost the largest possible
     197             :      power of two for k.  Precisely, it uses the smallest power of two
     198             :      such that k*n >= 2^63, which is the largest power of two such that
     199             :      k*n<=2^64 unless n is a power of two.  This approach eliminates the
     200             :      mod calculation but increases the expected number of samples
     201             :      required. */
     202  1578766392 :   ulong const zone = fd_ulong_if( rng->mode==FD_CHACHA20RNG_MODE_MOD,
     203  1578766392 :                                   ULONG_MAX - (ULONG_MAX-n+1UL)%n,
     204  1578766392 :                                   (n << (63 - fd_ulong_find_msb( n ) )) - 1UL );
     205             : 
     206  2031776307 :   for( int i=0; 1; i++ ) {
     207  2031776307 :     ulong   v   = fd_chacha20rng_ulong( rng );
     208  2031776307 : #if FD_HAS_INT128
     209             :     /* Compiles to one mulx instruction */
     210  2031776307 :     uint128 res = (uint128)v * (uint128)n;
     211  2031776307 :     ulong   hi  = (ulong)(res>>64);
     212  2031776307 :     ulong   lo  = (ulong) res;
     213             : #else
     214             :     ulong hi, lo;
     215             :     fd_uwide_mul( &hi, &lo, v, n );
     216             : #endif
     217             : 
     218             : #   if FD_CHACHA20RNG_DEBUG
     219             :     FD_LOG_DEBUG(( "roll (attempt %d): n=%016lx zone: %016lx v=%016lx lo=%016lx hi=%016lx", i, n, zone, v, lo, hi ));
     220             : #   else
     221  2031776307 :     (void)i;
     222  2031776307 : #   endif /* FD_CHACHA20RNG_DEBUG */
     223             : 
     224  2031776307 :     if( FD_LIKELY( lo<=zone ) ) return hi;
     225  2031776307 :   }
     226  1578766392 : }
     227             : 
     228             : FD_PROTOTYPES_END
     229             : 
     230             : #endif /* HEADER_fd_src_ballet_chacha20_fd_chacha20rng_h */

Generated by: LCOV version 1.14