LCOV - code coverage report
Current view: top level - ballet/wsample - fd_wsample.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 6 6 100.0 %
Date: 2026-01-23 05:02:40 Functions: 0 0 -

          Line data    Source code
       1             : #ifndef HEADER_fd_src_ballet_wsample_fd_wsample_h
       2             : #define HEADER_fd_src_ballet_wsample_fd_wsample_h
       3             : 
       4             : /* This header defines methods for computing weighted "random" samples
       5             :    of the specific type used by Solana for computing the leader
       6             :    schedule and the Turbine tree.
       7             : 
       8             :    In particular, those random samples are generated by:
       9             : 
      10             :    1. Seeding ChaCha20 with a 32B seed (different for leader schedule vs
      11             :       Turbine).
      12             :    2. Using the ChaCha20 random stream to generate a uniformly
      13             :       distributed random integer x in [0, total_stake_weight).
      14             :    3. Returning the first index such that the cumulative sum of stake is
      15             :       at least x. */
      16             : 
      17             : #include "../fd_ballet_base.h"
      18             : #include "../chacha/fd_chacha_rng.h"
      19             : 
      20             : struct fd_wsample_private;
      21             : typedef struct fd_wsample_private fd_wsample_t;
      22             : 
      23             : #define FD_WSAMPLE_RNG_CHACHA20 (0U)
      24             : #define FD_WSAMPLE_RNG_CHACHA8  (1U)
      25             : 
      26        6684 : #define FD_WSAMPLE_ALIGN (64UL)
      27             : /* fd_leaders really wants a compile time-compatible footprint... The
      28             :    internal count is 1/8 * (9^ceil(log_9(ele_cnt)) - 1) */
      29             : #define FD_WSAMPLE_FOOTPRINT( ele_cnt, restore_enabled )                   \
      30             :    (192UL + 64UL*((restore_enabled)?2UL:1UL)*(                             \
      31             :                  ((ele_cnt)<=         1UL)?        0UL :                   \
      32             :                  ((ele_cnt)<=         9UL)?        1UL :                   \
      33             :                  ((ele_cnt)<=        81UL)?       10UL :                   \
      34             :                  ((ele_cnt)<=       729UL)?       91UL :                   \
      35             :                  ((ele_cnt)<=      6561UL)?      820UL :                   \
      36             :                  ((ele_cnt)<=     59049UL)?     7381UL :                   \
      37             :                  ((ele_cnt)<=    531441UL)?    66430UL :                   \
      38             :                  ((ele_cnt)<=   4782969UL)?   597871UL :                   \
      39             :                  ((ele_cnt)<=  43046721UL)?  5380840UL :                   \
      40             :                  ((ele_cnt)<= 387420489UL)? 48427561UL :                   \
      41             :                  ((ele_cnt)<=3486784401UL)?435848050UL : 3922632451UL ))   \
      42             : 
      43             : /* fd_wsample_{align, footprint} give the alignment and footprint
      44             :    respectively required to create a weighted sampler with at most
      45             :    ele_cnt stake weights.  If restore_enabled is zero, calls to
      46             :    wsample_restore_all will be no-ops, but the footprint required will
      47             :    be smaller. ele_cnt in [0, UINT_MAX-2) (note, not ULONG MAX).
      48             : 
      49             :    fd_wsample_{join,leave} join and leave a memory region formatted as a
      50             :    weighted sampler, respectively.  They both are simple casts.
      51             : 
      52             :    fd_wsample_delete unformats a memory region used as a weighted
      53             :    sampler.  Releases all interest in rng. */
      54             : 
      55             : FD_FN_CONST ulong fd_wsample_align    ( void          );
      56             : FD_FN_CONST ulong fd_wsample_footprint( ulong ele_cnt, int restore_enabled );
      57             : fd_wsample_t *    fd_wsample_join     ( void * shmem  );
      58             : void *            fd_wsample_leave    ( fd_wsample_t * sampler );
      59             : void *            fd_wsample_delete   ( void * shmem  );
      60             : 
      61             : 
      62             : /* FD_WSAMPLE_HINT_*: Hints that can be passed to fd_wsample_new in the
      63             :    opt_hint field.  The hint specifies the distribution of weights:
      64             :    FLAT: The all weights are approximately constant.
      65             :    POWERLAW: The weights are sorted largest to smallest and decay with a
      66             :        power law distribution.  At the moment, this assumes it's
      67             :        proportional to 1/x.
      68             : 
      69             :    The hint can also specify whether sampling will be done with deleting
      70             :    or without deleting:
      71             :    REMOVE: Sampling will be done without replacement, i.e. mostly with
      72             :        fd_wsample_sample_and_remove and/or
      73             :        fd_wsample_sample_and_remove_many.
      74             :    NOREMOVE: Sampling will be done with replacement, i.e. mostly with
      75             :        fd_wsample_sample and/or
      76             :        fd_wsample_sample_many. */
      77        1560 : #define FD_WSAMPLE_HINT_FLAT              0
      78        6726 : #define FD_WSAMPLE_HINT_POWERLAW_NOREMOVE 2
      79      313590 : #define FD_WSAMPLE_HINT_POWERLAW_REMOVE   3
      80             : 
      81             : /* fd_wsample_new_init, fd_wsample_new_add_weight, and
      82             :    fd_wsample_new_fini format a memory region with the appropriate
      83             :    alignment and footprint to be usable as a weighted sampler.  This
      84             :    multi-function initialization process prevents needing to construct a
      85             :    flat array of weights, which is often inconvenient.
      86             : 
      87             :    The caller must first call fd_wsample_new_init, then new_add_weight
      88             :    ele_cnt times, and finally new_fini.  Only at that point will the
      89             :    region of memory be ready to be joined.
      90             : 
      91             :    fd_wsample_new_init begins the formatting a memory region.  shmem is
      92             :    a pointer to the first byte of the memory region to use.  rng must be
      93             :    a local join of a ChaCha20 RNG struct.  The weighted sampler will use
      94             :    rng to generate random numbers.  It may seem more natural for the
      95             :    weighted sampler to own its own rng, but this is done to facilitate
      96             :    sharing of rngs between weighted samplers, which is useful for
      97             :    Turbine.  ele_cnt specifies the number of elements that can be
      98             :    sampled from and must be less than UINT_MAX.  If restore_enabled is
      99             :    set to 0, fd_wsample_restore_all will not work but the required
     100             :    footprint is smaller.  opt_hint gives a hint of the shape of the
     101             :    weights and the style of queries that will be most common; this hint
     102             :    impacts query performance but not correctness.  opt_hint must be one
     103             :    of FD_WSAMPLE_HINT_*.
     104             : 
     105             :    fd_wsample_new_add adds a weight to a partially formatted memory
     106             :    region.  shmem must be a partially constructed region of memory, as
     107             :    returned by fd_wsample_new_init or fd_wsample_new_add_weight, weight
     108             :    must be strictly positive, and the cumulative sum of this weight and
     109             :    all other weights must be no more than ULONG_MAX.
     110             : 
     111             :    fd_wsample_new_fini finalizes the formatting of a partially formatted
     112             :    memory region.  shmem must be a partially constructed region of
     113             :    memory, as returned by fd_wsample_new_add_weight (or
     114             :    fd_wsample_new_init if ele_cnt==0).  If poisoned_weight is non-zero,
     115             :    the weighted sampler will end with a poisoned region representing an
     116             :    indeterminate number of unknown elements with total weight equal to
     117             :    poisoned_weight.  This is useful for chopping off a long tail so that
     118             :    the number of elements can be bounded easily.  The sum of all weights
     119             :    and poisoned_weight must be no more than ULONG_MAX.
     120             : 
     121             :    Retains read/write interest in rng.
     122             : 
     123             :    Each function returns shmem on success and NULL on failure.  It's
     124             :    safe to pass NULL as shmem, in which case NULL will be returned, so
     125             :    you only need to check the final result.
     126             : 
     127             :    On successful completion of the formatting process, the weighted
     128             :    sampler will contain an element corresponding to each provided
     129             :    weight.  Caller is not joined on return. */
     130             : void * fd_wsample_new_init( void            * shmem,
     131             :                             fd_chacha_rng_t * rng,
     132             :                             ulong             ele_cnt,
     133             :                             int               restore_enabled,
     134             :                             int               opt_hint );
     135             : void * fd_wsample_new_add ( void * shmem, ulong weight          );
     136             : void * fd_wsample_new_fini( void * shmem, ulong poisoned_weight );
     137             : 
     138             : /* fd_wsample_get_rng returns the value provided for rng in new. */
     139             : fd_chacha_rng_t * fd_wsample_get_rng( fd_wsample_t * sampler );
     140             : 
     141             : /* fd_wsample_seed_rng seeds the ChaCha rng with the provided seed in
     142             :    preparation for sampling.  This function is compatible with Solana's
     143             :    ChaChaRng::from_seed. */
     144             : void fd_wsample_seed_rng( fd_wsample_t * sampler,
     145             :                           uchar          seed[ 32 ],
     146             :                           int            use_chacha8 );
     147             : 
     148             : /* fd_wsample_sample{_and_remove}{,_many} produces one or cnt (in the
     149             :    _many case) weighted random samples from the sampler.  If the
     150             :    _and_remove variant of the function is called, the returned node will
     151             :    be temporarily removed from the sampler, i.e. for sampling without
     152             :    replacement.  Random samples are produced using the Solana-required
     153             :    method.  Sampler's RNG must be seeded appropriately
     154             :    prior to using these functions.
     155             : 
     156             :    The _many variants of the function store the ith index they sampled
     157             :    in idxs[i] for i in [0, cnt).  The other variants of the function
     158             :    simply return the sampled index.
     159             : 
     160             :    If the sampler has no unremoved elements, these functions will
     161             :    return/store FD_WSAMPLE_EMPTY.
     162             : 
     163             :    If the RNG sample lands in the poisoned region, these functions will
     164             :    return/store FD_WSAMPLE_INDETERMINATE.  If such a sample is produced
     165             :    in the without replacement mode, all subsequent calls (with and
     166             :    without replacement) will also return FD_WSAMPLE_INDETERMINATE until
     167             :    the next call to fd_wsample_restore_all.  This is necessary because
     168             :    the poisoned region represents an indeterminate number of elements,
     169             :    so it's not possible to know how removing one of them will affect the
     170             :    total weight, and thus all subsequent random samples.
     171             : 
     172             :    For each index i, fd_wsample_sample returns i with
     173             :    probability weights[i]/sum(weights), only considering weights of
     174             :    elements that have not been removed. */
     175      341628 : #define FD_WSAMPLE_EMPTY          UINT_MAX
     176    23866362 : #define FD_WSAMPLE_INDETERMINATE (UINT_MAX-1UL)
     177             : ulong fd_wsample_sample                ( fd_wsample_t * sampler );
     178             : ulong fd_wsample_sample_and_remove     ( fd_wsample_t * sampler );
     179             : void  fd_wsample_sample_many           ( fd_wsample_t * sampler, ulong * idxs, ulong cnt );
     180             : void  fd_wsample_sample_and_remove_many( fd_wsample_t * sampler, ulong * idxs, ulong cnt );
     181             : 
     182             : /* fd_wsample_remove_idx removes an element by index as if it had been selected
     183             :    for sampling without replacement.  Unless restore_all is called, this
     184             :    index will no longer be returned by any of the sample methods, and
     185             :    its weight will be excluded from the remaining sampling operations.
     186             :    Removing an element that has already been removed is a no-op.  idx
     187             :    must be in [0, ele_cnt). */
     188             : void fd_wsample_remove_idx( fd_wsample_t * sampler, ulong idx );
     189             : 
     190             : /* fd_wsample_restore_all restores all elements removed with one of the
     191             :    sample_and_remove functions or with fd_wsample_remove_idx.  The
     192             :    elements with have their original weight.  This is faster than
     193             :    recreating the weighted sampler, even if all elements have been
     194             :    removed.  Returns sampler on success and NULL on failure.  The only
     195             :    error case is if sampler was constructed with restore_enabled set to 0,
     196             :    in which case no elements are restored. */
     197             : fd_wsample_t * fd_wsample_restore_all( fd_wsample_t * sampler );
     198             : 
     199             : #endif /* HEADER_fd_src_ballet_wsample_fd_wsample_h */

Generated by: LCOV version 1.14