LCOV - code coverage report
Current view: top level - ballet/wsample - fd_wsample.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 6 6 100.0 %
Date: 2025-01-08 12:08:44 Functions: 0 0 -

          Line data    Source code
       1             : #ifndef HEADER_fd_src_ballet_shred_fd_wsample_h
       2             : #define HEADER_fd_src_ballet_shred_fd_wsample_h
       3             : 
       4             : /* This header defines methods for computing weighted "random" samples
       5             :    of the specific type used by Solana for computing the leader
       6             :    schedule and the Turbine tree.
       7             : 
       8             :    In particular, those random samples are generated by:
       9             : 
      10             :    1. Seeding ChaCha20 with a 32B seed (different for leader schedule vs
      11             :       Turbine).
      12             :    2. Using the ChaCha20 random stream to generate a uniformly
      13             :       distributed random integer x in [0, total_stake_weight).
      14             :    3. Returning the first index such that the cumulative sum of stake is
      15             :       at least x. */
      16             : 
      17             : #include "../fd_ballet_base.h"
      18             : #include "../chacha20/fd_chacha20rng.h"
      19             : 
      20             : struct fd_wsample_private;
      21             : typedef struct fd_wsample_private fd_wsample_t;
      22             : 
      23        6180 : #define FD_WSAMPLE_ALIGN (64UL)
      24             : /* fd_leaders really wants a compile time-compatible footprint... The
      25             :    internal count is 1/8 * (9^ceil(log_9(ele_cnt)) - 1) */
      26             : #define FD_WSAMPLE_FOOTPRINT( ele_cnt, restore_enabled )                   \
      27             :    (192UL + 64UL*((restore_enabled)?2UL:1UL)*(                             \
      28             :                  ((ele_cnt)<=         1UL)?        0UL :                   \
      29             :                  ((ele_cnt)<=         9UL)?        1UL :                   \
      30             :                  ((ele_cnt)<=        81UL)?       10UL :                   \
      31             :                  ((ele_cnt)<=       729UL)?       91UL :                   \
      32             :                  ((ele_cnt)<=      6561UL)?      820UL :                   \
      33             :                  ((ele_cnt)<=     59049UL)?     7381UL :                   \
      34             :                  ((ele_cnt)<=    531441UL)?    66430UL :                   \
      35             :                  ((ele_cnt)<=   4782969UL)?   597871UL :                   \
      36             :                  ((ele_cnt)<=  43046721UL)?  5380840UL :                   \
      37             :                  ((ele_cnt)<= 387420489UL)? 48427561UL :                   \
      38             :                  ((ele_cnt)<=3486784401UL)?435848050UL : 3922632451UL ))   \
      39             : 
      40             : /* fd_wsample_{align, footprint} give the alignment and footprint
      41             :    respectively required to create a weighted sampler with at most
      42             :    ele_cnt stake weights.  If restore_enabled is zero, calls to
      43             :    wsample_restore_all will be no-ops, but the footprint required will
      44             :    be smaller. ele_cnt in [0, UINT_MAX-2) (note, not ULONG MAX).
      45             : 
      46             :    fd_wsample_{join,leave} join and leave a memory region formatted as a
      47             :    weighted sampler, respectively.  They both are simple casts.
      48             : 
      49             :    fd_wsample_delete unformats a memory region used as a weighted
      50             :    sampler.  Releases all interest in rng. */
      51             : 
      52             : FD_FN_CONST ulong fd_wsample_align    ( void          );
      53             : FD_FN_CONST ulong fd_wsample_footprint( ulong ele_cnt, int restore_enabled );
      54             : fd_wsample_t *    fd_wsample_join     ( void * shmem  );
      55             : void *            fd_wsample_leave    ( fd_wsample_t * sampler );
      56             : void *            fd_wsample_delete   ( void * shmem  );
      57             : 
      58             : 
      59             : /* FD_WSAMPLE_HINT_*: Hints that can be passed to fd_wsample_new in the
      60             :    opt_hint field.  The hint specifies the distribution of weights:
      61             :    FLAT: The all weights are approximately constant.
      62             :    POWERLAW: The weights are sorted largest to smallest and decay with a
      63             :        power law distribution.  At the moment, this assumes it's
      64             :        proportional to 1/x.
      65             : 
      66             :    The hint can also specify whether sampling will be done with deleting
      67             :    or without deleting:
      68             :    REMOVE: Sampling will be done without replacement, i.e. mostly with
      69             :        fd_wsample_sample_and_remove and/or
      70             :        fd_wsample_sample_and_remove_many.
      71             :    NOREMOVE: Sampling will be done with replacement, i.e. mostly with
      72             :        fd_wsample_sample and/or
      73             :        fd_wsample_sample_many. */
      74        1554 : #define FD_WSAMPLE_HINT_FLAT              0
      75        6222 : #define FD_WSAMPLE_HINT_POWERLAW_NOREMOVE 2
      76      312939 : #define FD_WSAMPLE_HINT_POWERLAW_REMOVE   3
      77             : 
      78             : /* fd_wsample_new_init, fd_wsample_new_add_weight, and
      79             :    fd_wsample_new_fini format a memory region with the appropriate
      80             :    alignment and footprint to be usable as a weighted sampler.  This
      81             :    multi-function initialization process prevents needing to construct a
      82             :    flat array of weights, which is often inconvenient.
      83             : 
      84             :    The caller must first call fd_wsample_new_init, then new_add_weight
      85             :    ele_cnt times, and finally new_fini.  Only at that point will the
      86             :    region of memory be ready to be joined.
      87             : 
      88             :    fd_wsample_new_init begins the formatting a memory region.  shmem is
      89             :    a pointer to the first byte of the memory region to use.  rng must be
      90             :    a local join of a ChaCha20 RNG struct.  The weighted sampler will use
      91             :    rng to generate random numbers.  It may seem more natural for the
      92             :    weighted sampler to own its own rng, but this is done to facilitate
      93             :    sharing of rngs between weighted samplers, which is useful for
      94             :    Turbine.  ele_cnt specifies the number of elements that can be
      95             :    sampled from and must be less than UINT_MAX.  If restore_enabled is
      96             :    set to 0, fd_wsample_restore_all will not work but the required
      97             :    footprint is smaller.  opt_hint gives a hint of the shape of the
      98             :    weights and the style of queries that will be most common; this hint
      99             :    impacts query performance but not correctness.  opt_hint must be one
     100             :    of FD_WSAMPLE_HINT_*.
     101             : 
     102             :    fd_wsample_new_add adds a weight to a partially formatted memory
     103             :    region.  shmem must be a partially constructed region of memory, as
     104             :    returned by fd_wsample_new_init or fd_wsample_new_add_weight, weight
     105             :    must be strictly positive, and the cumulative sum of this weight and
     106             :    all other weights must be no more than ULONG_MAX.
     107             : 
     108             :    fd_wsample_new_fini finalizes the formatting of a partially formatted
     109             :    memory region.  shmem must be a partially constructed region of
     110             :    memory, as returned by fd_wsample_new_add_weight (or
     111             :    fd_wsample_new_init if ele_cnt==0).  If poisoned_weight is non-zero,
     112             :    the weighted sampler will end with a poisoned region representing an
     113             :    indeterminate number of unknown elements with total weight equal to
     114             :    poisoned_weight.  This is useful for chopping off a long tail so that
     115             :    the number of elements can be bounded easily.  The sum of all weights
     116             :    and poisoned_weight must be no more than ULONG_MAX.
     117             : 
     118             :    Retains read/write interest in rng.
     119             : 
     120             :    Each function returns shmem on success and NULL on failure.  It's
     121             :    safe to pass NULL as shmem, in which case NULL will be returned, so
     122             :    you only need to check the final result.
     123             : 
     124             :    On successful completion of the formatting process, the weighted
     125             :    sampler will contain an element corresponding to each provided
     126             :    weight.  Caller is not joined on return. */
     127             : void * fd_wsample_new_init( void             * shmem,
     128             :                             fd_chacha20rng_t * rng,
     129             :                             ulong              ele_cnt,
     130             :                             int                restore_enabled,
     131             :                             int                opt_hint );
     132             : void * fd_wsample_new_add ( void * shmem, ulong weight          );
     133             : void * fd_wsample_new_fini( void * shmem, ulong poisoned_weight );
     134             : 
     135             : /* fd_wsample_get_rng returns the value provided for rng in new. */
     136             : fd_chacha20rng_t * fd_wsample_get_rng( fd_wsample_t * sampler );
     137             : 
     138             : /* fd_wsample_seed_rng seeds the ChaCha20 rng with the provided seed in
     139             :    preparation for sampling.  This function is compatible with Solana's
     140             :    ChaChaRng::from_seed. */
     141             : void fd_wsample_seed_rng( fd_chacha20rng_t * rng, uchar seed[static 32] );
     142             : 
     143             : /* fd_wsample_sample{_and_remove}{,_many} produces one or cnt (in the
     144             :    _many case) weighted random samples from the sampler.  If the
     145             :    _and_remove variant of the function is called, the returned node will
     146             :    be temporarily removed from the sampler, i.e. for sampling without
     147             :    replacement.  Random samples are produced using the Solana-required
     148             :    method.  Sampler's RNG must be seeded appropriately
     149             :    prior to using these functions.
     150             : 
     151             :    The _many variants of the function store the ith index they sampled
     152             :    in idxs[i] for i in [0, cnt).  The other variants of the function
     153             :    simply return the sampled index.
     154             : 
     155             :    If the sampler has no unremoved elements, these functions will
     156             :    return/store FD_WSAMPLE_EMPTY.
     157             : 
     158             :    If the RNG sample lands in the poisoned region, these functions will
     159             :    return/store FD_WSAMPLE_INDETERMINATE.  If such a sample is produced
     160             :    in the without replacement mode, all subsequent calls (with and
     161             :    without replacement) will also return FD_WSAMPLE_INDETERMINATE until
     162             :    the next call to fd_wsample_restore_all.  This is necessary because
     163             :    the poisoned region represents an indeterminate number of elements,
     164             :    so it's not possible to know how removing one of them will affect the
     165             :    total weight, and thus all subsequent random samples.
     166             : 
     167             :    For each index i, fd_wsample_sample returns i with
     168             :    probability weights[i]/sum(weights), only considering weights of
     169             :    elements that have not been removed. */
     170      341619 : #define FD_WSAMPLE_EMPTY          UINT_MAX
     171    23866362 : #define FD_WSAMPLE_INDETERMINATE (UINT_MAX-1UL)
     172             : ulong fd_wsample_sample                ( fd_wsample_t * sampler );
     173             : ulong fd_wsample_sample_and_remove     ( fd_wsample_t * sampler );
     174             : void  fd_wsample_sample_many           ( fd_wsample_t * sampler, ulong * idxs, ulong cnt );
     175             : void  fd_wsample_sample_and_remove_many( fd_wsample_t * sampler, ulong * idxs, ulong cnt );
     176             : 
     177             : /* fd_wsample_remove_idx removes an element by index as if it had been selected
     178             :    for sampling without replacement.  Unless restore_all is called, this
     179             :    index will no longer be returned by any of the sample methods, and
     180             :    its weight will be excluded from the remaining sampling operations.
     181             :    Removing an element that has already been removed is a no-op.  idx
     182             :    must be in [0, ele_cnt). */
     183             : void fd_wsample_remove_idx( fd_wsample_t * sampler, ulong idx );
     184             : 
     185             : /* fd_wsample_restore_all restores all elements removed with one of the
     186             :    sample_and_remove functions or with fd_wsample_remove_idx.  The
     187             :    elements with have their original weight.  This is faster than
     188             :    recreating the weighted sampler, even if all elements have been
     189             :    removed.  Returns sampler on success and NULL on failure.  The only
     190             :    error case is if sampler was constructed with restore_enabled set to 0,
     191             :    in which case no elements are restored. */
     192             : fd_wsample_t * fd_wsample_restore_all( fd_wsample_t * sampler );
     193             : 
     194             : 
     195             : 
     196             : #endif /* HEADER_fd_src_ballet_shred_fd_wsample_h */

Generated by: LCOV version 1.14