LCOV - code coverage report
Current view: top level - ballet/pack - fd_est_tbl.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 43 57 75.4 %
Date: 2024-11-13 11:58:15 Functions: 4 248 1.6 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_ballet_pack_fd_est_tbl_h
       2             : #define HEADER_fd_src_ballet_pack_fd_est_tbl_h
       3             : 
       4             : #include "../fd_ballet_base.h"
       5             : 
       6             : #if FD_HAS_DOUBLE
       7             : 
       8             : /* This header defines a data structure for estimating the sliding-window mean
       9             :    and variance of tagged data.  It takes in real-valued input, with each value
      10             :    tagged with an opaque tag.  The data structure gives an estimated answer to
      11             :    queries of the form "what is the mean and variance of recent data with tag
      12             :    X" for a given value of X.  For best use, the tag should correspond to the
      13             :    distribution that the random variable comes from, although there's no
      14             :    specific need for this to be true, and no assumptions are made about
      15             :    normality etc.
      16             :    The answers this data structure gives are approximate because tags are
      17             :    mapped to an array of bins that is much smaller than the universe of tags
      18             :    and thus tags can alias.  This is actually desirable behavior because it
      19             :    means that if you have inserted lots of data but then query for a brand-new
      20             :    tag, the expected value of the returned data is close to the overall mean of
      21             :    all values that have been inserted. */
      22             : 
      23           3 : #define FD_EST_TBL_MAGIC (0xF17EDA2C37E57B10UL) /* F17E=FIRE,DA2C/37=DANCER,E5/7B1=ESTBL,0=V0 / FIREDANCER EST TBL V0 */
      24             : 
      25           0 : #define FD_EST_TBL_ALIGN                   (32UL)
      26             : #define FD_EST_TBL_FOOTPRINT( bin_cnt ) ( sizeof(fd_est_tbl_t) + ((bin_cnt)-1UL)*sizeof(fd_est_tbl_bin_t) )
      27             : 
      28             : /* Internal table bin structure used to accumulate statistics about tags that
      29             :    map to this bin index */
      30             : /* FIXME: With doubles, this struct is 32B. With floats, it is 16B, which means
      31             :    that reads and writes to it can be atomic if done carefully.  That will make
      32             :    updating the table while it's in use much easier.  On some platforms, 32B
      33             :    reads and writes will also be atomic. */
      34             : struct fd_private_est_tbl_bin {
      35             :   /* x: The numerator of the EMA of the values that have mapped to this
      36             :      bin */
      37             :   double x;
      38             :   /* x2: The numerator of the EMA of the square of values that have mapped
      39             :      to this bin */
      40             :   double x2;
      41             :   /* d: The denominator for EMA(x), paired with the numerator from above.
      42             :      */
      43             :   double d;
      44             :   double d2;
      45             : };
      46             : typedef struct fd_private_est_tbl_bin fd_est_tbl_bin_t;
      47             : 
      48             : /* The main data structure described in the overall header comment */
      49             : struct __attribute__((aligned(FD_EST_TBL_ALIGN))) fd_private_est_tbl {
      50             :   /* magic: set to FD_EST_TBL_MAGIC */
      51             :   ulong  magic;
      52             :   /* bin_cnt_mask: (bin_cnt_mask+1) is the number of bins in the table, a power
      53             :      of two */
      54             :   ulong  bin_cnt_mask;
      55             :   /* ema_coeff: the decay coefficient used in EMA computations. Near 1.0. */
      56             :   double ema_coeff;
      57             :   /* default_val: the value to return as mean when the query maps to a bin with
      58             :      very few values */
      59             :   double default_val;
      60             :   /* 32 byte aligned at this point */
      61             :   /* bins: the array of (bin_cnt_mask+1) bins follows.  The array size of 1 is
      62             :      just convention. */
      63             :   fd_est_tbl_bin_t bins[1];
      64             : };
      65             : typedef struct fd_private_est_tbl fd_est_tbl_t;
      66             : 
      67             : 
      68             : FD_PROTOTYPES_BEGIN
      69             : /* fd_est_tbl_{align, footprint} given the needed alignment and footprint for a
      70             :    memory region suitable to hold fd_est_tbl's state.  bin_cnt specifies the
      71             :    number of bins that the estimation table stores.  bin_cnt must be a
      72             :    power-of-two greater than 0.  Increasing the number of bins increases the
      73             :    footprint requirements but also increases the accuracy slightly (by reducing
      74             :    collisions).  fd_est_tbl_{align, footprint} return the same value as
      75             :    FD_EST_TBL_{ALIGN, FOOTPRINT}.
      76             : 
      77             :    fd_est_tbl_new takes ownership of the memory region pointed to by mem (which
      78             :    is assumed to be non-NULL and have the appropriate alignment and footprint)
      79             :    and formats it as a fd_est_tbl.  The estimation table will use bin_cnt bins,
      80             :    and each bin's EMA will be tuned for an a window size of history.  history
      81             :    must be positive.  The table will use a default value of default_val for the
      82             :    mean whenever a query indicates a bin has had no data.  Returns mem (which
      83             :    will be formatted for use) on success and NULL on failure (bad inputs).  The
      84             :    caller will not be joined to the region on return.
      85             : 
      86             :    fd_est_tbl_join joins the caller to a memory region holding the state of a
      87             :    fd_est_tbl.
      88             : 
      89             :    fd_est_tbl_leave leaves the current join.  Returns a pointer in the local
      90             :    address space to the memory region holding the table state.  The join should
      91             :    not be used after calling _leave.
      92             : 
      93             :    fd_est_tbl_delete unformats the memory region used to hold the state of an
      94             :    fd_est_tbl and returns ownership of the underlying memory region to the
      95             :    caller.  There should be no joins in the system on the fd_est_tbl.  Returns
      96             :    a pointer to the underlying memory region. */
      97             : 
      98           0 : FD_FN_CONST static inline ulong fd_est_tbl_align    ( void ) { return FD_EST_TBL_ALIGN; }
      99           0 : FD_FN_CONST static inline ulong fd_est_tbl_footprint( ulong bin_cnt ) {
     100           0 :   if( FD_UNLIKELY( !bin_cnt || !fd_ulong_is_pow2( bin_cnt )                                 ) ) return 0UL;
     101           0 :   if( FD_UNLIKELY(  bin_cnt > ((ULONG_MAX - sizeof(fd_est_tbl_t))/sizeof(fd_est_tbl_bin_t)) ) ) return 0UL;
     102           0 :   return sizeof(fd_est_tbl_t) + (bin_cnt-1UL)*sizeof(fd_est_tbl_bin_t);
     103           0 : }
     104             : 
     105             : static inline void *
     106             : fd_est_tbl_new( void * mem,
     107             :                 ulong  bin_cnt,
     108             :                 ulong  history,
     109           3 :                 uint   default_val ) {
     110           3 :   if( FD_UNLIKELY( !bin_cnt || !fd_ulong_is_pow2( bin_cnt )                                 ) ) return NULL;
     111           3 :   if( FD_UNLIKELY(  bin_cnt > ((ULONG_MAX - sizeof(fd_est_tbl_t))/sizeof(fd_est_tbl_bin_t)) ) ) return NULL;
     112           3 :   if( FD_UNLIKELY( !history                                                                 ) ) return NULL;
     113             :   /* The largest ema_d can get is around history, and the largest the value can
     114             :      get is UINT_MAX.  Their product is then less than 2^96 approx 8*10^28,
     115             :      which is comfortably in the range of a double. */
     116           3 :   fd_est_tbl_t * tbl  = (fd_est_tbl_t *)mem;
     117           3 :   tbl->bin_cnt_mask   = bin_cnt-1UL;
     118           3 :   tbl->ema_coeff      = 1.0 - 1.0/(double)history;
     119           3 :   tbl->default_val    = default_val;
     120             : 
     121           3 :   fd_memset( tbl->bins, 0, bin_cnt*sizeof(fd_est_tbl_bin_t) );
     122           3 :   FD_COMPILER_MFENCE();
     123           3 :   FD_VOLATILE( tbl->magic ) = FD_EST_TBL_MAGIC;
     124           3 :   FD_COMPILER_MFENCE();
     125             : 
     126           3 :   return (void *)tbl;
     127           3 : }
     128             : 
     129             : static inline fd_est_tbl_t *
     130           3 : fd_est_tbl_join  ( void         * _tbl ) {
     131           3 :   fd_est_tbl_t * tbl = (fd_est_tbl_t *)_tbl;
     132           3 :   if( FD_UNLIKELY( tbl->magic != FD_EST_TBL_MAGIC ) ) return NULL;
     133           3 :   return tbl;
     134           3 : }
     135           0 : static inline void         * fd_est_tbl_leave ( fd_est_tbl_t *  tbl ) { return (void         *) tbl; }
     136           0 : static inline void         * fd_est_tbl_delete( fd_est_tbl_t *  tbl ) {
     137           0 :   FD_COMPILER_MFENCE();
     138           0 :   FD_VOLATILE( tbl->magic ) = 0UL;
     139           0 :   FD_COMPILER_MFENCE();
     140           0 :   return (void         *) tbl;
     141           0 : }
     142             : 
     143             : /* fd_est_tbl_estimate: estimate the mean and variance of the distribution from
     144             :    which data tagged with tag is drawn.  Since this function cannot return two
     145             :    doubles, if variance_out is non-NULL, it will be set to the variance.  If 0
     146             :    values have been inserted with the specified tag (or a tag that aliases to
     147             :    it), this function will return a mean of default_val and a variance of 0. */
     148             : static inline double
     149             : fd_est_tbl_estimate( fd_est_tbl_t const * tbl,
     150             :                      ulong                tag,
     151          54 :                      double *             variance_out ) {
     152          54 :   fd_est_tbl_bin_t const * bin = tbl->bins + (tag & tbl->bin_cnt_mask);
     153          54 :   double mean, var;
     154          54 :   if( FD_UNLIKELY( !(bin->d > 0.0) ) ) {
     155           3 :     mean = tbl->default_val;
     156           3 :     var  = 0.0;
     157          51 :   } else {
     158          51 :     mean = bin->x / bin->d;
     159          51 :     var  = (bin->d * bin->x2 - (bin->x*bin->x)) / ( bin->d * bin->d - bin->d2 );
     160          51 :   }
     161          54 :   var  = fd_double_if( var>0.0, var, 0.0 );
     162          54 :   if( FD_LIKELY( variance_out ) ) *variance_out = var;
     163          54 :   return mean;
     164          54 : }
     165             : 
     166             : /* fd_est_tbl_update: inserts a new tagged value into this data structure */
     167             : static inline void
     168             : fd_est_tbl_update( fd_est_tbl_t * tbl,
     169             :                    ulong          tag,
     170     1524027 :                    uint           value ) {
     171     1524027 :   fd_est_tbl_bin_t * bin = tbl->bins + (tag & tbl->bin_cnt_mask);
     172             : #ifdef FD_EST_TBL_ADAPTIVE
     173             :   double mean, variance;
     174             :   mean = fd_est_tbl_estimate( tbl, tag, &variance );
     175             :   double dev_sq = (value - mean)*(value - mean) / variance; /* Normalized squared deviation */
     176             :   double alpha = 0.25;
     177             :   double C = fd_double_if( dev_sq<log(DBL_MAX)/alpha, 1.0/(1.0 + exp(alpha*dev_sq)*tbl->ema_coeff), 0.0 );
     178             : #else
     179     1524027 :   double C = tbl->ema_coeff;
     180     1524027 : #endif
     181     1524027 :   bin->x  = value       + fd_double_if( C*bin->x >DBL_MIN, C*bin->x , 0.0 );
     182     1524027 :   bin->x2 = value*value + fd_double_if( C*bin->x2>DBL_MIN, C*bin->x2, 0.0 );
     183     1524027 :   bin->d  = 1.0         +   C*bin->d ; /* Can't go denormal */
     184     1524027 :   bin->d2 = 1.0         + C*C*bin->d2; /* Can't go denormal */
     185     1524027 : }
     186             : 
     187             : #endif /* FD_HAS_DOUBLE */
     188             : 
     189             : #endif /* HEADER_fd_src_ballet_pack_fd_est_tbl_h */

Generated by: LCOV version 1.14