Line data Source code
1 : #ifndef HEADER_fd_src_ballet_pack_fd_est_tbl_h 2 : #define HEADER_fd_src_ballet_pack_fd_est_tbl_h 3 : 4 : #include "../fd_ballet_base.h" 5 : 6 : #if FD_HAS_DOUBLE 7 : 8 : /* This header defines a data structure for estimating the sliding-window mean 9 : and variance of tagged data. It takes in real-valued input, with each value 10 : tagged with an opaque tag. The data structure gives an estimated answer to 11 : queries of the form "what is the mean and variance of recent data with tag 12 : X" for a given value of X. For best use, the tag should correspond to the 13 : distribution that the random variable comes from, although there's no 14 : specific need for this to be true, and no assumptions are made about 15 : normality etc. 16 : The answers this data structure gives are approximate because tags are 17 : mapped to an array of bins that is much smaller than the universe of tags 18 : and thus tags can alias. This is actually desirable behavior because it 19 : means that if you have inserted lots of data but then query for a brand-new 20 : tag, the expected value of the returned data is close to the overall mean of 21 : all values that have been inserted. */ 22 : 23 3 : #define FD_EST_TBL_MAGIC (0xF17EDA2C37E57B10UL) /* F17E=FIRE,DA2C/37=DANCER,E5/7B1=ESTBL,0=V0 / FIREDANCER EST TBL V0 */ 24 : 25 0 : #define FD_EST_TBL_ALIGN (32UL) 26 : #define FD_EST_TBL_FOOTPRINT( bin_cnt ) ( sizeof(fd_est_tbl_t) + ((bin_cnt)-1UL)*sizeof(fd_est_tbl_bin_t) ) 27 : 28 : /* Internal table bin structure used to accumulate statistics about tags that 29 : map to this bin index */ 30 : /* FIXME: With doubles, this struct is 32B. With floats, it is 16B, which means 31 : that reads and writes to it can be atomic if done carefully. That will make 32 : updating the table while it's in use much easier. On some platforms, 32B 33 : reads and writes will also be atomic. */ 34 : struct fd_private_est_tbl_bin { 35 : /* x: The numerator of the EMA of the values that have mapped to this 36 : bin */ 37 : double x; 38 : /* x2: The numerator of the EMA of the square of values that have mapped 39 : to this bin */ 40 : double x2; 41 : /* d: The denominator for EMA(x), paired with the numerator from above. 42 : */ 43 : double d; 44 : double d2; 45 : }; 46 : typedef struct fd_private_est_tbl_bin fd_est_tbl_bin_t; 47 : 48 : /* The main data structure described in the overall header comment */ 49 : struct __attribute__((aligned(FD_EST_TBL_ALIGN))) fd_private_est_tbl { 50 : /* magic: set to FD_EST_TBL_MAGIC */ 51 : ulong magic; 52 : /* bin_cnt_mask: (bin_cnt_mask+1) is the number of bins in the table, a power 53 : of two */ 54 : ulong bin_cnt_mask; 55 : /* ema_coeff: the decay coefficient used in EMA computations. Near 1.0. */ 56 : double ema_coeff; 57 : /* default_val: the value to return as mean when the query maps to a bin with 58 : very few values */ 59 : double default_val; 60 : /* 32 byte aligned at this point */ 61 : /* bins: the array of (bin_cnt_mask+1) bins follows. The array size of 1 is 62 : just convention. */ 63 : fd_est_tbl_bin_t bins[1]; 64 : }; 65 : typedef struct fd_private_est_tbl fd_est_tbl_t; 66 : 67 : 68 : FD_PROTOTYPES_BEGIN 69 : /* fd_est_tbl_{align, footprint} given the needed alignment and footprint for a 70 : memory region suitable to hold fd_est_tbl's state. bin_cnt specifies the 71 : number of bins that the estimation table stores. bin_cnt must be a 72 : power-of-two greater than 0. Increasing the number of bins increases the 73 : footprint requirements but also increases the accuracy slightly (by reducing 74 : collisions). fd_est_tbl_{align, footprint} return the same value as 75 : FD_EST_TBL_{ALIGN, FOOTPRINT}. 76 : 77 : fd_est_tbl_new takes ownership of the memory region pointed to by mem (which 78 : is assumed to be non-NULL and have the appropriate alignment and footprint) 79 : and formats it as a fd_est_tbl. The estimation table will use bin_cnt bins, 80 : and each bin's EMA will be tuned for an a window size of history. history 81 : must be positive. The table will use a default value of default_val for the 82 : mean whenever a query indicates a bin has had no data. Returns mem (which 83 : will be formatted for use) on success and NULL on failure (bad inputs). The 84 : caller will not be joined to the region on return. 85 : 86 : fd_est_tbl_join joins the caller to a memory region holding the state of a 87 : fd_est_tbl. 88 : 89 : fd_est_tbl_leave leaves the current join. Returns a pointer in the local 90 : address space to the memory region holding the table state. The join should 91 : not be used after calling _leave. 92 : 93 : fd_est_tbl_delete unformats the memory region used to hold the state of an 94 : fd_est_tbl and returns ownership of the underlying memory region to the 95 : caller. There should be no joins in the system on the fd_est_tbl. Returns 96 : a pointer to the underlying memory region. */ 97 : 98 0 : FD_FN_CONST static inline ulong fd_est_tbl_align ( void ) { return FD_EST_TBL_ALIGN; } 99 0 : FD_FN_CONST static inline ulong fd_est_tbl_footprint( ulong bin_cnt ) { 100 0 : if( FD_UNLIKELY( !bin_cnt || !fd_ulong_is_pow2( bin_cnt ) ) ) return 0UL; 101 0 : if( FD_UNLIKELY( bin_cnt > ((ULONG_MAX - sizeof(fd_est_tbl_t))/sizeof(fd_est_tbl_bin_t)) ) ) return 0UL; 102 0 : return sizeof(fd_est_tbl_t) + (bin_cnt-1UL)*sizeof(fd_est_tbl_bin_t); 103 0 : } 104 : 105 : static inline void * 106 : fd_est_tbl_new( void * mem, 107 : ulong bin_cnt, 108 : ulong history, 109 3 : uint default_val ) { 110 3 : if( FD_UNLIKELY( !bin_cnt || !fd_ulong_is_pow2( bin_cnt ) ) ) return NULL; 111 3 : if( FD_UNLIKELY( bin_cnt > ((ULONG_MAX - sizeof(fd_est_tbl_t))/sizeof(fd_est_tbl_bin_t)) ) ) return NULL; 112 3 : if( FD_UNLIKELY( !history ) ) return NULL; 113 : /* The largest ema_d can get is around history, and the largest the value can 114 : get is UINT_MAX. Their product is then less than 2^96 approx 8*10^28, 115 : which is comfortably in the range of a double. */ 116 3 : fd_est_tbl_t * tbl = (fd_est_tbl_t *)mem; 117 3 : tbl->bin_cnt_mask = bin_cnt-1UL; 118 3 : tbl->ema_coeff = 1.0 - 1.0/(double)history; 119 3 : tbl->default_val = default_val; 120 : 121 3 : fd_memset( tbl->bins, 0, bin_cnt*sizeof(fd_est_tbl_bin_t) ); 122 3 : FD_COMPILER_MFENCE(); 123 3 : FD_VOLATILE( tbl->magic ) = FD_EST_TBL_MAGIC; 124 3 : FD_COMPILER_MFENCE(); 125 : 126 3 : return (void *)tbl; 127 3 : } 128 : 129 : static inline fd_est_tbl_t * 130 3 : fd_est_tbl_join ( void * _tbl ) { 131 3 : fd_est_tbl_t * tbl = (fd_est_tbl_t *)_tbl; 132 3 : if( FD_UNLIKELY( tbl->magic != FD_EST_TBL_MAGIC ) ) return NULL; 133 3 : return tbl; 134 3 : } 135 0 : static inline void * fd_est_tbl_leave ( fd_est_tbl_t * tbl ) { return (void *) tbl; } 136 0 : static inline void * fd_est_tbl_delete( fd_est_tbl_t * tbl ) { 137 0 : FD_COMPILER_MFENCE(); 138 0 : FD_VOLATILE( tbl->magic ) = 0UL; 139 0 : FD_COMPILER_MFENCE(); 140 0 : return (void *) tbl; 141 0 : } 142 : 143 : /* fd_est_tbl_estimate: estimate the mean and variance of the distribution from 144 : which data tagged with tag is drawn. Since this function cannot return two 145 : doubles, if variance_out is non-NULL, it will be set to the variance. If 0 146 : values have been inserted with the specified tag (or a tag that aliases to 147 : it), this function will return a mean of default_val and a variance of 0. */ 148 : static inline double 149 : fd_est_tbl_estimate( fd_est_tbl_t const * tbl, 150 : ulong tag, 151 54 : double * variance_out ) { 152 54 : fd_est_tbl_bin_t const * bin = tbl->bins + (tag & tbl->bin_cnt_mask); 153 54 : double mean, var; 154 54 : if( FD_UNLIKELY( !(bin->d > 0.0) ) ) { 155 3 : mean = tbl->default_val; 156 3 : var = 0.0; 157 51 : } else { 158 51 : mean = bin->x / bin->d; 159 51 : var = (bin->d * bin->x2 - (bin->x*bin->x)) / ( bin->d * bin->d - bin->d2 ); 160 51 : } 161 54 : var = fd_double_if( var>0.0, var, 0.0 ); 162 54 : if( FD_LIKELY( variance_out ) ) *variance_out = var; 163 54 : return mean; 164 54 : } 165 : 166 : /* fd_est_tbl_update: inserts a new tagged value into this data structure */ 167 : static inline void 168 : fd_est_tbl_update( fd_est_tbl_t * tbl, 169 : ulong tag, 170 1524027 : uint value ) { 171 1524027 : fd_est_tbl_bin_t * bin = tbl->bins + (tag & tbl->bin_cnt_mask); 172 : #ifdef FD_EST_TBL_ADAPTIVE 173 : double mean, variance; 174 : mean = fd_est_tbl_estimate( tbl, tag, &variance ); 175 : double dev_sq = (value - mean)*(value - mean) / variance; /* Normalized squared deviation */ 176 : double alpha = 0.25; 177 : double C = fd_double_if( dev_sq<log(DBL_MAX)/alpha, 1.0/(1.0 + exp(alpha*dev_sq)*tbl->ema_coeff), 0.0 ); 178 : #else 179 1524027 : double C = tbl->ema_coeff; 180 1524027 : #endif 181 1524027 : bin->x = value + fd_double_if( C*bin->x >DBL_MIN, C*bin->x , 0.0 ); 182 1524027 : bin->x2 = value*value + fd_double_if( C*bin->x2>DBL_MIN, C*bin->x2, 0.0 ); 183 1524027 : bin->d = 1.0 + C*bin->d ; /* Can't go denormal */ 184 1524027 : bin->d2 = 1.0 + C*C*bin->d2; /* Can't go denormal */ 185 1524027 : } 186 : 187 : #endif /* FD_HAS_DOUBLE */ 188 : 189 : #endif /* HEADER_fd_src_ballet_pack_fd_est_tbl_h */