LCOV - code coverage report
Current view: top level - util/math - fd_stat.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 70 104 67.3 %
Date: 2024-11-13 11:58:15 Functions: 18 28 64.3 %

          Line data    Source code
       1             : #include "fd_stat.h"
       2             : 
       3             : #define FD_STAT_IMPL( T, UT )                                     \
       4             : ulong                                                             \
       5             : fd_stat_filter_##T( T *       y,                                  \
       6             :                     T const * x,                                  \
       7             :                     ulong     n,                                  \
       8    36012021 :                     T         thresh ) { /* assumed positive */   \
       9    36012021 :   ulong j = 0UL;                                                  \
      10   306779271 :   for( ulong i=0UL; i<n; i++ ) {                                  \
      11   270767250 :     T xi = x[i];                                                  \
      12   270767250 :     y[j] = xi;                                    /* speculate */ \
      13   270767250 :     j += (ulong)(fd_##T##_abs( xi )<=(UT)thresh); /* commit */    \
      14   270767250 :   }                                                               \
      15    36012021 :   return j;                                                       \
      16    36012021 : }                                                                 \
      17             :                                                                   \
      18             : T                                                                 \
      19             : fd_stat_median_##T( T *   x,                                      \
      20       24042 :                     ulong cnt ) {                                 \
      21       24042 :   if( FD_UNLIKELY( !cnt ) ) return (T)0;                          \
      22       24042 :   ulong i1 = cnt >> 1;                                            \
      23       23862 :   T m1 = fd_sort_up_##T##_select( x, cnt, i1 )[ i1 ];             \
      24       23862 :   if( (cnt & 1UL) ) return m1;                                    \
      25       23862 :   ulong i0 = i1-1UL;                                              \
      26       11790 :   T m0 = fd_sort_up_##T##_select( x, cnt, i0 )[ i0 ];             \
      27       11790 :   return fd_stat_avg2_##T( m0, m1 );                              \
      28       23862 : }
      29             : 
      30             : FD_STAT_IMPL( schar,   uchar   )
      31             : FD_STAT_IMPL( short,   ushort  )
      32             : FD_STAT_IMPL( int,     uint    )
      33             : FD_STAT_IMPL( long,    ulong   )
      34             : FD_STAT_IMPL( uchar,   uchar   )
      35             : FD_STAT_IMPL( ushort,  ushort  )
      36             : FD_STAT_IMPL( uint,    uint    )
      37             : FD_STAT_IMPL( ulong,   ulong   )
      38             : #if FD_HAS_INT128
      39             : FD_STAT_IMPL( int128,  uint128 )
      40             : FD_STAT_IMPL( uint128, uint128 )
      41             : #endif
      42             : FD_STAT_IMPL( float,   float   )
      43             : #if FD_HAS_DOUBLE
      44             : FD_STAT_IMPL( double,  double  )
      45             : #endif
      46             : 
      47             : #undef FD_STAT_FILTER_IMPL
      48             : 
      49             : ulong
      50             : fd_stat_robust_norm_fit_float( float *       opt_mu,
      51             :                                float *       opt_sigma,
      52             :                                float const * x,
      53             :                                ulong         cnt,
      54        3000 :                                void  *       scratch ) {
      55        3000 :   float * y = (float *)scratch;
      56             : 
      57             :   /* Filter out weird data points.  The threshold is such the sigma
      58             :      calculation cannot overflow.  Specifically consider an x whose
      59             :      elements are +/-thresh.  The median would end being exactly
      60             :      +/-thresh.  The absolute deviations from the median then would then
      61             :      be either 0,2*thresh.  Such that the median absolute deviation
      62             :      could be 2*thresh.  And normalizing such like a normal would be
      63             :      sigma 2*1.48... thresh <= FLT_MAX.  We use a slightly more
      64             :      conservative FLT_MAX/5 to keep things simple to explain and
      65             :      consistent with the thresh used by the robust exp fit below. */
      66             : 
      67        3000 :   cnt = fd_stat_filter_float( y, x, cnt, FLT_MAX/5.f );
      68        3000 :   if( FD_LIKELY( opt_mu || opt_sigma ) ) {
      69             : 
      70             :     /* Compute the median.  This is an unbiased and maximally robust
      71             :        estimator of the average in the presence of corruption.  It is
      72             :        not the most accurate estimator if the data is clean though.  If
      73             :        cnt is zero post filtering, mu (and sigma) will be zero. */
      74             : 
      75        3000 :     float mu = fd_stat_median_float( y, cnt );
      76        3000 :     if( opt_mu ) *opt_mu = mu;
      77             : 
      78        3000 :     if( opt_sigma ) {
      79      199551 :       for( ulong i=0UL; i<cnt; i++ ) y[i] = fd_float_abs( y[i] - mu );
      80        3000 :       *opt_sigma = 1.48260221850560f*fd_stat_median_float( y, cnt ); /* 1 / inv_cdf(0.75) */
      81        3000 :     }
      82        3000 :   }
      83             : 
      84        3000 :   return cnt;
      85        3000 : }
      86             : 
      87             : ulong
      88             : fd_stat_robust_exp_fit_float( float *       opt_x0,
      89             :                               float *       opt_tau,
      90             :                               float const * x,
      91             :                               ulong         cnt,
      92        3000 :                               void *        scratch ) {
      93        3000 :   float * y = (float *)scratch;
      94             : 
      95             :   /* Filter out weird data points.  The threshold is such the x0
      96             :      and tau calculations cannot overflow.  Specifically consider an x
      97             :      whose elements are +/-thresh.  The magnitude of the median is at
      98             :      most thresh and the median absolute deviation is at most 2*thresh.
      99             :      x0 then is at most (1+2*1.44...)*thresh and tau is at most 2*2.07
     100             :      thresh.  We use a slightly more conservative FLT_MAX/5 for thresh
     101             :      to keep things simple. */
     102             : 
     103        3000 :   cnt = fd_stat_filter_float( y, x, cnt, FLT_MAX/5.f );
     104        3000 :   if( FD_LIKELY( opt_x0 || opt_tau ) ) {
     105             : 
     106             :     /* Compute the median and median absolute deviation from the median. */
     107        3000 :     float med = fd_stat_median_float( y, cnt );
     108      199551 :     for( ulong i=0UL; i<cnt; i++ ) y[i] = fd_float_abs( y[i] - med );
     109        3000 :     float mad = fd_stat_median_float( y, cnt );
     110             : 
     111             :     /* Estimate the parameters from the distribution */
     112        3000 :     if( opt_x0  ) *opt_x0  = med - mad*1.44042009041256f; /* (ln 2) / asinh(1/2) */
     113        3000 :     if( opt_tau ) *opt_tau = mad*2.07808692123503f;       /* 1 / asinh(1/2) */
     114             : 
     115        3000 :   }
     116        3000 :   return cnt;
     117        3000 : }
     118             : 
     119             : #if FD_HAS_DOUBLE /* See above for details */
     120             : 
     121             : ulong
     122             : fd_stat_robust_norm_fit_double( double *       opt_mu,
     123             :                                 double *       opt_sigma,
     124             :                                 double const * x,
     125             :                                 ulong          cnt,
     126        3015 :                                 void  *        scratch ) {
     127        3015 :   double * y = (double *)scratch;
     128        3015 :   cnt = fd_stat_filter_double( y, x, cnt, DBL_MAX/5. );
     129        3015 :   if( FD_LIKELY( opt_mu || opt_sigma ) ) {
     130        3015 :     double mu = fd_stat_median_double( y, cnt );
     131        3015 :     if( opt_mu ) *opt_mu = mu;
     132        3015 :     if( opt_sigma ) {
     133      198396 :       for( ulong i=0UL; i<cnt; i++ ) y[i] = fd_double_abs( y[i] - mu );
     134        3015 :       *opt_sigma = 1.48260221850560*fd_stat_median_double( y, cnt );
     135        3015 :     }
     136        3015 :   }
     137             : 
     138        3015 :   return cnt;
     139        3015 : }
     140             : 
     141             : ulong
     142             : fd_stat_robust_exp_fit_double( double *       opt_x0,
     143             :                                double *       opt_tau,
     144             :                                double const * x,
     145             :                                ulong          cnt,
     146        3006 :                                void *         scratch ) {
     147        3006 :   double * y = (double *)scratch;
     148        3006 :   cnt = fd_stat_filter_double( y, x, cnt, DBL_MAX/5. );
     149        3006 :   if( FD_LIKELY( opt_x0 || opt_tau ) ) {
     150        3006 :     double med = fd_stat_median_double( y, cnt );
     151      200331 :     for( ulong i=0UL; i<cnt; i++ ) y[i] = fd_double_abs( y[i] - med );
     152        3006 :     double mad = fd_stat_median_double( y, cnt );
     153        3006 :     if( opt_x0  ) *opt_x0  = med - mad*1.44042009041256;
     154        3006 :     if( opt_tau ) *opt_tau = mad*2.07808692123503;
     155        3006 :   }
     156        3006 :   return cnt;
     157        3006 : }
     158             : 
     159             : #endif
     160             : 
     161             : /* ascending sorts */
     162             : 
     163             : #define SORT_NAME       fd_sort_up_schar
     164           0 : #define SORT_KEY_T      schar
     165             : #define SORT_IMPL_STYLE 2
     166             : #include "../tmpl/fd_sort.c"
     167             : 
     168             : #define SORT_NAME       fd_sort_up_short
     169           0 : #define SORT_KEY_T      short
     170             : #define SORT_IMPL_STYLE 2
     171             : #include "../tmpl/fd_sort.c"
     172             : 
     173             : #define SORT_NAME       fd_sort_up_int
     174           0 : #define SORT_KEY_T      int
     175             : #define SORT_IMPL_STYLE 2
     176             : #include "../tmpl/fd_sort.c"
     177             : 
     178             : #define SORT_NAME       fd_sort_up_long
     179           0 : #define SORT_KEY_T      long
     180             : #define SORT_IMPL_STYLE 2
     181             : #include "../tmpl/fd_sort.c"
     182             : 
     183             : #define SORT_NAME       fd_sort_up_uchar
     184           0 : #define SORT_KEY_T      uchar
     185             : #define SORT_IMPL_STYLE 2
     186             : #include "../tmpl/fd_sort.c"
     187             : 
     188             : #define SORT_NAME       fd_sort_up_ushort
     189           0 : #define SORT_KEY_T      ushort
     190             : #define SORT_IMPL_STYLE 2
     191             : #include "../tmpl/fd_sort.c"
     192             : 
     193             : #define SORT_NAME       fd_sort_up_uint
     194           0 : #define SORT_KEY_T      uint
     195             : #define SORT_IMPL_STYLE 2
     196             : #include "../tmpl/fd_sort.c"
     197             : 
     198             : #define SORT_NAME       fd_sort_up_ulong
     199           0 : #define SORT_KEY_T      ulong
     200             : #define SORT_IMPL_STYLE 2
     201             : #include "../tmpl/fd_sort.c"
     202             : 
     203             : #if FD_HAS_INT128
     204             : #define SORT_NAME       fd_sort_up_int128
     205           0 : #define SORT_KEY_T      int128
     206             : #define SORT_IMPL_STYLE 2
     207             : #include "../tmpl/fd_sort.c"
     208             : 
     209             : #define SORT_NAME       fd_sort_up_uint128
     210           0 : #define SORT_KEY_T      uint128
     211             : #define SORT_IMPL_STYLE 2
     212             : #include "../tmpl/fd_sort.c"
     213             : #endif
     214             : 
     215             : #define SORT_NAME       fd_sort_up_float
     216     3387984 : #define SORT_KEY_T      float
     217             : #define SORT_IMPL_STYLE 2
     218             : #include "../tmpl/fd_sort.c"
     219             : 
     220             : #if FD_HAS_DOUBLE
     221             : #define SORT_NAME       fd_sort_up_double
     222     3346094 : #define SORT_KEY_T      double
     223             : #define SORT_IMPL_STYLE 2
     224             : #include "../tmpl/fd_sort.c"
     225             : #endif
     226             : 
     227             : /* descending sorts */
     228             : 
     229             : #define SORT_NAME        fd_sort_dn_schar
     230           0 : #define SORT_KEY_T       schar
     231           0 : #define SORT_BEFORE(a,b) ((a)>(b))
     232             : #define SORT_IMPL_STYLE  2
     233             : #include "../tmpl/fd_sort.c"
     234             : 
     235             : #define SORT_NAME        fd_sort_dn_short
     236           0 : #define SORT_KEY_T       short
     237           0 : #define SORT_BEFORE(a,b) ((a)>(b))
     238             : #define SORT_IMPL_STYLE 2
     239             : #include "../tmpl/fd_sort.c"
     240             : 
     241             : #define SORT_NAME        fd_sort_dn_int
     242           0 : #define SORT_KEY_T       int
     243           0 : #define SORT_BEFORE(a,b) ((a)>(b))
     244             : #define SORT_IMPL_STYLE  2
     245             : #include "../tmpl/fd_sort.c"
     246             : 
     247             : #define SORT_NAME        fd_sort_dn_long
     248           0 : #define SORT_KEY_T       long
     249           0 : #define SORT_BEFORE(a,b) ((a)>(b))
     250             : #define SORT_IMPL_STYLE  2
     251             : #include "../tmpl/fd_sort.c"
     252             : 
     253             : #define SORT_NAME        fd_sort_dn_uchar
     254           0 : #define SORT_KEY_T       uchar
     255           0 : #define SORT_BEFORE(a,b) ((a)>(b))
     256             : #define SORT_IMPL_STYLE  2
     257             : #include "../tmpl/fd_sort.c"
     258             : 
     259             : #define SORT_NAME        fd_sort_dn_ushort
     260           0 : #define SORT_KEY_T       ushort
     261           0 : #define SORT_BEFORE(a,b) ((a)>(b))
     262             : #define SORT_IMPL_STYLE  2
     263             : #include "../tmpl/fd_sort.c"
     264             : 
     265             : #define SORT_NAME        fd_sort_dn_uint
     266           0 : #define SORT_KEY_T       uint
     267           0 : #define SORT_BEFORE(a,b) ((a)>(b))
     268             : #define SORT_IMPL_STYLE  2
     269             : #include "../tmpl/fd_sort.c"
     270             : 
     271             : #define SORT_NAME        fd_sort_dn_ulong
     272           0 : #define SORT_KEY_T       ulong
     273           0 : #define SORT_BEFORE(a,b) ((a)>(b))
     274             : #define SORT_IMPL_STYLE  2
     275             : #include "../tmpl/fd_sort.c"
     276             : 
     277             : #if FD_HAS_INT128
     278             : #define SORT_NAME        fd_sort_dn_int128
     279           0 : #define SORT_KEY_T       int128
     280           0 : #define SORT_BEFORE(a,b) ((a)>(b))
     281             : #define SORT_IMPL_STYLE  2
     282             : #include "../tmpl/fd_sort.c"
     283             : 
     284             : #define SORT_NAME        fd_sort_dn_uint128
     285           0 : #define SORT_KEY_T       uint128
     286           0 : #define SORT_BEFORE(a,b) ((a)>(b))
     287             : #define SORT_IMPL_STYLE  2
     288             : #include "../tmpl/fd_sort.c"
     289             : #endif
     290             : 
     291             : #define SORT_NAME        fd_sort_dn_float
     292           0 : #define SORT_KEY_T       float
     293           0 : #define SORT_BEFORE(a,b) ((a)>(b))
     294             : #define SORT_IMPL_STYLE  2
     295             : #include "../tmpl/fd_sort.c"
     296             : 
     297             : #if FD_HAS_DOUBLE
     298             : #define SORT_NAME        fd_sort_dn_double
     299           0 : #define SORT_KEY_T       double
     300           0 : #define SORT_BEFORE(a,b) ((a)>(b))
     301             : #define SORT_IMPL_STYLE  2
     302             : #include "../tmpl/fd_sort.c"
     303             : #endif
     304             : 

Generated by: LCOV version 1.14