LCOV - code coverage report
Current view: top level - tango/tempo - fd_tempo.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 130 175 74.3 %
Date: 2025-07-01 05:00:49 Functions: 6 6 100.0 %

          Line data    Source code
       1             : #include "../fd_tango.h"
       2             : #include "../../util/math/fd_stat.h"
       3             : 
       4             : #if FD_HAS_DOUBLE
       5             : 
       6             : double
       7          60 : fd_tempo_wallclock_model( double * opt_tau ) {
       8          60 :   static double t0;
       9          60 :   static double tau;
      10             : 
      11         126 :   FD_ONCE_BEGIN {
      12             : 
      13             :     /* Assuming fd_log_wallclock() observes the application wallclock at
      14             :        a consistent point between when the call was made and when it
      15             :        returns, the difference between two adjacent calls is an estimate
      16             :        of the number of ns required for a call.  We expect this
      17             :        difference to have a well defined minimum time with sporadic
      18             :        delays due to various sources of jitter.  The natural approach is
      19             :        to model call overhead then as a shifted exponential random
      20             :        variable.  To parameterize the model, we repeatedly measure how
      21             :        long a call takes.  The minimum of a bunch of IID samples is very
      22             :        fast converging for estimating the minimum but easily corrupted
      23             :        if there are weird outliers on the negative side.  As such, we
      24             :        use a robust estimator to estimate the minimal overhead and
      25             :        jitter. */
      26             : 
      27           3 :     ulong iter = 0UL;
      28           3 :     for(;;) {
      29        1542 : #     define TRIAL_CNT 512UL
      30           6 : #     define TRIM_CNT  64UL
      31           3 :       double trial[ TRIAL_CNT ];
      32        1539 :       for( ulong trial_idx=0UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
      33        1536 :         FD_COMPILER_MFENCE();
      34        1536 :         long tic = fd_log_wallclock();
      35        1536 :         FD_COMPILER_MFENCE();
      36        1536 :         long toc = fd_log_wallclock();
      37        1536 :         FD_COMPILER_MFENCE();
      38        1536 :         trial[ trial_idx ] = (double)(toc - tic);
      39        1536 :         FD_COMPILER_MFENCE();
      40        1536 :       }
      41           3 :       double * sample     = trial + TRIM_CNT;
      42           3 :       ulong    sample_cnt = TRIAL_CNT - 2UL*TRIM_CNT;
      43           3 :       ulong    thresh     = sample_cnt >> 1;
      44           3 :       if( FD_LIKELY( fd_stat_robust_exp_fit_double( &t0, &tau, sample, sample_cnt, sample )>thresh ) && FD_LIKELY( t0>0. ) ) break;
      45           0 : #     undef TRIM_CNT
      46           0 : #     undef TRIAL_CNT
      47           0 :       iter++;
      48           0 :       if( iter==3UL ) {
      49           0 :         FD_LOG_WARNING(( "unable to model fd_log_wallclock() performance; using fallback and attempting to continue" ));
      50           0 :         t0 = 27.; tau = 1.;
      51           0 :         break;
      52           0 :       }
      53           0 :     }
      54             : 
      55           3 :   } FD_ONCE_END;
      56             : 
      57          60 :   if( opt_tau ) opt_tau[0] = tau;
      58          60 :   return t0;
      59          60 : }
      60             : 
      61             : double
      62          60 : fd_tempo_tickcount_model( double * opt_tau ) {
      63          60 :   static double t0;
      64          60 :   static double tau;
      65             : 
      66         126 :   FD_ONCE_BEGIN {
      67             : 
      68             :     /* Same as the above but for fd_tickcount(). */
      69             : 
      70           3 :     ulong iter = 0UL;
      71           3 :     for(;;) {
      72        1542 : #     define TRIAL_CNT 512UL
      73           6 : #     define TRIM_CNT  64UL
      74           3 :       double trial[ TRIAL_CNT ];
      75        1539 :       for( ulong trial_idx=0UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
      76        1536 :         FD_COMPILER_MFENCE();
      77        1536 :         long tic = fd_tickcount();
      78        1536 :         FD_COMPILER_MFENCE();
      79        1536 :         long toc = fd_tickcount();
      80        1536 :         FD_COMPILER_MFENCE();
      81        1536 :         trial[ trial_idx ] = (double)(toc - tic);
      82        1536 :         FD_COMPILER_MFENCE();
      83        1536 :       }
      84           3 :       double * sample     = trial + TRIM_CNT;
      85           3 :       ulong    sample_cnt = TRIAL_CNT - 2UL*TRIM_CNT;
      86           3 :       ulong    thresh     = sample_cnt >> 1;
      87           3 :       if( FD_LIKELY( fd_stat_robust_exp_fit_double( &t0, &tau, sample, sample_cnt, sample )>thresh ) && FD_LIKELY( t0>0. ) ) break;
      88           0 : #     undef TRIM_CNT
      89           0 : #     undef TRIAL_CNT
      90           0 :       iter++;
      91           0 :       if( iter==3UL ) {
      92           0 :         FD_LOG_WARNING(( "unable to model fd_tickcount() performance; using fallback and attempting to continue" ));
      93           0 :         t0 = 24.; tau = 4.;
      94           0 :         break;
      95           0 :       }
      96           0 :     }
      97             : 
      98           3 :   } FD_ONCE_END;
      99             : 
     100          60 :   if( opt_tau ) opt_tau[0] = tau;
     101          60 :   return t0;
     102          60 : }
     103             : 
     104             : static double mu;
     105             : static double sigma;
     106             : static int explicit_set;
     107             : 
     108             : void
     109             : fd_tempo_set_tick_per_ns( double _mu,
     110           3 :                           double _sigma ) {
     111           3 :   explicit_set = 1;
     112           3 :   mu    = _mu;
     113           3 :   sigma = _sigma;
     114           3 : }
     115             : 
     116             : double
     117         183 : fd_tempo_tick_per_ns( double * opt_sigma ) {
     118             : 
     119         402 :   FD_ONCE_BEGIN {
     120             : 
     121             :     /* If the value has already been set explicitly, no need to sample. */
     122             : 
     123          18 :     if( FD_LIKELY( !explicit_set ) ) {
     124             : 
     125             :       /* We measure repeatedly how much the tickcount and wallclock change
     126             :          over the same approximately constant time interval.  We do a pair
     127             :          observations to minimize errors in computing the interval (note
     128             :          that any remaining jitters should be zero mean such that they
     129             :          should statistically cancel in the rate calculation).  We use a
     130             :          robust estimate to get the avg and rms in the face of random
     131             :          sources of noise, assuming the sample distribution is reasonably
     132             :          well modeled as normal. */
     133             : 
     134          15 :       ulong iter = 0UL;
     135          15 :       for(;;) {
     136         510 :   #     define TRIAL_CNT 32UL
     137          30 :   #     define TRIM_CNT   4UL
     138          15 :         double trial[ TRIAL_CNT ];
     139         495 :         for( ulong trial_idx=0UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
     140         480 :           long then; long toc; fd_tempo_observe_pair( &then, &toc );
     141         480 :           fd_log_sleep( 16777216L ); /* ~16.8 ms */
     142         480 :           long now; long tic; fd_tempo_observe_pair( &now, &tic );
     143         480 :           trial[ trial_idx ] = (double)(tic-toc) / (double)(now-then);
     144         480 :         }
     145          15 :         double * sample     = trial + TRIM_CNT;
     146          15 :         ulong    sample_cnt = TRIAL_CNT - 2UL*TRIM_CNT;
     147          15 :         ulong    thresh     = sample_cnt >> 1;
     148          15 :         if( FD_LIKELY( fd_stat_robust_norm_fit_double( &mu, &sigma, sample, sample_cnt, sample )>thresh ) && FD_LIKELY( mu>0. ) )
     149          15 :           break;
     150           0 :   #     undef TRIM_CNT
     151           0 :   #     undef TRIAL_CNT
     152           0 :         iter++;
     153           0 :         if( iter==3UL ) {
     154           0 :           FD_LOG_WARNING(( "unable to measure tick_per_ns accurately; using fallback and attempting to continue" ));
     155           0 :           mu = 3.; sigma = 1e-7;
     156           0 :           break;
     157           0 :         }
     158           0 :       }
     159          15 :     }
     160             : 
     161          18 :   } FD_ONCE_END;
     162             : 
     163         183 :   if( opt_sigma ) opt_sigma[0] = sigma;
     164         183 :   return mu;
     165         183 : }
     166             : 
     167             : #endif
     168             : 
     169             : long
     170             : fd_tempo_observe_pair( long * opt_now,
     171        1011 :                        long * opt_tic ) {
     172        1011 :   long best_wc;
     173        1011 :   long best_tc;
     174        1011 :   long best_jt;
     175             : 
     176        1011 :   do {
     177             : 
     178             :     /* Do an alternating series of:
     179             : 
     180             :          tickcount
     181             :          wallclock
     182             :          tickcount
     183             :          wallclock
     184             :          tickcount
     185             :          ...
     186             :          wallclock
     187             :          tickcount
     188             : 
     189             :        observations and pick the wallclock observation that had the
     190             :        smallest elapsed number of ticks between adjacent tickcount
     191             :        observations.
     192             : 
     193             :        Since the wallclock / tickcounter returns a monotonically
     194             :        non-decreasing observation of the wallclock / tickcount at a
     195             :        point in time between when the call was made and when it
     196             :        returned, we know that this wallclock observation is the one we
     197             :        made that we know best when it was made in the tickcount stream.
     198             :        Further, we have lower and upper bounds of the value of the
     199             :        tickcounter in this read.  We start the alternation with the
     200             :        tickcount because that is typically the lower overhead, more
     201             :        deterministic one and less likely to get jerked around behind our
     202             :        back.
     203             : 
     204             :        Theoretically, this exploits how the minimum of a shifted
     205             :        exponential random variable converges.  Since the time to read
     206             :        the various clocks is expected to be reasonably modeled as a
     207             :        shifted exponential random variable, it doesn't take many trials
     208             :        to get something close to the minimum (estimating the minimum of
     209             :        a shifted exponential random variable takes way fewer samples and
     210             :        is way more accurate than say the estimating the average of a
     211             :        normally distributed random variable). */
     212             : 
     213        9099 : #   define TRIAL_CNT (4) /* 1 "warmup", 3 real reads */
     214             : 
     215        1011 :     long wc[ TRIAL_CNT+1 ];
     216        1011 :     long tc[ TRIAL_CNT+1 ];
     217        1011 :     FD_COMPILER_MFENCE();
     218        1011 :     tc[0] = fd_tickcount();
     219        1011 :     FD_COMPILER_MFENCE();
     220        5055 :     for( ulong trial_idx=0UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
     221        4044 :       wc[ trial_idx+1UL ] = fd_log_wallclock();
     222        4044 :       FD_COMPILER_MFENCE();
     223        4044 :       tc[ trial_idx+1UL ] = fd_tickcount();
     224        4044 :       FD_COMPILER_MFENCE();
     225        4044 :     }
     226             : 
     227        1011 :     best_wc = wc[1];
     228        1011 :     best_tc = tc[1];
     229        1011 :     best_jt = best_tc - tc[0];
     230        4044 :     for( ulong trial_idx=1UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
     231        3033 :       long wci = wc[ trial_idx+1UL ];
     232        3033 :       long tci = tc[ trial_idx+1UL ];
     233        3033 :       long jti = tci - tc[ trial_idx ];
     234        3033 :       int  c   = (jti<=best_jt);
     235        3033 :       best_wc  = fd_long_if( c, wci, best_wc );
     236        3033 :       best_tc  = fd_long_if( c, tci, best_tc );
     237        3033 :       best_jt  = fd_long_if( c, jti, best_jt );
     238        3033 :     }
     239             : 
     240        1011 : #   undef TRIAL_CNT
     241             : 
     242        1011 :   } while(0);
     243             : 
     244        1011 :   if( FD_UNLIKELY( best_jt<0L ) ) { /* paranoia */
     245           0 :     FD_LOG_WARNING(( "fd_tickcount() does not appear to be monotonic; joint read may not be accurate; attempting to continue" ));
     246           0 :     best_jt = 0L;
     247           0 :   }
     248             : 
     249        1011 :   if( opt_now ) opt_now[0] = best_wc;
     250        1011 :   if( opt_tic ) opt_tic[0] = best_tc - (best_jt>>1); /* Use lower and upper bound midpoint (could be improved statistically) */
     251        1011 :   return best_jt;
     252        1011 : }
     253             : 
     254             : ulong
     255             : fd_tempo_async_min( long  lazy,
     256             :                     ulong event_cnt,
     257          12 :                     float tick_per_ns ) {
     258          12 :   if( FD_UNLIKELY( !((1L<=lazy) & (lazy<(1L<<31))) ) ) {
     259           0 :     FD_LOG_WARNING(( "lazy should be in [1,2^31)" ));
     260           0 :     return 0UL;
     261           0 :   }
     262             : 
     263          12 :   if( FD_UNLIKELY( !((1UL<=event_cnt) & (event_cnt<(1UL<<31)) ) ) ) {
     264           0 :     FD_LOG_WARNING(( "event_cnt should be in [1,2^31)" ));
     265           0 :     return 0UL;
     266           0 :   }
     267             : 
     268          12 :   float tick_per_ns_max = FLT_MAX / (float)(1L<<31); /* exact, compile time, ~1.5e29 */
     269          12 :   if( FD_UNLIKELY( !((0.f<tick_per_ns) & (tick_per_ns<=tick_per_ns_max)) ) ) { /* robust against nan */
     270           0 :     FD_LOG_WARNING(( "tick_per_ns should in (0,~1.5e29)" ));
     271           0 :     return 0UL;
     272           0 :   }
     273             : 
     274          12 :   float _lazy         = (float)lazy;      /* typically exact, up to 0.5 ulp error if >~ 2^24 */
     275          12 :   float _event_cnt    = (float)event_cnt; /* typically exact, up to 0.5 ulp error if >~ 2^24 */
     276          12 :   float _async_target = (tick_per_ns*_lazy) / _event_cnt; /* non-negative finite result, O(1) ulp error typically */
     277             : 
     278          12 :   if( FD_UNLIKELY( !(1.f<=_async_target) ) ) {
     279           0 :     FD_LOG_WARNING(( "lazy, event_cnt and tick_per_ns imply an unreasonably small async_min" ));
     280           0 :     return 0UL;
     281           0 :   }
     282             : 
     283          12 :   if( FD_UNLIKELY( !(_async_target<((float)(1UL<<32))) ) ) {
     284           0 :     FD_LOG_WARNING(( "lazy, event_cnt and tick_per_ns imply an unreasonably large async_min" ));
     285           0 :     return 0UL;
     286           0 :   }
     287             : 
     288          12 :   ulong async_target = (ulong)_async_target;       /* in [1,2^32), O(1) ulp error typically (biased conservative) */
     289          12 :   return 1UL << fd_ulong_find_msb( async_target ); /* guaranteed power of 2 in [1,2^31] */
     290          12 : }
     291             : 

Generated by: LCOV version 1.14