LCOV - code coverage report
Current view: top level - tango/tempo - fd_tempo.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 130 175 74.3 %
Date: 2025-01-08 12:08:44 Functions: 6 6 100.0 %

          Line data    Source code
       1             : #include "../fd_tango.h"
       2             : 
       3             : #if FD_HAS_DOUBLE
       4             : 
       5             : double
       6          60 : fd_tempo_wallclock_model( double * opt_tau ) {
       7          60 :   static double t0;
       8          60 :   static double tau;
       9             : 
      10         126 :   FD_ONCE_BEGIN {
      11             : 
      12             :     /* Assuming fd_log_wallclock() observes the application wallclock at
      13             :        a consistent point between when the call was made and when it
      14             :        returns, the difference between two adjacent calls is an estimate
      15             :        of the number of ns required for a call.  We expect this
      16             :        difference to have a well defined minimum time with sporadic
      17             :        delays due to various sources of jitter.  The natural approach is
      18             :        to model call overhead then as a shifted exponential random
      19             :        variable.  To parameterize the model, we repeatedly measure how
      20             :        long a call takes.  The minimum of a bunch of IID samples is very
      21             :        fast converging for estimating the minimum but easily corrupted
      22             :        if there are weird outliers on the negative side.  As such, we
      23             :        use a robust estimator to estimate the minimal overhead and
      24             :        jitter. */
      25             : 
      26           3 :     ulong iter = 0UL;
      27           3 :     for(;;) { 
      28        1542 : #     define TRIAL_CNT 512UL
      29           6 : #     define TRIM_CNT  64UL
      30           3 :       double trial[ TRIAL_CNT ]; 
      31        1539 :       for( ulong trial_idx=0UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
      32        1536 :         FD_COMPILER_MFENCE();
      33        1536 :         long tic = fd_log_wallclock();
      34        1536 :         FD_COMPILER_MFENCE();
      35        1536 :         long toc = fd_log_wallclock();
      36        1536 :         FD_COMPILER_MFENCE();
      37        1536 :         trial[ trial_idx ] = (double)(toc - tic);
      38        1536 :         FD_COMPILER_MFENCE();
      39        1536 :       }
      40           3 :       double * sample     = trial + TRIM_CNT;
      41           3 :       ulong    sample_cnt = TRIAL_CNT - 2UL*TRIM_CNT;
      42           3 :       ulong    thresh     = sample_cnt >> 1;
      43           3 :       if( FD_LIKELY( fd_stat_robust_exp_fit_double( &t0, &tau, sample, sample_cnt, sample )>thresh ) && FD_LIKELY( t0>0. ) ) break;
      44           0 : #     undef TRIM_CNT
      45           0 : #     undef TRIAL_CNT
      46           0 :       iter++;
      47           0 :       if( iter==3UL ) {
      48           0 :         FD_LOG_WARNING(( "unable to model fd_log_wallclock() performance; using fallback and attempting to continue" ));
      49           0 :         t0 = 27.; tau = 1.;
      50           0 :         break;
      51           0 :       }
      52           0 :     }
      53             : 
      54           3 :   } FD_ONCE_END;
      55             : 
      56          60 :   if( opt_tau ) opt_tau[0] = tau;
      57          60 :   return t0;
      58          60 : }
      59             : 
      60             : double
      61          60 : fd_tempo_tickcount_model( double * opt_tau ) {
      62          60 :   static double t0;
      63          60 :   static double tau;
      64             : 
      65         126 :   FD_ONCE_BEGIN {
      66             : 
      67             :     /* Same as the above but for fd_tickcount(). */
      68             : 
      69           3 :     ulong iter = 0UL;
      70           3 :     for(;;) { 
      71        1542 : #     define TRIAL_CNT 512UL
      72           6 : #     define TRIM_CNT  64UL
      73           3 :       double trial[ TRIAL_CNT ]; 
      74        1539 :       for( ulong trial_idx=0UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
      75        1536 :         FD_COMPILER_MFENCE();
      76        1536 :         long tic = fd_tickcount();
      77        1536 :         FD_COMPILER_MFENCE();
      78        1536 :         long toc = fd_tickcount();
      79        1536 :         FD_COMPILER_MFENCE();
      80        1536 :         trial[ trial_idx ] = (double)(toc - tic);
      81        1536 :         FD_COMPILER_MFENCE();
      82        1536 :       }
      83           3 :       double * sample     = trial + TRIM_CNT;
      84           3 :       ulong    sample_cnt = TRIAL_CNT - 2UL*TRIM_CNT;
      85           3 :       ulong    thresh     = sample_cnt >> 1;
      86           3 :       if( FD_LIKELY( fd_stat_robust_exp_fit_double( &t0, &tau, sample, sample_cnt, sample )>thresh ) && FD_LIKELY( t0>0. ) ) break;
      87           0 : #     undef TRIM_CNT
      88           0 : #     undef TRIAL_CNT
      89           0 :       iter++;
      90           0 :       if( iter==3UL ) {
      91           0 :         FD_LOG_WARNING(( "unable to model fd_tickcount() performance; using fallback and attempting to continue" ));
      92           0 :         t0 = 24.; tau = 4.;
      93           0 :         break;
      94           0 :       }
      95           0 :     }
      96             : 
      97           3 :   } FD_ONCE_END;
      98             : 
      99          60 :   if( opt_tau ) opt_tau[0] = tau;
     100          60 :   return t0;
     101          60 : }
     102             : 
     103             : static double mu;
     104             : static double sigma;
     105             : static int explicit_set;
     106             : 
     107             : void
     108             : fd_tempo_set_tick_per_ns( double _mu,
     109           3 :                           double _sigma ) {
     110           3 :   explicit_set = 1;
     111           3 :   mu    = _mu;
     112           3 :   sigma = _sigma;
     113           3 : }
     114             : 
     115             : double
     116         186 : fd_tempo_tick_per_ns( double * opt_sigma ) {
     117             : 
     118         414 :   FD_ONCE_BEGIN {
     119             : 
     120             :     /* If the value has already been set explicitly, no need to sample. */
     121             : 
     122          21 :     if( FD_LIKELY( !explicit_set ) ) {
     123             : 
     124             :       /* We measure repeatedly how much the tickcount and wallclock change
     125             :          over the same approximately constant time interval.  We do a pair
     126             :          observations to minimize errors in computing the interval (note
     127             :          that any remaining jitters should be zero mean such that they
     128             :          should statistically cancel in the rate calculation).  We use a
     129             :          robust estimate to get the avg and rms in the face of random
     130             :          sources of noise, assuming the sample distribution is reasonably
     131             :          well modeled as normal. */
     132             : 
     133          18 :       ulong iter = 0UL;
     134          18 :       for(;;) { 
     135         612 :   #     define TRIAL_CNT 32UL
     136          36 :   #     define TRIM_CNT   4UL
     137          18 :         double trial[ TRIAL_CNT ]; 
     138         594 :         for( ulong trial_idx=0UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
     139         576 :           long then; long toc; fd_tempo_observe_pair( &then, &toc );
     140         576 :           fd_log_sleep( 16777216L ); /* ~16.8 ms */
     141         576 :           long now; long tic; fd_tempo_observe_pair( &now, &tic );
     142         576 :           trial[ trial_idx ] = (double)(tic-toc) / (double)(now-then);
     143         576 :         }
     144          18 :         double * sample     = trial + TRIM_CNT;
     145          18 :         ulong    sample_cnt = TRIAL_CNT - 2UL*TRIM_CNT;
     146          18 :         ulong    thresh     = sample_cnt >> 1;
     147          18 :         if( FD_LIKELY( fd_stat_robust_norm_fit_double( &mu, &sigma, sample, sample_cnt, sample )>thresh ) && FD_LIKELY( mu>0. ) )
     148          18 :           break;
     149           0 :   #     undef TRIM_CNT
     150           0 :   #     undef TRIAL_CNT
     151           0 :         iter++;
     152           0 :         if( iter==3UL ) {
     153           0 :           FD_LOG_WARNING(( "unable to measure tick_per_ns accurately; using fallback and attempting to continue" ));
     154           0 :           mu = 3.; sigma = 1e-7;
     155           0 :           break;
     156           0 :         }
     157           0 :       }
     158          18 :     }
     159             : 
     160          21 :   } FD_ONCE_END;
     161             : 
     162         186 :   if( opt_sigma ) opt_sigma[0] = sigma;
     163         186 :   return mu;
     164         186 : }
     165             : 
     166             : #endif
     167             : 
     168             : long
     169             : fd_tempo_observe_pair( long * opt_now,
     170        1203 :                        long * opt_tic ) {
     171        1203 :   long best_wc;
     172        1203 :   long best_tc;
     173        1203 :   long best_jt;
     174             : 
     175        1203 :   do {
     176             : 
     177             :     /* Do an alternating series of:
     178             : 
     179             :          tickcount
     180             :          wallclock
     181             :          tickcount
     182             :          wallclock
     183             :          tickcount
     184             :          ...
     185             :          wallclock
     186             :          tickcount
     187             : 
     188             :        observations and pick the wallclock observation that had the
     189             :        smallest elapsed number of ticks between adjacent tickcount
     190             :        observations.
     191             : 
     192             :        Since the wallclock / tickcounter returns a monotonically
     193             :        non-decreasing observation of the wallclock / tickcount at a
     194             :        point in time between when the call was made and when it
     195             :        returned, we know that this wallclock observation is the one we
     196             :        made that we know best when it was made in the tickcount stream.
     197             :        Further, we have lower and upper bounds of the value of the
     198             :        tickcounter in this read.  We start the alternation with the
     199             :        tickcount because that is typically the lower overhead, more
     200             :        deterministic one and less likely to get jerked around behind our
     201             :        back.
     202             :        
     203             :        Theoretically, this exploits how the minimum of a shifted
     204             :        exponential random variable converges.  Since the time to read
     205             :        the various clocks is expected to be reasonably modeled as a
     206             :        shifted exponential random variable, it doesn't take many trials
     207             :        to get something close to the minimum (estimating the minimum of
     208             :        a shifted exponential random variable takes way fewer samples and
     209             :        is way more accurate than say the estimating the average of a
     210             :        normally distributed random variable). */
     211             : 
     212       10827 : #   define TRIAL_CNT (4) /* 1 "warmup", 3 real reads */
     213             : 
     214        1203 :     long wc[ TRIAL_CNT+1 ];
     215        1203 :     long tc[ TRIAL_CNT+1 ];
     216        1203 :     FD_COMPILER_MFENCE();
     217        1203 :     tc[0] = fd_tickcount();
     218        1203 :     FD_COMPILER_MFENCE();
     219        6015 :     for( ulong trial_idx=0UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
     220        4812 :       wc[ trial_idx+1UL ] = fd_log_wallclock();
     221        4812 :       FD_COMPILER_MFENCE();
     222        4812 :       tc[ trial_idx+1UL ] = fd_tickcount();
     223        4812 :       FD_COMPILER_MFENCE();
     224        4812 :     }
     225             : 
     226        1203 :     best_wc = wc[1];
     227        1203 :     best_tc = tc[1];
     228        1203 :     best_jt = best_tc - tc[0];
     229        4812 :     for( ulong trial_idx=1UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
     230        3609 :       long wci = wc[ trial_idx+1UL ];
     231        3609 :       long tci = tc[ trial_idx+1UL ];
     232        3609 :       long jti = tci - tc[ trial_idx ];
     233        3609 :       int  c   = (jti<=best_jt);
     234        3609 :       best_wc  = fd_long_if( c, wci, best_wc );
     235        3609 :       best_tc  = fd_long_if( c, tci, best_tc );
     236        3609 :       best_jt  = fd_long_if( c, jti, best_jt );
     237        3609 :     }
     238             : 
     239        1203 : #   undef TRIAL_CNT
     240             : 
     241        1203 :   } while(0);
     242             : 
     243        1203 :   if( FD_UNLIKELY( best_jt<0L ) ) { /* paranoia */
     244           0 :     FD_LOG_WARNING(( "fd_tickcount() does not appear to be monotonic; joint read may not be accurate; attempting to continue" ));
     245           0 :     best_jt = 0L;
     246           0 :   }
     247             : 
     248        1203 :   if( opt_now ) opt_now[0] = best_wc;
     249        1203 :   if( opt_tic ) opt_tic[0] = best_tc - (best_jt>>1); /* Use lower and upper bound midpoint (could be improved statistically) */
     250        1203 :   return best_jt;
     251        1203 : }
     252             : 
     253             : ulong
     254             : fd_tempo_async_min( long  lazy,
     255             :                     ulong event_cnt,
     256          12 :                     float tick_per_ns ) {
     257          12 :   if( FD_UNLIKELY( !((1L<=lazy) & (lazy<(1L<<31))) ) ) {
     258           0 :     FD_LOG_WARNING(( "lazy should be in [1,2^31)" ));
     259           0 :     return 0UL;
     260           0 :   }
     261             : 
     262          12 :   if( FD_UNLIKELY( !((1UL<=event_cnt) & (event_cnt<(1UL<<31)) ) ) ) {
     263           0 :     FD_LOG_WARNING(( "event_cnt should be in [1,2^31)" ));
     264           0 :     return 0UL;
     265           0 :   }
     266             : 
     267          12 :   float tick_per_ns_max = FLT_MAX / (float)(1L<<31); /* exact, compile time, ~1.5e29 */
     268          12 :   if( FD_UNLIKELY( !((0.f<tick_per_ns) & (tick_per_ns<=tick_per_ns_max)) ) ) { /* robust against nan */
     269           0 :     FD_LOG_WARNING(( "tick_per_ns should in (0,~1.5e29)" ));
     270           0 :     return 0UL;
     271           0 :   }
     272             : 
     273          12 :   float _lazy         = (float)lazy;      /* typically exact, up to 0.5 ulp error if >~ 2^24 */
     274          12 :   float _event_cnt    = (float)event_cnt; /* typically exact, up to 0.5 ulp error if >~ 2^24 */
     275          12 :   float _async_target = (tick_per_ns*_lazy) / _event_cnt; /* non-negative finite result, O(1) ulp error typically */
     276             : 
     277          12 :   if( FD_UNLIKELY( !(1.f<=_async_target) ) ) {
     278           0 :     FD_LOG_WARNING(( "lazy, event_cnt and tick_per_ns imply an unreasonably small async_min" ));
     279           0 :     return 0UL;
     280           0 :   }
     281             : 
     282          12 :   if( FD_UNLIKELY( !(_async_target<((float)(1UL<<32))) ) ) {
     283           0 :     FD_LOG_WARNING(( "lazy, event_cnt and tick_per_ns imply an unreasonably large async_min" ));
     284           0 :     return 0UL;
     285           0 :   }
     286             : 
     287          12 :   ulong async_target = (ulong)_async_target;       /* in [1,2^32), O(1) ulp error typically (biased conservative) */
     288          12 :   return 1UL << fd_ulong_find_msb( async_target ); /* guaranteed power of 2 in [1,2^31] */
     289          12 : }
     290             : 

Generated by: LCOV version 1.14