Line data Source code
1 : #include "../fd_tango.h"
2 : #include "../../util/math/fd_stat.h"
3 :
4 : #if FD_HAS_DOUBLE
5 :
6 : double
7 60 : fd_tempo_wallclock_model( double * opt_tau ) {
8 60 : static double t0;
9 60 : static double tau;
10 :
11 126 : FD_ONCE_BEGIN {
12 :
13 : /* Assuming fd_log_wallclock() observes the application wallclock at
14 : a consistent point between when the call was made and when it
15 : returns, the difference between two adjacent calls is an estimate
16 : of the number of ns required for a call. We expect this
17 : difference to have a well defined minimum time with sporadic
18 : delays due to various sources of jitter. The natural approach is
19 : to model call overhead then as a shifted exponential random
20 : variable. To parameterize the model, we repeatedly measure how
21 : long a call takes. The minimum of a bunch of IID samples is very
22 : fast converging for estimating the minimum but easily corrupted
23 : if there are weird outliers on the negative side. As such, we
24 : use a robust estimator to estimate the minimal overhead and
25 : jitter. */
26 :
27 3 : ulong iter = 0UL;
28 3 : for(;;) {
29 1542 : # define TRIAL_CNT 512UL
30 6 : # define TRIM_CNT 64UL
31 3 : double trial[ TRIAL_CNT ];
32 1539 : for( ulong trial_idx=0UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
33 1536 : FD_COMPILER_MFENCE();
34 1536 : long tic = fd_log_wallclock();
35 1536 : FD_COMPILER_MFENCE();
36 1536 : long toc = fd_log_wallclock();
37 1536 : FD_COMPILER_MFENCE();
38 1536 : trial[ trial_idx ] = (double)(toc - tic);
39 1536 : FD_COMPILER_MFENCE();
40 1536 : }
41 3 : double * sample = trial + TRIM_CNT;
42 3 : ulong sample_cnt = TRIAL_CNT - 2UL*TRIM_CNT;
43 3 : ulong thresh = sample_cnt >> 1;
44 3 : if( FD_LIKELY( fd_stat_robust_exp_fit_double( &t0, &tau, sample, sample_cnt, sample )>thresh ) && FD_LIKELY( t0>0. ) ) break;
45 0 : # undef TRIM_CNT
46 0 : # undef TRIAL_CNT
47 0 : iter++;
48 0 : if( iter==3UL ) {
49 0 : FD_LOG_WARNING(( "unable to model fd_log_wallclock() performance; using fallback and attempting to continue" ));
50 0 : t0 = 27.; tau = 1.;
51 0 : break;
52 0 : }
53 0 : }
54 :
55 3 : } FD_ONCE_END;
56 :
57 60 : if( opt_tau ) opt_tau[0] = tau;
58 60 : return t0;
59 60 : }
60 :
61 : double
62 60 : fd_tempo_tickcount_model( double * opt_tau ) {
63 60 : static double t0;
64 60 : static double tau;
65 :
66 126 : FD_ONCE_BEGIN {
67 :
68 : /* Same as the above but for fd_tickcount(). */
69 :
70 3 : ulong iter = 0UL;
71 3 : for(;;) {
72 1542 : # define TRIAL_CNT 512UL
73 6 : # define TRIM_CNT 64UL
74 3 : double trial[ TRIAL_CNT ];
75 1539 : for( ulong trial_idx=0UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
76 1536 : FD_COMPILER_MFENCE();
77 1536 : long tic = fd_tickcount();
78 1536 : FD_COMPILER_MFENCE();
79 1536 : long toc = fd_tickcount();
80 1536 : FD_COMPILER_MFENCE();
81 1536 : trial[ trial_idx ] = (double)(toc - tic);
82 1536 : FD_COMPILER_MFENCE();
83 1536 : }
84 3 : double * sample = trial + TRIM_CNT;
85 3 : ulong sample_cnt = TRIAL_CNT - 2UL*TRIM_CNT;
86 3 : ulong thresh = sample_cnt >> 1;
87 3 : if( FD_LIKELY( fd_stat_robust_exp_fit_double( &t0, &tau, sample, sample_cnt, sample )>thresh ) && FD_LIKELY( t0>0. ) ) break;
88 0 : # undef TRIM_CNT
89 0 : # undef TRIAL_CNT
90 0 : iter++;
91 0 : if( iter==3UL ) {
92 0 : FD_LOG_WARNING(( "unable to model fd_tickcount() performance; using fallback and attempting to continue" ));
93 0 : t0 = 24.; tau = 4.;
94 0 : break;
95 0 : }
96 0 : }
97 :
98 3 : } FD_ONCE_END;
99 :
100 60 : if( opt_tau ) opt_tau[0] = tau;
101 60 : return t0;
102 60 : }
103 :
104 : static double mu;
105 : static double sigma;
106 : static int explicit_set;
107 :
108 : void
109 : fd_tempo_set_tick_per_ns( double _mu,
110 3 : double _sigma ) {
111 3 : explicit_set = 1;
112 3 : mu = _mu;
113 3 : sigma = _sigma;
114 3 : }
115 :
116 : double
117 183 : fd_tempo_tick_per_ns( double * opt_sigma ) {
118 :
119 402 : FD_ONCE_BEGIN {
120 :
121 : /* If the value has already been set explicitly, no need to sample. */
122 :
123 18 : if( FD_LIKELY( !explicit_set ) ) {
124 :
125 : /* We measure repeatedly how much the tickcount and wallclock change
126 : over the same approximately constant time interval. We do a pair
127 : observations to minimize errors in computing the interval (note
128 : that any remaining jitters should be zero mean such that they
129 : should statistically cancel in the rate calculation). We use a
130 : robust estimate to get the avg and rms in the face of random
131 : sources of noise, assuming the sample distribution is reasonably
132 : well modeled as normal. */
133 :
134 15 : ulong iter = 0UL;
135 15 : for(;;) {
136 510 : # define TRIAL_CNT 32UL
137 30 : # define TRIM_CNT 4UL
138 15 : double trial[ TRIAL_CNT ];
139 495 : for( ulong trial_idx=0UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
140 480 : long then; long toc; fd_tempo_observe_pair( &then, &toc );
141 480 : fd_log_sleep( 16777216L ); /* ~16.8 ms */
142 480 : long now; long tic; fd_tempo_observe_pair( &now, &tic );
143 480 : trial[ trial_idx ] = (double)(tic-toc) / (double)(now-then);
144 480 : }
145 15 : double * sample = trial + TRIM_CNT;
146 15 : ulong sample_cnt = TRIAL_CNT - 2UL*TRIM_CNT;
147 15 : ulong thresh = sample_cnt >> 1;
148 15 : if( FD_LIKELY( fd_stat_robust_norm_fit_double( &mu, &sigma, sample, sample_cnt, sample )>thresh ) && FD_LIKELY( mu>0. ) )
149 15 : break;
150 0 : # undef TRIM_CNT
151 0 : # undef TRIAL_CNT
152 0 : iter++;
153 0 : if( iter==3UL ) {
154 0 : FD_LOG_WARNING(( "unable to measure tick_per_ns accurately; using fallback and attempting to continue" ));
155 0 : mu = 3.; sigma = 1e-7;
156 0 : break;
157 0 : }
158 0 : }
159 15 : }
160 :
161 18 : } FD_ONCE_END;
162 :
163 183 : if( opt_sigma ) opt_sigma[0] = sigma;
164 183 : return mu;
165 183 : }
166 :
167 : #endif
168 :
169 : long
170 : fd_tempo_observe_pair( long * opt_now,
171 1011 : long * opt_tic ) {
172 1011 : long best_wc;
173 1011 : long best_tc;
174 1011 : long best_jt;
175 :
176 1011 : do {
177 :
178 : /* Do an alternating series of:
179 :
180 : tickcount
181 : wallclock
182 : tickcount
183 : wallclock
184 : tickcount
185 : ...
186 : wallclock
187 : tickcount
188 :
189 : observations and pick the wallclock observation that had the
190 : smallest elapsed number of ticks between adjacent tickcount
191 : observations.
192 :
193 : Since the wallclock / tickcounter returns a monotonically
194 : non-decreasing observation of the wallclock / tickcount at a
195 : point in time between when the call was made and when it
196 : returned, we know that this wallclock observation is the one we
197 : made that we know best when it was made in the tickcount stream.
198 : Further, we have lower and upper bounds of the value of the
199 : tickcounter in this read. We start the alternation with the
200 : tickcount because that is typically the lower overhead, more
201 : deterministic one and less likely to get jerked around behind our
202 : back.
203 :
204 : Theoretically, this exploits how the minimum of a shifted
205 : exponential random variable converges. Since the time to read
206 : the various clocks is expected to be reasonably modeled as a
207 : shifted exponential random variable, it doesn't take many trials
208 : to get something close to the minimum (estimating the minimum of
209 : a shifted exponential random variable takes way fewer samples and
210 : is way more accurate than say the estimating the average of a
211 : normally distributed random variable). */
212 :
213 9099 : # define TRIAL_CNT (4) /* 1 "warmup", 3 real reads */
214 :
215 1011 : long wc[ TRIAL_CNT+1 ];
216 1011 : long tc[ TRIAL_CNT+1 ];
217 1011 : FD_COMPILER_MFENCE();
218 1011 : tc[0] = fd_tickcount();
219 1011 : FD_COMPILER_MFENCE();
220 5055 : for( ulong trial_idx=0UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
221 4044 : wc[ trial_idx+1UL ] = fd_log_wallclock();
222 4044 : FD_COMPILER_MFENCE();
223 4044 : tc[ trial_idx+1UL ] = fd_tickcount();
224 4044 : FD_COMPILER_MFENCE();
225 4044 : }
226 :
227 1011 : best_wc = wc[1];
228 1011 : best_tc = tc[1];
229 1011 : best_jt = best_tc - tc[0];
230 4044 : for( ulong trial_idx=1UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
231 3033 : long wci = wc[ trial_idx+1UL ];
232 3033 : long tci = tc[ trial_idx+1UL ];
233 3033 : long jti = tci - tc[ trial_idx ];
234 3033 : int c = (jti<=best_jt);
235 3033 : best_wc = fd_long_if( c, wci, best_wc );
236 3033 : best_tc = fd_long_if( c, tci, best_tc );
237 3033 : best_jt = fd_long_if( c, jti, best_jt );
238 3033 : }
239 :
240 1011 : # undef TRIAL_CNT
241 :
242 1011 : } while(0);
243 :
244 1011 : if( FD_UNLIKELY( best_jt<0L ) ) { /* paranoia */
245 0 : FD_LOG_WARNING(( "fd_tickcount() does not appear to be monotonic; joint read may not be accurate; attempting to continue" ));
246 0 : best_jt = 0L;
247 0 : }
248 :
249 1011 : if( opt_now ) opt_now[0] = best_wc;
250 1011 : if( opt_tic ) opt_tic[0] = best_tc - (best_jt>>1); /* Use lower and upper bound midpoint (could be improved statistically) */
251 1011 : return best_jt;
252 1011 : }
253 :
254 : ulong
255 : fd_tempo_async_min( long lazy,
256 : ulong event_cnt,
257 12 : float tick_per_ns ) {
258 12 : if( FD_UNLIKELY( !((1L<=lazy) & (lazy<(1L<<31))) ) ) {
259 0 : FD_LOG_WARNING(( "lazy should be in [1,2^31)" ));
260 0 : return 0UL;
261 0 : }
262 :
263 12 : if( FD_UNLIKELY( !((1UL<=event_cnt) & (event_cnt<(1UL<<31)) ) ) ) {
264 0 : FD_LOG_WARNING(( "event_cnt should be in [1,2^31)" ));
265 0 : return 0UL;
266 0 : }
267 :
268 12 : float tick_per_ns_max = FLT_MAX / (float)(1L<<31); /* exact, compile time, ~1.5e29 */
269 12 : if( FD_UNLIKELY( !((0.f<tick_per_ns) & (tick_per_ns<=tick_per_ns_max)) ) ) { /* robust against nan */
270 0 : FD_LOG_WARNING(( "tick_per_ns should in (0,~1.5e29)" ));
271 0 : return 0UL;
272 0 : }
273 :
274 12 : float _lazy = (float)lazy; /* typically exact, up to 0.5 ulp error if >~ 2^24 */
275 12 : float _event_cnt = (float)event_cnt; /* typically exact, up to 0.5 ulp error if >~ 2^24 */
276 12 : float _async_target = (tick_per_ns*_lazy) / _event_cnt; /* non-negative finite result, O(1) ulp error typically */
277 :
278 12 : if( FD_UNLIKELY( !(1.f<=_async_target) ) ) {
279 0 : FD_LOG_WARNING(( "lazy, event_cnt and tick_per_ns imply an unreasonably small async_min" ));
280 0 : return 0UL;
281 0 : }
282 :
283 12 : if( FD_UNLIKELY( !(_async_target<((float)(1UL<<32))) ) ) {
284 0 : FD_LOG_WARNING(( "lazy, event_cnt and tick_per_ns imply an unreasonably large async_min" ));
285 0 : return 0UL;
286 0 : }
287 :
288 12 : ulong async_target = (ulong)_async_target; /* in [1,2^32), O(1) ulp error typically (biased conservative) */
289 12 : return 1UL << fd_ulong_find_msb( async_target ); /* guaranteed power of 2 in [1,2^31] */
290 12 : }
291 :
|