Line data Source code
1 : #include "../fd_tango.h"
2 :
3 : #if FD_HAS_DOUBLE
4 :
5 : double
6 60 : fd_tempo_wallclock_model( double * opt_tau ) {
7 60 : static double t0;
8 60 : static double tau;
9 :
10 126 : FD_ONCE_BEGIN {
11 :
12 : /* Assuming fd_log_wallclock() observes the application wallclock at
13 : a consistent point between when the call was made and when it
14 : returns, the difference between two adjacent calls is an estimate
15 : of the number of ns required for a call. We expect this
16 : difference to have a well defined minimum time with sporadic
17 : delays due to various sources of jitter. The natural approach is
18 : to model call overhead then as a shifted exponential random
19 : variable. To parameterize the model, we repeatedly measure how
20 : long a call takes. The minimum of a bunch of IID samples is very
21 : fast converging for estimating the minimum but easily corrupted
22 : if there are weird outliers on the negative side. As such, we
23 : use a robust estimator to estimate the minimal overhead and
24 : jitter. */
25 :
26 3 : ulong iter = 0UL;
27 3 : for(;;) {
28 1542 : # define TRIAL_CNT 512UL
29 6 : # define TRIM_CNT 64UL
30 3 : double trial[ TRIAL_CNT ];
31 1539 : for( ulong trial_idx=0UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
32 1536 : FD_COMPILER_MFENCE();
33 1536 : long tic = fd_log_wallclock();
34 1536 : FD_COMPILER_MFENCE();
35 1536 : long toc = fd_log_wallclock();
36 1536 : FD_COMPILER_MFENCE();
37 1536 : trial[ trial_idx ] = (double)(toc - tic);
38 1536 : FD_COMPILER_MFENCE();
39 1536 : }
40 3 : double * sample = trial + TRIM_CNT;
41 3 : ulong sample_cnt = TRIAL_CNT - 2UL*TRIM_CNT;
42 3 : ulong thresh = sample_cnt >> 1;
43 3 : if( FD_LIKELY( fd_stat_robust_exp_fit_double( &t0, &tau, sample, sample_cnt, sample )>thresh ) && FD_LIKELY( t0>0. ) ) break;
44 0 : # undef TRIM_CNT
45 0 : # undef TRIAL_CNT
46 0 : iter++;
47 0 : if( iter==3UL ) {
48 0 : FD_LOG_WARNING(( "unable to model fd_log_wallclock() performance; using fallback and attempting to continue" ));
49 0 : t0 = 27.; tau = 1.;
50 0 : break;
51 0 : }
52 0 : }
53 :
54 3 : } FD_ONCE_END;
55 :
56 60 : if( opt_tau ) opt_tau[0] = tau;
57 60 : return t0;
58 60 : }
59 :
60 : double
61 60 : fd_tempo_tickcount_model( double * opt_tau ) {
62 60 : static double t0;
63 60 : static double tau;
64 :
65 126 : FD_ONCE_BEGIN {
66 :
67 : /* Same as the above but for fd_tickcount(). */
68 :
69 3 : ulong iter = 0UL;
70 3 : for(;;) {
71 1542 : # define TRIAL_CNT 512UL
72 6 : # define TRIM_CNT 64UL
73 3 : double trial[ TRIAL_CNT ];
74 1539 : for( ulong trial_idx=0UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
75 1536 : FD_COMPILER_MFENCE();
76 1536 : long tic = fd_tickcount();
77 1536 : FD_COMPILER_MFENCE();
78 1536 : long toc = fd_tickcount();
79 1536 : FD_COMPILER_MFENCE();
80 1536 : trial[ trial_idx ] = (double)(toc - tic);
81 1536 : FD_COMPILER_MFENCE();
82 1536 : }
83 3 : double * sample = trial + TRIM_CNT;
84 3 : ulong sample_cnt = TRIAL_CNT - 2UL*TRIM_CNT;
85 3 : ulong thresh = sample_cnt >> 1;
86 3 : if( FD_LIKELY( fd_stat_robust_exp_fit_double( &t0, &tau, sample, sample_cnt, sample )>thresh ) && FD_LIKELY( t0>0. ) ) break;
87 0 : # undef TRIM_CNT
88 0 : # undef TRIAL_CNT
89 0 : iter++;
90 0 : if( iter==3UL ) {
91 0 : FD_LOG_WARNING(( "unable to model fd_tickcount() performance; using fallback and attempting to continue" ));
92 0 : t0 = 24.; tau = 4.;
93 0 : break;
94 0 : }
95 0 : }
96 :
97 3 : } FD_ONCE_END;
98 :
99 60 : if( opt_tau ) opt_tau[0] = tau;
100 60 : return t0;
101 60 : }
102 :
103 : static double mu;
104 : static double sigma;
105 : static int explicit_set;
106 :
107 : void
108 : fd_tempo_set_tick_per_ns( double _mu,
109 3 : double _sigma ) {
110 3 : explicit_set = 1;
111 3 : mu = _mu;
112 3 : sigma = _sigma;
113 3 : }
114 :
115 : double
116 186 : fd_tempo_tick_per_ns( double * opt_sigma ) {
117 :
118 414 : FD_ONCE_BEGIN {
119 :
120 : /* If the value has already been set explicitly, no need to sample. */
121 :
122 21 : if( FD_LIKELY( !explicit_set ) ) {
123 :
124 : /* We measure repeatedly how much the tickcount and wallclock change
125 : over the same approximately constant time interval. We do a pair
126 : observations to minimize errors in computing the interval (note
127 : that any remaining jitters should be zero mean such that they
128 : should statistically cancel in the rate calculation). We use a
129 : robust estimate to get the avg and rms in the face of random
130 : sources of noise, assuming the sample distribution is reasonably
131 : well modeled as normal. */
132 :
133 18 : ulong iter = 0UL;
134 18 : for(;;) {
135 612 : # define TRIAL_CNT 32UL
136 36 : # define TRIM_CNT 4UL
137 18 : double trial[ TRIAL_CNT ];
138 594 : for( ulong trial_idx=0UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
139 576 : long then; long toc; fd_tempo_observe_pair( &then, &toc );
140 576 : fd_log_sleep( 16777216L ); /* ~16.8 ms */
141 576 : long now; long tic; fd_tempo_observe_pair( &now, &tic );
142 576 : trial[ trial_idx ] = (double)(tic-toc) / (double)(now-then);
143 576 : }
144 18 : double * sample = trial + TRIM_CNT;
145 18 : ulong sample_cnt = TRIAL_CNT - 2UL*TRIM_CNT;
146 18 : ulong thresh = sample_cnt >> 1;
147 18 : if( FD_LIKELY( fd_stat_robust_norm_fit_double( &mu, &sigma, sample, sample_cnt, sample )>thresh ) && FD_LIKELY( mu>0. ) )
148 18 : break;
149 0 : # undef TRIM_CNT
150 0 : # undef TRIAL_CNT
151 0 : iter++;
152 0 : if( iter==3UL ) {
153 0 : FD_LOG_WARNING(( "unable to measure tick_per_ns accurately; using fallback and attempting to continue" ));
154 0 : mu = 3.; sigma = 1e-7;
155 0 : break;
156 0 : }
157 0 : }
158 18 : }
159 :
160 21 : } FD_ONCE_END;
161 :
162 186 : if( opt_sigma ) opt_sigma[0] = sigma;
163 186 : return mu;
164 186 : }
165 :
166 : #endif
167 :
168 : long
169 : fd_tempo_observe_pair( long * opt_now,
170 1203 : long * opt_tic ) {
171 1203 : long best_wc;
172 1203 : long best_tc;
173 1203 : long best_jt;
174 :
175 1203 : do {
176 :
177 : /* Do an alternating series of:
178 :
179 : tickcount
180 : wallclock
181 : tickcount
182 : wallclock
183 : tickcount
184 : ...
185 : wallclock
186 : tickcount
187 :
188 : observations and pick the wallclock observation that had the
189 : smallest elapsed number of ticks between adjacent tickcount
190 : observations.
191 :
192 : Since the wallclock / tickcounter returns a monotonically
193 : non-decreasing observation of the wallclock / tickcount at a
194 : point in time between when the call was made and when it
195 : returned, we know that this wallclock observation is the one we
196 : made that we know best when it was made in the tickcount stream.
197 : Further, we have lower and upper bounds of the value of the
198 : tickcounter in this read. We start the alternation with the
199 : tickcount because that is typically the lower overhead, more
200 : deterministic one and less likely to get jerked around behind our
201 : back.
202 :
203 : Theoretically, this exploits how the minimum of a shifted
204 : exponential random variable converges. Since the time to read
205 : the various clocks is expected to be reasonably modeled as a
206 : shifted exponential random variable, it doesn't take many trials
207 : to get something close to the minimum (estimating the minimum of
208 : a shifted exponential random variable takes way fewer samples and
209 : is way more accurate than say the estimating the average of a
210 : normally distributed random variable). */
211 :
212 10827 : # define TRIAL_CNT (4) /* 1 "warmup", 3 real reads */
213 :
214 1203 : long wc[ TRIAL_CNT+1 ];
215 1203 : long tc[ TRIAL_CNT+1 ];
216 1203 : FD_COMPILER_MFENCE();
217 1203 : tc[0] = fd_tickcount();
218 1203 : FD_COMPILER_MFENCE();
219 6015 : for( ulong trial_idx=0UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
220 4812 : wc[ trial_idx+1UL ] = fd_log_wallclock();
221 4812 : FD_COMPILER_MFENCE();
222 4812 : tc[ trial_idx+1UL ] = fd_tickcount();
223 4812 : FD_COMPILER_MFENCE();
224 4812 : }
225 :
226 1203 : best_wc = wc[1];
227 1203 : best_tc = tc[1];
228 1203 : best_jt = best_tc - tc[0];
229 4812 : for( ulong trial_idx=1UL; trial_idx<TRIAL_CNT; trial_idx++ ) {
230 3609 : long wci = wc[ trial_idx+1UL ];
231 3609 : long tci = tc[ trial_idx+1UL ];
232 3609 : long jti = tci - tc[ trial_idx ];
233 3609 : int c = (jti<=best_jt);
234 3609 : best_wc = fd_long_if( c, wci, best_wc );
235 3609 : best_tc = fd_long_if( c, tci, best_tc );
236 3609 : best_jt = fd_long_if( c, jti, best_jt );
237 3609 : }
238 :
239 1203 : # undef TRIAL_CNT
240 :
241 1203 : } while(0);
242 :
243 1203 : if( FD_UNLIKELY( best_jt<0L ) ) { /* paranoia */
244 0 : FD_LOG_WARNING(( "fd_tickcount() does not appear to be monotonic; joint read may not be accurate; attempting to continue" ));
245 0 : best_jt = 0L;
246 0 : }
247 :
248 1203 : if( opt_now ) opt_now[0] = best_wc;
249 1203 : if( opt_tic ) opt_tic[0] = best_tc - (best_jt>>1); /* Use lower and upper bound midpoint (could be improved statistically) */
250 1203 : return best_jt;
251 1203 : }
252 :
253 : ulong
254 : fd_tempo_async_min( long lazy,
255 : ulong event_cnt,
256 12 : float tick_per_ns ) {
257 12 : if( FD_UNLIKELY( !((1L<=lazy) & (lazy<(1L<<31))) ) ) {
258 0 : FD_LOG_WARNING(( "lazy should be in [1,2^31)" ));
259 0 : return 0UL;
260 0 : }
261 :
262 12 : if( FD_UNLIKELY( !((1UL<=event_cnt) & (event_cnt<(1UL<<31)) ) ) ) {
263 0 : FD_LOG_WARNING(( "event_cnt should be in [1,2^31)" ));
264 0 : return 0UL;
265 0 : }
266 :
267 12 : float tick_per_ns_max = FLT_MAX / (float)(1L<<31); /* exact, compile time, ~1.5e29 */
268 12 : if( FD_UNLIKELY( !((0.f<tick_per_ns) & (tick_per_ns<=tick_per_ns_max)) ) ) { /* robust against nan */
269 0 : FD_LOG_WARNING(( "tick_per_ns should in (0,~1.5e29)" ));
270 0 : return 0UL;
271 0 : }
272 :
273 12 : float _lazy = (float)lazy; /* typically exact, up to 0.5 ulp error if >~ 2^24 */
274 12 : float _event_cnt = (float)event_cnt; /* typically exact, up to 0.5 ulp error if >~ 2^24 */
275 12 : float _async_target = (tick_per_ns*_lazy) / _event_cnt; /* non-negative finite result, O(1) ulp error typically */
276 :
277 12 : if( FD_UNLIKELY( !(1.f<=_async_target) ) ) {
278 0 : FD_LOG_WARNING(( "lazy, event_cnt and tick_per_ns imply an unreasonably small async_min" ));
279 0 : return 0UL;
280 0 : }
281 :
282 12 : if( FD_UNLIKELY( !(_async_target<((float)(1UL<<32))) ) ) {
283 0 : FD_LOG_WARNING(( "lazy, event_cnt and tick_per_ns imply an unreasonably large async_min" ));
284 0 : return 0UL;
285 0 : }
286 :
287 12 : ulong async_target = (ulong)_async_target; /* in [1,2^32), O(1) ulp error typically (biased conservative) */
288 12 : return 1UL << fd_ulong_find_msb( async_target ); /* guaranteed power of 2 in [1,2^31] */
289 12 : }
290 :
|