Line data Source code
1 : #include "fd_clock.h"
2 :
3 : /* Let x denote x-clock observations, y denote y-clock observations and
4 : y_est denote estimates of the y-clock from the x-clock. During a
5 : clock epoch, we use a linear approximation for y_est:
6 :
7 : y_est(now) = y_eff(epoch_start) + m(epoch)*( x(now) - x(epoch_start) )
8 :
9 : Given a short enough epoch, the jointly observed values of x-clock
10 : and y-clock at the epoch start and stop are well approximated as
11 : linearly related:
12 :
13 : x_actual(epoch_end) ~ x_actual(epoch_start) + w_actual(epoch)( y_actual(epoch_end) - y_actual(epoch_start) )
14 :
15 : where w_actual gives the x-tick per y-tick rate between the clocks
16 : during the epoch. Unfortunately, we can't jointly observe the two
17 : clocks infinitely precisely. We assume that:
18 :
19 : x_actual(sample) = x_obs(sample) + delta_x(sample)
20 : y_actual(sample) = y_obs(sample)
21 :
22 : where delta_x(sample) represent the effects of quantization error
23 : (due to finite x-tick size) and synchronization error (due to not
24 : observing x_obs(sample) at the exact time the y_obs(sample) was
25 : observed on the y-clock). That is, we assume:
26 :
27 : x_obs(epoch_end) + delta_x(epoch_end) = x_obs(epoch_start) + delta_x(epoch_start)
28 : + w_actual(epoch)( y_obs(epoch_end) - y_obs(epoch_start) )
29 :
30 : Then w_actual(epoch) = w_obs(epoch) + delta_w(epoch) where:
31 :
32 : x_obs(epoch_end) - x_obs(epoch_start)
33 : w_obs(epoch) = -------------------------------------
34 : y_obs(epoch_end) - y_obs(epoch_start)
35 :
36 : and:
37 :
38 : delta_x(epoch_end) - delta_x(epoch_start)
39 : delta_w(epoch) = -----------------------------------------
40 : y_obs(epoch_end) - y_obs(epoch_start)
41 :
42 : Assuming, quite reasonably, delta_x(sample) all have the same mean
43 : (or, even stronger but still reasonable, are IID), w_obs(epoch) is an
44 : unbiased estimate of w_actual(epoch).
45 :
46 : Since we expect w_actual(epoch) to be nearly constant from epoch to
47 : epoch (i.e. these are clocks), we can use a decaying average filter
48 : to compute an estimate of w(next_epoch) given an estimate for this
49 : epoch w_est(epoch) and w_obs(epoch):
50 :
51 : w_est(next_epoch) = w_est(epoch) + alpha ( w_obs(epoch) - w_est(epoch) )
52 :
53 : Here:
54 :
55 : alpha = 1 / (1 + hist)
56 :
57 : is in [0,1] and hist is non-negative. hist can be thought of as the
58 : number of previous observations to include in the w_est(next_epoch).
59 :
60 : If w_actual is strictly constant from epoch to epoch,
61 : w_est(next_epoch) is an unbiased estimate of w_actual assuming
62 : w_est(epoch) is also an unbiased estimate.
63 :
64 : In practice, we expect w_actual(epoch) to slowly vary from epoch to
65 : epoch. The more epochs we include in the average (i.e. the larger
66 : hist), the more accurate w_est(epoch) will be but the less quickly
67 : w_est(epoch) will adapt to changes in w_actual(epoch).
68 :
69 : Given a reasonably accurate w_est(next_epoch), we want to create a
70 : relationship between the x-clock and y-clock that preserves
71 : monotonicity of y_est from epoch to epoch while having no asymptotic
72 : clock drift between y_est and y_obs.
73 :
74 : To that end, if y_est(epoch_end) is less than or equal to
75 : y_obs(epoch_end), we can correct for all accumulated clock drift
76 : immediately without breaking monotonicity by simply forward stepping
77 : y_eff(next_epoch_start) to y_obs(epoch_end) and using
78 : 1/w_est(next_epoch) directly for m(next_epoch):
79 :
80 : y_eff(next_epoch_start) = y_obs(epoch_end)
81 : m(next_epoch) = 1 / w_est(next_epoch)
82 :
83 : Unfortunately, this can break monotonicity when y_est(epoch_end) is
84 : greater than y_obs(epoch_end). In this case, let frac be the
85 : fraction of this clock drift we want to absorb during the next epoch.
86 : If we know the next clock epoch will be at most epoch_max y-ticks
87 : long, we can reduce m(next_epoch) to absorb the drift:
88 :
89 : y_eff(next_epoch_start) = y_est(epoch_end)
90 :
91 : 1 - beta ( y_est(epoch_end) - y_obs(epoch_end) )
92 : m(next_epoch) = ------------------------------------------------
93 : w_est(next_epoch)
94 :
95 : where:
96 :
97 : beta = frac / epoch_max
98 :
99 : To insure m(next_epoch) is always positive (and thus preserve
100 : monotonicity), we tweak the above into:
101 :
102 : 1
103 : m(next_epoch) = --------------------------------------------------------------------
104 : w_est(next_epoch) ( 1 + beta ( y_est(epoch_end) - y_obs(epoch_end) )
105 :
106 : This is asymptotically identical to the above in the (common case)
107 : limit:
108 :
109 : beta (y_est-y_obs) << 1.
110 :
111 : and asymptotes to zero when:
112 :
113 : beta (y_est-y_obs) >> 1.
114 :
115 : These all be combined into a branchless implementation via:
116 :
117 : x_obs(epoch_end) - x_obs(epoch_start)
118 : w_obs(epoch) = -------------------------------------
119 : y_obs(epoch_end) - y_obs(epoch_start)
120 :
121 : w_est(next_epoch) = w_est(epoch) + alpha ( w_obs(epoch) - w_est(epoch) )
122 :
123 : y_est(epoch_end) = y_eff(epoch_start) + m(epoch) ( x_obs(epoch_end) - x_obs(epoch_start) )
124 :
125 : y_eff(next_epoch_start) = max( y_obs(epoch_end), y_est(epoch_end) )
126 :
127 : 1
128 : m(next_epoch) = ---------------------------------------------------------------------------
129 : w_est(next_epoch) ( 1 + beta ( y_eff(next_epoch_start) - y_obs(epoch_end) )
130 :
131 : This is the basic recalibration update used below. */
132 :
133 : ulong
134 57 : fd_clock_align( void ) {
135 57 : return alignof( fd_clock_shmem_t );
136 57 : }
137 :
138 : ulong
139 9 : fd_clock_footprint( void ) {
140 9 : return sizeof( fd_clock_shmem_t );
141 9 : }
142 :
143 : void *
144 : fd_clock_new( void * shmem,
145 : long recal_avg,
146 : long recal_jit,
147 : double recal_hist,
148 : double recal_frac,
149 : long init_x0,
150 : long init_y0,
151 30 : double init_w ) {
152 30 : fd_clock_shmem_t * shclock = (fd_clock_shmem_t *)shmem;
153 :
154 30 : if( FD_UNLIKELY( !recal_jit ) ) recal_jit = (recal_avg>>7) + (long)!!(recal_avg & 127L); /* ceil( recal_avg / 128 ) */
155 30 : if( FD_UNLIKELY( recal_hist==0. ) ) recal_hist = 3.;
156 30 : if( FD_UNLIKELY( recal_frac==0. ) ) recal_frac = 1.;
157 :
158 30 : if( FD_UNLIKELY( !shclock ) ) {
159 3 : FD_LOG_WARNING(( "NULL shmem" ));
160 3 : return NULL;
161 3 : }
162 :
163 27 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shclock, fd_clock_align() ) ) ) {
164 3 : FD_LOG_WARNING(( "misaligned shmem" ));
165 3 : return NULL;
166 3 : }
167 :
168 24 : if( FD_UNLIKELY( !((1L<=recal_jit) & (recal_jit<=recal_avg) & (recal_avg<=(LONG_MAX-recal_jit))) ) ) {
169 9 : FD_LOG_WARNING(( "bad recal_avg / recal_jit" ));
170 9 : return NULL;
171 9 : }
172 :
173 15 : if( FD_UNLIKELY( !(recal_hist>0.) ) ) {
174 3 : FD_LOG_WARNING(( "bad recal_hist" ));
175 3 : return NULL;
176 3 : }
177 :
178 12 : if( FD_UNLIKELY( !(recal_frac>0.) ) ) {
179 3 : FD_LOG_WARNING(( "bad recal_frac" ));
180 3 : return NULL;
181 3 : }
182 :
183 9 : double init_m = 1./init_w;
184 9 : if( FD_UNLIKELY( !((init_w>0.) & (init_m>0.)) ) ) {
185 3 : FD_LOG_WARNING(( "bad w" ));
186 3 : return NULL;
187 3 : }
188 :
189 6 : ulong footprint = fd_clock_footprint();
190 6 : if( FD_UNLIKELY( !footprint ) ) {
191 0 : FD_LOG_WARNING(( "bad footprint" ));
192 0 : return NULL;
193 0 : }
194 :
195 6 : memset( shclock, 0, footprint );
196 :
197 6 : long recal_jit_eff = (long)fd_ulong_pow2_dn( (ulong)recal_jit );
198 6 : long recal_min = recal_avg - recal_jit_eff;
199 6 : ulong recal_mask = 2UL*(ulong)recal_jit_eff - 1UL;
200 :
201 6 : shclock->seq = 0UL;
202 6 : shclock->recal_next = init_y0 + recal_min + (long)(fd_ulong_hash( FD_CLOCK_MAGIC ^ (ulong)init_x0 ) & recal_mask);
203 6 : shclock->err_cnt = 0UL;
204 :
205 6 : shclock->recal_alpha = 1. / (1. + recal_hist);
206 6 : shclock->recal_beta = recal_frac / (double)(recal_avg + recal_jit_eff);
207 6 : shclock->recal_min = recal_min;
208 6 : shclock->recal_mask = recal_mask;
209 :
210 6 : shclock->recal_avg = recal_avg;
211 6 : shclock->recal_jit = recal_jit;
212 6 : shclock->recal_hist = recal_hist;
213 6 : shclock->recal_frac = recal_frac;
214 :
215 6 : shclock->init_x0 = init_x0;
216 6 : shclock->init_y0 = init_y0;
217 6 : shclock->init_w = init_w;
218 :
219 6 : fd_clock_epoch_t * epoch = shclock->epoch;
220 :
221 30 : for( ulong idx=0UL; idx<FD_CLOCK_EPOCH_CNT; idx++ ) {
222 24 : epoch[ idx ].seq0 = 0UL;
223 24 : epoch[ idx ].x0 = init_x0;
224 24 : epoch[ idx ].y0 = init_y0;
225 24 : epoch[ idx ].w = init_w;
226 24 : epoch[ idx ].y0_eff = init_y0;
227 24 : epoch[ idx ].m = init_m;
228 24 : epoch[ idx ].seq1 = 0UL;
229 24 : }
230 :
231 6 : FD_COMPILER_MFENCE();
232 6 : shclock->magic = FD_CLOCK_MAGIC;
233 6 : FD_COMPILER_MFENCE();
234 :
235 6 : return shclock;
236 6 : }
237 :
238 : fd_clock_t *
239 : fd_clock_join( void * _lmem,
240 : void * _shclock,
241 : fd_clock_func_t clock_x,
242 24 : void const * args_x ) {
243 24 : fd_clock_t * clock = (fd_clock_t *)_lmem;
244 24 : fd_clock_shmem_t * shclock = (fd_clock_shmem_t *)_shclock;
245 :
246 24 : if( FD_UNLIKELY( !clock ) ) {
247 3 : FD_LOG_WARNING(( "NULL lmem" ));
248 3 : return NULL;
249 3 : }
250 :
251 21 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)clock, alignof(fd_clock_t) ) ) ) {
252 3 : FD_LOG_WARNING(( "misaligned lmem" ));
253 3 : return NULL;
254 3 : }
255 :
256 18 : if( FD_UNLIKELY( !shclock ) ) {
257 3 : FD_LOG_WARNING(( "NULL shclock" ));
258 3 : return NULL;
259 3 : }
260 :
261 15 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shclock, fd_clock_align() ) ) ) {
262 3 : FD_LOG_WARNING(( "misaligned shclock" ));
263 3 : return NULL;
264 3 : }
265 :
266 12 : if( FD_UNLIKELY( shclock->magic!=FD_CLOCK_MAGIC ) ) {
267 3 : FD_LOG_WARNING(( "bad magic" ));
268 3 : return NULL;
269 3 : }
270 :
271 9 : if( FD_UNLIKELY( !clock_x ) ) {
272 3 : FD_LOG_WARNING(( "NULL clock_x" ));
273 3 : return NULL;
274 3 : }
275 :
276 6 : clock->shclock = shclock;
277 6 : clock->clock_x = clock_x;
278 6 : clock->args_x = args_x;
279 :
280 6 : return clock;
281 9 : }
282 :
283 : void *
284 9 : fd_clock_leave( fd_clock_t * clock ) {
285 :
286 9 : if( FD_UNLIKELY( !clock ) ) {
287 3 : FD_LOG_WARNING(( "NULL clock" ));
288 3 : return NULL;
289 3 : }
290 :
291 6 : return clock;
292 9 : }
293 :
294 : void *
295 15 : fd_clock_delete( void * _shclock ) {
296 15 : fd_clock_shmem_t * shclock = (fd_clock_shmem_t *)_shclock;
297 :
298 15 : if( FD_UNLIKELY( !shclock ) ) {
299 3 : FD_LOG_WARNING(( "NULL shclock" ));
300 3 : return NULL;
301 3 : }
302 :
303 12 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shclock, fd_clock_align() ) ) ) {
304 3 : FD_LOG_WARNING(( "misaligned shclock" ));
305 3 : return NULL;
306 3 : }
307 :
308 9 : if( FD_UNLIKELY( shclock->magic!=FD_CLOCK_MAGIC ) ) {
309 3 : FD_LOG_WARNING(( "bad magic" ));
310 3 : return NULL;
311 3 : }
312 :
313 6 : FD_COMPILER_MFENCE();
314 6 : shclock->magic = 0UL;
315 6 : FD_COMPILER_MFENCE();
316 :
317 6 : return shclock;
318 9 : }
319 :
320 : long
321 899739 : fd_clock_now( void const * _clock ) {
322 899739 : fd_clock_t const * clock = (fd_clock_t const *)_clock;
323 899739 : fd_clock_shmem_t const * shclock = clock->shclock;
324 899739 : fd_clock_func_t clock_x = clock->clock_x;
325 899739 : void const * args_x = clock->args_x;
326 :
327 899739 : fd_clock_epoch_t epoch[1];
328 899739 : long x_obs;
329 899739 : for(;;) {
330 899739 : ulong seq0 = fd_clock_seq( shclock ); /* likely l1 cache hit */
331 899739 : fd_clock_epoch_read( shclock, seq0, epoch ); /* likely l1 cache hit */
332 899739 : x_obs = clock_x( args_x ); /* after seq0 read, as close to return as possible */
333 899739 : ulong seq1 = fd_clock_seq( shclock ); /* likely l1 cache hit */
334 899739 : if( FD_LIKELY( (seq0==seq1) & (epoch->seq0==seq0) & (epoch->seq1==seq0) ) ) break;
335 0 : FD_SPIN_PAUSE();
336 0 : }
337 899739 : return fd_clock_epoch_y( epoch, x_obs );
338 899739 : }
339 :
340 : int
341 : fd_clock_joint_read( fd_clock_func_t clock_x, void const * args_x,
342 : fd_clock_func_t clock_y, void const * args_y,
343 : long * opt_x,
344 : long * opt_y,
345 5962 : long * opt_dx ) {
346 :
347 5962 : long x[ FD_CLOCK_JOINT_READ_CNT+1UL ];
348 5962 : long y[ FD_CLOCK_JOINT_READ_CNT ];
349 :
350 23848 : for( ulong idx=0UL; idx<FD_CLOCK_JOINT_READ_CNT; idx++ ) {
351 17886 : x[ idx ] = clock_x( args_x );
352 17886 : y[ idx ] = clock_y( args_y );
353 17886 : }
354 :
355 5962 : x[ FD_CLOCK_JOINT_READ_CNT ] = clock_x( args_x );
356 :
357 5962 : ulong best_idx = 0UL;
358 5962 : long best_dx = x[1] - x[0]; if( FD_UNLIKELY( best_dx<=0L ) ) return FD_CLOCK_ERR_X;
359 :
360 17871 : for( ulong idx=1UL; idx<FD_CLOCK_JOINT_READ_CNT; idx++ ) {
361 11915 : long dy = y[ idx ] - y[ idx-1UL ]; if( FD_UNLIKELY( dy<=0L ) ) return FD_CLOCK_ERR_Y;
362 11912 : long dx = x[ idx+1UL ] - x[ idx ]; if( FD_UNLIKELY( dx<=0L ) ) return FD_CLOCK_ERR_X;
363 11912 : best_idx = fd_ulong_if( best_dx<dx, best_idx, idx );
364 11912 : best_dx = fd_long_min( best_dx, dx );
365 11912 : }
366 :
367 5956 : best_dx = (best_dx+1L) >> 1; /* ceil( (x[best_idx+1]-x[best_idx])/2 ) */
368 :
369 5956 : fd_long_store_if( !!opt_x, opt_x, x[ best_idx ] + best_dx );
370 5956 : fd_long_store_if( !!opt_y, opt_y, y[ best_idx ] );
371 5956 : fd_long_store_if( !!opt_dx, opt_dx, best_dx );
372 :
373 5956 : return FD_CLOCK_SUCCESS;
374 5959 : }
375 :
376 : static inline long
377 : fd_clock_next( fd_clock_shmem_t * shclock,
378 : long x0,
379 : long y0,
380 : double w,
381 : long y0_eff,
382 : double m,
383 5941 : int err ) {
384 :
385 5941 : ulong seq = shclock->seq + 1UL;
386 5941 : long recal_next = (y0_eff + shclock->recal_min) + (long)(fd_ulong_hash( FD_CLOCK_MAGIC ^ (ulong)x0 ) & shclock->recal_mask);
387 5941 : ulong err_cnt = shclock->err_cnt + (ulong)!!err;
388 :
389 5941 : fd_clock_epoch_t * epoch = shclock->epoch + (seq & (FD_CLOCK_EPOCH_CNT-1UL));
390 :
391 5941 : FD_COMPILER_MFENCE();
392 5941 : epoch->seq1 = seq; /* Mark entry as unsafe to read */
393 5941 : FD_COMPILER_MFENCE();
394 5941 : epoch->x0 = x0;
395 5941 : epoch->y0 = y0;
396 5941 : epoch->w = w;
397 5941 : epoch->y0_eff = y0_eff;
398 5941 : epoch->m = m;
399 5941 : FD_COMPILER_MFENCE();
400 5941 : epoch->seq0 = seq; /* Mark entry as safe to read */
401 5941 : FD_COMPILER_MFENCE();
402 5941 : shclock->seq = seq;
403 5941 : shclock->recal_next = recal_next;
404 5941 : shclock->err_cnt = err_cnt;
405 5941 : FD_COMPILER_MFENCE();
406 :
407 5941 : return recal_next;
408 5941 : }
409 :
410 : long
411 : fd_clock_recal( fd_clock_t * clock,
412 : long x1,
413 5941 : long y1 ) {
414 :
415 5941 : fd_clock_shmem_t * shclock = clock->shclock;
416 :
417 5941 : fd_clock_epoch_t const * epoch = shclock->epoch + (shclock->seq & (FD_CLOCK_EPOCH_CNT-1UL));
418 :
419 5941 : long x0 = epoch->x0;
420 5941 : long y0 = epoch->y0;
421 5941 : double w0 = epoch->w;
422 5941 : long y0_eff = epoch->y0_eff;
423 5941 : double m0 = epoch->m;
424 :
425 5941 : long dx = x1 - x0;
426 5941 : long dy = y1 - y0;
427 :
428 5941 : double w_obs = ((double)dx) / ((double)dy);
429 :
430 5941 : double w1;
431 5941 : long y1_eff;
432 5941 : double m1;
433 5941 : int err;
434 :
435 : /* FIXME: Consider tighter rate interval? (E.g. 0.9765625 / 1.024) */
436 :
437 5941 : if( FD_UNLIKELY( !((dx>0L) & (dy>0L) & ((0.5*w0)<w_obs) & (w_obs<(2.0*w0))) ) ) {
438 :
439 : /* At this point, the x-clock didn't step forward between recals,
440 : the y-clock didn't step forward between recals, the observed rate
441 : this epoch slowed dramatically and/or the observed rate this
442 : epoch increased dramatically. This is typically a sign that an
443 : operator stepped the x-clock backward (dx<<0), y-clock backward
444 : (dy<<0), x-clock forward (w_obs>>w0) and/or y-clock forward
445 : (w_obs<<w0) by a large amount out-of-band. We start the new
446 : epoch at (x1,y1) with the current epoch's tick rate estimates and
447 : log a recal error. This allows near immediate recovery all
448 : manners of clock jankiness. It also can break monotonicity of
449 : y-clock predictions but there's not a lot of choice given janky
450 : clocks. */
451 :
452 0 : w1 = w0;
453 0 : y1_eff = y1;
454 0 : m1 = 1./w0;
455 0 : err = 1;
456 :
457 5941 : } else {
458 :
459 : /* At this point, x1 / y1 are in the future of the current epoch
460 : start on the x-clock / y-clock and the observed tick rate this
461 : epoch is plausible. We average the observed tick rate with the
462 : current epoch's rate estimate to get the next epoch's rate
463 : estimate.
464 :
465 : To preserve y-clock prediction monotonicity, the effective y1
466 : for the next epoch will be at the later of the observation y1 and
467 : this epoch's estimate for y1 given the obsevration x1.
468 :
469 : If y1_eff == y1 (i.e. we are microstepping the fd_clock forward
470 : to correct immediately and fully an underestimate at the end of
471 : this epoch without breaking monotonicity), we just use 1/w1 for
472 : the y-ticks per x-ticks conversion rate m1 this epoch.
473 :
474 : Otherwise, we can't microstep the fd_clock backward while
475 : ensuring monotonicity on observers. To correct the overestimate
476 : this epoch, we reduce the conversion rate to approximately absorb
477 : recal_frac the overestimate over the coming epoch. The reduction
478 : is such that, asymptotically, the resulting conversion should
479 : always be positive.
480 :
481 : See more detailed derivation above. */
482 :
483 5941 : w1 = w0 + shclock->recal_alpha*(w_obs-w0);
484 5941 : y1_eff = fd_long_max( y1, y0_eff + (long)(0.5 + m0*(double)dx) );
485 5941 : m1 = 1. / (w1 + (shclock->recal_beta*w1)*(double)(y1_eff-y1));
486 5941 : err = 0;
487 :
488 5941 : }
489 :
490 5941 : return fd_clock_next( shclock, x1, y1, w1, y1_eff, m1, err );
491 5941 : }
492 :
493 : long
494 : fd_clock_step( fd_clock_t * clock,
495 : long x0,
496 : long y0,
497 0 : double w ) {
498 0 : return fd_clock_next( clock->shclock, x0, y0, w, y0, 1./w, 0 ); /* FIXME: Consider treating these as a "err" */
499 0 : }
500 :
501 : char const *
502 12 : fd_clock_strerror( int err ) {
503 12 : switch( err ) {
504 3 : case FD_CLOCK_SUCCESS: return "success";
505 3 : case FD_CLOCK_ERR_X: return "x-clock not well-behaved";
506 3 : case FD_CLOCK_ERR_Y: return "y-clock not well-behaved";
507 3 : default: break;
508 12 : }
509 3 : return "unknown";
510 12 : }
|