Line data Source code
1 : #ifndef HEADER_fd_src_util_clock_fd_clock_h
2 : #define HEADER_fd_src_util_clock_fd_clock_h
3 :
4 : #include "../log/fd_log.h"
5 :
6 : /* fd_clock provides a persistent interprocess shared memory object for
7 : synchronizing a pair of clocks and using that synchronization
8 : lockfree between an arbitrary number of observer threads and a
9 : calibrating thread.
10 :
11 : Common usage is to have 1 adminstrative thread in an application
12 : synchronize the cheap low quality CPU invariant tickcounter, NIC
13 : tickcounters, FPGA tickcounters, GPU tickcounters, etc to the system
14 : clock so that all threads in that application can ultra cheaply
15 : convert fast-but-inaccurate tickcounter reads across a wide variety
16 : of devices into a wallclock time that can be compared enterprise
17 : wide.
18 :
19 : This is typically several times cheaper than the reading system clock
20 : with comparable accuracy, less jitter, and more flexibility
21 : (especially when using heterogeneous hardware).
22 :
23 : Note that any thread can do the calibration. The only concurrency
24 : restriction is that different threads should not attempt to calibrate
25 : a fd_clock at the same time. Among other things, this means that
26 : single threaded modes of operation are supported.
27 :
28 : Many of the below APIs make a best effort to handle common clock
29 : dysfunctions (like getting stepping backwards due to operators / NTP
30 : manipulating the underlying clock hardware out-of-band).
31 :
32 : Summary usage:
33 :
34 : ... create a fd_clock for the clock pair (clock_x/args_x,clock_y/args_y)
35 :
36 : ... typically clock_x/args_x is the fast-but-inaccurate local clock
37 : ... (e.g. the CPU invariant tickcounter) and clock_y/args_y is the
38 : ... slow-but-accurate reference global clock (e.g. clock_gettime /
39 : ... CLOCK_REALTIME)
40 :
41 : ... user parameters
42 :
43 : long recal_avg = (long)10e6; ... target clock epoch duration, in y-ticks, no default (10e6 <> 10 ms if y-ticks are ns)
44 : long recal_jit = 0L; ... target clock epoch jitter, in y-ticks, 0 -> use default (~recal_avg/128)
45 : double recal_hist = 0.; ... ~number of recent clock epochs to use for clock rate estimation, 0 -> use a default (3)
46 : double recal_frac = 0.; ... ~target max clock drift fraction to correct in a clock epoch, 0 -> use a default (1)
47 :
48 : FD_LOG_NOTICE(( "starting initial calibration" ));
49 :
50 : ... the user can use whatever method is best to joint read the
51 : ... x-clock and y-clock. fd_clock_join_read provides a generic
52 : ... method to approximate joint reading an arbitrary clock pair.
53 :
54 : long init_x0;
55 : long init_y0;
56 : int err = fd_clock_joint_read( clock_x, args_x, clock_y, args_y, &init_x0, &init_y0, NULL );
57 : if( FD_UNLIKELY( err ) ) FD_LOG_ERR(( "fd_clock_joint_read failed (%i-%s)", err, fd_clock_strerror( err ) ));
58 :
59 : ... wait / do other stuff for O(1) epochs
60 :
61 : FD_LOG_NOTICE(( "finishing initial calibration" ));
62 :
63 : long init_x1;
64 : long init_y1;
65 : err = fd_clock_joint_read( clock_x, args_x, clock_y, args_y, &init_x1, &init_y1, NULL );
66 : if( FD_UNLIKELY( err ) ) FD_LOG_ERR(( "fd_clock_joint_read failed (%i-%s)", err, fd_clock_strerror( err ) ));
67 :
68 : long init_dx = init_x1 - init_x0;
69 : long init_dy = init_y1 - init_y0;
70 : if( FD_UNLIKELY( !((init_dx>0L) & (init_dy>0L)) ) ) FD_LOG_ERR(( "initial calibration failed" ));
71 :
72 : double init_w = (double)init_dx / (double)init_dy;
73 :
74 : void * shmem = ... alloc fd_clock_align() / fd_clock_footprint() compat shared memory;
75 : void * shclock = fd_clock_new( shmem, recal_avg, recal_jit, recal_hist, recal_frac, init_x1, init_y1, init_w );
76 : if( FD_UNLIKELY( !shclock ) ) FD_LOG_ERR(( "fd_clock_new failed" ));
77 :
78 : ... at this point, shclock==shmem and is ready to be joined by
79 : ... observing threads and the calibrating thread
80 :
81 : ... join a fd_clock (clock_x/args_x are how to read the x-clock on the caller)
82 :
83 : void * lmem = ... alloc fd_clock_t compat local memory;
84 : void * shclock = ... map fd_clock into the caller's local address space with the proper alignment and footprint;
85 : fd_clock_t * clock = fd_clock_join( lmem, shclock, clock_x, args_x );
86 : if( FD_UNLIKELY( !clock ) ) FD_LOG_ERR(( "fd_clock_new failed" ));
87 :
88 : ... at this point, clock==lmem is a current local join
89 :
90 : ... leave a fd_clock (clock is a current local join)
91 :
92 : void * lmem = fd_clock_leave( clock ); // returns lmem on success, NULL on error (logs details)
93 :
94 : ... at this point, clock is no longer a join and lmem can freed /
95 : ... reused
96 :
97 : ... destroy a fd_clock (nobody should be joined at this point)
98 :
99 : void * shclock = ... map clock into the caller's local address space with the proper alignment and footprint;
100 : void * shmem = fd_clock_delete( shclock ); returns shclock on success, NULL on error (logs details)
101 :
102 : ... at this point, shmem no longer holds the state of a fd_clock
103 : ... and can be freed / reused
104 :
105 : ... observe a fd_clock (clock is a current local join)
106 :
107 : ... this can be done by any thread at any time (non-blocking
108 : ... lockfree).
109 :
110 : long now = fd_clock_now( clock );
111 :
112 : ... now is an accurate estimate of the time on the y-clock based
113 : ... on a local read of the x-clock
114 :
115 : ... recalibrate a fd_clock (clock is a current local join to a fd_clock)
116 :
117 : ... this should be done by a single thread on roughly the
118 : ... recommended schedule (e.g. an admin tile that does low
119 : ... priority application wide chores)
120 :
121 : ... tile setup
122 :
123 : ...
124 : long recal_next = fd_clock_recal_next( clock ); ... get the current recommended next recal
125 : ...
126 :
127 : ... tile run loop
128 :
129 : for(;;) {
130 : ...
131 : long now = fd_clock_now( clock );
132 : ...
133 : if( FD_UNLIKELY( now>=recal_next ) ) {
134 :
135 : ... as per the above, user can use other methods to joint
136 : ... read the x-clock and y-clock if applicable
137 :
138 : long x1;
139 : long y1;
140 : int err = fd_clock_joint_read( clock_x, args_x, clock_y, args_y, &x1, &y1, NULL );
141 : if( FD_UNLIKELY( err ) )
142 : FD_LOG_WARNING(( "fd_clock_joint_read failed (%i-%s); attempting to continue", err, fd_clock_strerror( err ) ));
143 : else
144 : recal_next = fd_clock_recal( clock, x1, y1 );
145 : }
146 : ...
147 : }
148 :
149 : ... HPC tile usage (advanced API)
150 :
151 : ... tile setup ...
152 :
153 : ...
154 : fd_clock_shmem_t const * shclock = fd_clock_shclock_const( clock );
155 : fd_clock_epoch_t epoch[1];
156 : fd_clock_epoch_init( epoch, shclock );
157 : ...
158 :
159 : ... tile run loop
160 :
161 : for(;;) {
162 :
163 : if( ... time for tile housekeeping ... ) { ... should be done much more often than recal_avg
164 : ...
165 : fd_clock_epoch_refresh( epoch, shclock );
166 : ...
167 : }
168 :
169 : ... critical path usage ...
170 :
171 : ...
172 : long x = ... an x-clock observation, e.g. clock_x( args_x ), hardware tickcount, etc;
173 : ...
174 : long now = fd_clock_epoch_y( epoch, x ); ... O(1) ns
175 : ...
176 :
177 : }
178 : */
179 :
180 : /* FD_CLOCK_SUCCESS (0) is used by fd_clock APIs to indicate an
181 : operation succeeded. FD_CLOCK_ERR_* (negative integers) are used by
182 : these APIs to indicate an operation failed and why. */
183 :
184 5959 : #define FD_CLOCK_SUCCESS ( 0) /* Success */
185 6 : #define FD_CLOCK_ERR_X (-1) /* Failed because the x-clock is not well behaved (e.g. didn't tick forward) */
186 6 : #define FD_CLOCK_ERR_Y (-2) /* Failed because the y-clock is not well behaved (e.g. didn't tick forward) */
187 :
188 : /* FD_CLOCK_ALIGN / FD_CLOCK_FOOTPRINT allow for compile-time
189 : declarations of clock shared memory region. */
190 :
191 : #define FD_CLOCK_ALIGN (128UL)
192 : #define FD_CLOCK_FOOTPRINT (640UL)
193 :
194 : /* A fd_clock_shmem_t is a quasi-opaque handle to a shared memory region
195 : used to hold the state of a fd_clock. A fd_clock_t is a quasi-opaque
196 : handle that describes a local join to a fd_clock. A fd_clock_epoch_t
197 : is a quasi-opaque handle that describes the relationship between the
198 : x-clock and y-clock in an epoch for use by the advanced APIs. */
199 :
200 : struct fd_clock_shmem_private;
201 : typedef struct fd_clock_shmem_private fd_clock_shmem_t;
202 :
203 : struct fd_clock_private;
204 : typedef struct fd_clock_private fd_clock_t;
205 :
206 : struct fd_clock_epoch_private;
207 : typedef struct fd_clock_epoch_private fd_clock_epoch_t;
208 :
209 : /* fd_clock private API **********************************************/
210 :
211 : /* This is exposed here to facilitate inlining of various clock
212 : operations for use in high performance contexts. */
213 :
214 : /* FD_CLOCK_JOINT_READ_CNT gives the number of iterations used by
215 : fd_clock_joint_read. Larger values cost linearly more but can
216 : improve joint read accuracy approximately hyperbolically in an
217 : statistical extremal value sense (assuming clock read timing is
218 : shifted exponentially distributed). Must be at least 1. */
219 :
220 47681 : #define FD_CLOCK_JOINT_READ_CNT (3UL)
221 :
222 : /* Internals of a fd_clock_shmem_t */
223 :
224 : /* FD_CLOCK_EPOCH_CNT gives the number of epochs parameters to cache.
225 : Must be a positive integer power of 2. Larger than 1 is strongly
226 : recommended to reduce collision risks between observer threads and
227 : the calibrating thread. */
228 :
229 914624 : #define FD_CLOCK_EPOCH_CNT (4UL)
230 :
231 : /* FD_CLOCK_MAGIC specifies the fd_clock shared memory region layout. */
232 :
233 5953 : #define FD_CLOCK_MAGIC (0xfdc101c3a61c0000UL) /* fd clock magic ver 0 */
234 :
235 : /* Internals of a fd_clock_epoch_t */
236 :
237 : struct __attribute__((aligned(128))) fd_clock_epoch_private { /* double cache line alignment to avoid false sharing */
238 :
239 : /* seq0 is the epoch's sequence number. */
240 :
241 : ulong seq0;
242 :
243 : /* x0 is the raw value observed on the x-clock at the epoch start. In
244 : x-ticks (e.g. ticks if x is a CPU invariant tickcounter). */
245 :
246 : long x0;
247 :
248 : /* y0 is the raw value for the y-clock at the epoch start. In
249 : y-ticks (e.g. ns if y is the system wallclock in ns). */
250 :
251 : long y0;
252 :
253 : /* w is the estimate of the recent x-tick per y-tick rate at epoch
254 : start (e.g. in GHz = tick/ns if x is a CPU invariant tickcounter
255 : and y is the system wallclock in ns). */
256 :
257 : double w;
258 :
259 : /* y0_eff is the effective value for the y-clock at the epoch start.
260 : This may be different from the raw value in order to preserve
261 : monotonicity of y-clock estimates across epochs. In y-ticks (e.g.
262 : ns if y is the system wallclock in ns). */
263 :
264 : long y0_eff;
265 :
266 : /* m is the y-tick per x-tick rate used to estimate the value that
267 : would have been observed on the y-clock given an observation on the
268 : x-clock (e.g. in ns/tick if x is a CPU invariant tickcounter and y
269 : is the system wallclock in ns). This may be different from 1/w
270 : because the clock might be absorbing the clock drift observed at
271 : the end of the previous epoch. */
272 :
273 : double m;
274 :
275 : /* seq1==seq0 when the above parameters are valid. To update epoch
276 : parameters, first, seq1 is updated (marking the parameters as
277 : invalid). Then the above parameters are updated. Last, seq0 is
278 : updated (marking parameters as valid again). Since sequence number
279 : wrapping is not an issue practically (would take eons), observers
280 : read this structure sequentially forward (seq0->params->seq1) and
281 : then validate seq0==seq1 to do a non-blocking lockfree read.
282 : Assumes the usual write visibility ordering properties common on
283 : x86. */
284 :
285 : ulong seq1;
286 :
287 : };
288 :
289 : /* Internals of a fd_clock_shmem_t */
290 :
291 : struct __attribute__((aligned(128))) fd_clock_shmem_private {
292 :
293 : /* First cache line pair */
294 :
295 : ulong magic; /* == FD_CLOCK_MAGIC */
296 :
297 : /* clock epochs have sequence numbers. seq is the most recent epoch
298 : sequence number published by the calibrating thread. That is,
299 : epochs [0,seq] are guaranteed to have been published, seq+1 is
300 : either not published or in the process of getting published.
301 : [seq+2,inf) have definitely not been published. */
302 : /* FIXME: consider using a versioned lock here to make recalibration
303 : explicit? */
304 :
305 : ulong seq;
306 :
307 : /* recal_next gives the recommended time on the y-clock when to next
308 : recalibrate, in y-ticks. */
309 :
310 : long recal_next;
311 :
312 : /* err_cnt gives the number of times that recalibration potentially
313 : broke monotonicity of y-clock estimates due to operators jerking
314 : around the x-clock and/or y-clock out-of-band. */
315 :
316 : ulong err_cnt;
317 :
318 : /* parameters derived from the user configuration */
319 :
320 : double recal_alpha; /* == 1. / (1. + recal_hist) */
321 : double recal_beta; /* == recal_frac / ( recal_avg + pow2_dn(recal_jit) ) */
322 : long recal_min; /* == recal_avg - pow2_dn(recal_jit) */
323 : ulong recal_mask; /* == 2*pow2_dn(recal_jit)-1 */
324 :
325 : /* recal_avg is the recommended average interval between
326 : recalibrations, in y-ticks. Shorter values increase overhead,
327 : increase cache traffic between the calibrating thread and observer
328 : threads and eventually degrade accuracy due to quantization errors
329 : and synchronization errors in x-clock / y-clock joint reads that
330 : get relatively worse for smaller intervals. Longer values decrease
331 : overheads and reduce cache traffic but also eventually degrade
332 : accuracy due to various long timescale sources of clock drift /
333 : non-linearites (e.g. thermal changes).
334 :
335 : That is, when synchronizing two clocks, there is an optimal value
336 : in the middle for recal avg that gets the best overall tradeoff
337 : between sync accuracy and sync overhead. For typical real world
338 : use cases (e.g. the CPU invariant tickcount and the system
339 : wallclock), recal_avg should be O(10s) milliseconds for ns-scale
340 : accuracy.
341 :
342 : Similarly, recal_jit is the recommended jitter between
343 : recalibrations, in y-ticks. This is a positive value much less
344 : than recal_avg. In short, it is really bad idea to do anything in
345 : distributed systems on completely regular intervals because as such
346 : can become a source of all sorts of subtle and not-so-subtle
347 : anomalies. ~1/128 of recal_avg is reasonable for most apps.
348 :
349 : recal_hist gives roughly how many recent epochs to use for
350 : estimating the recent relative clock rate. Smaller values allow
351 : for more adaptivity when syncing low quality clocks. Larger values
352 : allow higher accuracy when syncing in high quality clocks.
353 : Positive, 3 is a reasonable value for most apps.
354 :
355 : recal_frac gives what fraction of clock drift observed at the end
356 : of an epoch the clock should try to be absorb over the next epoch.
357 : Values not in (0,2) are likely unstable. Values near 1 are
358 : recommended. 1 is a reasonable value for most apps. */
359 :
360 : long recal_avg; /* positive */
361 : long recal_jit; /* in [1,recal_avg] */
362 : double recal_hist; /* non-negative */
363 : double recal_frac; /* in ~(0,2) */
364 :
365 : /* init_x1 and init_y1 are the x-clock and y-clock joint read
366 : observations used for epoch 0 when the clock was created. init_w
367 : is the initial estimate for the x-ticks per y-ticks rate. */
368 :
369 : long init_x0;
370 : long init_y0;
371 : double init_w; /* positive */
372 :
373 : /* FD_CLOCK_EPOCH_CNT cache line pairs */
374 :
375 : /* epoch is a direct mapped cache of recently published epochs. If
376 : epoch seq is in the epoch cache, it will be at:
377 :
378 : idx = seq & (FD_CLOCK_EPOCH_CNT-1)
379 :
380 : Each epoch is on its own cache line pair to minimize false sharing
381 : between the calibrating thread and observer threads. */
382 :
383 : fd_clock_epoch_t epoch[ FD_CLOCK_EPOCH_CNT ];
384 :
385 : };
386 :
387 : /* Internals of a fd_clock_t */
388 :
389 : struct fd_clock_private {
390 : fd_clock_shmem_t * shclock; /* Location of the clock shared memory in the local join's address space */
391 : fd_clock_func_t clock_x; /* How to read the x-clock for this local join */
392 : void const * args_x; /* " */
393 : };
394 :
395 : /* End of private API *************************************************/
396 :
397 : FD_PROTOTYPES_BEGIN
398 :
399 : /* Constructors / destructors *****************************************/
400 :
401 : /* fd_clock_{align,footprint,new,join,leave,delete} provide the usual
402 : persistent interprocess constructors/deconstructors. */
403 :
404 : ulong fd_clock_align ( void ); /* ==FD_CLOCK_ALIGN */
405 : ulong fd_clock_footprint( void ); /* ==FD_CLOCK_FOOTPRINT */
406 :
407 : void *
408 : fd_clock_new( void * shmem,
409 : long recal_avg,
410 : long recal_jit,
411 : double recal_hist,
412 : double recal_frac,
413 : long init_x0,
414 : long init_y0,
415 : double init_w );
416 :
417 : fd_clock_t *
418 : fd_clock_join( void * lmem,
419 : void * shclock,
420 : fd_clock_func_t clock_x,
421 : void const * args_x );
422 :
423 : void * fd_clock_leave ( fd_clock_t * clock ); /* returns lmem */
424 : void * fd_clock_delete( void * shclock ); /* returns shmem */
425 :
426 : /* Accessors **********************************************************/
427 :
428 3 : static inline long fd_clock_recal_avg ( fd_clock_t const * clock ) { return clock->shclock->recal_avg; }
429 3 : static inline long fd_clock_recal_jit ( fd_clock_t const * clock ) { return clock->shclock->recal_jit; }
430 3 : static inline double fd_clock_recal_hist( fd_clock_t const * clock ) { return clock->shclock->recal_hist; }
431 3 : static inline double fd_clock_recal_frac( fd_clock_t const * clock ) { return clock->shclock->recal_frac; }
432 3 : static inline long fd_clock_init_x0 ( fd_clock_t const * clock ) { return clock->shclock->init_x0; }
433 3 : static inline long fd_clock_init_y0 ( fd_clock_t const * clock ) { return clock->shclock->init_y0; }
434 3 : static inline double fd_clock_init_w ( fd_clock_t const * clock ) { return clock->shclock->init_w; }
435 :
436 6 : static inline void const * fd_clock_shclock_const( fd_clock_t const * clock ) { return clock->shclock; }
437 3 : static inline fd_clock_func_t fd_clock_clock_x ( fd_clock_t const * clock ) { return clock->clock_x; }
438 3 : static inline void const * fd_clock_args_x ( fd_clock_t const * clock ) { return clock->args_x; }
439 :
440 3 : static inline void * fd_clock_shclock( fd_clock_t * clock ) { return clock->shclock; }
441 :
442 : /* Basic observer API *************************************************/
443 :
444 : /* fd_clock_now returns an estimate of the y-clock (which is typically
445 : the slow-but-accurate global reference clock with the desired units
446 : ... e.g. the system wallclock in ns) by making a local observation of
447 : the x-clock (which is typically the fast-but-inaccurate local
448 : tickcounter not in the desired units ... e.g. the CPU invariant
449 : tickcounter in CPU ticks). Assumes the clock has been recently
450 : calibrated. Does no input argument checking. For common x-clock /
451 : y-clock pairs, usually several times faster, more deterministic and
452 : comparable accuracy to reading the y-clock. The return value should
453 : be interpreted as just before when the call returned (as opposed to,
454 : say, just after when the call was entered).
455 :
456 : This is a composite of several of the advanced observer API calls.
457 : As such, this can be accelerated further by deconstructing the call
458 : into lazily loading clock epoch parameters in tile housekeeping and
459 : using the lazily loaded epoch directly in the tile run loop. The
460 : result typically has O(1) ns overhead with optimal cache and NUMA
461 : behavior between the calibrating thread and all the concurrent
462 : observer threads. */
463 :
464 : long
465 : fd_clock_now( void const * clock ); /* fd_clock_func_t compat */
466 :
467 : /* Basic calibrator API ***********************************************/
468 :
469 : /* fd_clock_{recal_next,err_cnt} returns the {time on the y-clock when
470 : it is recommended to recalibrate the clock next,number of errors
471 : detected with the underlying clock sources since clock was created/
472 : counter was last reset}. */
473 :
474 3 : static inline long fd_clock_recal_next( fd_clock_t const * clock ) { return clock->shclock->recal_next; }
475 120 : static inline ulong fd_clock_err_cnt ( fd_clock_t const * clock ) { return clock->shclock->err_cnt; }
476 :
477 : /* fd_clock_reset_err_cnt resets the error counter */
478 :
479 60 : static inline void fd_clock_reset_err_cnt( fd_clock_t * clock ) { clock->shclock->err_cnt = 0L; }
480 :
481 : /* fd_clock_joint_read reads the time on two different arbitrary clocks,
482 : an x-clock (specified by clock_x / args_x) and a y-clock (specified
483 : by clock_y / args_y), "simultaneously". The x-clock and y-clock do
484 : not have to use the same units. Returns SUCCESS (0) on success. On
485 : return, if opt_x / opt_y is non-NULL, *opt_x / *opt_y will contain
486 : the time observed on the x-clock / y-clock in x-ticks / y-ticks. If
487 : opt_dx is non-NULL, *opt_dx will contain the read accuracy in
488 : x-ticks. Specifically, the y-clock was observed at some time on the
489 : x-clock in the interval [x-dx,x+dx]. Does no input argument
490 : checking.
491 :
492 : Returns a FD_CLOCK_ERR code (negative) on failure. On return, *opt_x
493 : / *opt_y / *opt_dx are unchanged. Reasons for failure include ERR_X
494 : / ERR_Y (the x-clock / y-clock showed a negative clock interval
495 : between adjacent calls ... i.e the clocks passed to joint_read aren't
496 : in fact well-behaved clocks).
497 :
498 : In typical usage, x-clock is the fast-but-inaccurate local clock
499 : (e.g. the CPU invariant tickcounter) and y-clock is the
500 : slow-but-accurate reference global clock (e.g. the system wallclock).
501 :
502 : This API is not required. If the calibrating thread has a better
503 : method (e.g. lower cost / lower jitter) for reading the two clocks
504 : "simultaneously", they can use that with the recal/step APIs below. */
505 :
506 : int
507 : fd_clock_joint_read( fd_clock_func_t clock_x, void const * args_x,
508 : fd_clock_func_t clock_y, void const * args_y,
509 : long * opt_x, long * opt_y, long * opt_dx );
510 :
511 : /* fd_clock_recal and fd_clock_step end clock's current epoch and start
512 : a new epoch. The new epoch will start at the time x1 on the x-clock.
513 : y1 gives the time jointly observed on the y-clock at x1. Ideally, x1
514 : and y1 have been recently jointly read (e.g. read via
515 : fd_clock_joint_read immediately before calling this or by any other
516 : suitable method for the specific clock pair). Returns the
517 : recommended time on the y-clock when to recalibrate next.
518 :
519 : For fd_clock_recal, the step will be such that monotonicity of
520 : y-clock estimates will be strictly preserved if the underlying clocks
521 : are proper clocks. If this detects the underlying clocks are not
522 : well-behaved (e.g. were stepped backward out-of-band), this will make
523 : a best effort to handle such and record the potential monotonicity
524 : failure.
525 :
526 : For fd_clock_step, the clock will be stepped to x1,y1 with a x-tick
527 : per y-tick rate of w1 without regard for whether or not that
528 : preserves monotonicity with the most recent epoch. This can be used
529 : to recover a dormant clock after a long period of no calibration or
530 : to handle situations where the calibrating thread explicity knows the
531 : the x-clock and/or y-clock were stepped out-of-band (e.g. the
532 : superuser manually changing the time on the system wallclock). */
533 :
534 : long fd_clock_recal( fd_clock_t * clock, long x1, long y1 );
535 : long fd_clock_step ( fd_clock_t * clock, long x1, long y1, double w1 );
536 :
537 : /* Advanced observer API **********************************************/
538 :
539 : /* fd_clock_seq returns the most recently published epoch sequence
540 : number observed at some point during the call. This is a compiler
541 : memory fence. Does no input argument checking. */
542 :
543 : static inline ulong
544 1808394 : fd_clock_seq( fd_clock_shmem_t const * shclock ) {
545 1808394 : FD_COMPILER_MFENCE();
546 1808394 : ulong seq = shclock->seq;
547 1808394 : FD_COMPILER_MFENCE();
548 1808394 : return seq;
549 1808394 : }
550 :
551 : /* fd_clock_epoch_read attempts to read the synchronization parameters
552 : for clock epoch seq from the clock's epoch cache. clock is a valid
553 : local join to a fd_clock. Returns epoch and *epoch was always
554 : written.
555 :
556 : On return, if epoch->seq0!=epoch->seq1, the caller collided with an
557 : in-progress recalibration on the calibrating thread while attempting
558 : to read seq ... as the caller has probably fallen behind (or somehow
559 : ended up ahead of the calibrating thread), the caller should update
560 : seq to the most recent published sequence number and try again.
561 :
562 : Otherwise (epoch->seq0==epoch->seq1), *epoch contains the parameters
563 : for epoch->seq0. If delta=(long)(seq-epoch->seq0) is positive /
564 : negative, the caller is ahead / behind of the calibrating thread (and
565 : the magnitude gives a rough estimate of how far). If delta==0, the
566 : desired parameters are in *epoch.
567 :
568 : This is a compiler memory fence. Does no input argument checking.
569 :
570 : TL;DR If seq==epoch->seq0==epoch->seq1, the call was successful. If
571 : not, the caller should update seq and try again. */
572 :
573 : static inline fd_clock_epoch_t *
574 : fd_clock_epoch_read( fd_clock_shmem_t const * shclock,
575 : ulong seq,
576 902712 : fd_clock_epoch_t * epoch ) {
577 :
578 902712 : fd_clock_epoch_t const * e = shclock->epoch + (seq & (FD_CLOCK_EPOCH_CNT-1UL));
579 :
580 902712 : FD_COMPILER_MFENCE();
581 902712 : ulong seq0 = e->seq0;
582 902712 : FD_COMPILER_MFENCE();
583 902712 : long x0 = e->x0;
584 902712 : long y0 = e->y0;
585 902712 : double w = e->w;
586 902712 : long y0_eff = e->y0_eff;
587 902712 : double m = e->m;
588 902712 : FD_COMPILER_MFENCE();
589 902712 : ulong seq1 = e->seq1;
590 902712 : FD_COMPILER_MFENCE();
591 :
592 902712 : epoch->seq0 = seq0;
593 902712 : epoch->x0 = x0;
594 902712 : epoch->y0 = y0;
595 902712 : epoch->w = w;
596 902712 : epoch->y0_eff = y0_eff;
597 902712 : epoch->m = m;
598 902712 : epoch->seq1 = seq1;
599 :
600 902712 : return epoch;
601 902712 : }
602 :
603 : /* fd_clock_epoch_init populates epoch with parameters for fd_clock's
604 : current epoch as observed at some point during the call. Does no
605 : input argument checking. Returns epoch. */
606 :
607 : static inline fd_clock_epoch_t *
608 : fd_clock_epoch_init( fd_clock_epoch_t * epoch,
609 2973 : fd_clock_shmem_t const * shclock ) {
610 2973 : for(;;) {
611 2973 : ulong seq = fd_clock_seq( shclock );
612 2973 : fd_clock_epoch_read( shclock, seq, epoch );
613 2973 : if( FD_LIKELY( (epoch->seq0==seq) & (epoch->seq1==seq) ) ) break;
614 0 : FD_SPIN_PAUSE();
615 0 : }
616 2973 : return epoch;
617 2973 : }
618 :
619 : /* fd_clock_epoch_refresh refreshes epoch with parameters of the current
620 : epoch as observed at some point in time during the call, assuming
621 : that epoch contains previous published epoch parameters. Does no
622 : input argument checking. Returns epoch. */
623 :
624 : static inline fd_clock_epoch_t *
625 : fd_clock_epoch_refresh( fd_clock_epoch_t * epoch,
626 2970 : fd_clock_shmem_t const * shclock ) {
627 2970 : if( FD_UNLIKELY( epoch->seq0!=fd_clock_seq( shclock ) ) ) fd_clock_epoch_init( epoch, shclock );
628 2970 : return epoch;
629 2970 : }
630 :
631 : /* fd_clock_epoch_{x0,y0,w,y0_eff,m} returns the {raw x-clock epoch
632 : start time, raw y-clock epoch start time, estimated recent average
633 : x-tick per y-tick rate at epoch start,effective epoch y-clock start
634 : time, y-tick per x-tick conversion In effect for this epoch}.
635 : Specifically, this epoch estimates the y-clock from the x-clock
636 : observation x_obs via:
637 :
638 : y_est = y0_eff + round( m*(x_obs-x0) ) */
639 :
640 3 : static inline long fd_clock_epoch_x0 ( fd_clock_epoch_t const * epoch ) { return epoch->x0; }
641 3 : static inline long fd_clock_epoch_y0 ( fd_clock_epoch_t const * epoch ) { return epoch->y0; }
642 3 : static inline double fd_clock_epoch_w ( fd_clock_epoch_t const * epoch ) { return epoch->w; }
643 3 : static inline long fd_clock_epoch_y0_eff( fd_clock_epoch_t const * epoch ) { return epoch->y0_eff; }
644 3 : static inline double fd_clock_epoch_m ( fd_clock_epoch_t const * epoch ) { return epoch->m; }
645 :
646 : /* fd_clock_epoch_y returns an estimate of what would have been observed
647 : on the y-clock given the observation x from the x-clock and the clock
648 : synchronization parameters in epoch. Does no input argument
649 : checking. Ideally x should have been observed during the epoch but
650 : reads from just before or just after the epoch are typically usable
651 : too. */
652 :
653 : static inline long
654 : fd_clock_epoch_y( fd_clock_epoch_t const * epoch,
655 1201152 : long x ) {
656 1201152 : return epoch->y0_eff + (long)(0.5 + epoch->m*(double)(x-epoch->x0));
657 1201152 : }
658 :
659 : /* Misc APIs **********************************************************/
660 :
661 : /* fd_clock_strerror converts a FD_CLOCK_SUCCESS / FD_CLOCK_ERR code
662 : into a human readable cstr. The lifetime of the returned pointer is
663 : infinite. The returned pointer is always to a non-NULL cstr. */
664 :
665 : char const * fd_clock_strerror( int err );
666 :
667 : FD_PROTOTYPES_END
668 :
669 : #endif /* HEADER_fd_src_util_clock_fd_clock_h */
|