LCOV - code coverage report
Current view: top level - tango/tempo - fd_tempo.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 6 6 100.0 %
Date: 2025-01-08 12:08:44 Functions: 9 226 4.0 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_tango_tempo_fd_tempo_h
       2             : #define HEADER_fd_src_tango_tempo_fd_tempo_h
       3             : 
       4             : /* APIs for measuring time and tick intervals */
       5             : 
       6             : #include "../fd_tango_base.h"
       7             : 
       8             : FD_PROTOTYPES_BEGIN
       9             : 
      10             : /* fd_tempo_wallclock_model returns an estimate of t0, the minimum cost
      11             :    of fd_log_wallclock() in ticks.  If opt_tau is non_NULL, on return,
      12             :    *opt_tau will contain an estimate of typical jitter associated with
      13             :    fd_log_wallclock() (such that fd_log_wallclock() can be roughly
      14             :    modeled as a shifted exponential distribution with minimum of t0 and
      15             :    wait time of tau, average cost of t0 + tau, rms of tau).  The first
      16             :    call of this in a thread group will be slow and all subsequent calls
      17             :    in the thread group will be fast and return the identical parameters
      18             :    to the first call.  t0 will be finite and positive and the tau will
      19             :    be finite and non-negative.  If the fd_log_wallclock() cannot be
      20             :    sanely parameterized on the first call, logs a warning and uses a
      21             :    fallback parameterization. */
      22             : 
      23             : double
      24             : fd_tempo_wallclock_model( double * opt_tau );
      25             : 
      26             : /* fd_tempo_tickcount_model does the same as fd_tempo_wallclock model
      27             :    for fd_tickcount().  The model parameter units will be in ticks
      28             :    instead of nanoseconds. */
      29             : 
      30             : double
      31             : fd_tempo_tickcount_model( double * opt_tau );
      32             : 
      33             : /* fd_tempo_set_tick_per_ns explicitly sets the return values of
      34             :    fd_tempo_tick_per_ns below, subsequent calls to that function will
      35             :    return the values given here.
      36             : 
      37             :    These should not be arbitrarily provided, and this function is here
      38             :    primarily to enable different processes to synchronize their
      39             :    tick_per_ns value. */
      40             : 
      41             : void
      42             : fd_tempo_set_tick_per_ns( double _mu,
      43             :                           double _sigma );
      44             : 
      45             : /* fd_tempo_tick_per_ns is the same as the above but gives an estimate
      46             :    of the rate fd_tickcount() ticks relative to fd_log_wallclock() (this
      47             :    is in Ghz).  The returned value is the observed rate when
      48             :    fd_tempo_tick_per_ns was first called in the thread group (this call
      49             :    will take around ~0.5 s).  If opt_sigma is non-NULL, on return,
      50             :    *opt_sigma will have an estimate how much the rate was fluctuating
      51             :    when observed during the first call.
      52             : 
      53             :    IMPORTANT!  Though fd_tickcount() is meant to tick at a constant rate
      54             :    relative to fd_log_wallclock(), the instantaneous rate it ticks can
      55             :    fluctuate for the usual of clock synchronization reasons (e.g.
      56             :    thermal and electrical effects from CPU load, CPU clock timing
      57             :    jitter, similar for the wallclock, etc).  As this is an invariant
      58             :    counter, reasons for it to fluctuate do _NOT_ include directly things
      59             :    like turbo mode cpu clock frequency changes (it might slightly
      60             :    indirectly impact it due to correlated changes to system thermal and
      61             :    electric conditions from the changed power draw).  As such, this is
      62             :    mostly meant for useful for getting a thread group wide consistent
      63             :    estimate of the number of ticks in a short interval of ns.
      64             : 
      65             :    TL;DR This returns an estimate of fd_tickcount()'s clock speed in
      66             :    GHz.  This is _NOT_ the current clock speed of the processor though
      67             :    it will usually superficially look like it.  This is _NOT_ the
      68             :    instantaneous rate the tickcounter is ticking relative to the
      69             :    wallclock though it will usually superficially look like it. */
      70             : 
      71             : double
      72             : fd_tempo_tick_per_ns( double * opt_sigma );
      73             : 
      74             : /* fd_tempo_observe_pair observes the fd_log_wallclock() and
      75             :    fd_tickcount() at the "same time".  More precisely, it alternately
      76             :    observes both a few times and estimates from the "best" wallclock
      77             :    read what tickcount would have been observed at that time had
      78             :    fd_tickcount() been called instead.  Returns a non-negative measure
      79             :    of the jitter in ticks in the sense observed tickcount is within
      80             :    +/-0.5 jitter ticks of the time the wallclock was observed.  On
      81             :    return, if opt_now is non-NULL, *opt_now will contain the actual
      82             :    fd_log_wallclock() observation and, if opt_tic is non-NULL, *opt_tic
      83             :    will contain the estimated simultaneous fd_tickcount() observation.
      84             : 
      85             :    If anything wonky is detected in the measurement, logs a warning and
      86             :    returns a best effort.  As this does multiple reads under the hood
      87             :    and uses only one of them, the observed value should be interpreted
      88             :    as at some point in time between when the the call was made and when
      89             :    the call returned but not always at the same point (can be roughly
      90             :    modeled as uniformly distributed between when the call was made and
      91             :    when it returned).
      92             : 
      93             :    While this isn't particularly expensive, it isn't particularly cheap
      94             :    either.  Cost is on the order of a few calls to fd_wallclock plus a
      95             :    few calls to fd_tickcount().  This is mostly meant for doing
      96             :    precision timing calibrations. */
      97             : 
      98             : long
      99             : fd_tempo_observe_pair( long * opt_now,
     100             :                        long * opt_tic );
     101             : 
     102             : /* fd_tempo_lazy_default returns a target interval between housekeeping
     103             :    events in ns (laziness) for a producer / consumer that has a maximum
     104             :    credits of cr_max / lag behind the producer of lag_max.
     105             : 
     106             :    To understand this default, note that a producer should receive / a
     107             :    consumer should transmit complete flow control credits least as often
     108             :    as the time it takes a producer to exhaust all its credits / a slow
     109             :    consumer to process a worst case backlog of lag_max credits.
     110             :    Otherwise, the communications throughput will be limited by the rate
     111             :    credits can flow from consumer to producer rather than the rate the
     112             :    producer can publish / consumer can receive.  At the same time, we
     113             :    don't want to be too eager to return credits to avoid consumer to
     114             :    producer credit communications competing for NOC resources with
     115             :    producer to consumer communications.
     116             : 
     117             :    This implies we need to update all flow control information on a tile
     118             :    somewhat faster than:
     119             : 
     120             :      cr_max max( typical time it takes a producer to consume a credit,
     121             :                  typical time it takes a consumer to produce a credit )
     122             : 
     123             :    Practical considerations applied to this yield a useful conservative
     124             :    lower bound:
     125             : 
     126             :    Assume credits are network packets (as is often the case), the above
     127             :    times are the typical time it takes a producer / consumer to generate
     128             :    / process a packet.  Given a producer line-rating minimal sized
     129             :    Ethernet frames (672 bits) at 100G into a mcache / dcache and
     130             :    consumers that are keeping up with this producer (both highly
     131             :    unrealistically harsh situations in the real world as this implies
     132             :    Ethernet payloads much much smaller than typical real world payloads
     133             :    and a consumer that can process packets in just a handful of ns), the
     134             :    above suggests housekeeping done somewhat than:
     135             : 
     136             :      ~(cr_max pkt)(672 bit/pkt/100 Gbit/ns)
     137             : 
     138             :    will be adequate for all practical purposes. Given that the typical
     139             :    randomized housekeeping event will be at most ~1.5 lazy, we have:
     140             : 
     141             :      lazy < ~cr_max*672/100e9/1.5 ~ 4.48 cr_max
     142             : 
     143             :    We use 1+floor( 9*cr_max/4 )) ~ 2.25 cr_max to keep things simple.
     144             :    Note that that while this might seem aggressive per credit, since
     145             :    cr_max is typically values in thousands to hundreds of thousands,
     146             :    this corresponds to default laziness in the tens microseconds to
     147             :    milliseconds.  We also saturate cr_max to keep the returned value in
     148             :    [1,2^31] ns for all cr_max. */
     149             : 
     150             : FD_FN_CONST static inline long
     151          48 : fd_tempo_lazy_default( ulong cr_max ) {
     152          48 :   return fd_long_if( cr_max>954437176UL, (long)INT_MAX, (long)(1UL+((9UL*cr_max)>>2)) );
     153          48 : }
     154             : 
     155             : /* fd_tempo_async_min picks a reasonable minimum interval in ticks
     156             :    between housekeeping events.  On success, returns positive integer
     157             :    power of two in [1,2^31].  On failure, returns zero (logs details).
     158             :    Reasons for failure include lazy is not in [1,2^31), event_cnt is not
     159             :    in [1,2^31), tick_per_ns is not in (0,~1.5e29), the combination would
     160             :    require an unreasonably small (sub-tick) or large (more than 2^31)
     161             :    async_min.
     162             : 
     163             :    More precisely, consider a run loop where event_cnt out-of-band
     164             :    housekeeping events are cyclicly scheduled to be done with a IID
     165             :    uniform random interval between events in [async_min,2*async_min]
     166             :    ticks (as is commonly the case).  And suppose we need housekeeping
     167             :    to complete an event cycle roughly every lazy ns for system
     168             :    considerations.
     169             : 
     170             :    If we were to use a regularly scheduled interval between events (which
     171             :    is a stunningly bad idea in an distributed system and all too
     172             :    commonly done), we'd space housekeeping events by:
     173             : 
     174             :      async_target ~ tick_per_ns*lazy/event_cnt ticks
     175             : 
     176             :    where tick_per_ns is the conversion ratio to use between the
     177             :    wallclock and the tickrate of whatever counter is used to schedule
     178             :    housekeeping events.
     179             : 
     180             :    Consider using the largest integer power of two less than or equal to
     181             :    async_target for async_min.  In ns then, async_min will be at least
     182             :    ~0.5*lazy/event_cnt and at most lazy/event_cnt.  And since it takes,
     183             :    on average, 1.5*async_min*event_cnt to process a cycle, this value
     184             :    for async min will yield an average cycle time of at least ~0.75*lazy
     185             :    in ns and at most ~1.5*lazy ns. */
     186             : 
     187             : ulong
     188             : fd_tempo_async_min( long  lazy,
     189             :                     ulong event_cnt,
     190             :                     float tick_per_ns );
     191             : 
     192             : /* fd_tempo_async_reload returns a quality random number very quickly in
     193             :    [async_min,2*async_min).  Assumes async_min is an integer power of 2
     194             :    in [1,2^31].  Consumes exactly 1 rng slot.  This is typically used to
     195             :    randomize the timing of background task processing to avoid auto
     196             :    synchronization anomalies while providing given strong lower and
     197             :    upper bounds on the interval between between processing background
     198             :    tasks. */
     199             : 
     200             : static inline ulong
     201             : fd_tempo_async_reload( fd_rng_t * rng,
     202   148602125 :                        ulong      async_min ) {
     203   148602125 :   return async_min + (((ulong)fd_rng_uint( rng )) & (async_min-1UL));
     204   148602125 : }
     205             : 
     206             : FD_PROTOTYPES_END
     207             : 
     208             : #endif /* HEADER_fd_src_tango_tempo_fd_tempo_h */

Generated by: LCOV version 1.14