LCOV - code coverage report
Current view: top level - tango/fctl - fd_fctl.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 73 80 91.2 %
Date: 2024-11-13 11:58:15 Functions: 46 2260 2.0 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_tango_fctl_fd_fctl_h
       2             : #define HEADER_fd_src_tango_fctl_fd_fctl_h
       3             : 
       4             : /* fctl provides a set of APIs for general purpose, ultra flexible,
       5             :    ultra low overhead credit-based flow control.  That being said,
       6             :    backpressure is the worst thing in the world for a large scale
       7             :    distributed system.  So the system should be designed to use flow
       8             :    control exceedingly sparingly and limit the number of strictly
       9             :    reliable consumers needed in the system to, ideally, zero.  That is,
      10             :    the less this API gets used, the better the overall system is. */
      11             : 
      12             : #include "../fd_tango_base.h"
      13             : 
      14             : /* FD_FCTL_RX_MAX_MAX returns the largest number of receivers a fctl can
      15             :    be sized to accommodate.  Should be in [1,65535]. */
      16             : 
      17             : #define FD_FCTL_RX_MAX_MAX (65535UL)
      18             : 
      19             : /* FD_FCTL_{ALIGN,FOOTPRINT} specify the alignment and footprint needed
      20             :    for a fctl.  ALIGN will be positive integer power of 2.  FOOTPRINT
      21             :    assumes rx_max is in [0,FD_FCTL_RX_MAX_RX_MAX]. */
      22             : 
      23           0 : #define FD_FCTL_ALIGN (8UL)
      24             : #define FD_FCTL_FOOTPRINT( rx_max )                                         \
      25           9 :   FD_LAYOUT_FINI( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_INIT,       \
      26           9 :     alignof(fd_fctl_t),                     sizeof(fd_fctl_t)            ), \
      27           9 :     alignof(fd_fctl_private_rx_t), (rx_max)*sizeof(fd_fctl_private_rx_t) ), \
      28           9 :     FD_FCTL_ALIGN )
      29             : 
      30             : /* A fd_fctl_t is an opaque handle to an flow control object that can
      31             :    manage flow control on behalf of a transmitter for a zero or more
      32             :    (potentially dynamic) reliable (i.e. backpressure allowed) receivers. */
      33             : 
      34             : struct fd_fctl_private;
      35             : typedef struct fd_fctl_private fd_fctl_t;
      36             : 
      37             : /* Private APIs *******************************************************/
      38             : 
      39             : /* For the most part, applications should not interacting with these
      40             :    directly.  They are exposed to facilitate compile time inlining of
      41             :    flow control operations in performance critical loops. */
      42             : 
      43             : struct fd_fctl_private_rx {
      44             :   long          cr_max;     /* See fd_fctl_cfg_rx_add for details, should be positive */
      45             :   ulong const * seq_laddr;  /* ", NULL indicates an inactive rx */
      46             :   ulong *       slow_laddr; /* " */
      47             : };
      48             : 
      49             : typedef struct fd_fctl_private_rx fd_fctl_private_rx_t;
      50             : 
      51             : struct fd_fctl_private {
      52             :   ushort rx_max;    /* Maximum number of receivers for this fctl, in [0,FD_FCTL_RX_MAX_MAX] */
      53             :   ushort rx_cnt;    /* Current number of receivers for this fctl, in [0,rx_max] */
      54             :   int    in_refill; /* 0 / 1 if the flow control currently in a refilling state */
      55             :   ulong  cr_burst;  /* See fd_fctl_cfg_done for details, in [1,LONG_MAX] (not ULONG_MAX) */
      56             :   ulong  cr_max;    /* ", in [cr_burst,LONG_MAX] */
      57             :   ulong  cr_resume; /* ", in [cr_burst,cr_max  ] */
      58             :   ulong  cr_refill; /* ", In [1,cr_resume      ] */
      59             :   /* rx_max fd_fctl_private_rx_t array indexed [0,rx_max) follows.  Only
      60             :      elements [0,rx_cnt) are in use.  Only elements with non-NULL
      61             :      seq_laddr are currently allowed to backpressure this fctl. */
      62             : };
      63             : 
      64             : FD_PROTOTYPES_BEGIN
      65             : 
      66             : /* fd_fctl_rx returns a pointer to the fctl's rx array.  Assumes fctl is
      67             :    valid.  fd_fctl_rx_const is a const correct version. */
      68             : 
      69             : FD_FN_CONST static inline fd_fctl_private_rx_t *
      70        1154 : fd_fctl_private_rx( fd_fctl_t * fctl ) {
      71        1154 :   return (fd_fctl_private_rx_t *)(fctl+1UL);
      72        1154 : }
      73             : 
      74             : FD_FN_CONST static inline fd_fctl_private_rx_t const *
      75       31268 : fd_fctl_private_rx_const( fd_fctl_t const * fctl ) {
      76       31268 :   return (fd_fctl_private_rx_t const *)(fctl+1UL);
      77       31268 : }
      78             : 
      79             : FD_PROTOTYPES_END
      80             : 
      81             : /* Public APIs ********************************************************/
      82             : 
      83             : FD_PROTOTYPES_BEGIN
      84             : 
      85             : /* Constructor APIs */
      86             : 
      87             : /* fd_fctl_{align,footprint} return the required alignment and footprint
      88             :    of a memory region suitable for use as fctl that can manage flow control
      89             :    for up to rx_max reliable consumers.  align returns FD_FCTL_ALIGN
      90             :    (will be a power of two).  rx_max should be in [0,FCTL_RX_MAX_MAX].
      91             :    If not, footprint will silently return 0 (and thus can be used by the
      92             :    caller to validate rx_max configuration parameters). */
      93             :    
      94             : FD_FN_CONST static inline ulong
      95           0 : fd_fctl_align( void ) {
      96           0 :   return FD_FCTL_ALIGN;
      97           0 : }
      98             : 
      99             : FD_FN_CONST static inline ulong
     100          12 : fd_fctl_footprint( ulong rx_max ) {
     101          12 :   if( FD_UNLIKELY( rx_max>FD_FCTL_RX_MAX_MAX ) ) return 0UL;
     102           9 :   return FD_FCTL_FOOTPRINT( rx_max );
     103          12 : }
     104             : 
     105             : /* fd_fctl_new takes ownership of the memory region pointed to by shmem
     106             :    (which is assumed to be non-NULL with the appropriate alignment and
     107             :    footprint) and formats it as fctl.  Typically this memory region will
     108             :    just be local to the user though it can be placed in a shared region
     109             :    to allow remote monitors to inspect / modify various portions of it,
     110             :    facilitate hotswapping, etc.  Returns mem on success and NULL on
     111             :    failure (logs details).  The fctl will be initialized to an
     112             :    unconfigured state with zero receivers attached to it on success
     113             :    return.  Reasons for failure include an obviously bad shmem region
     114             :    and too large rx_max.
     115             :    
     116             :    fd_fctl_join joins the caller to a memory region holding the state of
     117             :    a fctl.  shfctl points to a memory region in the local address space
     118             :    that holds a fctl.  Returns an opaque handle of the local join in the
     119             :    local address space to the fctl (which might not be the same thing as
     120             :    shfctl ... the separation of new and join is to facilitate
     121             :    interprocess shared memory usage patterns while supporting
     122             :    transparent upgrade of users of this to more elaborate algorithms
     123             :    where address translations under the hood may not be trivial).
     124             : 
     125             :    fd_fctl_leave leaves the current fctl join.  Returns a pointer in the
     126             :    local address space to the memory region holding the state of the
     127             :    fctl.  The join should not be used afterward.
     128             : 
     129             :    fd_fctl_delete unformats the memory region currently used to hold the
     130             :    state of a fctl and returns ownership of the underlying memory region
     131             :    to the caller.  There should be no joins in the system on the fctl.
     132             :    Returns a pointer to the underlying memory region. */
     133             : 
     134             : void *
     135             : fd_fctl_new( void * shmem,
     136             :              ulong  rx_max );
     137             : 
     138           9 : static inline fd_fctl_t * fd_fctl_join  ( void *      shfctl ) { return (fd_fctl_t *)shfctl; }
     139           9 : static inline void *      fd_fctl_leave ( fd_fctl_t * fctl   ) { return (void *)fctl;        }
     140           9 : static inline void *      fd_fctl_delete( void *      shfctl ) { return shfctl;              }
     141             : 
     142             : /* fd_fctl_cfg_rx_add adds flow control details for a receiver to the
     143             :    given fctl.  Assumes the fctl configuration is not yet complete
     144             :    (FIXME: CONSIDER EXPLICITLY VERIFYING THIS).  Each receiver is
     145             :    assigned an index starting from zero sequentially as they are added.
     146             :    This index is used to communicate diagnostic information to the user
     147             :    (e.g. see fd_fctl_cr_query rx_idx_slow).
     148             : 
     149             :    cr_max is how many credits are safe for the transmitter to burst to
     150             :    the receiver when this receiver is fully caught up.  Should be in
     151             :    [1,LONG_MAX] (not ULONG_MAX).
     152             :    
     153             :    seq_laddr is the location in the user's local address space where the
     154             :    user can query a lower bound of where this receiver is currently at
     155             :    in the underlying sequence space.  The user is guaranteed that the
     156             :    receiver has processed all sequence numbers strictly before
     157             :    *seq_laddr.  NULL is fine here (the fctl will ignore this receiver
     158             :    until it is set, potentially post configuration).
     159             : 
     160             :    slow_laddr is the location in the user's local address space where
     161             :    the fctl should accumulate statistics for which receiver is running
     162             :    the slowest.  It is fine if other events are accumulated to this
     163             :    location (e.g. the user doesn't need ultra fine grained diagnostics).
     164             : 
     165             :    Returns fctl on success and NULL on failure (logs details).  Reasons
     166             :    for failure include NULL fctl, NULL slow_laddr, too many fctl
     167             :    receivers, too small cr_max, too large cr_max. */
     168             : 
     169             : fd_fctl_t *
     170             : fd_fctl_cfg_rx_add( fd_fctl_t *   fctl,
     171             :                     ulong         cr_max,
     172             :                     ulong const * seq_laddr,
     173             :                     ulong *       slow_laddr );
     174             : 
     175             : /* fd_fctl_cfg_done completes the configuration of a flow control.
     176             :    Assumes the fctl configuration is not yet complete (FIXME: CONSIDER
     177             :    EXPLICITLY VERIFYING THIS?).
     178             : 
     179             :    cr_burst is the maximum number of credits a transmitter will use in a
     180             :    burst (i.e. deduct from its credits available before checking that it
     181             :    still had credits available again, e.g. MTU for Ethernet-like
     182             :    protocols, MSS for a byte oriented TCP-like protocols, number of
     183             :    slots in a batch for slot oriented protocols like a batch of frag
     184             :    metadata, etc).  Should be in [1,cr_burst_max] where cr_burst_max is
     185             :    min(rx[:].cr_max) and LONG_MAX when there are no receivers.
     186             : 
     187             :    cr_max is an upper bound of the number of credits a transmitter can
     188             :    have (e.g. how many credits a transmitter should get from a query
     189             :    when there are no active receivers).  Should be in
     190             :    [cr_burst,LONG_MAX].  0 indicates to pick a reasonable default for
     191             :    the configured receivers (e.g. cr_burst_max).  This limit is mostly
     192             :    for dynamic configured flow control situations (e.g. it provides an
     193             :    upper limit to how lazy the transmitter can be with flow control
     194             :    operations).
     195             : 
     196             :    cr_resume is the credit threshold (in a >= sense) for the fctl to
     197             :    stop trying to refill credits from active receivers.  Should be in
     198             :    [cr_burst,actual_cr_max] where actual_cr_max is the value determined
     199             :    above.  0 indicates to pick a reasonable default (e.g.
     200             :    cr_burst+floor((2/3)(actual_cr_max-cr_burst)).
     201             : 
     202             :    cr_refill is the credit threshold (in a < sense) for the fctl to
     203             :    start trying to refill its credits from active receivers.  Should be
     204             :    in [cr_burst,actual_cr_resume] where actual_cr_resume is the value
     205             :    determined above.  0 indicates to pick a reasonable default (e.g.
     206             :    cr_burst+floor((1/2)(actual_cr_resume-cr_burst)).
     207             : 
     208             :    Returns fctl on success (fctl configuration will be complete on
     209             :    return) and NULL on failure (logs details).  Reasons for failure
     210             :    include NULL fctl, too large cr_max, cr_resume, cr_refill.
     211             : 
     212             :    TL;DR Just say zeros for cr_{max,resume,refill} if you don't
     213             :    understand the above and this will usually do something reasonable. */
     214             : 
     215             : fd_fctl_t *
     216             : fd_fctl_cfg_done( fd_fctl_t * fctl,
     217             :                   ulong       cr_burst,
     218             :                   ulong       cr_max,
     219             :                   ulong       cr_resume,
     220             :                   ulong       cr_refill );
     221             : 
     222             : /* Accessor APIs */
     223             : 
     224             : /* fd_fctl_{rx_max,rx_cnt,
     225             :             cr_burst,cr_max,cr_resume,cr_refill,
     226             :             rx_cr_max,rx_seq_laddr,
     227             :             rx_seq_slow_laddr,rx_seq_slow_laddr_const}
     228             :    return the values configured during fctl initialization.
     229             : 
     230             :    These assume fctl is a local join to a configured fctl.  rx_cr_max,
     231             :    rx_seq_laddr, rx_slow_laddr and rx_slow_laddr_const further assume
     232             :    rx_idx is in [0,rx_cnt).  slow_laddr_const is a const-correct
     233             :    version of rx_slow_laddr.
     234             :    
     235             :    (FIXME: CONSIDER ACCESSES FOR DISTINGUISHING WHETHER CR_MAX /
     236             :    CR_RESUME / CR_REFILL WERE AUTOCONFIGURED.  EXPOSE IN_REFILL?
     237             :    GET/SET RX_SEQ_LADDR DYNAMICALLY?  GET/SET IN_REFILL?) */
     238             : 
     239           3 : FD_FN_PURE static inline ulong fd_fctl_rx_max   ( fd_fctl_t const * fctl ) { return (ulong)fctl->rx_max; }
     240           3 : FD_FN_PURE static inline ulong fd_fctl_rx_cnt   ( fd_fctl_t const * fctl ) { return (ulong)fctl->rx_cnt; }
     241          12 : FD_FN_PURE static inline ulong fd_fctl_cr_burst ( fd_fctl_t const * fctl ) { return fctl->cr_burst;      }
     242          15 : FD_FN_PURE static inline ulong fd_fctl_cr_max   ( fd_fctl_t const * fctl ) { return fctl->cr_max;        }
     243          15 : FD_FN_PURE static inline ulong fd_fctl_cr_resume( fd_fctl_t const * fctl ) { return fctl->cr_resume;     }
     244          15 : FD_FN_PURE static inline ulong fd_fctl_cr_refill( fd_fctl_t const * fctl ) { return fctl->cr_refill;     }
     245             : 
     246             : FD_FN_PURE static inline ulong
     247             : fd_fctl_rx_cr_max( fd_fctl_t const * fctl,
     248          18 :                    ulong             rx_idx ) {
     249          18 :   return (ulong)fd_fctl_private_rx_const( fctl )[rx_idx].cr_max;
     250          18 : }
     251             : 
     252             : FD_FN_PURE static inline ulong const *
     253             : fd_fctl_rx_seq_laddr( fd_fctl_t const * fctl,
     254         105 :                       ulong             rx_idx ) {
     255         105 :   return fd_fctl_private_rx_const( fctl )[rx_idx].seq_laddr;
     256         105 : }
     257             : 
     258             : FD_FN_PURE static inline ulong *
     259             : fd_fctl_rx_slow_laddr( fd_fctl_t * fctl,
     260         777 :                        ulong       rx_idx ) {
     261         777 :   return fd_fctl_private_rx( fctl )[rx_idx].slow_laddr;
     262         777 : }
     263             : 
     264             : FD_FN_PURE static inline ulong const *
     265             : fd_fctl_rx_slow_laddr_const( fd_fctl_t const * fctl,
     266           0 :                              ulong             rx_idx ) {
     267           0 :   return fd_fctl_private_rx_const( fctl )[rx_idx].slow_laddr;
     268           0 : }
     269             : 
     270             : /* fd_fctl_rx_cr_return updates users of _rx_seq flow control (e.g. from
     271             :    rx_seq_laddr above) the position of the receiver in sequence space
     272             :    (in the sense that the receiver has consumed all sequence numbers
     273             :    strictly before rx_seq cyclic).  This should be done moderately
     274             :    frequently (e.g. in background housekeeping) after the receiver has
     275             :    moved forward in sequence space since the last update and should be
     276             :    monotonically non-decreasing.  Even more aggressively is usually
     277             :    fine.  This also should be done when the receiver is shutdown to
     278             :    facilitate cleanly restarting a consumer and what not.  This also
     279             :    serves as a compiler memory fence to ensure credits are returned at a
     280             :    well defined point in the instruction stream (e.g. so that compiler
     281             :    doesn't move any loads that might be clobbered by the return to after
     282             :    the return). */
     283             : 
     284             : static inline void
     285             : fd_fctl_rx_cr_return( ulong * _rx_seq,
     286    17317027 :                       ulong   rx_seq ) {
     287    17317027 :   FD_COMPILER_MFENCE();
     288    17317027 :   FD_VOLATILE( *_rx_seq ) = rx_seq;
     289    17317027 :   FD_COMPILER_MFENCE();
     290    17317027 : }
     291             : 
     292             : /**********************************************************************/
     293             : 
     294             : /* fd_fctl_cr_query returns a lower bound of the number of credits
     295             :    available to a transmitter that has published up to but NOT INCLUDING
     296             :    tx_seq.  Will be in [0,cr_max].  On return, *_rx_idx_slow will
     297             :    contain the lowest indexed receiver that constrained the result or
     298             :    ULONG_MAX if the query result wasn't receiver constrained.
     299             : 
     300             :    This involves interthread communication so should be used sparingly.
     301             :    
     302             :    FIXME: DO AS A MULTIPLE RETURN MACRO TO FORCE INLINE AND AVOID
     303             :    RX_IDX_SLOW MEMORY USAGE?  DO AS A NON-INLINED CALL?  OPTIMUM
     304             :    PROBABLY DEPENDS ON USE CASES. */
     305             : 
     306             : FD_FN_UNUSED static ulong /* Work around -Winline */
     307             : fd_fctl_cr_query( fd_fctl_t const * fctl,
     308             :                   ulong             tx_seq,
     309       31145 :                   ulong *           _rx_idx_slow ) {
     310       31145 :   fd_fctl_private_rx_t const * rx     = fd_fctl_private_rx_const( fctl );
     311       31145 :   ulong                        rx_cnt = (ulong)fctl->rx_cnt;
     312             : 
     313       31145 :   ulong cr_query    = fctl->cr_max;
     314       31145 :   ulong rx_idx_slow = ULONG_MAX;
     315             : 
     316             :   /* Note: it is possible to do this in vectorized / in parallel */
     317             : 
     318             :   /* Note that the rx_cr_query calc is robust against overflow /
     319             :      misconfigured receivers / etc.  Let delta by the number of sequence
     320             :      numbers that that the transmitter is ahead of the receiver and note
     321             :      that the calc can be written as rx_cr_query=max(rx_cr_test,0) where
     322             :      rx_cr_test is rx_cr_max-max(delta,0).
     323             : 
     324             :      In normal operation, delta is in [0,rx_cr_max] (i.e. the
     325             :      transmitter is at or ahead of the receiver but has not overrun the
     326             :      receiver).  Then max(delta,0)==delta and is in [0,rx_cr_max].  As
     327             :      such, rx_cr_test is in [0,rx_cr_max] and the result is thus also in
     328             :      [0,rx_cr_max] (so no overflow).  The result here is a lower bound
     329             :      of the credits the transmitter can use without overrunning the
     330             :      receiver.
     331             : 
     332             :      Suppose, the transmitter has or appears to have overrun the
     333             :      receiver (e.g. the transmitter initialized with a sequence number
     334             :      well ahead of the receiver).  Then delta is in
     335             :      (rx_cr_max,LONG_MAX].  max(delta,0)==delta and is in
     336             :      (rx_cr_max,LONG_MAX].  As such, in exact arithmetic, rx_cr_test is
     337             :      in [rx_cr_max-LONG_MAX,0).  But since rx_cr_max was restricted to
     338             :      be in [1,LONG_MAX] on initialization, this result is thus in
     339             :      [-LONG_MAX+1,0) and is computed without overflow (note that
     340             :      LONG_MIN==-LONG_MAX-1).  Since this situation always produces a
     341             :      negative cr_test result, rx_cr_query will be capped at 0.  This
     342             :      correctly indicates that the transmitter cannot send anything
     343             :      because it would exacerbate the already overrun receiver.
     344             : 
     345             :      Conversely, suppose the receiver appears to be ahead of the
     346             :      transmitter (e.g. the receiver was initialized with a sequence
     347             :      number ahead of the transmitter).  Then delta is in [LONG_MIN,0).
     348             :      And max(delta,0)==0 such that rx_cr_test = rx_cr_query = rx_cr_max.
     349             :      The result here in conservative lower bound of the credits the
     350             :      transmitter can use.  Conservative in the sense that it has been
     351             :      capped by rx_cr_max even though the value advertised by the
     352             :      receiver in principle would allow more (as a receiver getting ahead
     353             :      of the transmitter is a good sign of some breakage though, this
     354             :      situation strongly suggests the rx_seq value shouldn't be trusted
     355             :      such that rx_cr_max is a fallback in this case). */
     356             : 
     357      529387 :   for( ulong rx_idx=0UL; rx_idx<rx_cnt; rx_idx++ ) {
     358      498242 :     ulong const * _rx_seq = rx[ rx_idx ].seq_laddr;
     359      498242 :     if( FD_UNLIKELY( !_rx_seq ) ) continue; /* Skip inactive rx */
     360             : 
     361      498242 :     ulong rx_seq      = FD_VOLATILE_CONST( *_rx_seq );
     362      498242 :     ulong rx_cr_query = (ulong)fd_long_max( rx[ rx_idx ].cr_max - fd_long_max( fd_seq_diff( tx_seq, rx_seq ), 0L ), 0L );
     363      498242 :     rx_idx_slow       = fd_ulong_if( rx_cr_query<cr_query, rx_idx, rx_idx_slow );
     364      498242 :     cr_query          = fd_ulong_min( rx_cr_query, cr_query );
     365      498242 :   }
     366             : 
     367       31145 :   _rx_idx_slow[0] = rx_idx_slow;
     368       31145 :   return cr_query;
     369       31145 : }
     370             : 
     371             : /* fd_fctl_tx_cr_update returns the new number of credits available to
     372             :    a transmitter given the current number of credits available to that
     373             :    transmitter and the transmitter's position in sequence space.
     374             : 
     375             :    The vast majority of flow control scenarios (even incredibly
     376             :    intricate dynamic heterogeneous multiple consumer) can typically be
     377             :    handled with just this one call in the transmitter's run loop.
     378             : 
     379             :    It assumes that reliable receivers are updating their position in
     380             :    sequence space moderately frequently, the transmitter and receivers
     381             :    have reasonably large credit maxes (to provide a lot of tolerance for
     382             :    temporary receiver lagging and enough flexibility to keep the vast
     383             :    majority of flow credit management out of the critical path) and that
     384             :    reliable receivers asymptotically can keep up with the transmitter on
     385             :    average (which is implied anyway as otherwise the overall system is
     386             :    inherently doomed).
     387             :    
     388             :    What a flow control credit means depends on usage.  Typical cases
     389             :    number of bytes for TCP streaming like protocols, number of packet
     390             :    slots for more bounded size packet oriented protocols, etc.)  Each
     391             :    flow control credit is good for the transmitter to consume exactly
     392             :    one sequence number.
     393             :    
     394             :    Typically, on initialization / after first housekeeping, the
     395             :    transmitter has a huge number of flow control credits.
     396             :    
     397             :    Usually, while the transmitter is consuming these credits, it is not
     398             :    bothering the receivers at all.
     399             : 
     400             :    When the transmitter gets to below around ~(1/3) cr_max of credits
     401             :    available, the fctl will query receivers on its behalf to refresh the
     402             :    number of credits available.  If all is well (receivers are staying
     403             :    close to caught up with the transmitters ... typically within (1/3)
     404             :    cr_max of the transmitter), this query quickly returns and the
     405             :    transmitter will be back to having a huge number of credits.  Since
     406             :    there is lots of margin between credit exhaustion and the start of
     407             :    trying to refill, this querying doesn't need to be in transmitter's
     408             :    critical path.
     409             : 
     410             :    As such, when receivers are keeping up, fctl will only rarely do
     411             :    rx->tx communications (roughly every ~2/3 cr_max sequence numbers
     412             :    sent).  And, when receivers aren't keeping up, having the actual
     413             :    update call running asynchronously out of the critical path (e.g.
     414             :    in a housekeeping loop) prevents the fctl from flooding the system
     415             :    with tons of flow control communications.  This can further degrade
     416             :    the performance of already slow receivers, exacerbating the slow down
     417             :    and causing congestion collapse like issues on modern CPU NOCs.
     418             : 
     419             :    Related, using distinct and well separated refill and resume
     420             :    thresholds prevents performance degrading start / stop situations
     421             :    (e.g. if resume and refill thresholds are very close, the situation
     422             :    can arise where the fctl falls below, triggers a query, the receivers
     423             :    are just above, triggering another round of flow control
     424             :    communications shortly thereafter and another and another ...).  This
     425             :    behavior can cause a big degradation in system performance due to the
     426             :    same flow control communications flooding.  The typical spacing here
     427             :    guarantees that transmitter only resume sending once it is sure it
     428             :    will be able to send ~1/3 cr_max sequence numbers before querying the
     429             :    receivers again.
     430             : 
     431             :    Analogously, receives don't need to do flow control sequence number
     432             :    updates in their critical paths.  But, even if done there, because it
     433             :    is infrequently queried by the transmitter, it typically will be an
     434             :    ultra fast L1 store cache hit with minimal performance implications.
     435             : 
     436             :    TL;DR
     437             : 
     438             :      Example reliable receiver init:
     439             : 
     440             :        ...
     441             :        ulong * fctl_seq = ... location in receivers address space of receiver flow control (ideally near the receiver)
     442             :        ...
     443             :        ulong   rx_seq = ... next sequence number this receiver expects from the transmitter
     444             :        ...
     445             : 
     446             :      Example reliable receiver run loop (lots of variants with different
     447             :      tradeoffs possible):
     448             : 
     449             :        ...
     450             :        if( ... time for housekeeping ... ) {
     451             :          ...
     452             :          FD_VOLATILE( fctl_seq[0] ) = rx_seq; // Update the transmitter and monitors where we are at
     453             :          // It is fine to be quite aggressive about this as this is
     454             :          // should be a L1 cache hit store the vast majority of the time.
     455             :          ...
     456             :        }
     457             :        ...
     458             : 
     459             :        rx_cnt = ... receive from transmitter starting at rx_seq
     460             : 
     461             :        // At this point, we just received [rx_seq,rx_seq+rx_cnt).  Process
     462             :        // these.  When we are ready, tell the transmitter we are fine for
     463             :        // the transmitter to move on.
     464             : 
     465             :        rx_seq = fd_seq_inc( rx_seq, rx_cnt );
     466             : 
     467             :        ...
     468             :    
     469             :      Example transmitter init:
     470             : 
     471             :        ...
     472             :        fd_fctl_t * fctl = ... setup flow control for all reliable receivers ...
     473             :        ...
     474             :        ulong tx_seq = ... first sequence number of the next transmission ...
     475             :        ...
     476             :        ulong cr_avail = 0UL; // Will learn this on the first housekeeping
     477             :        ...
     478             : 
     479             :      Example transmitter run loop (lots of variants with different
     480             :      tradeoffs possible):
     481             : 
     482             :          ...
     483             :          if( ... time for housekeeping ... ) {
     484             :            ...
     485             :            cr_avail = fd_fctl_cr_update( fctl, cr_avail, tx_seq );
     486             :            ...
     487             :          }
     488             :          ...
     489             : 
     490             :          // If we don't have enough credits to handle the the size of a
     491             :          // worst case burst, wait while still doing housekeeping in the
     492             :          // background to keep flow control credits flowing and keep
     493             :          // monitoring / command-and-control operating.
     494             : 
     495             :          if( FD_UNLIKELY( cr_avail < cr_burst ) continue;
     496             : 
     497             :          ...
     498             :          ulong tx_cnt = ... determine how much we are ready to send
     499             :                         ... will be in [0,cr_burst]
     500             :          ... send [tx_seq,tx_seq+tx_seq_cnt) to receivers
     501             :          tx_seq    = fd_seq_inc( tx_seq, tx_cnt );
     502             :          cr_avail -= tx_cnt; // guaranteed not to underflow
     503             :          ...
     504             : */
     505             : 
     506             : static inline ulong
     507             : fd_fctl_tx_cr_update( fd_fctl_t * fctl,
     508             :                       ulong       cr_avail,
     509       36127 :                       ulong       tx_seq ) {
     510             : 
     511       36127 :   int in_refill = fctl->in_refill;
     512             : 
     513       36127 :   if( FD_UNLIKELY( (cr_avail<fctl->cr_refill) | in_refill ) ) { /* Yes, strictly "<" */
     514             : 
     515             :     /* The number of credits available has just dropped below the
     516             :        transmitters refill threshold (such that we should enter the
     517             :        refilling state) or the transmitter is already in the refilling
     518             :        state ... query the receivers for the number of credits that
     519             :        might be available. */
     520             :  
     521       31142 :     ulong rx_idx_slow;
     522       31142 :     ulong cr_query = fd_fctl_cr_query( fctl, tx_seq, &rx_idx_slow );
     523             : 
     524       31142 :     if( FD_LIKELY( cr_query>=fctl->cr_resume ) ) { /* Yes, strictly ">=" */
     525             :     
     526             :       /* We got enough credits to resume.  Update the credits available
     527             :          and exit the refilling state. */
     528             : 
     529         260 :       fctl->in_refill = 0;
     530         260 :       cr_avail = cr_query;
     531             : 
     532       30882 :     } else if( FD_LIKELY( !in_refill ) ) {
     533             : 
     534             :       /* We didn't get enough credits to resume and we are just entering
     535             :          the refilling state.  Attribute this event to the slowest
     536             :          receiver (i.e. rx_idx_low is likely limiting system performance
     537             :          / potentially causing backpressure / etc).  We don't bother
     538             :          with a proper atomic increment as this is just diagnostic;
     539             :          exact precision is not required for correctness (also, if there
     540             :          is no multiplexing of the rx's slow counter with counters on
     541             :          different threads, as is often the case, it will be exact
     542             :          anyway).  Note that because the refilling is typically
     543             :          triggered well before the transmitter is actual not able to
     544             :          send, this counter should be more thought of as a "rx_idx_slow
     545             :          might be a source of stalls on the transmitter" as opposed to
     546             :          "rx_idx_slow caused a stall". */
     547             : 
     548         263 :       if( FD_LIKELY( rx_idx_slow!=ULONG_MAX ) ) {
     549         263 :         ulong * slow = fd_fctl_private_rx( fctl )[ rx_idx_slow ].slow_laddr;
     550         263 :         FD_COMPILER_MFENCE();
     551         263 :         slow[0] += 1UL;
     552         263 :         FD_COMPILER_MFENCE();
     553         263 :       }
     554         263 :       fctl->in_refill = 1;
     555             : 
     556         263 :     } /* else {
     557             : 
     558             :       // We didn't get enough credits to resume and we were already
     559             :       // in the refilling state.  So, nothing to update.
     560             : 
     561             :     } */
     562             : 
     563       31142 :   }
     564             : 
     565       36127 :   return cr_avail;
     566       36127 : }
     567             : 
     568             : FD_PROTOTYPES_END
     569             : 
     570             : #endif /* HEADER_fd_src_tango_fctl_fd_fctl_h */

Generated by: LCOV version 1.14