LCOV - code coverage report
Current view: top level - tango/cnc - fd_cnc.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 48 48 100.0 %
Date: 2025-01-08 12:08:44 Functions: 30 1090 2.8 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_tango_cnc_fd_cnc_h
       2             : #define HEADER_fd_src_tango_cnc_fd_cnc_h
       3             : 
       4             : #include "../fd_tango_base.h"
       5             : 
       6             : /* A fd_cnc_t provides APIs for out-of-band low bandwidth
       7             :    command-and-control ("cnc") signals to a high performance app thread
       8             :    ("app").  In the app thread's run loop, as part of its out-of-band
       9             :    housekeeping, it uses fd_cnc_t object dedicated to it to send and
      10             :    receive information from command and control threads and/or monitoring
      11             :    threads.  The basic template for an fd_cnc_t state machine is:
      12             : 
      13             :                         app
      14             :            new +---------<----------+
      15             :             |  |                    |
      16             :             |  |        +---<----+  |
      17             :         cnc v  v        |  app   |  ^
      18             :             |  |        v        ^  |
      19             :             |  |  app   |  cnc   |  |
      20             :      +--<-- BOOT -->-- RUN -->-- USER -->---+
      21             :      | cnc  |  |        |        |  |       |
      22             :      |      |  ^        v cnc    |  ^       v
      23             :      |      v  | app    |        |  |       |
      24             :      |      |  +--<-- HALT       |  +---<---+
      25             :      v      |           |        v   app/cnc
      26             :      |      +---->----+ |        |
      27             :      |          app   | v app    |
      28             :      |                v |        |
      29             :      |                | |        |
      30             :    delete -----<----- FAIL ---<--+
      31             :               cnc           app
      32             : 
      33             :   That is, when a cnc is created, it is in the BOOT state and the app
      34             :   thread that uses it is not running.  When the app thread starts
      35             :   running and finishes booting up, it should transition the cnc to the
      36             :   RUN state if the thread started up successfully or the FAIL state if
      37             :   booting failed (the thread is not running and is considered to be
      38             :   unsafe to try restarting).  While in the RUN state:
      39             : 
      40             :   - If a cnc thread raises a HALT signal on the app thread's cnc, the
      41             :     app thread should cleanup after itself and, just before it stops
      42             :     running, transition its cnc to BOOT (FAIL) if the app thread can
      43             :     (cannot) be booted again safely.
      44             : 
      45             :   - If a cnc thread raises a USER defined signal, the app thread should
      46             :     process the signal.  If the app thread resumes running after
      47             :     processing the signal, it should transition its cnc to RUN.  If
      48             :     this processing results in termination of the app thread, just before
      49             :     it stops running, it should transition its cnc to BOOT (FAIL) if the
      50             :     app thread can (cannot) be booted again safely.  Note that the cnc
      51             :     state alone does not indicate if a USER defined signal was processed
      52             :     successfully (e.g. the app thread might chose to ignore a malformed
      53             :     command, log the details, and then resume running).  For such
      54             :     information, the application can encode additional inputs and
      55             :     outputs regarding commands in the cnc app region.  USER defined to
      56             :     USER defined transitions (either driven by the app thread as it
      57             :     processes a complex signal or by a back-and-forth interaction with
      58             :     the cnc thread) are fine and up to the application to define.
      59             : 
      60             :   The only thing that can be done to an app thread in the FAIL state is
      61             :   postmortem autopsies and clean up.  An app thread should not do a
      62             :   RUN->FAIL transition, even if it dies in the RUN state.  If a thread
      63             :   dies while in the RUN state, note that the cnc has a heartbeat to help
      64             :   cnc threads and/or monitor threads detect such without needing to open
      65             :   a cnc command session (specific heartbeating conventions are
      66             :   application defined).
      67             : 
      68             :   It is often useful to have one USER defined signal to be a no-op
      69             :   "ACK".  For this, a cnc thread can signal ACK to the app thread.  If
      70             :   the cnc returns to RUN reasonably promptly, the app thread has self
      71             :   reported to the cnc thread it is operating correctly.  If it doesn't
      72             :   (i.e. times out), the cnc thread can forcibly terminate the app
      73             :   thread, move the cnc post termination into the FAIL state and then
      74             :   proceed like a normal FAIL.
      75             : 
      76             :   A cnc has an application defined type field to help applications
      77             :   distinguish between what USER defined signals might be supported by a
      78             :   particular app thread. */
      79             : 
      80             : /* FD_CNC_{ALIGN,FOOTPRINT} describe the alignment and footprint of a
      81             :    fd_cnc_t.  ALIGN is a positive integer power of 2.  FOOTPRINT is a
      82             :    multiple of ALIGN.  ALIGN is recommended to be at least double cache
      83             :    line to mitigate various kinds of false sharing.  app_sz is assumed
      84             :    to be valid (e.g. will not require a footprint larger than
      85             :    ULONG_MAX).  These are provided to facilitate compile time
      86             :    declarations. */
      87             : 
      88          63 : #define FD_CNC_ALIGN (128UL)
      89             : #define FD_CNC_FOOTPRINT( app_sz )                                    \
      90         117 :   FD_LAYOUT_FINI( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_INIT, \
      91         117 :     FD_CNC_ALIGN,     64UL     ),                                     \
      92         117 :     FD_CNC_APP_ALIGN, (app_sz) ),                                     \
      93         117 :     FD_CNC_ALIGN )
      94             : 
      95             : /* FD_CNC_ALIGN describes the alignment and footprint of a fd_cnc_t's
      96             :    application region.  This is a power of 2 of the minimal malloc
      97             :    alignment (typically 8) and at most FD_CNC_ALIGN. */
      98             : 
      99             : #define FD_CNC_APP_ALIGN (64UL)
     100             : 
     101             : /* FD_CNC_SIGNAL_* are the standard cnc signals.  All remaining values
     102             :    ([4,ULONG_MAX]) are available to implement user defined signals.
     103             :    Details of the standard signals are provided above. */
     104             : 
     105         303 : #define FD_CNC_SIGNAL_RUN  (0UL)
     106         273 : #define FD_CNC_SIGNAL_BOOT (1UL)
     107           6 : #define FD_CNC_SIGNAL_FAIL (2UL)
     108         411 : #define FD_CNC_SIGNAL_HALT (3UL)
     109             : 
     110             : /* FD_CNC_SUCCESS, FD_CNC_ERR_* are error code return values used by
     111             :    cnc APIs.  SUCCESS must be zero, ERR_* are negative and distinct. */
     112             : 
     113         108 : #define FD_CNC_SUCCESS   (0)  /* success */
     114           3 : #define FD_CNC_ERR_UNSUP (-1) /* unsupported on this caller */
     115           3 : #define FD_CNC_ERR_INVAL (-2) /* bad inputs */
     116           3 : #define FD_CNC_ERR_AGAIN (-3) /* potentially transient failure */
     117           3 : #define FD_CNC_ERR_FAIL  (-4) /* permanent failure */
     118             : 
     119             : /* fd_cnc_t is an opaque handle of a command-and-control object.
     120             :    Details are exposed here to facilitate inlining of many cnc
     121             :    operations in performance critical app thread paths. */
     122             : 
     123          57 : #define FD_CNC_MAGIC (0xf17eda2c37c2c000UL) /* firedancer cnc ver 0 */
     124             : 
     125             : struct __attribute__((aligned(FD_CNC_ALIGN))) fd_cnc_private {
     126             :   ulong magic;     /* ==FD_CNC_MAGIC */
     127             :   ulong app_sz;
     128             :   ulong type;
     129             :   long  heartbeat0;
     130             :   long  heartbeat;
     131             :   ulong lock;
     132             :   ulong signal;
     133             :   /* Padding to FD_CNC_APP_ALIGN here */
     134             :   /* app_sz bytes here */
     135             :   /* Padding to FD_CNC_ALIGN here */
     136             : };
     137             : 
     138             : typedef struct fd_cnc_private fd_cnc_t;
     139             : 
     140             : FD_PROTOTYPES_BEGIN
     141             : 
     142             : /* fd_cnc_{align,footprint} return the required alignment and footprint
     143             :    of a memory region suitable for use as a cnc.  fd_cnc_align returns
     144             :    FD_CNC_ALIGN.  If footprint is larger than ULONG_MAX, footprint will
     145             :    silently return 0 (and thus can be used by the caller to validate the
     146             :    cnc configuration parameters). */
     147             : 
     148             : FD_FN_CONST ulong
     149             : fd_cnc_align( void );
     150             : 
     151             : FD_FN_CONST ulong
     152             : fd_cnc_footprint( ulong app_sz );
     153             : 
     154             : /* fd_cnc_new formats an unused memory region for use as a cnc.  Assumes
     155             :    shmem is a non-NULL pointer to this region in the local address space
     156             :    with the required footprint and alignment.  The cnc will be
     157             :    initialized to have the given type (should be in [0,UINT_MAX]) with
     158             :    an initial heartbeat of now.  The cnc application region will be
     159             :    initialized to zero.  Returns shmem (and the memory region it points
     160             :    to will be formatted as a cnc, caller is not joined) and NULL on
     161             :    failure (logs details).  Reasons for failure include an obviously bad
     162             :    shmem region or app_sz. */
     163             : 
     164             : void *
     165             : fd_cnc_new( void * shmem,
     166             :             ulong  app_sz,
     167             :             ulong  type,
     168             :             long   now );
     169             : 
     170             : /* fd_cnc_join joins the caller to the cnc.  shcnc points to the first
     171             :    byte of the memory region backing the cnc in the caller's address
     172             :    space.  Returns a pointer in the local address space to the cnc on
     173             :    success (this should not be assumed to be just a cast of shcnc) or
     174             :    NULL on failure (logs details).  Reasons for failure include the
     175             :    shcnc is obviously not a local pointer to a memory region holding a
     176             :    cnc.  Every successful join should have a matching leave.  The
     177             :    lifetime of the join is until the matching leave or caller's thread
     178             :    group is terminated. */
     179             : 
     180             : fd_cnc_t *
     181             : fd_cnc_join( void * shcnc );
     182             : 
     183             : /* fd_cnc_leave leaves a current local join.  Returns a pointer to the
     184             :    underlying shared memory region on success (this should not be
     185             :    assumed to be just a cast of cnc) and NULL on failure (logs details).
     186             :    Reasons for failure include cnc is NULL. */
     187             : 
     188             : void *
     189             : fd_cnc_leave( fd_cnc_t const * cnc );
     190             : 
     191             : /* fd_cnc_delete unformats a memory region used as a cnc.  Assumes
     192             :    nobody is joined to the region.  Returns a pointer to the underlying
     193             :    shared memory region or NULL if used obviously in error (e.g. shcnc
     194             :    obviously does not point to a cnc ... logs details).  The ownership
     195             :    of the memory region is transferred to the caller on success. */
     196             : 
     197             : void *
     198             : fd_cnc_delete( void * shcnc );
     199             : 
     200             : /* fd_cnc_app_sz returns the size of a the cnc's application region.
     201             :    Assumes cnc is a current local join. */
     202             : 
     203           9 : FD_FN_PURE static inline ulong fd_cnc_app_sz( fd_cnc_t const * cnc ) { return cnc->app_sz; }
     204             : 
     205             : /* fd_cnc_app_laddr returns local address of the cnc's application
     206             :    region.  This will have FD_CNC_APP_ALIGN alignment and room for at
     207             :    least fd_cnc_app_sz( cnc ) bytes.  Assumes cnc is a current local
     208             :    join.  fd_cnc_app_laddr_const is for const correctness.  The return
     209             :    values are valid for the lifetime of the local join. */
     210             : 
     211          12 : FD_FN_CONST static inline void *       fd_cnc_app_laddr      ( fd_cnc_t *       cnc ) { return (void *      )(((ulong)cnc)+64UL); }
     212           9 : FD_FN_CONST static inline void const * fd_cnc_app_laddr_const( fd_cnc_t const * cnc ) { return (void const *)(((ulong)cnc)+64UL); }
     213             : 
     214             : /* fd_cnc_type returns the application defined type of a cnc.  Assumes
     215             :    cnc is a current local join. */
     216             : 
     217           3 : FD_FN_PURE static inline ulong fd_cnc_type( fd_cnc_t const * cnc ) { return cnc->type; }
     218             : 
     219             : /* fd_cnc_heartbeat0 returns the heartbeat assigned when the cnc was
     220             :    created.  Assumes cnc is a current local join. */
     221             : 
     222           3 : FD_FN_PURE static inline long fd_cnc_heartbeat0( fd_cnc_t const * cnc ) { return cnc->heartbeat0; }
     223             : 
     224             : /* fd_cnc_heartbeat_query returns the value of the cnc's heartbeat
     225             :    as of some point in time between when this was called and when this
     226             :    returned.  Assumes cnc is a current local join.  This acts as a
     227             :    compiler memory fence. */
     228             : 
     229             : static inline long
     230          99 : fd_cnc_heartbeat_query( fd_cnc_t const * cnc ) {
     231          99 :   FD_COMPILER_MFENCE();
     232          99 :   long then = FD_VOLATILE_CONST( cnc->heartbeat );
     233          99 :   FD_COMPILER_MFENCE();
     234          99 :   return then;
     235          99 : }
     236             : 
     237             : /* fd_cnc_heartbeat is used by an app thread to update the cnc's
     238             :    heartbeat.  Heartbeat values are application defined but typical
     239             :    usage is something that monotonically increases (e.g. the host
     240             :    wallclock, host tickcounter or just a flat counter).  It is
     241             :    recommended app threads do cnc heartbeats with intervals that are
     242             :    uniform random distributed in a range like [min,2*min] nanoseconds
     243             :    for some reasonably fast to a human but slow to the computer value
     244             :    of min.  This keeps load from heartbeating low, keeps the system
     245             :    human real time responsive, prevents heartbeats from multiple cnc
     246             :    auto-synchronizing and gives a strict range in time over which a cnc
     247             :    thread should expect to see a heartbeat from a normally running app
     248             :    thread.  Assumes cnc is a current local join.  This acts as a
     249             :    compiler memory fence. */
     250             : 
     251             : static inline void
     252             : fd_cnc_heartbeat( fd_cnc_t * cnc,
     253   145596929 :                   long       now ) {
     254   145596929 :   FD_COMPILER_MFENCE();
     255   145596929 :   FD_VOLATILE( cnc->heartbeat ) = now;
     256   145596929 :   FD_COMPILER_MFENCE();
     257   145596929 : }
     258             : 
     259             : /* fd_cnc_signal query observes the current signal posted to the cnc.
     260             :    Assumes cnc is a current local join.  This is a compiler fence.
     261             :    Returns the current signal on the cnc at some point in time between
     262             :    when this was called and this returned. */
     263             : 
     264             : static inline ulong
     265   145604274 : fd_cnc_signal_query( fd_cnc_t const * cnc ) {
     266   145604274 :   FD_COMPILER_MFENCE();
     267   145604274 :   ulong s = FD_VOLATILE_CONST( cnc->signal );
     268   145604274 :   FD_COMPILER_MFENCE();
     269   145604274 :   return s;
     270   145604274 : }
     271             : 
     272             : /* fd_cnc_signal atomically transitions the cnc to signal s.  Assumes
     273             :    cnc is a current local join and the caller is currently allowed to do
     274             :    a transition to s.  Specifically:
     275             : 
     276             :      CNC thread with open command session:
     277             : 
     278             :      - RUN->HALT: signal an app thread to shutdown
     279             : 
     280             :      - RUN->USER defined: as per application requirements
     281             : 
     282             :      - USER defined->USER defined: as per application requirements
     283             : 
     284             :      Running APP thread:
     285             : 
     286             :      - BOOT->RUN: when app thread it is done booting ... should be just
     287             :        before app thread enters its run loop.
     288             : 
     289             :      - BOOT->FAIL: if app thread failed to boot ... should be just
     290             :        before app thread stops running.
     291             : 
     292             :      - HALT->BOOT: when app thread is done halting ... should be just
     293             :        before app thread stops running.
     294             : 
     295             :      - USER defined->RUN: when app thread is done processing signal and
     296             :        can resume running ... should be just before app thread resumes
     297             :        its run loop.
     298             : 
     299             :      - USER defined->BOOT: when CNC thread signal processing halted the
     300             :        app thread normally ... should be just before app thread stops
     301             :        running.
     302             : 
     303             :      - USER defined->FAIL: when CNC thread signal processing halted the
     304             :        app thread abnormally ... should be just before app thread stops
     305             :        running.
     306             : 
     307             :      - USER defined->USER defined: as per application requirements
     308             : 
     309             :    See above state machine for more details.  This function is a
     310             :    compiler memory fence (e.g. caller can populate the cnc app region
     311             :    with app signal specific details and all the memory operations to the
     312             :    app region will be issued before s is signaled). */
     313             : 
     314             : static inline void
     315             : fd_cnc_signal( fd_cnc_t * cnc,
     316         891 :                ulong      s ) {
     317         891 :   FD_COMPILER_MFENCE();
     318         891 :   FD_VOLATILE( cnc->signal ) = s;
     319         891 :   FD_COMPILER_MFENCE();
     320         891 : }
     321             : 
     322             : /* fd_cnc_open opens a new command session to an app thread.  Returns 0
     323             :    (FD_CNC_SUCCESS) on success and a negative (FD_CNC_ERR_*) on failure
     324             :    (logs details).  On successful return, caller will have an open
     325             :    command session on the cnc and the cnc will be in the RUN state.  On
     326             :    failure, caller does not have a command session on cnc.
     327             : 
     328             :    Reasons for FD_CNC_ERR_UNSUP include not running on a hosted target,
     329             :    not running on an atomic capable target and strange thread group id;
     330             :    this is a permanent failure.  Reasons for FD_CNC_ERR_INVAL include
     331             :    NULL cur; this is a permanent failure.  Reasons for FD_CNC_ERR_FAIL
     332             :    include app thread is not running and cannot be restarted cleanly;
     333             :    this is a permanent failure.  Reasons for FD_CNC_ERR_AGAIN include
     334             :    app thread is bootable, is in the process of booting or is in the
     335             :    process of halting (and thus might be running later) or there is
     336             :    already an open command session on app thread; this failure is
     337             :    _potentially_ transient.
     338             : 
     339             :    Caller should not leave the join while it has an open command
     340             :    session.  Caller should not close an open command session while it
     341             :    has a signal pending on it.  If the caller dies with an open command
     342             :    session, the next cnc thread will try to implicitly close it to
     343             :    recover (logging details as necessary). */
     344             : 
     345             : int
     346             : fd_cnc_open( fd_cnc_t * cnc );
     347             : 
     348             : /* fd_cnc_wait waits up to dt ns for the cnc to transition to something
     349             :    other than test.  Returns the last observed cnc signal (which can be
     350             :    used detect result of the way).  dt==LONG_MAX will do a blocking
     351             :    wait.  dt<=0 will poll cnc once.  If _opt_now is non-NULL, *_opt_now
     352             :    will contain the wallclock observed just before the last time the cnc
     353             :    was queried on return.  The wait is OS friendly (e.g. will not block
     354             :    other threads that might be running on the same core as the cnc
     355             :    thread as such threads are often scheduled to shared a common
     356             :    administrative core). */
     357             : 
     358             : ulong
     359             : fd_cnc_wait( fd_cnc_t const * cnc,
     360             :              ulong            test,
     361             :              long             dt,
     362             :              long *           _opt_now );
     363             : 
     364             : /* fd_cnc_close ends the current command session on cnc.  Assumes caller
     365             :    has an open command session on cnc and there are no signals being
     366             :    processed by the app thread (e.g. the sync is in the RUN, BOOT or
     367             :    FAIL state).  This function is a compiler fence. */
     368             : 
     369             : static inline void
     370         105 : fd_cnc_close( fd_cnc_t * cnc ) {
     371         105 :   FD_COMPILER_MFENCE();
     372         105 :   FD_VOLATILE( cnc->lock ) = 0UL;
     373         105 :   FD_COMPILER_MFENCE();
     374         105 : }
     375             : 
     376             : /* fd_cnc_strerror converts a FD_CNC_SUCCESS / FD_CNC_ERR_* code into
     377             :    a human readable cstr.  The lifetime of the returned pointer is
     378             :    infinite.  The returned pointer is always to a non-NULL cstr. */
     379             : 
     380             : FD_FN_CONST char const *
     381             : fd_cnc_strerror( int err );
     382             : 
     383             : /* fd_cstr_to_cnc_signal converts the cstr pointed to by into a cnc
     384             :    signal value.  Return value undefined if cstr does not point to a cnc
     385             :    signal cstr. */
     386             : 
     387             : FD_FN_PURE ulong
     388             : fd_cstr_to_cnc_signal( char const * cstr );
     389             : 
     390             : /* fd_cnc_signal_cstr pretty prints the cnc signal value into buf.  buf
     391             :    must point to a character buffer with at least
     392             :    FD_CNC_SIGNAL_CSTR_BUF_MAX bytes.  Always returns buf.  If buf is
     393             :    non-NULL, the buffer pointed at will be populated with a proper '\0'
     394             :    terminated cstr on return (and one that fd_cstr_to_cnc_signal
     395             :    properly convert back to signal). */
     396             : 
     397           3 : #define FD_CNC_SIGNAL_CSTR_BUF_MAX (21UL)
     398             : 
     399             : char *
     400             : fd_cnc_signal_cstr( ulong  signal,
     401             :                     char * buf );
     402             : 
     403             : FD_PROTOTYPES_END
     404             : 
     405             : #endif /* HEADER_fd_src_tango_cnc_fd_cnc_h */
     406             : 

Generated by: LCOV version 1.14