Line data Source code
1 : #ifndef HEADER_fd_src_tango_cnc_fd_cnc_h 2 : #define HEADER_fd_src_tango_cnc_fd_cnc_h 3 : 4 : #include "../fd_tango_base.h" 5 : 6 : /* A fd_cnc_t provides APIs for out-of-band low bandwidth 7 : command-and-control ("cnc") signals to a high performance app thread 8 : ("app"). In the app thread's run loop, as part of its out-of-band 9 : housekeeping, it uses fd_cnc_t object dedicated to it to send and 10 : receive information from command and control threads and/or monitoring 11 : threads. The basic template for an fd_cnc_t state machine is: 12 : 13 : app 14 : new +---------<----------+ 15 : | | | 16 : | | +---<----+ | 17 : cnc v v | app | ^ 18 : | | v ^ | 19 : | | app | cnc | | 20 : +--<-- BOOT -->-- RUN -->-- USER -->---+ 21 : | cnc | | | | | | 22 : | | ^ v cnc | ^ v 23 : | v | app | | | | 24 : | | +--<-- HALT | +---<---+ 25 : v | | v app/cnc 26 : | +---->----+ | | 27 : | app | v app | 28 : | v | | 29 : | | | | 30 : delete -----<----- FAIL ---<--+ 31 : cnc app 32 : 33 : That is, when a cnc is created, it is in the BOOT state and the app 34 : thread that uses it is not running. When the app thread starts 35 : running and finishes booting up, it should transition the cnc to the 36 : RUN state if the thread started up successfully or the FAIL state if 37 : booting failed (the thread is not running and is considered to be 38 : unsafe to try restarting). While in the RUN state: 39 : 40 : - If a cnc thread raises a HALT signal on the app thread's cnc, the 41 : app thread should cleanup after itself and, just before it stops 42 : running, transition its cnc to BOOT (FAIL) if the app thread can 43 : (cannot) be booted again safely. 44 : 45 : - If a cnc thread raises a USER defined signal, the app thread should 46 : process the signal. If the app thread resumes running after 47 : processing the signal, it should transition its cnc to RUN. If 48 : this processing results in termination of the app thread, just before 49 : it stops running, it should transition its cnc to BOOT (FAIL) if the 50 : app thread can (cannot) be booted again safely. Note that the cnc 51 : state alone does not indicate if a USER defined signal was processed 52 : successfully (e.g. the app thread might chose to ignore a malformed 53 : command, log the details, and then resume running). For such 54 : information, the application can encode additional inputs and 55 : outputs regarding commands in the cnc app region. USER defined to 56 : USER defined transitions (either driven by the app thread as it 57 : processes a complex signal or by a back-and-forth interaction with 58 : the cnc thread) are fine and up to the application to define. 59 : 60 : The only thing that can be done to an app thread in the FAIL state is 61 : postmortem autopsies and clean up. An app thread should not do a 62 : RUN->FAIL transition, even if it dies in the RUN state. If a thread 63 : dies while in the RUN state, note that the cnc has a heartbeat to help 64 : cnc threads and/or monitor threads detect such without needing to open 65 : a cnc command session (specific heartbeating conventions are 66 : application defined). 67 : 68 : It is often useful to have one USER defined signal to be a no-op 69 : "ACK". For this, a cnc thread can signal ACK to the app thread. If 70 : the cnc returns to RUN reasonably promptly, the app thread has self 71 : reported to the cnc thread it is operating correctly. If it doesn't 72 : (i.e. times out), the cnc thread can forcibly terminate the app 73 : thread, move the cnc post termination into the FAIL state and then 74 : proceed like a normal FAIL. 75 : 76 : A cnc has an application defined type field to help applications 77 : distinguish between what USER defined signals might be supported by a 78 : particular app thread. */ 79 : 80 : /* FD_CNC_{ALIGN,FOOTPRINT} describe the alignment and footprint of a 81 : fd_cnc_t. ALIGN is a positive integer power of 2. FOOTPRINT is a 82 : multiple of ALIGN. ALIGN is recommended to be at least double cache 83 : line to mitigate various kinds of false sharing. app_sz is assumed 84 : to be valid (e.g. will not require a footprint larger than 85 : ULONG_MAX). These are provided to facilitate compile time 86 : declarations. */ 87 : 88 345 : #define FD_CNC_ALIGN (128UL) 89 : #define FD_CNC_FOOTPRINT( app_sz ) \ 90 117 : FD_LAYOUT_FINI( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_INIT, \ 91 117 : FD_CNC_ALIGN, 64UL ), \ 92 117 : FD_CNC_APP_ALIGN, (app_sz) ), \ 93 117 : FD_CNC_ALIGN ) 94 : 95 : /* FD_CNC_ALIGN describes the alignment and footprint of a fd_cnc_t's 96 : application region. This is a power of 2 of the minimal malloc 97 : alignment (typically 8) and at most FD_CNC_ALIGN. */ 98 : 99 : #define FD_CNC_APP_ALIGN (64UL) 100 : 101 : /* FD_CNC_SIGNAL_* are the standard cnc signals. All remaining values 102 : ([4,ULONG_MAX]) are available to implement user defined signals. 103 : Details of the standard signals are provided above. */ 104 : 105 303 : #define FD_CNC_SIGNAL_RUN (0UL) 106 273 : #define FD_CNC_SIGNAL_BOOT (1UL) 107 6 : #define FD_CNC_SIGNAL_FAIL (2UL) 108 411 : #define FD_CNC_SIGNAL_HALT (3UL) 109 : 110 : /* FD_CNC_SUCCESS, FD_CNC_ERR_* are error code return values used by 111 : cnc APIs. SUCCESS must be zero, ERR_* are negative and distinct. */ 112 : 113 108 : #define FD_CNC_SUCCESS (0) /* success */ 114 3 : #define FD_CNC_ERR_UNSUP (-1) /* unsupported on this caller */ 115 3 : #define FD_CNC_ERR_INVAL (-2) /* bad inputs */ 116 3 : #define FD_CNC_ERR_AGAIN (-3) /* potentially transient failure */ 117 3 : #define FD_CNC_ERR_FAIL (-4) /* permanent failure */ 118 : 119 : /* fd_cnc_t is an opaque handle of a command-and-control object. 120 : Details are exposed here to facilitate inlining of many cnc 121 : operations in performance critical app thread paths. */ 122 : 123 57 : #define FD_CNC_MAGIC (0xf17eda2c37c2c000UL) /* firedancer cnc ver 0 */ 124 : 125 : struct __attribute__((aligned(FD_CNC_ALIGN))) fd_cnc_private { 126 : ulong magic; /* ==FD_CNC_MAGIC */ 127 : ulong app_sz; 128 : ulong type; 129 : long heartbeat0; 130 : long heartbeat; 131 : ulong lock; 132 : ulong signal; 133 : /* Padding to FD_CNC_APP_ALIGN here */ 134 : /* app_sz bytes here */ 135 : /* Padding to FD_CNC_ALIGN here */ 136 : }; 137 : 138 : typedef struct fd_cnc_private fd_cnc_t; 139 : 140 : FD_PROTOTYPES_BEGIN 141 : 142 : /* fd_cnc_{align,footprint} return the required alignment and footprint 143 : of a memory region suitable for use as a cnc. fd_cnc_align returns 144 : FD_CNC_ALIGN. If footprint is larger than ULONG_MAX, footprint will 145 : silently return 0 (and thus can be used by the caller to validate the 146 : cnc configuration parameters). */ 147 : 148 : FD_FN_CONST ulong 149 : fd_cnc_align( void ); 150 : 151 : FD_FN_CONST ulong 152 : fd_cnc_footprint( ulong app_sz ); 153 : 154 : /* fd_cnc_new formats an unused memory region for use as a cnc. Assumes 155 : shmem is a non-NULL pointer to this region in the local address space 156 : with the required footprint and alignment. The cnc will be 157 : initialized to have the given type (should be in [0,UINT_MAX]) with 158 : an initial heartbeat of now. The cnc application region will be 159 : initialized to zero. Returns shmem (and the memory region it points 160 : to will be formatted as a cnc, caller is not joined) and NULL on 161 : failure (logs details). Reasons for failure include an obviously bad 162 : shmem region or app_sz. */ 163 : 164 : void * 165 : fd_cnc_new( void * shmem, 166 : ulong app_sz, 167 : ulong type, 168 : long now ); 169 : 170 : /* fd_cnc_join joins the caller to the cnc. shcnc points to the first 171 : byte of the memory region backing the cnc in the caller's address 172 : space. Returns a pointer in the local address space to the cnc on 173 : success (this should not be assumed to be just a cast of shcnc) or 174 : NULL on failure (logs details). Reasons for failure include the 175 : shcnc is obviously not a local pointer to a memory region holding a 176 : cnc. Every successful join should have a matching leave. The 177 : lifetime of the join is until the matching leave or caller's thread 178 : group is terminated. */ 179 : 180 : fd_cnc_t * 181 : fd_cnc_join( void * shcnc ); 182 : 183 : /* fd_cnc_leave leaves a current local join. Returns a pointer to the 184 : underlying shared memory region on success (this should not be 185 : assumed to be just a cast of cnc) and NULL on failure (logs details). 186 : Reasons for failure include cnc is NULL. */ 187 : 188 : void * 189 : fd_cnc_leave( fd_cnc_t const * cnc ); 190 : 191 : /* fd_cnc_delete unformats a memory region used as a cnc. Assumes 192 : nobody is joined to the region. Returns a pointer to the underlying 193 : shared memory region or NULL if used obviously in error (e.g. shcnc 194 : obviously does not point to a cnc ... logs details). The ownership 195 : of the memory region is transferred to the caller on success. */ 196 : 197 : void * 198 : fd_cnc_delete( void * shcnc ); 199 : 200 : /* fd_cnc_shmem returns a pointer in the caller's address space to the 201 : underlying shared memory region used by a cnc. Assumes cnc is a 202 : current local join. fd_cnc_shmem_const is a const-correct version. */ 203 : 204 3 : FD_FN_CONST static inline void * fd_cnc_shmem ( fd_cnc_t * cnc ) { return (void *)cnc; } 205 3 : FD_FN_CONST static inline void const * fd_cnc_shmem_const( fd_cnc_t const * cnc ) { return (void const *)cnc; } 206 : 207 : /* fd_cnc_app_sz returns the size of a the cnc's application region. 208 : Assumes cnc is a current local join. */ 209 : 210 9 : FD_FN_PURE static inline ulong fd_cnc_app_sz( fd_cnc_t const * cnc ) { return cnc->app_sz; } 211 : 212 : /* fd_cnc_app_laddr returns local address of the cnc's application 213 : region. This will have FD_CNC_APP_ALIGN alignment and room for at 214 : least fd_cnc_app_sz( cnc ) bytes. Assumes cnc is a current local 215 : join. fd_cnc_app_laddr_const is for const correctness. The return 216 : values are valid for the lifetime of the local join. */ 217 : 218 12 : FD_FN_CONST static inline void * fd_cnc_app_laddr ( fd_cnc_t * cnc ) { return (void * )(((ulong)cnc)+64UL); } 219 9 : FD_FN_CONST static inline void const * fd_cnc_app_laddr_const( fd_cnc_t const * cnc ) { return (void const *)(((ulong)cnc)+64UL); } 220 : 221 : /* fd_cnc_type returns the application defined type of a cnc. Assumes 222 : cnc is a current local join. */ 223 : 224 3 : FD_FN_PURE static inline ulong fd_cnc_type( fd_cnc_t const * cnc ) { return cnc->type; } 225 : 226 : /* fd_cnc_heartbeat0 returns the heartbeat assigned when the cnc was 227 : created. Assumes cnc is a current local join. */ 228 : 229 3 : FD_FN_PURE static inline long fd_cnc_heartbeat0( fd_cnc_t const * cnc ) { return cnc->heartbeat0; } 230 : 231 : /* fd_cnc_heartbeat_query returns the value of the cnc's heartbeat 232 : as of some point in time between when this was called and when this 233 : returned. Assumes cnc is a current local join. This acts as a 234 : compiler memory fence. */ 235 : 236 : static inline long 237 99 : fd_cnc_heartbeat_query( fd_cnc_t const * cnc ) { 238 99 : FD_COMPILER_MFENCE(); 239 99 : long then = FD_VOLATILE_CONST( cnc->heartbeat ); 240 99 : FD_COMPILER_MFENCE(); 241 99 : return then; 242 99 : } 243 : 244 : /* fd_cnc_heartbeat is used by an app thread to update the cnc's 245 : heartbeat. Heartbeat values are application defined but typical 246 : usage is something that monotonically increases (e.g. the host 247 : wallclock, host tickcounter or just a flat counter). It is 248 : recommended app threads do cnc heartbeats with intervals that are 249 : uniform random distributed in a range like [min,2*min] nanoseconds 250 : for some reasonably fast to a human but slow to the computer value 251 : of min. This keeps load from heartbeating low, keeps the system 252 : human real time responsive, prevents heartbeats from multiple cnc 253 : auto-synchronizing and gives a strict range in time over which a cnc 254 : thread should expect to see a heartbeat from a normally running app 255 : thread. Assumes cnc is a current local join. This acts as a 256 : compiler memory fence. */ 257 : 258 : static inline void 259 : fd_cnc_heartbeat( fd_cnc_t * cnc, 260 121600832 : long now ) { 261 121600832 : FD_COMPILER_MFENCE(); 262 121600832 : FD_VOLATILE( cnc->heartbeat ) = now; 263 121600832 : FD_COMPILER_MFENCE(); 264 121600832 : } 265 : 266 : /* fd_cnc_signal query observes the current signal posted to the cnc. 267 : Assumes cnc is a current local join. This is a compiler fence. 268 : Returns the current signal on the cnc at some point in time between 269 : when this was called and this returned. */ 270 : 271 : static inline ulong 272 121610032 : fd_cnc_signal_query( fd_cnc_t const * cnc ) { 273 121610032 : FD_COMPILER_MFENCE(); 274 121610032 : ulong s = FD_VOLATILE_CONST( cnc->signal ); 275 121610032 : FD_COMPILER_MFENCE(); 276 121610032 : return s; 277 121610032 : } 278 : 279 : /* fd_cnc_signal atomically transitions the cnc to signal s. Assumes 280 : cnc is a current local join and the caller is currently allowed to do 281 : a transition to s. Specifically: 282 : 283 : CNC thread with open command session: 284 : 285 : - RUN->HALT: signal an app thread to shutdown 286 : 287 : - RUN->USER defined: as per application requirements 288 : 289 : - USER defined->USER defined: as per application requirements 290 : 291 : Running APP thread: 292 : 293 : - BOOT->RUN: when app thread it is done booting ... should be just 294 : before app thread enters its run loop. 295 : 296 : - BOOT->FAIL: if app thread failed to boot ... should be just 297 : before app thread stops running. 298 : 299 : - HALT->BOOT: when app thread is done halting ... should be just 300 : before app thread stops running. 301 : 302 : - USER defined->RUN: when app thread is done processing signal and 303 : can resume running ... should be just before app thread resumes 304 : its run loop. 305 : 306 : - USER defined->BOOT: when CNC thread signal processing halted the 307 : app thread normally ... should be just before app thread stops 308 : running. 309 : 310 : - USER defined->FAIL: when CNC thread signal processing halted the 311 : app thread abnormally ... should be just before app thread stops 312 : running. 313 : 314 : - USER defined->USER defined: as per application requirements 315 : 316 : See above state machine for more details. This function is a 317 : compiler memory fence (e.g. caller can populate the cnc app region 318 : with app signal specific details and all the memory operations to the 319 : app region will be issued before s is signaled). */ 320 : 321 : static inline void 322 : fd_cnc_signal( fd_cnc_t * cnc, 323 891 : ulong s ) { 324 891 : FD_COMPILER_MFENCE(); 325 891 : FD_VOLATILE( cnc->signal ) = s; 326 891 : FD_COMPILER_MFENCE(); 327 891 : } 328 : 329 : /* fd_cnc_open opens a new command session to an app thread. Returns 0 330 : (FD_CNC_SUCCESS) on success and a negative (FD_CNC_ERR_*) on failure 331 : (logs details). On successful return, caller will have an open 332 : command session on the cnc and the cnc will be in the RUN state. On 333 : failure, caller does not have a command session on cnc. 334 : 335 : Reasons for FD_CNC_ERR_UNSUP include not running on a hosted target, 336 : not running on an atomic capable target and strange thread group id; 337 : this is a permanent failure. Reasons for FD_CNC_ERR_INVAL include 338 : NULL cur; this is a permanent failure. Reasons for FD_CNC_ERR_FAIL 339 : include app thread is not running and cannot be restarted cleanly; 340 : this is a permanent failure. Reasons for FD_CNC_ERR_AGAIN include 341 : app thread is bootable, is in the process of booting or is in the 342 : process of halting (and thus might be running later) or there is 343 : already an open command session on app thread; this failure is 344 : _potentially_ transient. 345 : 346 : Caller should not leave the join while it has an open command 347 : session. Caller should not close an open command session while it 348 : has a signal pending on it. If the caller dies with an open command 349 : session, the next cnc thread will try to implicitly close it to 350 : recover (logging details as necessary). */ 351 : 352 : int 353 : fd_cnc_open( fd_cnc_t * cnc ); 354 : 355 : /* fd_cnc_wait waits up to dt ns for the cnc to transition to something 356 : other than test. Returns the last observed cnc signal (which can be 357 : used detect result of the way). dt==LONG_MAX will do a blocking 358 : wait. dt<=0 will poll cnc once. If _opt_now is non-NULL, *_opt_now 359 : will contain the wallclock observed just before the last time the cnc 360 : was queried on return. The wait is OS friendly (e.g. will not block 361 : other threads that might be running on the same core as the cnc 362 : thread as such threads are often scheduled to shared a common 363 : administrative core). */ 364 : 365 : ulong 366 : fd_cnc_wait( fd_cnc_t const * cnc, 367 : ulong test, 368 : long dt, 369 : long * _opt_now ); 370 : 371 : /* fd_cnc_close ends the current command session on cnc. Assumes caller 372 : has an open command session on cnc and there are no signals being 373 : processed by the app thread (e.g. the sync is in the RUN, BOOT or 374 : FAIL state). This function is a compiler fence. */ 375 : 376 : static inline void 377 105 : fd_cnc_close( fd_cnc_t * cnc ) { 378 105 : FD_COMPILER_MFENCE(); 379 105 : FD_VOLATILE( cnc->lock ) = 0UL; 380 105 : FD_COMPILER_MFENCE(); 381 105 : } 382 : 383 : /* fd_cnc_strerror converts a FD_CNC_SUCCESS / FD_CNC_ERR_* code into 384 : a human readable cstr. The lifetime of the returned pointer is 385 : infinite. The returned pointer is always to a non-NULL cstr. */ 386 : 387 : FD_FN_CONST char const * 388 : fd_cnc_strerror( int err ); 389 : 390 : /* fd_cstr_to_cnc_signal converts the cstr pointed to by into a cnc 391 : signal value. Return value undefined if cstr does not point to a cnc 392 : signal cstr. */ 393 : 394 : FD_FN_PURE ulong 395 : fd_cstr_to_cnc_signal( char const * cstr ); 396 : 397 : /* fd_cnc_signal_cstr pretty prints the cnc signal value into buf. buf 398 : must point to a character buffer with at least 399 : FD_CNC_SIGNAL_CSTR_BUF_MAX bytes. Always returns buf. If buf is 400 : non-NULL, the buffer pointed at will be populated with a proper '\0' 401 : terminated cstr on return (and one that fd_cstr_to_cnc_signal 402 : properly convert back to signal). */ 403 : 404 3 : #define FD_CNC_SIGNAL_CSTR_BUF_MAX (21UL) 405 : 406 : char * 407 : fd_cnc_signal_cstr( ulong signal, 408 : char * buf ); 409 : 410 : FD_PROTOTYPES_END 411 : 412 : #endif /* HEADER_fd_src_tango_cnc_fd_cnc_h */