LCOV - code coverage report
Current view: top level - discof/restore/utils - fd_ssctrl.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 1 49 2.0 %
Date: 2026-06-29 05:51:35 Functions: 0 30 0.0 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_discof_restore_utils_fd_ssctrl_h
       2             : #define HEADER_fd_src_discof_restore_utils_fd_ssctrl_h
       3             : 
       4             : #include "../../../util/net/fd_net_headers.h"
       5             : #include "../../../flamenco/runtime/fd_runtime_const.h"
       6             : 
       7             : /* The snapshot tiles have a somewhat involved state machine, which is
       8             :    controlled by snapct.  Imagine first the following sequence:
       9             : 
      10             :     1. snapct is reading a full snapshot from the network and sends some
      11             :        data to snapdc to be decompressed.
      12             :     2. snapct hits a network error, and resets the connection to a new
      13             :        peer.
      14             :     3. The decompressor fails on data from the old peer, and sends a
      15             :        malformed message to snapct.
      16             :     4. snapct receives the malformed message, and abandons the new
      17             :        connection, even though it was not malformed.
      18             : 
      19             :    There are basically two ways to prevent this.  Option A is the tiles
      20             :    can pass not just control messages to one another, but also tag them
      21             :    with some xid indicating which "attempt" the control message is for.
      22             : 
      23             :    This is pretty hard to reason about, and the state machine can grow
      24             :    quite complicated.
      25             : 
      26             :    There's an easier way: the tiles just are fully synchronized with
      27             :    snapct.  Whatever "attempt" snapct is on, we ensure all other tiles
      28             :    are on it too.  This means when any tile fails a snapshot, all tiles
      29             :    must fail it and fully flush all frags in the pipeline before snapct
      30             :    can proceed with a new attempt.
      31             : 
      32             :    The control flow then is basically,
      33             : 
      34             :      1. All tiles start in the IDLE state.
      35             :      2. snapct initializes the pipeline by sending an INIT message.
      36             :         Each tile enters the PROCESSING state and then forwards the INIT
      37             :         message down the pipeline.  When snapct receives this INIT
      38             :         message, the entire pipeline is in PROCESSING state.
      39             :      3. Tiles continue to process data / frags as applicable.  If an
      40             :         error occurs, the tile enters the ERROR state and also sends an
      41             :         ERROR message downstream.  All downstream tiles also enter the
      42             :         ERROR state and forward the message.  Note that upstream tiles
      43             :         will not be in an ERROR state and will continue producing frags.
      44             :         When snapct receives the ERROR message, it will send a FAIL
      45             :         message.  snapct then waits for this FAIL message to be
      46             :         propagated through the pipeline and received back.  snapct
      47             :         also guarantees to flush any pending load data in the
      48             :         pipeline.  It then knows that all tiles are synchronized back
      49             :         in an IDLE state and it can try again with a new INIT.
      50             :      4. Once all snapshot data has entered the pipeline, snapct sends
      51             :         a FINI message through the pipeline and waits for it to be
      52             :         received back.  This synchronizes tiles to finish any
      53             :         remaining in-flight work.  It then sends either a NEXT or a
      54             :         DONE message through the pipeline, again waiting for it to be
      55             :         received back.  NEXT means another snapshot follows, whereas
      56             :         DONE means a shutdown will follow.
      57             : 
      58             :    That keeps the tiles in lockstep, and simplifies the state machine
      59             :    to a manageable level.
      60             : 
      61             :    In more detail, all tiles in the feedback loop of the pipeline must
      62             :    comply with the following rules; snapct enforces them; tiles outside
      63             :    of the feedback loop may support a subset of these:
      64             :      - Allowed state transitions on given control message:
      65             :         IDLE       to PROCESSING: on INIT_FULL / INIT_INCR ctrl msg.
      66             :         PROCESSING to FINISHING : on FINI ctrl msg (*).
      67             :         FINISHING  to IDLE      : on NEXT / DONE ctrl msg.
      68             :         IDLE       to SHUTDOWN  : on SHUTDOWN ctrl msg.
      69             :      - Control messages that apply to all states (except SHUTDOWN):
      70             :         On ERROR msg, always transition to ERROR.
      71             :         On FAIL msg, always transition to IDLE.
      72             :      - When in SHUTDOWN state, no data or control message is allowed.
      73             :      - Error handling:
      74             :         A tile that enters ERROR state on its own must forward an
      75             :         ERROR control message and discard any incoming message.  When
      76             :         in ERROR state, data is discarded, and only a FAIL control
      77             :         message is processed and forwarded (all others are discarded).
      78             :         As a result, only one ERROR message can propagate through the
      79             :         pipeline at any given time.
      80             :      - Holding onto a control message:
      81             :         A tile may hold onto a control message and forward it later
      82             :         after performing some asynchronous routine.  The tile's state
      83             :         transition may also be deferred until that control message is
      84             :         forwarded.
      85             :      - (*) A tile may self-transition from PROCESSING to FINISHING
      86             :         early, if it can detect end-of-stream.  The FINI message is
      87             :         used to synchronize the transition.
      88             : 
      89             :    Each non-ERROR control message is only generated once in snapct
      90             :    and will not be re-sent.  The pipeline will be locked on flushing
      91             :    that control message until all tiles forward it on, or an ERROR
      92             :    message is triggered by any of the tiles and forwarded. */
      93             : 
      94           0 : #define FD_SNAPSHOT_STATE_IDLE                 (0UL) /* Performing no work and should receive no data frags */
      95           0 : #define FD_SNAPSHOT_STATE_PROCESSING           (1UL) /* Performing usual work, no errors / EoF condition encountered */
      96           0 : #define FD_SNAPSHOT_STATE_FINISHING            (2UL) /* Tile has observed EoF, expects no additional data frags */
      97           0 : #define FD_SNAPSHOT_STATE_ERROR                (3UL) /* Some error occurred, will wait for a FAIL command to reset */
      98           0 : #define FD_SNAPSHOT_STATE_SHUTDOWN             (4UL) /* All work finished, tile can perform final cleanup and exit */
      99             : 
     100           0 : #define FD_SNAPSHOT_MSG_DATA                   (0UL) /* Fragment represents some snapshot data */
     101           0 : #define FD_SNAPSHOT_MSG_META                   (1UL) /* Fragment represents a fd_ssctrl_meta_t message */
     102             : 
     103           0 : #define FD_SNAPSHOT_MSG_CTRL_INIT_FULL         (2UL) /* Pipeline should start processing a full snapshot */
     104           0 : #define FD_SNAPSHOT_MSG_CTRL_INIT_INCR         (3UL) /* Pipeline should start processing an incremental snapshot */
     105           0 : #define FD_SNAPSHOT_MSG_CTRL_FAIL              (4UL) /* Current snapshot failed, undo work and reset to idle state */
     106           0 : #define FD_SNAPSHOT_MSG_CTRL_NEXT              (5UL) /* Current snapshot succeeded, commit work, go idle, and expect another snapshot */
     107           0 : #define FD_SNAPSHOT_MSG_CTRL_DONE              (6UL) /* Current snapshot succeeded, commit work, go idle, and expect shutdown */
     108           0 : #define FD_SNAPSHOT_MSG_CTRL_SHUTDOWN          (7UL) /* Snapshot load successful, no work left to do, perform final cleanup and shut down*/
     109           0 : #define FD_SNAPSHOT_MSG_CTRL_ERROR             (8UL) /* Some tile encountered an error with the current stream */
     110           0 : #define FD_SNAPSHOT_MSG_CTRL_FINI              (9UL) /* Current snapshot has been fully loaded, finish processing */
     111             : 
     112             : /* snapld -> snapct (via snapld_dc) */
     113          15 : #define FD_SNAPSHOT_MSG_LOAD_COMPLETE         (10UL) /* snapld finished reading/downloading all data */
     114             : 
     115             : /* Sent by snapct to tell snapld whether to load a local file or
     116             :    download from a particular external peer. */
     117             : typedef struct fd_ssctrl_init {
     118             :   int           file;
     119             :   int           zstd;
     120             :   ulong         slot; /* slot advertised by the snapshot peer */
     121             :   fd_ip4_port_t addr;
     122             :   uchar         snapshot_hash[ FD_HASH_FOOTPRINT ]; /* advertised snapshot hash from snapshot file name */
     123             :   char          hostname[ 256UL ];
     124             :   char          path[ PATH_MAX ];
     125             :   ulong         path_len;
     126             :   int           is_https;
     127             : } fd_ssctrl_init_t;
     128             : 
     129             : /* Sent by snapld to tell snapct metadata about a downloaded snapshot. */
     130             : typedef struct fd_ssctrl_meta {
     131             :   ulong total_sz;
     132             : } fd_ssctrl_meta_t;
     133             : 
     134             : struct fd_snapshot_account_hdr {
     135             :   uchar   pubkey[ FD_PUBKEY_FOOTPRINT ];
     136             :   uchar   owner[ FD_PUBKEY_FOOTPRINT ];
     137             :   ulong   lamports;
     138             :   uchar   executable;
     139             :   ulong   data_len;
     140             : };
     141             : typedef struct fd_snapshot_account_hdr fd_snapshot_account_hdr_t;
     142             : 
     143             : /* fd_snapshot_account_hdr_init initializes a fd_snapshot_account_hdr_t struct
     144             :    with the appropriate account metadata fields. */
     145             : static inline void
     146             : fd_snapshot_account_hdr_init( fd_snapshot_account_hdr_t * account,
     147             :                            uchar const                    pubkey[ FD_PUBKEY_FOOTPRINT ],
     148             :                            uchar const                    owner[ FD_PUBKEY_FOOTPRINT ],
     149             :                            ulong                          lamports,
     150             :                            uchar                          executable,
     151           0 :                            ulong                          data_len ) {
     152           0 :   fd_memcpy( account->pubkey, pubkey, FD_PUBKEY_FOOTPRINT );
     153           0 :   fd_memcpy( account->owner,  owner,  FD_PUBKEY_FOOTPRINT );
     154           0 :   account->lamports   = lamports;
     155           0 :   account->executable = executable;
     156           0 :   account->data_len   = data_len;
     157           0 : }
     158             : 
     159             : static inline const char *
     160           0 : fd_ssctrl_state_str( ulong state ) {
     161           0 :   switch( state ) {
     162           0 :     case FD_SNAPSHOT_STATE_IDLE:        return "idle";
     163           0 :     case FD_SNAPSHOT_STATE_PROCESSING:  return "processing";
     164           0 :     case FD_SNAPSHOT_STATE_FINISHING:   return "finishing";
     165           0 :     case FD_SNAPSHOT_STATE_ERROR:       return "error";
     166           0 :     case FD_SNAPSHOT_STATE_SHUTDOWN:    return "shutdown";
     167           0 :     default:                            return "unknown";
     168           0 :   }
     169           0 : }
     170             : 
     171             : static inline const char *
     172           0 : fd_ssctrl_msg_ctrl_str( ulong sig ) {
     173           0 :   switch( sig ) {
     174           0 :     case FD_SNAPSHOT_MSG_DATA:                  return "data";
     175           0 :     case FD_SNAPSHOT_MSG_META:                  return "meta";
     176           0 :     case FD_SNAPSHOT_MSG_CTRL_INIT_FULL:        return "init_full";
     177           0 :     case FD_SNAPSHOT_MSG_CTRL_INIT_INCR:        return "init_incr";
     178           0 :     case FD_SNAPSHOT_MSG_CTRL_FAIL:             return "fail";
     179           0 :     case FD_SNAPSHOT_MSG_CTRL_NEXT:             return "next";
     180           0 :     case FD_SNAPSHOT_MSG_CTRL_DONE:             return "done";
     181           0 :     case FD_SNAPSHOT_MSG_CTRL_SHUTDOWN:         return "shutdown";
     182           0 :     case FD_SNAPSHOT_MSG_CTRL_ERROR:            return "error";
     183           0 :     case FD_SNAPSHOT_MSG_CTRL_FINI:             return "fini";
     184           0 :     case FD_SNAPSHOT_MSG_LOAD_COMPLETE:         return "load_complete";
     185           0 :     default:                                    return "unknown";
     186           0 :   }
     187           0 : }
     188             : 
     189             : #endif /* HEADER_fd_src_discof_restore_utils_fd_ssctrl_h */

Generated by: LCOV version 1.14