LCOV - code coverage report
Current view: top level - discof/restore/utils - fd_ssctrl.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 26 0.0 %
Date: 2025-12-06 04:45:29 Functions: 0 16 0.0 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_discof_restore_utils_fd_ssctrl_h
       2             : #define HEADER_fd_src_discof_restore_utils_fd_ssctrl_h
       3             : 
       4             : #include "../../../util/net/fd_net_headers.h"
       5             : #include "../../../flamenco/runtime/fd_runtime_const.h"
       6             : 
       7             : /* The snapshot tiles have a somewhat involved state machine, which is
       8             :    controlled by snapct.  Imagine first the following sequence:
       9             : 
      10             :     1. snapct is reading a full snapshot from the network and sends some
      11             :        data to snapdc to be decompressed.
      12             :     2. snapct hits a network error, and resets the connection to a new
      13             :        peer.
      14             :     3. The decompressor fails on data from the old peer, and sends a
      15             :        malformed message to snapct.
      16             :     4. snapct receives the malformed message, and abandons the new
      17             :        connection, even though it was not malformed.
      18             : 
      19             :    There are basically two ways to prevent this.  Option A is the tiles
      20             :    can pass not just control messages to one another, but also tag them
      21             :    with some xid indicating which "attempt" the control message is for.
      22             : 
      23             :    This is pretty hard to reason about, and the state machine can grow
      24             :    quite complicated.
      25             : 
      26             :    There's an easier way: the tiles just are fully synchronized with
      27             :    snapct.  Whatever "attempt" snapct is on, we ensure all other tiles
      28             :    are on it too.  This means when any tile fails a snapshot, all tiles
      29             :    must fail it and fully flush all frags in the pipeline before snapct
      30             :    can proceed with a new attempt.
      31             : 
      32             :    The control flow then is basically,
      33             : 
      34             :      1. All tiles start in the IDLE state.
      35             :      2. snapct initializes the pipeline by sending an INIT message.
      36             :         Each tile enters the PROCESSING state and then forwards the INIT
      37             :         message down the pipeline.  When snapct receives this INIT
      38             :         message, the entire pipeline is in PROCESSING state.
      39             :      3. Tiles continue to process data / frags as applicable.  If an
      40             :         error occurs, the tile enters the ERROR state and also sends an
      41             :         ERROR message downstream.  All downstream tiles also enter the
      42             :         ERROR state and forward the message.  Note that upstream tiles
      43             :         will not be in an ERROR state and will continue producing frags.
      44             :         When snapct receives the ERROR message, it will send a FAIL
      45             :         message.  snapct then waits for this FAIL message to be
      46             :         progagated through the pipeline and received back.  It then
      47             :         knows that all tiles are synchonized back in an IDLE state and
      48             :         it can try again with a new INIT.
      49             :      4. Once snapct detects that the processing is finished, it sends
      50             :         a DONE message through the pipeline and waits for it to be
      51             :         received back.  We then either move on to the incremental
      52             :         snapshot, or shut down the whole pipeline.
      53             : 
      54             :    The keeps the tiles in lockstep, and simplifies the state machine to
      55             :    a manageable level. */
      56             : 
      57           0 : #define FD_SNAPSHOT_STATE_IDLE                 (0UL) /* Performing no work and should receive no data frags */
      58           0 : #define FD_SNAPSHOT_STATE_PROCESSING           (1UL) /* Performing usual work, no errors / EoF condition encountered */
      59           0 : #define FD_SNAPSHOT_STATE_FINISHING            (2UL) /* Tile has observed EoF, expects no additional data frags */
      60           0 : #define FD_SNAPSHOT_STATE_ERROR                (3UL) /* Some error occurred, will wait for a FAIL command to reset */
      61           0 : #define FD_SNAPSHOT_STATE_SHUTDOWN             (4UL) /* All work finished, tile can perform final cleanup and exit */
      62             : 
      63           0 : #define FD_SNAPSHOT_MSG_DATA                   (0UL) /* Fragment represents some snapshot data */
      64           0 : #define FD_SNAPSHOT_MSG_META                   (1UL) /* Fragment represents a fd_ssctrl_meta_t message */
      65             : 
      66           0 : #define FD_SNAPSHOT_MSG_CTRL_INIT_FULL         (2UL) /* Pipeline should start processing a full snapshot */
      67           0 : #define FD_SNAPSHOT_MSG_CTRL_INIT_INCR         (3UL) /* Pipeline should start processing an incremental snapshot */
      68           0 : #define FD_SNAPSHOT_MSG_CTRL_FAIL              (4UL) /* Current snapshot failed, undo work and reset to idle state */
      69           0 : #define FD_SNAPSHOT_MSG_CTRL_NEXT              (5UL) /* Current snapshot succeeded, commit work, go idle, and expect another snapshot */
      70           0 : #define FD_SNAPSHOT_MSG_CTRL_DONE              (6UL) /* Current snapshot succeeded, commit work, go idle, and expect shutdown */
      71           0 : #define FD_SNAPSHOT_MSG_CTRL_SHUTDOWN          (7UL) /* No work left to do, perform final cleanup and shut down */
      72           0 : #define FD_SNAPSHOT_MSG_CTRL_ERROR             (8UL) /* Some tile encountered an error with the current stream */
      73             : 
      74             : /* snapla -> snapls */
      75           0 : #define FD_SNAPSHOT_HASH_MSG_RESULT_ADD        (9UL) /* Hash result sent from snapla to snapls */
      76             : 
      77             : /* snapin -> snapls */
      78           0 : #define FD_SNAPSHOT_HASH_MSG_EXPECTED         (10UL) /* Hash result sent from snapin to snapls */
      79             : 
      80             : /* snapin -> snapls */
      81           0 : #define FD_SNAPSHOT_HASH_MSG_SUB              (11UL) /* Duplicate account sent from snapin to snapls, includes account header and data */
      82           0 : #define FD_SNAPSHOT_HASH_MSG_SUB_HDR          (12UL) /* Duplicate account sent from snapin to snapls, only the account header, no data */
      83           0 : #define FD_SNAPSHOT_HASH_MSG_SUB_DATA         (13UL) /* Duplicate account sent from snapin to snapls, only the account data, no header */
      84             : 
      85             : /* Sent by snapct to tell snapld whether to load a local file or
      86             :    download from a particular external peer. */
      87             : typedef struct fd_ssctrl_init {
      88             :   int           file;
      89             :   int           zstd;
      90             :   fd_ip4_port_t addr;
      91             :   char          hostname[ 256UL ];
      92             :   int           is_https;
      93             : } fd_ssctrl_init_t;
      94             : 
      95             : /* Sent by snapld to tell snapct metadata about a downloaded snapshot. */
      96             : typedef struct fd_ssctrl_meta {
      97             :   ulong total_sz;
      98             :   char  name[ PATH_MAX ];
      99             : } fd_ssctrl_meta_t;
     100             : 
     101             : struct fd_snapshot_account_hdr {
     102             :   uchar   pubkey[ FD_PUBKEY_FOOTPRINT ];
     103             :   uchar   owner[ FD_PUBKEY_FOOTPRINT ];
     104             :   ulong   lamports;
     105             :   uchar   executable;
     106             :   ulong   data_len;
     107             : };
     108             : typedef struct fd_snapshot_account_hdr fd_snapshot_account_hdr_t;
     109             : 
     110             : /* fd_snapshot_account_hdr_init initializes a fd_snapshot_account_hdr_t struct
     111             :    with the appropriate account metadata fields. */
     112             : static inline void
     113             : fd_snapshot_account_hdr_init( fd_snapshot_account_hdr_t * account,
     114             :                            uchar const                    pubkey[ FD_PUBKEY_FOOTPRINT ],
     115             :                            uchar const                    owner[ FD_PUBKEY_FOOTPRINT ],
     116             :                            ulong                          lamports,
     117             :                            uchar                          executable,
     118           0 :                            ulong                          data_len ) {
     119           0 :   fd_memcpy( account->pubkey, pubkey, FD_PUBKEY_FOOTPRINT );
     120           0 :   fd_memcpy( account->owner,  owner,  FD_PUBKEY_FOOTPRINT );
     121           0 :   account->lamports   = lamports;
     122           0 :   account->executable = executable;
     123           0 :   account->data_len   = data_len;
     124           0 : }
     125             : 
     126             : /* fd_snapshot_full_account is the contents of the
     127             :    SNAPSHOT_HASH_MSG_SUB message.  It contains a fd_snapshot_account_hdr_t
     128             :    header and the corresponding account data in a single message.
     129             : 
     130             :    For simplicity and conformance to burst limitations in snapin, the
     131             :    entire duplicate account is sent in one message (one frag).  Consider
     132             :    caching the lthash of the duplicate account so we do not have to
     133             :    send the entire account over. */
     134             : struct fd_snapshot_full_account {
     135             :   fd_snapshot_account_hdr_t hdr;
     136             :   uchar                     data[ FD_RUNTIME_ACC_SZ_MAX ];
     137             : };
     138             : typedef struct fd_snapshot_full_account fd_snapshot_full_account_t;
     139             : 
     140             : #define FD_SNAPSHOT_MAX_SNAPLA_TILES (8UL)
     141             : 
     142             : #endif /* HEADER_fd_src_discof_restore_utils_fd_ssctrl_h */

Generated by: LCOV version 1.14