LCOV - code coverage report
Current view: top level - discof/restore/utils - fd_ssctrl.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 28 0.0 %
Date: 2026-01-23 05:02:40 Functions: 0 20 0.0 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_discof_restore_utils_fd_ssctrl_h
       2             : #define HEADER_fd_src_discof_restore_utils_fd_ssctrl_h
       3             : 
       4             : #include "../../../util/net/fd_net_headers.h"
       5             : #include "../../../flamenco/runtime/fd_runtime_const.h"
       6             : 
       7             : /* The snapshot tiles have a somewhat involved state machine, which is
       8             :    controlled by snapct.  Imagine first the following sequence:
       9             : 
      10             :     1. snapct is reading a full snapshot from the network and sends some
      11             :        data to snapdc to be decompressed.
      12             :     2. snapct hits a network error, and resets the connection to a new
      13             :        peer.
      14             :     3. The decompressor fails on data from the old peer, and sends a
      15             :        malformed message to snapct.
      16             :     4. snapct receives the malformed message, and abandons the new
      17             :        connection, even though it was not malformed.
      18             : 
      19             :    There are basically two ways to prevent this.  Option A is the tiles
      20             :    can pass not just control messages to one another, but also tag them
      21             :    with some xid indicating which "attempt" the control message is for.
      22             : 
      23             :    This is pretty hard to reason about, and the state machine can grow
      24             :    quite complicated.
      25             : 
      26             :    There's an easier way: the tiles just are fully synchronized with
      27             :    snapct.  Whatever "attempt" snapct is on, we ensure all other tiles
      28             :    are on it too.  This means when any tile fails a snapshot, all tiles
      29             :    must fail it and fully flush all frags in the pipeline before snapct
      30             :    can proceed with a new attempt.
      31             : 
      32             :    The control flow then is basically,
      33             : 
      34             :      1. All tiles start in the IDLE state.
      35             :      2. snapct initializes the pipeline by sending an INIT message.
      36             :         Each tile enters the PROCESSING state and then forwards the INIT
      37             :         message down the pipeline.  When snapct receives this INIT
      38             :         message, the entire pipeline is in PROCESSING state.
      39             :      3. Tiles continue to process data / frags as applicable.  If an
      40             :         error occurs, the tile enters the ERROR state and also sends an
      41             :         ERROR message downstream.  All downstream tiles also enter the
      42             :         ERROR state and forward the message.  Note that upstream tiles
      43             :         will not be in an ERROR state and will continue producing frags.
      44             :         When snapct receives the ERROR message, it will send a FAIL
      45             :         message.  snapct then waits for this FAIL message to be
      46             :         progagated through the pipeline and received back.  It then
      47             :         knows that all tiles are synchonized back in an IDLE state and
      48             :         it can try again with a new INIT.
      49             :      4. Once snapct detects that the processing is finished, it sends
      50             :         a DONE message through the pipeline and waits for it to be
      51             :         received back.  We then either move on to the incremental
      52             :         snapshot, or shut down the whole pipeline.
      53             : 
      54             :    The keeps the tiles in lockstep, and simplifies the state machine to
      55             :    a manageable level.
      56             : 
      57             :    It is a strict requirement that all tiles in the pipeline eventually
      58             :    forward all control messages they receive.  Each control message is
      59             :    only generated once in snapct and will not be re-sent.  The pipeline
      60             :    will be locked on flushing that control message until all tiles
      61             :    forward it on. If a control message is dropped, the pipeline will
      62             :    deadlock.  Note that a tile can choose to hold onto a control message
      63             :    and forward it later after performing some asynchronous routine.  */
      64             : 
      65           0 : #define FD_SNAPSHOT_STATE_IDLE                 (0UL) /* Performing no work and should receive no data frags */
      66           0 : #define FD_SNAPSHOT_STATE_PROCESSING           (1UL) /* Performing usual work, no errors / EoF condition encountered */
      67           0 : #define FD_SNAPSHOT_STATE_FINISHING            (2UL) /* Tile has observed EoF, expects no additional data frags */
      68           0 : #define FD_SNAPSHOT_STATE_ERROR                (3UL) /* Some error occurred, will wait for a FAIL command to reset */
      69           0 : #define FD_SNAPSHOT_STATE_SHUTDOWN             (4UL) /* All work finished, tile can perform final cleanup and exit */
      70             : 
      71           0 : #define FD_SNAPSHOT_MSG_DATA                   (0UL) /* Fragment represents some snapshot data */
      72           0 : #define FD_SNAPSHOT_MSG_META                   (1UL) /* Fragment represents a fd_ssctrl_meta_t message */
      73             : 
      74           0 : #define FD_SNAPSHOT_MSG_CTRL_INIT_FULL         (2UL) /* Pipeline should start processing a full snapshot */
      75           0 : #define FD_SNAPSHOT_MSG_CTRL_INIT_INCR         (3UL) /* Pipeline should start processing an incremental snapshot */
      76           0 : #define FD_SNAPSHOT_MSG_CTRL_FAIL              (4UL) /* Current snapshot failed, undo work and reset to idle state */
      77           0 : #define FD_SNAPSHOT_MSG_CTRL_NEXT              (5UL) /* Current snapshot succeeded, commit work, go idle, and expect another snapshot */
      78           0 : #define FD_SNAPSHOT_MSG_CTRL_DONE              (6UL) /* Current snapshot succeeded, commit work, go idle, and expect shutdown */
      79           0 : #define FD_SNAPSHOT_MSG_CTRL_SHUTDOWN          (7UL) /* No work left to do, perform final cleanup and shut down */
      80           0 : #define FD_SNAPSHOT_MSG_CTRL_ERROR             (8UL) /* Some tile encountered an error with the current stream */
      81             : 
      82             : /* snapla -> snapls */
      83             : /* snaplh -> snaplv */
      84           0 : #define FD_SNAPSHOT_HASH_MSG_RESULT_ADD        (9UL) /* Hash result sent from snapla (snaplh) to snapls (snaplv) */
      85             : 
      86             : /* snapin -> snapls */
      87             : /* snapin -> snapwm -> snaplv */
      88           0 : #define FD_SNAPSHOT_HASH_MSG_EXPECTED         (10UL) /* Hash result sent from snapin to snapls or from snapin to snapwm to snaplv */
      89             : 
      90             : /* snapin -> snapls */
      91           0 : #define FD_SNAPSHOT_HASH_MSG_SUB              (11UL) /* Duplicate account sent from snapin to snapls, includes account header and data */
      92           0 : #define FD_SNAPSHOT_HASH_MSG_SUB_HDR          (12UL) /* Duplicate account sent from snapin to snapls, only the account header, no data */
      93           0 : #define FD_SNAPSHOT_HASH_MSG_SUB_DATA         (13UL) /* Duplicate account sent from snapin to snapls, only the account data, no header */
      94             : /* snapwm -> snaplv */
      95           0 : #define FD_SNAPSHOT_HASH_MSG_RESULT_SUB       (14UL) /* Duplicate partial hash result sent from snapwm to snaplv (to subtract) */
      96             : /* snapwm -> snaplv -> snaplh */
      97           0 : #define FD_SNAPSHOT_HASH_MSG_SUB_META_BATCH   (15UL) /* Duplicate account(s) meta batch sent from snapwm to snaplv */
      98             : 
      99             : 
     100             : 
     101             : /* Sent by snapct to tell snapld whether to load a local file or
     102             :    download from a particular external peer. */
     103             : typedef struct fd_ssctrl_init {
     104             :   int           file;
     105             :   int           zstd;
     106             :   fd_ip4_port_t addr;
     107             :   char          hostname[ 256UL ];
     108             :   int           is_https;
     109             : } fd_ssctrl_init_t;
     110             : 
     111             : /* Sent by snapld to tell snapct metadata about a downloaded snapshot. */
     112             : typedef struct fd_ssctrl_meta {
     113             :   ulong total_sz;
     114             :   char  name[ PATH_MAX ];
     115             : } fd_ssctrl_meta_t;
     116             : 
     117             : struct fd_snapshot_account_hdr {
     118             :   uchar   pubkey[ FD_PUBKEY_FOOTPRINT ];
     119             :   uchar   owner[ FD_PUBKEY_FOOTPRINT ];
     120             :   ulong   lamports;
     121             :   uchar   executable;
     122             :   ulong   data_len;
     123             : };
     124             : typedef struct fd_snapshot_account_hdr fd_snapshot_account_hdr_t;
     125             : 
     126             : /* fd_snapshot_account_hdr_init initializes a fd_snapshot_account_hdr_t struct
     127             :    with the appropriate account metadata fields. */
     128             : static inline void
     129             : fd_snapshot_account_hdr_init( fd_snapshot_account_hdr_t * account,
     130             :                            uchar const                    pubkey[ FD_PUBKEY_FOOTPRINT ],
     131             :                            uchar const                    owner[ FD_PUBKEY_FOOTPRINT ],
     132             :                            ulong                          lamports,
     133             :                            uchar                          executable,
     134           0 :                            ulong                          data_len ) {
     135           0 :   fd_memcpy( account->pubkey, pubkey, FD_PUBKEY_FOOTPRINT );
     136           0 :   fd_memcpy( account->owner,  owner,  FD_PUBKEY_FOOTPRINT );
     137           0 :   account->lamports   = lamports;
     138           0 :   account->executable = executable;
     139           0 :   account->data_len   = data_len;
     140           0 : }
     141             : 
     142             : /* fd_snapshot_full_account is the contents of the
     143             :    SNAPSHOT_HASH_MSG_SUB message.  It contains a fd_snapshot_account_hdr_t
     144             :    header and the corresponding account data in a single message.
     145             : 
     146             :    For simplicity and conformance to burst limitations in snapin, the
     147             :    entire duplicate account is sent in one message (one frag).  Consider
     148             :    caching the lthash of the duplicate account so we do not have to
     149             :    send the entire account over. */
     150             : struct fd_snapshot_full_account {
     151             :   fd_snapshot_account_hdr_t hdr;
     152             :   uchar                     data[ FD_RUNTIME_ACC_SZ_MAX ];
     153             : };
     154             : typedef struct fd_snapshot_full_account fd_snapshot_full_account_t;
     155             : 
     156             : #define FD_SNAPSHOT_MAX_SNAPLA_TILES (8UL)
     157             : #define FD_SNAPSHOT_MAX_SNAPLH_TILES (8UL)
     158             : 
     159             : #endif /* HEADER_fd_src_discof_restore_utils_fd_ssctrl_h */

Generated by: LCOV version 1.14