LCOV - code coverage report
Current view: top level - discof/restore/utils - fd_ssctrl.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 8 0.0 %
Date: 2025-08-05 05:04:49 Functions: 0 0 -

          Line data    Source code
       1             : #ifndef HEADER_fd_src_discof_restore_utils_fd_ssctrl_h
       2             : #define HEADER_fd_src_discof_restore_utils_fd_ssctrl_h
       3             : 
       4             : /* The snapshot tiles have a somewhat involved state machine, which is
       5             :    controlled by snaprd.  Imagine first the following sequence:
       6             : 
       7             :     1. snaprd is reading a full snapshot from the network and sends some
       8             :        data to snapdc to be decompressed.
       9             :     2. snaprd hits a network error, and resets the connection to a new
      10             :        peer.
      11             :     3. The decompressor fails on data from the old peer, and sends a
      12             :        malformed message to snaprd.
      13             :     4. snaprd receives the malformed message, and abandons the new
      14             :        connection, even though it was not malformed.
      15             : 
      16             :    There are basically two ways to prevent this.  Option A is the tiles
      17             :    can pass not just control messages to one another, but also tag them
      18             :    with some xid indicating which "attempt" the control message is for.
      19             : 
      20             :    This is pretty hard to reason about, and the state machine can grow
      21             :    quite complicated.
      22             : 
      23             :    There's an easier way: the tiles just are fully synchronized with
      24             :    snaprd.  Whatever "attempt" snaprd is on, we ensure all other tiles
      25             :    are on it too.  This means when any tile fails a snapshot, all tiles
      26             :    must fail it and fully flush all frags in the pipeline before snaprd
      27             :    can proceed with a new attempt.
      28             : 
      29             :    The control flow then is basically,
      30             : 
      31             :      1. All tiles start assuming we are reading the full snapshot.
      32             :      2. If any tile fails the snapshot, it sends a MALFOREMD message
      33             :         to snaprd.  Snaprd then sends a RESET message to all tiles.
      34             :      3. Any control message, including a RESET, send by snaprd must be
      35             :         acknowledged by all other tiles in the snapshot pipeline before
      36             :         snaprd can proceed with the next step.
      37             : 
      38             :    The keeps the tiles in lockstep, and simplifies the state machine to
      39             :    a manageable level. */
      40             : 
      41           0 : #define FD_SNAPSHOT_MSG_DATA                   (0UL) /* Fragment represents some snapshot data */
      42             : 
      43           0 : #define FD_SNAPSHOT_MSG_CTRL_RESET_FULL        (1UL) /* Reset to start loading a fresh full snapshot */
      44           0 : #define FD_SNAPSHOT_MSG_CTRL_EOF_FULL          (2UL) /* Full snapshot data is done, incremental data starting now */
      45           0 : #define FD_SNAPSHOT_MSG_CTRL_RESET_INCREMENTAL (3UL) /* Incremental data being retried, start incremental over */
      46           0 : #define FD_SNAPSHOT_MSG_CTRL_DONE              (4UL) /* Snapshot load is over, data is finished for this tile */
      47           0 : #define FD_SNAPSHOT_MSG_CTRL_SHUTDOWN          (5UL) /* All tiles have acknowledged snapshot load is done, can now shutdown */
      48             : 
      49           0 : #define FD_SNAPSHOT_MSG_CTRL_ACK               (6UL) /* Sent from tiles back to snaprd, meaning they ACK whatever control message was pending */
      50           0 : #define FD_SNAPSHOT_MSG_CTRL_MALFORMED         (7UL) /* Sent from tiles back to snaprd, meaning they consider the current snapshot malformed */
      51             : 
      52             : #endif /* HEADER_fd_src_discof_restore_utils_fd_ssctrl_h */

Generated by: LCOV version 1.14