Line data Source code
1 : #ifndef HEADER_fd_src_discof_restore_utils_fd_ssctrl_h 2 : #define HEADER_fd_src_discof_restore_utils_fd_ssctrl_h 3 : 4 : #include "../../../util/net/fd_net_headers.h" 5 : #include "../../../flamenco/runtime/fd_runtime_const.h" 6 : 7 : /* The snapshot tiles have a somewhat involved state machine, which is 8 : controlled by snapct. Imagine first the following sequence: 9 : 10 : 1. snapct is reading a full snapshot from the network and sends some 11 : data to snapdc to be decompressed. 12 : 2. snapct hits a network error, and resets the connection to a new 13 : peer. 14 : 3. The decompressor fails on data from the old peer, and sends a 15 : malformed message to snapct. 16 : 4. snapct receives the malformed message, and abandons the new 17 : connection, even though it was not malformed. 18 : 19 : There are basically two ways to prevent this. Option A is the tiles 20 : can pass not just control messages to one another, but also tag them 21 : with some xid indicating which "attempt" the control message is for. 22 : 23 : This is pretty hard to reason about, and the state machine can grow 24 : quite complicated. 25 : 26 : There's an easier way: the tiles just are fully synchronized with 27 : snapct. Whatever "attempt" snapct is on, we ensure all other tiles 28 : are on it too. This means when any tile fails a snapshot, all tiles 29 : must fail it and fully flush all frags in the pipeline before snapct 30 : can proceed with a new attempt. 31 : 32 : The control flow then is basically, 33 : 34 : 1. All tiles start in the IDLE state. 35 : 2. snapct initializes the pipeline by sending an INIT message. 36 : Each tile enters the PROCESSING state and then forwards the INIT 37 : message down the pipeline. When snapct receives this INIT 38 : message, the entire pipeline is in PROCESSING state. 39 : 3. Tiles continue to process data / frags as applicable. If an 40 : error occurs, the tile enters the ERROR state and also sends an 41 : ERROR message downstream. All downstream tiles also enter the 42 : ERROR state and forward the message. Note that upstream tiles 43 : will not be in an ERROR state and will continue producing frags. 44 : When snapct receives the ERROR message, it will send a FAIL 45 : message. snapct then waits for this FAIL message to be 46 : progagated through the pipeline and received back. It then 47 : knows that all tiles are synchonized back in an IDLE state and 48 : it can try again with a new INIT. 49 : 4. Once snapct detects that the processing is finished, it sends 50 : a DONE message through the pipeline and waits for it to be 51 : received back. We then either move on to the incremental 52 : snapshot, or shut down the whole pipeline. 53 : 54 : The keeps the tiles in lockstep, and simplifies the state machine to 55 : a manageable level. */ 56 : 57 0 : #define FD_SNAPSHOT_STATE_IDLE (0UL) /* Performing no work and should receive no data frags */ 58 0 : #define FD_SNAPSHOT_STATE_PROCESSING (1UL) /* Performing usual work, no errors / EoF condition encountered */ 59 0 : #define FD_SNAPSHOT_STATE_FINISHING (2UL) /* Tile has observed EoF, expects no additional data frags */ 60 0 : #define FD_SNAPSHOT_STATE_ERROR (3UL) /* Some error occurred, will wait for a FAIL command to reset */ 61 0 : #define FD_SNAPSHOT_STATE_SHUTDOWN (4UL) /* All work finished, tile can perform final cleanup and exit */ 62 : 63 0 : #define FD_SNAPSHOT_MSG_DATA (0UL) /* Fragment represents some snapshot data */ 64 0 : #define FD_SNAPSHOT_MSG_META (1UL) /* Fragment represents a fd_ssctrl_meta_t message */ 65 : 66 0 : #define FD_SNAPSHOT_MSG_CTRL_INIT_FULL (2UL) /* Pipeline should start processing a full snapshot */ 67 0 : #define FD_SNAPSHOT_MSG_CTRL_INIT_INCR (3UL) /* Pipeline should start processing an incremental snapshot */ 68 0 : #define FD_SNAPSHOT_MSG_CTRL_FAIL (4UL) /* Current snapshot failed, undo work and reset to idle state */ 69 0 : #define FD_SNAPSHOT_MSG_CTRL_NEXT (5UL) /* Current snapshot succeeded, commit work, go idle, and expect another snapshot */ 70 0 : #define FD_SNAPSHOT_MSG_CTRL_DONE (6UL) /* Current snapshot succeeded, commit work, go idle, and expect shutdown */ 71 0 : #define FD_SNAPSHOT_MSG_CTRL_SHUTDOWN (7UL) /* No work left to do, perform final cleanup and shut down */ 72 0 : #define FD_SNAPSHOT_MSG_CTRL_ERROR (8UL) /* Some tile encountered an error with the current stream */ 73 : 74 : /* snapla -> snapls */ 75 0 : #define FD_SNAPSHOT_HASH_MSG_RESULT_ADD (9UL) /* Hash result sent from snapla to snapls */ 76 : 77 : /* snapin -> snapls */ 78 0 : #define FD_SNAPSHOT_HASH_MSG_EXPECTED (10UL) /* Hash result sent from snapin to snapls */ 79 : 80 : /* snapin -> snapls */ 81 0 : #define FD_SNAPSHOT_HASH_MSG_SUB (11UL) /* Duplicate account sent from snapin to snapls, includes account header and data */ 82 0 : #define FD_SNAPSHOT_HASH_MSG_SUB_HDR (12UL) /* Duplicate account sent from snapin to snapls, only the account header, no data */ 83 0 : #define FD_SNAPSHOT_HASH_MSG_SUB_DATA (13UL) /* Duplicate account sent from snapin to snapls, only the account data, no header */ 84 : 85 : /* Sent by snapct to tell snapld whether to load a local file or 86 : download from a particular external peer. */ 87 : typedef struct fd_ssctrl_init { 88 : int file; 89 : int zstd; 90 : fd_ip4_port_t addr; 91 : char hostname[ 256UL ]; 92 : int is_https; 93 : } fd_ssctrl_init_t; 94 : 95 : /* Sent by snapld to tell snapct metadata about a downloaded snapshot. */ 96 : typedef struct fd_ssctrl_meta { 97 : ulong total_sz; 98 : char name[ PATH_MAX ]; 99 : } fd_ssctrl_meta_t; 100 : 101 : struct fd_snapshot_account_hdr { 102 : uchar pubkey[ FD_PUBKEY_FOOTPRINT ]; 103 : uchar owner[ FD_PUBKEY_FOOTPRINT ]; 104 : ulong lamports; 105 : uchar executable; 106 : ulong data_len; 107 : }; 108 : typedef struct fd_snapshot_account_hdr fd_snapshot_account_hdr_t; 109 : 110 : /* fd_snapshot_account_hdr_init initializes a fd_snapshot_account_hdr_t struct 111 : with the appropriate account metadata fields. */ 112 : static inline void 113 : fd_snapshot_account_hdr_init( fd_snapshot_account_hdr_t * account, 114 : uchar const pubkey[ FD_PUBKEY_FOOTPRINT ], 115 : uchar const owner[ FD_PUBKEY_FOOTPRINT ], 116 : ulong lamports, 117 : uchar executable, 118 0 : ulong data_len ) { 119 0 : fd_memcpy( account->pubkey, pubkey, FD_PUBKEY_FOOTPRINT ); 120 0 : fd_memcpy( account->owner, owner, FD_PUBKEY_FOOTPRINT ); 121 0 : account->lamports = lamports; 122 0 : account->executable = executable; 123 0 : account->data_len = data_len; 124 0 : } 125 : 126 : /* fd_snapshot_full_account is the contents of the 127 : SNAPSHOT_HASH_MSG_SUB message. It contains a fd_snapshot_account_hdr_t 128 : header and the corresponding account data in a single message. 129 : 130 : For simplicity and conformance to burst limitations in snapin, the 131 : entire duplicate account is sent in one message (one frag). Consider 132 : caching the lthash of the duplicate account so we do not have to 133 : send the entire account over. */ 134 : struct fd_snapshot_full_account { 135 : fd_snapshot_account_hdr_t hdr; 136 : uchar data[ FD_RUNTIME_ACC_SZ_MAX ]; 137 : }; 138 : typedef struct fd_snapshot_full_account fd_snapshot_full_account_t; 139 : 140 : #define FD_SNAPSHOT_MAX_SNAPLA_TILES (8UL) 141 : 142 : #endif /* HEADER_fd_src_discof_restore_utils_fd_ssctrl_h */