Line data Source code
1 : #ifndef HEADER_fd_src_discof_restore_utils_fd_ssctrl_h 2 : #define HEADER_fd_src_discof_restore_utils_fd_ssctrl_h 3 : 4 : #include "../../../util/net/fd_net_headers.h" 5 : #include "../../../flamenco/runtime/fd_runtime_const.h" 6 : 7 : /* The snapshot tiles have a somewhat involved state machine, which is 8 : controlled by snapct. Imagine first the following sequence: 9 : 10 : 1. snapct is reading a full snapshot from the network and sends some 11 : data to snapdc to be decompressed. 12 : 2. snapct hits a network error, and resets the connection to a new 13 : peer. 14 : 3. The decompressor fails on data from the old peer, and sends a 15 : malformed message to snapct. 16 : 4. snapct receives the malformed message, and abandons the new 17 : connection, even though it was not malformed. 18 : 19 : There are basically two ways to prevent this. Option A is the tiles 20 : can pass not just control messages to one another, but also tag them 21 : with some xid indicating which "attempt" the control message is for. 22 : 23 : This is pretty hard to reason about, and the state machine can grow 24 : quite complicated. 25 : 26 : There's an easier way: the tiles just are fully synchronized with 27 : snapct. Whatever "attempt" snapct is on, we ensure all other tiles 28 : are on it too. This means when any tile fails a snapshot, all tiles 29 : must fail it and fully flush all frags in the pipeline before snapct 30 : can proceed with a new attempt. 31 : 32 : The control flow then is basically, 33 : 34 : 1. All tiles start in the IDLE state. 35 : 2. snapct initializes the pipeline by sending an INIT message. 36 : Each tile enters the PROCESSING state and then forwards the INIT 37 : message down the pipeline. When snapct receives this INIT 38 : message, the entire pipeline is in PROCESSING state. 39 : 3. Tiles continue to process data / frags as applicable. If an 40 : error occurs, the tile enters the ERROR state and also sends an 41 : ERROR message downstream. All downstream tiles also enter the 42 : ERROR state and forward the message. Note that upstream tiles 43 : will not be in an ERROR state and will continue producing frags. 44 : When snapct receives the ERROR message, it will send a FAIL 45 : message. snapct then waits for this FAIL message to be 46 : progagated through the pipeline and received back. It then 47 : knows that all tiles are synchonized back in an IDLE state and 48 : it can try again with a new INIT. 49 : 4. Once snapct detects that the processing is finished, it sends 50 : a DONE message through the pipeline and waits for it to be 51 : received back. We then either move on to the incremental 52 : snapshot, or shut down the whole pipeline. 53 : 54 : The keeps the tiles in lockstep, and simplifies the state machine to 55 : a manageable level. 56 : 57 : It is a strict requirement that all tiles in the pipeline eventually 58 : forward all control messages they receive. Each control message is 59 : only generated once in snapct and will not be re-sent. The pipeline 60 : will be locked on flushing that control message until all tiles 61 : forward it on. If a control message is dropped, the pipeline will 62 : deadlock. Note that a tile can choose to hold onto a control message 63 : and forward it later after performing some asynchronous routine. */ 64 : 65 0 : #define FD_SNAPSHOT_STATE_IDLE (0UL) /* Performing no work and should receive no data frags */ 66 0 : #define FD_SNAPSHOT_STATE_PROCESSING (1UL) /* Performing usual work, no errors / EoF condition encountered */ 67 0 : #define FD_SNAPSHOT_STATE_FINISHING (2UL) /* Tile has observed EoF, expects no additional data frags */ 68 0 : #define FD_SNAPSHOT_STATE_ERROR (3UL) /* Some error occurred, will wait for a FAIL command to reset */ 69 0 : #define FD_SNAPSHOT_STATE_SHUTDOWN (4UL) /* All work finished, tile can perform final cleanup and exit */ 70 : 71 0 : #define FD_SNAPSHOT_MSG_DATA (0UL) /* Fragment represents some snapshot data */ 72 0 : #define FD_SNAPSHOT_MSG_META (1UL) /* Fragment represents a fd_ssctrl_meta_t message */ 73 : 74 0 : #define FD_SNAPSHOT_MSG_CTRL_INIT_FULL (2UL) /* Pipeline should start processing a full snapshot */ 75 0 : #define FD_SNAPSHOT_MSG_CTRL_INIT_INCR (3UL) /* Pipeline should start processing an incremental snapshot */ 76 0 : #define FD_SNAPSHOT_MSG_CTRL_FAIL (4UL) /* Current snapshot failed, undo work and reset to idle state */ 77 0 : #define FD_SNAPSHOT_MSG_CTRL_NEXT (5UL) /* Current snapshot succeeded, commit work, go idle, and expect another snapshot */ 78 0 : #define FD_SNAPSHOT_MSG_CTRL_DONE (6UL) /* Current snapshot succeeded, commit work, go idle, and expect shutdown */ 79 0 : #define FD_SNAPSHOT_MSG_CTRL_SHUTDOWN (7UL) /* No work left to do, perform final cleanup and shut down */ 80 0 : #define FD_SNAPSHOT_MSG_CTRL_ERROR (8UL) /* Some tile encountered an error with the current stream */ 81 : 82 : /* snapla -> snapls */ 83 : /* snaplh -> snaplv */ 84 0 : #define FD_SNAPSHOT_HASH_MSG_RESULT_ADD (9UL) /* Hash result sent from snapla (snaplh) to snapls (snaplv) */ 85 : 86 : /* snapin -> snapls */ 87 : /* snapin -> snapwm -> snaplv */ 88 0 : #define FD_SNAPSHOT_HASH_MSG_EXPECTED (10UL) /* Hash result sent from snapin to snapls or from snapin to snapwm to snaplv */ 89 : 90 : /* snapin -> snapls */ 91 0 : #define FD_SNAPSHOT_HASH_MSG_SUB (11UL) /* Duplicate account sent from snapin to snapls, includes account header and data */ 92 0 : #define FD_SNAPSHOT_HASH_MSG_SUB_HDR (12UL) /* Duplicate account sent from snapin to snapls, only the account header, no data */ 93 0 : #define FD_SNAPSHOT_HASH_MSG_SUB_DATA (13UL) /* Duplicate account sent from snapin to snapls, only the account data, no header */ 94 : /* snapwm -> snaplv */ 95 0 : #define FD_SNAPSHOT_HASH_MSG_RESULT_SUB (14UL) /* Duplicate partial hash result sent from snapwm to snaplv (to subtract) */ 96 : /* snapwm -> snaplv -> snaplh */ 97 0 : #define FD_SNAPSHOT_HASH_MSG_SUB_META_BATCH (15UL) /* Duplicate account(s) meta batch sent from snapwm to snaplv */ 98 : 99 : 100 : 101 : /* Sent by snapct to tell snapld whether to load a local file or 102 : download from a particular external peer. */ 103 : typedef struct fd_ssctrl_init { 104 : int file; 105 : int zstd; 106 : fd_ip4_port_t addr; 107 : char hostname[ 256UL ]; 108 : int is_https; 109 : } fd_ssctrl_init_t; 110 : 111 : /* Sent by snapld to tell snapct metadata about a downloaded snapshot. */ 112 : typedef struct fd_ssctrl_meta { 113 : ulong total_sz; 114 : char name[ PATH_MAX ]; 115 : } fd_ssctrl_meta_t; 116 : 117 : struct fd_snapshot_account_hdr { 118 : uchar pubkey[ FD_PUBKEY_FOOTPRINT ]; 119 : uchar owner[ FD_PUBKEY_FOOTPRINT ]; 120 : ulong lamports; 121 : uchar executable; 122 : ulong data_len; 123 : }; 124 : typedef struct fd_snapshot_account_hdr fd_snapshot_account_hdr_t; 125 : 126 : /* fd_snapshot_account_hdr_init initializes a fd_snapshot_account_hdr_t struct 127 : with the appropriate account metadata fields. */ 128 : static inline void 129 : fd_snapshot_account_hdr_init( fd_snapshot_account_hdr_t * account, 130 : uchar const pubkey[ FD_PUBKEY_FOOTPRINT ], 131 : uchar const owner[ FD_PUBKEY_FOOTPRINT ], 132 : ulong lamports, 133 : uchar executable, 134 0 : ulong data_len ) { 135 0 : fd_memcpy( account->pubkey, pubkey, FD_PUBKEY_FOOTPRINT ); 136 0 : fd_memcpy( account->owner, owner, FD_PUBKEY_FOOTPRINT ); 137 0 : account->lamports = lamports; 138 0 : account->executable = executable; 139 0 : account->data_len = data_len; 140 0 : } 141 : 142 : /* fd_snapshot_full_account is the contents of the 143 : SNAPSHOT_HASH_MSG_SUB message. It contains a fd_snapshot_account_hdr_t 144 : header and the corresponding account data in a single message. 145 : 146 : For simplicity and conformance to burst limitations in snapin, the 147 : entire duplicate account is sent in one message (one frag). Consider 148 : caching the lthash of the duplicate account so we do not have to 149 : send the entire account over. */ 150 : struct fd_snapshot_full_account { 151 : fd_snapshot_account_hdr_t hdr; 152 : uchar data[ FD_RUNTIME_ACC_SZ_MAX ]; 153 : }; 154 : typedef struct fd_snapshot_full_account fd_snapshot_full_account_t; 155 : 156 : #define FD_SNAPSHOT_MAX_SNAPLA_TILES (8UL) 157 : #define FD_SNAPSHOT_MAX_SNAPLH_TILES (8UL) 158 : 159 : #endif /* HEADER_fd_src_discof_restore_utils_fd_ssctrl_h */