Line data Source code
1 : #ifndef HEADER_fd_src_discof_restore_utils_fd_ssctrl_h 2 : #define HEADER_fd_src_discof_restore_utils_fd_ssctrl_h 3 : 4 : #include "../../../util/net/fd_net_headers.h" 5 : #include "../../../flamenco/runtime/fd_runtime_const.h" 6 : 7 : /* The snapshot tiles have a somewhat involved state machine, which is 8 : controlled by snapct. Imagine first the following sequence: 9 : 10 : 1. snapct is reading a full snapshot from the network and sends some 11 : data to snapdc to be decompressed. 12 : 2. snapct hits a network error, and resets the connection to a new 13 : peer. 14 : 3. The decompressor fails on data from the old peer, and sends a 15 : malformed message to snapct. 16 : 4. snapct receives the malformed message, and abandons the new 17 : connection, even though it was not malformed. 18 : 19 : There are basically two ways to prevent this. Option A is the tiles 20 : can pass not just control messages to one another, but also tag them 21 : with some xid indicating which "attempt" the control message is for. 22 : 23 : This is pretty hard to reason about, and the state machine can grow 24 : quite complicated. 25 : 26 : There's an easier way: the tiles just are fully synchronized with 27 : snapct. Whatever "attempt" snapct is on, we ensure all other tiles 28 : are on it too. This means when any tile fails a snapshot, all tiles 29 : must fail it and fully flush all frags in the pipeline before snapct 30 : can proceed with a new attempt. 31 : 32 : The control flow then is basically, 33 : 34 : 1. All tiles start in the IDLE state. 35 : 2. snapct initializes the pipeline by sending an INIT message. 36 : Each tile enters the PROCESSING state and then forwards the INIT 37 : message down the pipeline. When snapct receives this INIT 38 : message, the entire pipeline is in PROCESSING state. 39 : 3. Tiles continue to process data / frags as applicable. If an 40 : error occurs, the tile enters the ERROR state and also sends an 41 : ERROR message downstream. All downstream tiles also enter the 42 : ERROR state and forward the message. Note that upstream tiles 43 : will not be in an ERROR state and will continue producing frags. 44 : When snapct receives the ERROR message, it will send a FAIL 45 : message. snapct then waits for this FAIL message to be 46 : progagated through the pipeline and received back. It then 47 : knows that all tiles are synchonized back in an IDLE state and 48 : it can try again with a new INIT. 49 : 4. Once snapct detects that the processing is finished, it sends 50 : a DONE message through the pipeline and waits for it to be 51 : received back. We then either move on to the incremental 52 : snapshot, or shut down the whole pipeline. 53 : 54 : The keeps the tiles in lockstep, and simplifies the state machine to 55 : a manageable level. 56 : 57 : It is a strict requirement that all tiles in the pipeline eventually 58 : forward all control messages they receive. Each control message is 59 : only generated once in snapct and will not be re-sent. The pipeline 60 : will be locked on flushing that control message until all tiles 61 : forward it on. If a control message is dropped, the pipeline will 62 : deadlock. Note that a tile can choose to hold onto a control message 63 : and forward it later after performing some asynchronous routine. */ 64 : 65 0 : #define FD_SNAPSHOT_STATE_IDLE (0UL) /* Performing no work and should receive no data frags */ 66 0 : #define FD_SNAPSHOT_STATE_PROCESSING (1UL) /* Performing usual work, no errors / EoF condition encountered */ 67 0 : #define FD_SNAPSHOT_STATE_FINISHING (2UL) /* Tile has observed EoF, expects no additional data frags */ 68 0 : #define FD_SNAPSHOT_STATE_ERROR (3UL) /* Some error occurred, will wait for a FAIL command to reset */ 69 0 : #define FD_SNAPSHOT_STATE_SHUTDOWN (4UL) /* All work finished, tile can perform final cleanup and exit */ 70 : 71 0 : #define FD_SNAPSHOT_MSG_DATA (0UL) /* Fragment represents some snapshot data */ 72 0 : #define FD_SNAPSHOT_MSG_META (1UL) /* Fragment represents a fd_ssctrl_meta_t message */ 73 : 74 0 : #define FD_SNAPSHOT_MSG_CTRL_INIT_FULL (2UL) /* Pipeline should start processing a full snapshot */ 75 0 : #define FD_SNAPSHOT_MSG_CTRL_INIT_INCR (3UL) /* Pipeline should start processing an incremental snapshot */ 76 0 : #define FD_SNAPSHOT_MSG_CTRL_FAIL (4UL) /* Current snapshot failed, undo work and reset to idle state */ 77 0 : #define FD_SNAPSHOT_MSG_CTRL_NEXT (5UL) /* Current snapshot succeeded, commit work, go idle, and expect another snapshot */ 78 0 : #define FD_SNAPSHOT_MSG_CTRL_DONE (6UL) /* Current snapshot succeeded, commit work, go idle, and expect shutdown */ 79 0 : #define FD_SNAPSHOT_MSG_CTRL_SHUTDOWN (7UL) /* Snapshot load successful, no work left to do, perform final cleanup and shut down*/ 80 0 : #define FD_SNAPSHOT_MSG_CTRL_ERROR (8UL) /* Some tile encountered an error with the current stream */ 81 0 : #define FD_SNAPSHOT_MSG_CTRL_FINI (9UL) /* Current snapshot has been fully loaded, finish processing */ 82 : 83 : /* snapin -> snapls */ 84 : /* snapin -> snapwm -> snaplv */ 85 0 : #define FD_SNAPSHOT_HASH_MSG_EXPECTED (10UL) /* Hash result sent from snapin to snapls or from snapin to snapwm to snaplv */ 86 : 87 : /* snapin -> snapls */ 88 0 : #define FD_SNAPSHOT_HASH_MSG_SUB (11UL) /* Duplicate account sent from snapin to snapls, includes account header and data */ 89 0 : #define FD_SNAPSHOT_HASH_MSG_SUB_HDR (12UL) /* Duplicate account sent from snapin to snapls, only the account header, no data */ 90 0 : #define FD_SNAPSHOT_HASH_MSG_SUB_DATA (13UL) /* Duplicate account sent from snapin to snapls, only the account data, no header */ 91 : /* snapwm -> snaplv */ 92 0 : #define FD_SNAPSHOT_HASH_MSG_RESULT_SUB (14UL) /* Duplicate partial hash result sent from snapwm to snaplv (to subtract) */ 93 : /* snapwm -> snaplv -> snaplh */ 94 0 : #define FD_SNAPSHOT_HASH_MSG_SUB_META_BATCH (15UL) /* Duplicate account(s) meta batch sent from snapwm to snaplv */ 95 : 96 : /* snapla -> snapls */ 97 : /* snaplh -> snaplv */ 98 0 : #define FD_SNAPSHOT_HASH_MSG_RESULT_ADD (16UL) /* Hash result sent from snapla (snaplh) to snapls (snaplv) */ 99 : 100 : 101 : /* Sent by snapct to tell snapld whether to load a local file or 102 : download from a particular external peer. */ 103 : typedef struct fd_ssctrl_init { 104 : int file; 105 : int zstd; 106 : ulong slot; /* slot advertised by the snapshot peer */ 107 : fd_ip4_port_t addr; 108 : char hostname[ 256UL ]; 109 : char path[ PATH_MAX ]; 110 : ulong path_len; 111 : int is_https; 112 : } fd_ssctrl_init_t; 113 : 114 : /* Sent by snapld to tell snapct metadata about a downloaded snapshot. */ 115 : typedef struct fd_ssctrl_meta { 116 : ulong total_sz; 117 : } fd_ssctrl_meta_t; 118 : 119 : struct fd_snapshot_account_hdr { 120 : uchar pubkey[ FD_PUBKEY_FOOTPRINT ]; 121 : uchar owner[ FD_PUBKEY_FOOTPRINT ]; 122 : ulong lamports; 123 : uchar executable; 124 : ulong data_len; 125 : }; 126 : typedef struct fd_snapshot_account_hdr fd_snapshot_account_hdr_t; 127 : 128 : /* fd_snapshot_account_hdr_init initializes a fd_snapshot_account_hdr_t struct 129 : with the appropriate account metadata fields. */ 130 : static inline void 131 : fd_snapshot_account_hdr_init( fd_snapshot_account_hdr_t * account, 132 : uchar const pubkey[ FD_PUBKEY_FOOTPRINT ], 133 : uchar const owner[ FD_PUBKEY_FOOTPRINT ], 134 : ulong lamports, 135 : uchar executable, 136 0 : ulong data_len ) { 137 0 : fd_memcpy( account->pubkey, pubkey, FD_PUBKEY_FOOTPRINT ); 138 0 : fd_memcpy( account->owner, owner, FD_PUBKEY_FOOTPRINT ); 139 0 : account->lamports = lamports; 140 0 : account->executable = executable; 141 0 : account->data_len = data_len; 142 0 : } 143 : 144 : /* fd_snapshot_full_account is the contents of the 145 : SNAPSHOT_HASH_MSG_SUB message. It contains a fd_snapshot_account_hdr_t 146 : header and the corresponding account data in a single message. 147 : 148 : For simplicity and conformance to burst limitations in snapin, the 149 : entire duplicate account is sent in one message (one frag). Consider 150 : caching the lthash of the duplicate account so we do not have to 151 : send the entire account over. */ 152 : struct fd_snapshot_full_account { 153 : fd_snapshot_account_hdr_t hdr; 154 : uchar data[ FD_RUNTIME_ACC_SZ_MAX ]; 155 : }; 156 : typedef struct fd_snapshot_full_account fd_snapshot_full_account_t; 157 : 158 : #define FD_SNAPSHOT_MAX_SNAPLA_TILES (8UL) 159 : #define FD_SNAPSHOT_MAX_SNAPLH_TILES (8UL) 160 : 161 : #endif /* HEADER_fd_src_discof_restore_utils_fd_ssctrl_h */