Line data Source code
1 : #ifndef HEADER_fd_src_discof_restore_utils_fd_ssctrl_h 2 : #define HEADER_fd_src_discof_restore_utils_fd_ssctrl_h 3 : 4 : /* The snapshot tiles have a somewhat involved state machine, which is 5 : controlled by snaprd. Imagine first the following sequence: 6 : 7 : 1. snaprd is reading a full snapshot from the network and sends some 8 : data to snapdc to be decompressed. 9 : 2. snaprd hits a network error, and resets the connection to a new 10 : peer. 11 : 3. The decompressor fails on data from the old peer, and sends a 12 : malformed message to snaprd. 13 : 4. snaprd receives the malformed message, and abandons the new 14 : connection, even though it was not malformed. 15 : 16 : There are basically two ways to prevent this. Option A is the tiles 17 : can pass not just control messages to one another, but also tag them 18 : with some xid indicating which "attempt" the control message is for. 19 : 20 : This is pretty hard to reason about, and the state machine can grow 21 : quite complicated. 22 : 23 : There's an easier way: the tiles just are fully synchronized with 24 : snaprd. Whatever "attempt" snaprd is on, we ensure all other tiles 25 : are on it too. This means when any tile fails a snapshot, all tiles 26 : must fail it and fully flush all frags in the pipeline before snaprd 27 : can proceed with a new attempt. 28 : 29 : The control flow then is basically, 30 : 31 : 1. All tiles start assuming we are reading the full snapshot. 32 : 2. If any tile fails the snapshot, it sends a MALFOREMD message 33 : to snaprd. Snaprd then sends a RESET message to all tiles. 34 : 3. Any control message, including a RESET, send by snaprd must be 35 : acknowledged by all other tiles in the snapshot pipeline before 36 : snaprd can proceed with the next step. 37 : 38 : The keeps the tiles in lockstep, and simplifies the state machine to 39 : a manageable level. */ 40 : 41 0 : #define FD_SNAPSHOT_MSG_DATA (0UL) /* Fragment represents some snapshot data */ 42 : 43 0 : #define FD_SNAPSHOT_MSG_CTRL_RESET_FULL (1UL) /* Reset to start loading a fresh full snapshot */ 44 0 : #define FD_SNAPSHOT_MSG_CTRL_EOF_FULL (2UL) /* Full snapshot data is done, incremental data starting now */ 45 0 : #define FD_SNAPSHOT_MSG_CTRL_RESET_INCREMENTAL (3UL) /* Incremental data being retried, start incremental over */ 46 0 : #define FD_SNAPSHOT_MSG_CTRL_DONE (4UL) /* Snapshot load is over, data is finished for this tile */ 47 0 : #define FD_SNAPSHOT_MSG_CTRL_SHUTDOWN (5UL) /* All tiles have acknowledged snapshot load is done, can now shutdown */ 48 : 49 0 : #define FD_SNAPSHOT_MSG_CTRL_ACK (6UL) /* Sent from tiles back to snaprd, meaning they ACK whatever control message was pending */ 50 0 : #define FD_SNAPSHOT_MSG_CTRL_MALFORMED (7UL) /* Sent from tiles back to snaprd, meaning they consider the current snapshot malformed */ 51 : 52 : #endif /* HEADER_fd_src_discof_restore_utils_fd_ssctrl_h */