Line data Source code
1 : #ifndef HEADER_fd_src_choreo_restart_fd_restart_h 2 : #define HEADER_fd_src_choreo_restart_fd_restart_h 3 : 4 : /* fd_restart implements Solana's SIMD-0046, Optimistic cluster restart 5 : automation, which is also known as wen-restart. See protocol details at 6 : https://github.com/solana-foundation/solana-improvement-documents/pull/46 7 : */ 8 : 9 : #include "../../choreo/tower/fd_tower.h" 10 : #include "../../flamenco/types/fd_types.h" 11 : 12 : #define RESTART_MAGIC_TAG 128UL 13 : 14 : /* Protocol parameters of wen-restart */ 15 0 : #define RESTART_EPOCHS_MAX 2UL 16 0 : #define HEAVIEST_FORK_THRESHOLD_DELTA_PERCENT 38UL 17 : #define WAIT_FOR_NEXT_EPOCH_THRESHOLD_PERCENT 33UL 18 0 : #define WAIT_FOR_SUPERMAJORITY_THRESHOLD_PERCENT 80UL 19 0 : #define LAST_VOTED_FORK_MAX_SLOTS 0xFFFFUL 20 : 21 : /* Implementation-specific parameters */ 22 : #define FD_RESTART_MAX_PEERS 40200UL 23 : #define FD_RESTART_MSG_PUBLISH_PERIOD_NS 10e9L 24 0 : #define FD_RESTART_RAW_BITMAP_BYTES_MAX 8192UL /* 0xFFFF/8+1 */ 25 0 : #define FD_RESTART_PACKET_BITMAP_BYTES_MAX 824UL /* PACKET_DATA_SIZE is 1232, and the rest of LAST_VOTED_FORK_SLOT needs 1232-824 bytes */ 26 : #define FD_RESTART_LINK_BYTES_MAX ( sizeof(fd_gossip_restart_last_voted_fork_slots_t)+FD_RESTART_RAW_BITMAP_BYTES_MAX ) 27 : 28 : typedef enum { 29 : WR_STAGE_WAIT_FOR_INIT = 0, 30 : WR_STAGE_FIND_HEAVIEST_FORK_SLOT_NUM = 1, 31 : WR_STAGE_FIND_HEAVIEST_FORK_BANK_HASH = 2, 32 : WR_STAGE_GENERATE_SNAPSHOT = 3, 33 : WR_STAGE_DONE = 4 34 : } fd_wen_restart_stage_t; 35 : 36 : /* fd_restart_t contains all the states maintained by wen-restart. 37 : It is allocated within the `unprivileged_init` of the replay tile. */ 38 : struct fd_restart { 39 : fd_wen_restart_stage_t stage; 40 : 41 : /* States initialized at the beginning */ 42 : ulong funk_root; 43 : ulong root_epoch; 44 : fd_hash_t root_bank_hash; 45 : fd_epoch_schedule_t * epoch_schedule; 46 : ulong total_stake[ RESTART_EPOCHS_MAX ]; 47 : ulong num_vote_accts[ RESTART_EPOCHS_MAX ]; 48 : fd_stake_weight_t stake_weights[ RESTART_EPOCHS_MAX ][ FD_RESTART_MAX_PEERS ]; 49 : 50 : /* States maintained by the FIND_HEAVIEST_FORK_SLOT_NUM stage */ 51 : ulong total_stake_received[ RESTART_EPOCHS_MAX ]; 52 : ulong total_stake_received_and_voted[ RESTART_EPOCHS_MAX ]; 53 : uchar last_voted_fork_slots_received[ RESTART_EPOCHS_MAX ][ FD_RESTART_MAX_PEERS ]; 54 : ulong slot_to_stake[ LAST_VOTED_FORK_MAX_SLOTS ]; /* the index is an offset from funk_root */ 55 : 56 : /* States maintained by the FIND_HEAVIEST_FORK_BANK_HASH stage */ 57 : fd_pubkey_t my_pubkey; 58 : ulong heaviest_fork_slot; 59 : fd_hash_t heaviest_fork_bank_hash; 60 : ulong heaviest_fork_ready; 61 : 62 : fd_pubkey_t coordinator_pubkey; 63 : ulong coordinator_heaviest_fork_slot; 64 : fd_hash_t coordinator_heaviest_fork_bank_hash; 65 : ulong coordinator_heaviest_fork_ready; 66 : }; 67 : typedef struct fd_restart fd_restart_t; 68 : 69 : /* fd_restart_{align,footprint} return the required alignment and 70 : footprint of a memory region suitable for use as the wen-restart state. */ 71 : FD_FN_CONST static inline ulong 72 0 : fd_restart_align( void ) { 73 0 : return alignof(fd_restart_t); 74 0 : } 75 : 76 : FD_FN_CONST static inline ulong 77 0 : fd_restart_footprint( void ) { 78 0 : return sizeof(fd_restart_t); 79 0 : } 80 : 81 : /* fd_restart_new formats an unused memory region for use as the state of 82 : wen-restart. mem is a non-NULL pointer to this region in the local address 83 : space with the required footprint and alignment. */ 84 : void * 85 : fd_restart_new( void * mem ); 86 : 87 : /* fd_restart_join joins the caller to the wen-restart state. restart points 88 : to the first byte of the memory region backing the wen-restart state in the 89 : caller's address space. 90 : 91 : Returns a pointer in the local address space to wen-restart state on success. */ 92 : fd_restart_t * 93 : fd_restart_join( void * restart ); 94 : 95 : /* fd_restart_init is called in the replay tile after a snapshot is loaded. 96 : The arguments of this function come from the loaded snapshot and provide 97 : the first few fields in fd_restart_t. This function fills out_buf 98 : and out_buf_len with a gossip message -- the first gossip message sent 99 : in the wen-restart protocol (fd_gossip_restart_last_voted_fork_slots_t). */ 100 : void 101 : fd_restart_init( fd_restart_t * restart, 102 : ulong funk_root, 103 : fd_hash_t * root_bank_hash, 104 : fd_vote_accounts_t const * epoch_stakes[], 105 : fd_epoch_schedule_t * epoch_schedule, 106 : int tower_checkpt_fileno, 107 : fd_slot_history_t const * slot_history, 108 : fd_pubkey_t * my_pubkey, 109 : fd_pubkey_t * coordinator_pubkey, 110 : uchar * out_buf, 111 : ulong * out_buf_len ); 112 : 113 : /* fd_restart_recv_gossip_msg is invoked for each gossip message received. 114 : 115 : In case of a last_voted_fork_slots message, the function would check 116 : whether we have received such messages from more than 80% stake where 117 : 80% is specified as WAIT_FOR_SUPERMAJORITY_THRESHOLD_PERCENT. If so, 118 : out_heaviest_fork_found would be set to 1, and the stage will be set 119 : to WR_STAGE_FIND_HEAVIEST_FORK_BANK_HASH. 120 : 121 : In case of a heaviest_fork message, the function would check whether 122 : this message comes from the wen-restart coordinator, and if so, record 123 : the heaviest fork information in this message for later verification. */ 124 : void 125 : fd_restart_recv_gossip_msg( fd_restart_t * restart, 126 : void * gossip_msg, 127 : ulong * out_heaviest_fork_found ); 128 : 129 : /* fd_restart_find_heaviest_fork_bank_hash will check whether the funk 130 : root happens to be the chosen heaviest fork slot. If so, it simply 131 : copies the funk root bank hash into the heaviest fork hash field of 132 : fd_restart_t. If not, it will set out_need_repair to 1, which will 133 : trigger a repair and repaly process from the funk root to the chosen 134 : heaviest fork slot in order to get the bank hash. */ 135 : void 136 : fd_restart_find_heaviest_fork_bank_hash( fd_restart_t * restart, 137 : fd_funk_t * funk, 138 : ulong * out_need_repair ); 139 : 140 : /* fd_restart_verify_heaviest_fork is invoked repeatedly by the replay 141 : tile. It is a no-op if either the coordinator heaviest fork hash or 142 : the local heaviest fork hash is not ready. When both are ready, this 143 : function checks whether the two bank hashes match, and print an error 144 : message if the two mismatch. 145 : 146 : If we are the wen-restart coordinator, out_send will be set to 1 and 147 : out_buf will hold a message of type fd_gossip_restart_heaviest_fork_t, 148 : which will be sent out by the gossip tile. */ 149 : void 150 : fd_restart_verify_heaviest_fork( fd_restart_t * restart, 151 : uchar * out_buf, 152 : ulong * out_send ); 153 : 154 : /* fd_restart_convert_runlength_to_raw_bitmap converts the bitmap in 155 : a last_voted_fork_slots message from the run length encoding into 156 : raw encoding. It is invoked in the gossip tile before forwarding 157 : this gossip message to the replay tile. Therefore, the replay tile 158 : could assume raw encoding of bitmap when processing the message. 159 : 160 : fd_restart_convert_raw_bitmap_to_runlength, reversely, converts a 161 : raw bitmap into run length encoding, which happens right before the 162 : gossip tile tries to send out a last_voted_fork_slots message. */ 163 : void 164 : fd_restart_convert_runlength_to_raw_bitmap( fd_gossip_restart_last_voted_fork_slots_t * msg, 165 : uchar * out_bitmap, 166 : ulong * out_bitmap_len ); 167 : 168 : void 169 : fd_restart_convert_raw_bitmap_to_runlength( fd_gossip_restart_last_voted_fork_slots_t * msg, 170 : fd_restart_run_length_encoding_inner_t * out_encoding ); 171 : 172 : /* fd_restart_tower_checkpt checkpoints the latest sent tower into a 173 : local file and it is invoked every time the replay tile sends out 174 : a tower vote; fd_restart_tower_restore reads this checkpoint file 175 : in fd_restart_init for the last_voted_fork_slot message sent out */ 176 : void 177 : fd_restart_tower_checkpt( fd_hash_t const * vote_bank_hash, 178 : fd_tower_t * tower, 179 : int tower_checkpt_fileno ); 180 : 181 : void 182 : fd_restart_tower_restore( fd_hash_t * vote_bank_hash, 183 : ulong * tower_slots, 184 : ulong * tower_height, 185 : int tower_checkpt_fileno ); 186 : #endif