Line data Source code
1 : #ifndef HEADER_fd_src_discof_replay_fd_replay_h 2 : #define HEADER_fd_src_discof_replay_fd_replay_h 3 : 4 : #include "../../ballet/reedsol/fd_reedsol.h" 5 : 6 : /* This provides APIs for orchestrating replay of blocks as they are 7 : received from the cluster. 8 : 9 : Concepts: 10 : 11 : - Blocks are aggregations of entries aka. microblocks which are 12 : groupings of txns and are constructed by the block producer (see 13 : fd_pack). 14 : 15 : - Entries are grouped into entry batches by the block producer (see 16 : fd_pack / fd_shredder). 17 : 18 : - Entry batches are divided into chunks known as shreds by the block 19 : producer (see fd_shredder). 20 : 21 : - Shreds are grouped into forward-error-correction sets (FEC sets) by 22 : the block producer (see fd_shredder). 23 : 24 : - Shreds are transmitted to the rest of the cluster via the Turbine 25 : protocol (see fd_shredder / fd_shred). 26 : 27 : - Once enough shreds within a FEC set are received to recover the 28 : entirety of the shred data encoded by that FEC set, the receiver 29 : can "complete" the FEC set (see fd_fec_resolver). 30 : 31 : - If shreds in the FEC set are missing such that it can't complete, 32 : the receiver can use the Repair protocol to request missing shreds 33 : in FEC set (see fd_repair). 34 : 35 : - Once all the FEC sets complete within an entry batch is now 36 : replayable (see fd_replay). 37 : 38 : - Replay describes a grouping of completed entry batches as a block 39 : slice. Because shreds are received over the network and multiple 40 : entry batches may be completed at once, an entire slice is queued 41 : for replay (vs. individual entry batches) 42 : 43 : - Replay uses the frontier to determine which of the fork heads aka. 44 : banks the slice should be replayed from. This is required because 45 : each fork head has an independent state ie. different set of txns 46 : that have been executed (see fd_replay / fd_forks). 47 : 48 : - This process is repeated for every slice in the block at which 49 : point the block is executed (see fd_replay). 50 : 51 : - Replay of all the slices in a block must happen in order, as well 52 : as the entry batches within the slice and the entries within the 53 : entry batch. However, replay of the txns within an entry can 54 : happen out-of-order (see fd_replay). */ 55 : 56 : #include "../../disco/fd_disco_base.h" 57 : #include "../../ballet/reedsol/fd_reedsol.h" 58 : #include "../../flamenco/runtime/fd_blockstore.h" 59 : #include "../../tango/fseq/fd_fseq.h" 60 : 61 : 62 : /* FD_REPLAY_USE_HANDHOLDING: Define this to non-zero at compile time 63 : to turn on additional runtime checks and logging. */ 64 : 65 : #ifndef FD_REPLAY_USE_HANDHOLDING 66 : #define FD_REPLAY_USE_HANDHOLDING 1 67 : #endif 68 : 69 : /* fd_replay_fec_idxs is a bit vec that tracks the received data shred 70 : idxs in the FEC set. */ 71 : 72 : #define SET_NAME fd_replay_fec_idxs 73 : #define SET_MAX FD_REEDSOL_DATA_SHREDS_MAX 74 : #define SET_WORD_CNT (SET_MAX / sizeof(ulong) + 1) 75 : FD_STATIC_ASSERT( FD_REEDSOL_DATA_SHREDS_MAX % sizeof(ulong) != 0, fd_replay_fec_idxs ); 76 : #include "../../util/tmpl/fd_set.c" 77 : 78 : /* fd_replay_fec_t tracks in-progress FEC sets. It's synchronized with 79 : fd_fec_resolver, so the FEC sets fd_replay tracks should roughly 80 : match fd_fec_resolver's in-progress FEC sets. This state might be 81 : slightly delayed, because the replay tile is a downstream consumer of 82 : the shred tile, and therefore fd_replay_fec lags fd_fec_resolver. */ 83 : 84 : struct fd_replay_fec { 85 : ulong key; /* map key. 32 msb = slot, 32 lsb = fec_set_idx */ 86 : ulong prev; /* internal use by dlist */ 87 : uint hash; /* internal use by map */ 88 : 89 : ulong slot; /* slot of the block this fec set is part of */ 90 : ulong parent_slot; /* parent slot of `slot` */ 91 : uint fec_set_idx; /* index of the first data shred */ 92 : long ts; /* timestamp upon receiving the first shred */ 93 : ulong recv_cnt; /* count of shreds received so far data + coding */ 94 : ulong data_cnt; /* count of total data shreds in the FEC set */ 95 : 96 : /* This set is used to track which data shred indices to request if 97 : needing repairs. */ 98 : 99 : fd_replay_fec_idxs_t idxs[FD_REEDSOL_DATA_SHREDS_MAX / sizeof(ulong) + 1]; 100 : }; 101 : typedef struct fd_replay_fec fd_replay_fec_t; 102 : 103 : #define DEQUE_NAME fd_replay_fec_deque 104 : #define DEQUE_T fd_replay_fec_t 105 : #include "../../util/tmpl/fd_deque_dynamic.c" 106 : 107 : #define MAP_NAME fd_replay_fec_map 108 0 : #define MAP_T fd_replay_fec_t 109 : #include "../../util/tmpl/fd_map_dynamic.c" 110 : 111 : /* fd_replay_slice_t describes a replayable slice of a block which is 112 : a group of one or more completed entry batches. */ 113 : 114 : static inline FD_FN_CONST 115 0 : uint fd_replay_slice_start_idx( ulong key ){ 116 0 : return (uint)fd_ulong_extract( key, 32, 63 ); 117 0 : } 118 : 119 : static inline FD_FN_CONST 120 0 : uint fd_replay_slice_end_idx( ulong key ){ 121 0 : return (uint)fd_ulong_extract( key, 0, 31 ); 122 0 : } 123 : 124 : static inline 125 0 : ulong fd_replay_slice_key( uint start_idx, uint end_idx ) { 126 0 : return (ulong)start_idx << 32 | (ulong)end_idx; 127 0 : } 128 : 129 : struct fd_replay_slice { 130 : ulong slot; 131 : ulong * deque; 132 : }; 133 : typedef struct fd_replay_slice fd_replay_slice_t; 134 : 135 : #define DEQUE_NAME fd_replay_slice_deque 136 0 : #define DEQUE_T ulong 137 : #include "../../util/tmpl/fd_deque_dynamic.c" 138 : 139 : #define MAP_NAME fd_replay_slice_map 140 0 : #define MAP_T fd_replay_slice_t 141 0 : #define MAP_KEY slot 142 0 : #define MAP_KEY_NULL ULONG_MAX 143 0 : #define MAP_KEY_INVAL(k) ((k)==ULONG_MAX) 144 : #define MAP_MEMOIZE 0 145 : #include "../../util/tmpl/fd_map_dynamic.c" 146 : 147 0 : #define FD_REPLAY_MAGIC (0xf17eda2ce77e91a7UL) /* firedancer replay version 0 */ 148 : 149 : /* fd_replay_t is the top-level structure that maintains an LRU cache 150 : (pool, dlist, map) of the outstanding block slices that need replay. 151 : 152 : The replay order is FIFO so the first slice to go into the LRU will 153 : also be the first to attempt replay. */ 154 : 155 : struct __attribute__((aligned(128UL))) fd_replay { 156 : ulong fec_max; 157 : ulong slice_max; 158 : ulong block_max; 159 : 160 : /* Track in-progress FEC sets to repair if they don't complete in a 161 : timely way. */ 162 : 163 : fd_replay_fec_t * fec_map; 164 : fd_replay_fec_t * fec_deque; /* FIFO */ 165 : 166 : /* Track block slices to be replayed. */ 167 : 168 : fd_replay_slice_t * slice_map; 169 : 170 : /* Buffer to hold the block slice. */ 171 : 172 : uchar * slice_buf; 173 : 174 : /* Magic number to verify the replay structure. */ 175 : 176 : ulong magic; 177 : }; 178 : typedef struct fd_replay fd_replay_t; 179 : 180 : FD_PROTOTYPES_BEGIN 181 : 182 : /* Constructors */ 183 : 184 : /* fd_replay_{align,footprint} return the required alignment and 185 : footprint of a memory region suitable for use as replay with up to 186 : slice_max slices and block_max blocks. */ 187 : 188 : FD_FN_CONST static inline ulong 189 0 : fd_replay_align( void ) { 190 0 : return alignof(fd_replay_t); 191 0 : } 192 : 193 : FD_FN_CONST static inline ulong 194 0 : fd_replay_footprint( ulong fec_max, ulong slice_max, ulong block_max ) { 195 0 : int lg_fec_max = fd_ulong_find_msb( fd_ulong_pow2_up( fec_max ) ); 196 0 : int lg_block_max = fd_ulong_find_msb( fd_ulong_pow2_up( block_max ) ); 197 0 : ulong footprint = 198 0 : FD_LAYOUT_APPEND( 199 0 : FD_LAYOUT_APPEND( 200 0 : FD_LAYOUT_APPEND( 201 0 : FD_LAYOUT_APPEND( 202 0 : FD_LAYOUT_APPEND( 203 0 : FD_LAYOUT_INIT, 204 0 : alignof(fd_replay_t), sizeof(fd_replay_t) ), 205 0 : fd_replay_fec_map_align(), fd_replay_fec_map_footprint( lg_fec_max ) ), 206 0 : fd_replay_fec_deque_align(), fd_replay_fec_deque_footprint( fec_max ) ), 207 0 : 128UL, FD_SLICE_MAX ), 208 0 : fd_replay_slice_map_align(), fd_replay_slice_map_footprint( lg_block_max) ); 209 : 210 0 : for( ulong i = 0UL; i < block_max; i++ ) { 211 0 : footprint = FD_LAYOUT_APPEND( footprint, fd_replay_slice_deque_align(), fd_replay_slice_deque_footprint( slice_max ) ); 212 0 : } 213 0 : return FD_LAYOUT_FINI(footprint, fd_replay_align()); 214 0 : } 215 : 216 : /* fd_replay_new formats an unused memory region for use as a replay. 217 : mem is a non-NULL pointer to this region in the local address space 218 : with the required footprint and alignment. */ 219 : 220 : void * 221 : fd_replay_new( void * shmem, ulong fec_max, ulong slice_max, ulong block_max ); 222 : 223 : /* fd_replay_join joins the caller to the replay. replay points to the 224 : first byte of the memory region backing the replay in the caller's 225 : address space. 226 : 227 : Returns a pointer in the local address space to replay on success. */ 228 : 229 : fd_replay_t * 230 : fd_replay_join( void * replay ); 231 : 232 : /* fd_replay_leave leaves a current local join. Returns a pointer to the 233 : underlying shared memory region on success and NULL on failure (logs 234 : details). Reasons for failure include replay is NULL. */ 235 : 236 : void * 237 : fd_replay_leave( fd_replay_t const * replay ); 238 : 239 : /* fd_replay_delete unformats a memory region used as a replay. 240 : Assumes only the nobody is joined to the region. Returns a 241 : pointer to the underlying shared memory region or NULL if used 242 : obviously in error (e.g. replay is obviously not a replay ... logs 243 : details). The ownership of the memory region is transferred to the 244 : caller. */ 245 : 246 : void * 247 : fd_replay_delete( void * replay ); 248 : 249 : /* fd_replay_fec_query returns a pointer to the in-progress FEC keyed 250 : by slot and fec_set_idx. Returns NULL if not found. */ 251 : 252 : FD_FN_PURE static inline fd_replay_fec_t * 253 0 : fd_replay_fec_query( fd_replay_t * replay, ulong slot, uint fec_set_idx ) { 254 0 : ulong key = slot << 32 | (ulong)fec_set_idx; 255 0 : return fd_replay_fec_map_query( replay->fec_map, key, NULL ); 256 0 : } 257 : 258 : /* fd_replay_fec_insert inserts and returns a new in-progress FEC set 259 : keyed by slot and fec_set_idx into the map. Returns NULL if the map 260 : is full. */ 261 : 262 : static inline fd_replay_fec_t * 263 0 : fd_replay_fec_insert( fd_replay_t * replay, ulong slot, uint fec_set_idx ) { 264 0 : if( FD_UNLIKELY( fd_replay_fec_map_key_cnt( replay->fec_map ) == fd_replay_fec_map_key_max( replay->fec_map ) ) ) return NULL; 265 0 : ulong key = slot << 32 | (ulong)fec_set_idx; 266 0 : fd_replay_fec_t * fec = fd_replay_fec_map_insert( replay->fec_map, key ); /* cannot fail */ 267 0 : fec->slot = slot; 268 0 : fec->fec_set_idx = fec_set_idx; 269 0 : fec->ts = fd_log_wallclock(); 270 0 : fec->recv_cnt = 0; 271 0 : fec->data_cnt = 0; 272 0 : fd_replay_fec_idxs_null( fec->idxs ); 273 0 : return fec; 274 0 : } 275 : 276 : /* fd_replay_fec_query removes an in-progress FEC set from the map. 277 : Returns NULL if no fec set keyed by slot and fec_set_idx is found. */ 278 : 279 : static inline void 280 0 : fd_replay_fec_remove( fd_replay_t * replay, ulong slot, uint fec_set_idx ) { 281 0 : ulong key = slot << 32 | (ulong)fec_set_idx; 282 0 : fd_replay_fec_t * fec = fd_replay_fec_map_query( replay->fec_map, key, NULL ); 283 0 : FD_TEST( fec ); 284 0 : fd_replay_fec_map_remove( replay->fec_map, fec ); /* cannot fail */ 285 0 : } 286 : 287 : FD_PROTOTYPES_END 288 : 289 : #endif /* HEADER_fd_src_discof_replay_fd_replay_h */