Line data Source code
1 : #ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_h 2 : #define HEADER_fd_src_ballet_reedsol_fd_reedsol_h 3 : 4 : /* fd_reedsol provides APIs for producing Reed-Solomon encoded parity 5 : data and for reconstructing missing data from parity data. The 6 : encoding process is optimized, and highly optimized for Turbine's 7 : typical case. 8 : 9 : Reed-Solomon works in GF(2^8), i.e. the codeword size is 1 byte, but 10 : is typically used on each byte of larger pieces of data called 11 : shreds (a Solana-specific term, often called shards elsewhere in the 12 : literature). Mathematically, the encoding process forms a vector 13 : from the input data, taking one byte from each shred, and 14 : left-multiplies the vector by a constant matrix in GF(2^8). The 15 : resulting vector contains one byte for each of the parity shreds. 16 : Solana also calls parity shreds "code" shreds, but due to the naming 17 : collision with executable code, we have opted for "parity." This 18 : mathematical structure thus forces each shred to be of identical size 19 : but doesn't otherwise impose any size restrictions.*/ 20 : 21 : #include "../fd_ballet_base.h" 22 : 23 : /* FD_REEDSOL_{DATA,PARITY}_SHREDS_MAX describe the inclusive maximum 24 : number of data and parity shreds that this implementation supports. 25 : These limits are not mathematical limits, but limits based on current 26 : Solana needs and performance. The common case for both shred counts 27 : to be set to 32. */ 28 : 29 17268 : #define FD_REEDSOL_DATA_SHREDS_MAX (67UL) 30 16947 : #define FD_REEDSOL_PARITY_SHREDS_MAX (67UL) 31 0 : #define FD_REEDSOL_FEC_SHRED_CNT (32UL) 32 : 33 : 34 : #define FD_REEDSOL_ALIGN (128UL) 35 : #define FD_REEDSOL_FOOTPRINT (2304UL) /* 18*ALIGN */ 36 : 37 : /* FD_REEDSOL_SUCCESS, FD_REEDSOL_ERR_* are error code return values used 38 : by the recover operation, which is the only part that can fail for 39 : non-bug reasons. Their meaning is documented with 40 : fd_reedsol_recover_fini. SUCCESS must be zero, ERR_* are negative 41 : and distinct. */ 42 : 43 270 : #define FD_REEDSOL_SUCCESS (0) 44 0 : #define FD_REEDSOL_ERR_CORRUPT (-1) 45 0 : #define FD_REEDSOL_ERR_PARTIAL (-2) 46 : 47 : struct __attribute__((aligned(FD_REEDSOL_ALIGN))) fd_reedsol_private { 48 : uchar scratch[ 1024 ]; // Used for the ultra high performance implementation 49 : 50 : ulong shred_sz; // shred_sz: the size of each shred in bytes (all shreds must be the same size) 51 : ulong data_shred_cnt; // {data,parity}_shred_cnt: the number of data or parity shreds 52 : ulong parity_shred_cnt; // (respectively) have been added to the in-process operation 53 : 54 : union { 55 : 56 : struct { 57 : uchar const * data_shred [ FD_REEDSOL_DATA_SHREDS_MAX ]; // {data,parity}_shred: pointers to the 1st byte of each shred 58 : uchar * parity_shred[ FD_REEDSOL_PARITY_SHREDS_MAX ]; 59 : } encode; 60 : 61 : struct { 62 : uchar * shred[ FD_REEDSOL_DATA_SHREDS_MAX + FD_REEDSOL_PARITY_SHREDS_MAX ]; 63 : /* erased: whether the shred at the corresponding index is an 64 : erasure (i.e. wasn't received or was corrupted). Used only for 65 : decoding operations. TODO: Is this the right data type? Should 66 : it use a fd_smallset instead? */ 67 : uchar erased[ FD_REEDSOL_DATA_SHREDS_MAX + FD_REEDSOL_PARITY_SHREDS_MAX ]; 68 : } recover; 69 : 70 : }; 71 : }; 72 : 73 : typedef struct fd_reedsol_private fd_reedsol_t; 74 : 75 : FD_PROTOTYPES_BEGIN 76 : 77 : /* fd_reedsol_{align,footprint} return the alignment and footprint 78 : required in bytes for a fd_reedsol_t. */ 79 : 80 0 : static inline FD_FN_CONST ulong fd_reedsol_align( void ) { return FD_REEDSOL_ALIGN; } 81 0 : static inline FD_FN_CONST ulong fd_reedsol_footprint( void ) { return FD_REEDSOL_FOOTPRINT; } 82 : 83 : /* fd_reedsol_encode_init: starts a Reed-Solomon encoding operation that 84 : will encode shreds of size shred_sz. mem is assumed to be a piece of 85 : memory that meets the alignment and size constraints specified above. 86 : Takes a write interest in mem that persists until the operation is 87 : aborted or finalized. shred_sz must be >= 32. If the provided size is 88 : inadequate, NULL is returned. Returns mem as a a newly initialized encoder. 89 : Every call to fd_reedsol_encode_init should be paired with a call to 90 : fd_reedsol_encode_fini (normal execution) or fd_reedsol_encode_abort 91 : (abnormal execution). */ 92 : 93 : static inline fd_reedsol_t * 94 : fd_reedsol_encode_init( void * mem, 95 1430574 : ulong shred_sz ) { 96 1430574 : if( FD_UNLIKELY( shred_sz < 32 ) ) return NULL; 97 : 98 1430574 : fd_reedsol_t * rs = (fd_reedsol_t *)mem; 99 1430574 : rs->shred_sz = shred_sz; 100 1430574 : rs->data_shred_cnt = 0UL; 101 1430574 : rs->parity_shred_cnt = 0UL; 102 1430574 : return rs; 103 1430574 : } 104 : 105 : /* fd_reedsol_encode_add_data_shred: adds a shred consisting of the 106 : memory [ptr,ptr+shred_sz) to the in-process Reed-Solomon encoding 107 : operation. Takes a read interest in the shred that persists for the 108 : lifetime of the operation (i.e. until finalized or aborted). Data 109 : shreds have no alignment restrictions and can overlap with each other 110 : but should not overlap with any parity shreds in the same encoding 111 : operation. 112 : 113 : Note: The order in which data shreds are added relative to other data 114 : shreds matters. It impacts the parity data produced by the encoding 115 : operation. 116 : 117 : Assumes rs is initialized as an encoder and returns rs (still 118 : initialized as an encoder). */ 119 : 120 : static inline fd_reedsol_t * 121 : fd_reedsol_encode_add_data_shred( fd_reedsol_t * rs, 122 51705990 : void const * ptr ) { 123 51705990 : rs->encode.data_shred[ rs->data_shred_cnt++ ] = (uchar const*) ptr; 124 51705990 : return rs; 125 51705990 : } 126 : 127 : /* fd_reedsol_encode_add_parity_shred: adds the block of memory 128 : [ptr,ptr+shred_sz) to the in-process Reed-Solomon encoding operation 129 : as the destination of a parity shred. Takes a write interest in the 130 : memory that persists for the lifetime of the operation (i.e. until 131 : finalized or aborted). Parity shreds have no alignment 132 : restrictions but must not overlap with each other or with data shreds 133 : in the same operation (U.B. if they overlap). 134 : 135 : Note: The order in which parity shreds are added matters only insofar 136 : as which data will be written to which location. 137 : 138 : Assumes rs is initialized as an encoder and returns rs (still 139 : initialized as an encoder). */ 140 : 141 : static inline fd_reedsol_t * 142 : fd_reedsol_encode_add_parity_shred( fd_reedsol_t * rs, 143 54254862 : void * ptr ) { 144 54254862 : rs->encode.parity_shred[ rs->parity_shred_cnt++ ] = (uchar *)ptr; 145 54254862 : return rs; 146 54254862 : } 147 : 148 : /* fd_reedsol_encode_abort aborts an in-progress encoding operation. 149 : Releases any read or write interests in any shreds that were added to 150 : the operation. Upon return, the contents of the parity shreds are 151 : undefined. Assumes rs is initialized as an encoder, rs will not be 152 : initialized on return. */ 153 : 154 : static inline void 155 0 : fd_reedsol_encode_abort( fd_reedsol_t * rs ) { 156 0 : rs->data_shred_cnt = 0UL; 157 0 : rs->parity_shred_cnt = 0UL; 158 0 : } 159 : 160 : /* fd_reedsol_encode_fini finishes the in-progress encoding operation. 161 : Upon return, the parity shreds will be filled with the correct 162 : Reed-Solomon encoded parity data. Upon return, this will no longer 163 : have any read or write interest in any of the provided shreds. 164 : Assumes rs is initialized as an encoder, rs will not be initialized 165 : on return. */ 166 : 167 : void 168 : fd_reedsol_encode_fini( fd_reedsol_t * rs ); 169 : 170 : /* fd_reedsol_recover_init: starts a Reed-Solomon recover/decode 171 : operation that will recover shreds of size shred_sz. mem is assumed 172 : to be an unused piece of memory that meets the alignment and size 173 : constraints specified above. Takes a write interest in mem that 174 : persists until the operation is aborted or finalized. shred_sz must 175 : be >= 32. If the provided size is inadequate, NULL is returned. Returns mem 176 : as a newly initialized recoverer. Every call to fd_reedsol_recover_init 177 : should be paired with a call to fd_reedsol_recover_fini (normal execution) or 178 : fd_reedsol_recover_abort (abnormal execution). */ 179 : 180 : static inline fd_reedsol_t * 181 270 : fd_reedsol_recover_init( void * mem, ulong shred_sz ) { 182 270 : if( FD_UNLIKELY( shred_sz < 32 ) ) return NULL; 183 : 184 270 : fd_reedsol_t * rs = (fd_reedsol_t *)mem; 185 270 : rs->shred_sz = shred_sz; 186 270 : rs->data_shred_cnt = 0UL; 187 270 : rs->parity_shred_cnt = 0UL; 188 270 : return rs; 189 270 : } 190 : 191 : /* fd_reedsol_recover_add_rcvd_shred adds the shred consisting of the of 192 : memory [ptr, ptr+shred_sz) to the in-process Reed-Solomon recover 193 : operation as a source of data. Takes a read interest in the shred 194 : that persists for the lifetime of the operation (i.e. until finalized 195 : or aborted). Received shreds have no alignment restrictions and can 196 : overlap with each other (if necessary, but there's no known use case 197 : for doing so), but should not overlap with any erased shreds in the 198 : same recovery operation. 199 : 200 : The shred is treated as a data shred if is_data_shred is non-zero and 201 : as a parity shred if not. Data shreds and parity shreds are mostly 202 : treated identically in the recover operation, but having the right 203 : number of data shreds is important for validating the shreds are 204 : correct. 205 : 206 : Note: The order in which shreds are added (using this function and 207 : fd_reedsol_recover_add_erased_shred) is very important for recovery. 208 : Shreds must be added in the natural index order or the recover 209 : operation will almost certainly fail. In particular, all data shreds 210 : must be added before any parity shreds are added. 211 : 212 : Assumes rs is initialized as a recoverer, returns rs (still 213 : initialized as a recoverer). */ 214 : 215 : static inline fd_reedsol_t * 216 : fd_reedsol_recover_add_rcvd_shred( fd_reedsol_t * rs, 217 : int is_data_shred, 218 8604 : void const * ptr ) { 219 : 220 : /* Assumes is_data_shred==1 implies rs->parity_shred_cnt==0 and 221 : data_shred_cnt, parity_shred_cnt won't go over the max */ 222 : 223 : /* For performance reasons, we need to store all the shred pointers in 224 : one flat array, which means the array needs to be non-const. The 225 : const in the function signature signals that this operation won't 226 : modify the shred. */ 227 : 228 8604 : rs->recover.shred [ rs->data_shred_cnt + rs->parity_shred_cnt ] = (void *)ptr; 229 8604 : rs->recover.erased[ rs->data_shred_cnt + rs->parity_shred_cnt ] = (uchar)0; 230 8604 : rs->data_shred_cnt += !!is_data_shred; 231 8604 : rs->parity_shred_cnt += !is_data_shred; 232 : 233 8604 : return rs; 234 8604 : } 235 : 236 : /* fd_reedsol_recover_add_erased_shred adds the block of memory 237 : [ptr,ptr+shred_sz) to the in-process Reed-Solomon recover operation 238 : as the destination for a shred that will be recovered. Takes a write 239 : interest in the shred that persists for the lifetime of the operation 240 : (i.e. until finalized or aborted). Erased shreds have no alignment 241 : restrictions but should not overlap with any other shreds in the same 242 : recover operation. The contents of the block of memory are 243 : ignored and will be overwritten by the time the operation is 244 : finished. 245 : 246 : The shred is treated as a data shred if is_data_shred is non-zero and 247 : as a parity shred if not. Data shreds and parity shreds are mostly 248 : treated identically in the recover operation, but having the right 249 : number of data shreds is important for validating the shreds are 250 : correct. 251 : 252 : Note: The order in which shreds are added (using this function and 253 : fd_reedsol_recover_add_rcvd_shred) is very important for recovery. 254 : Shreds must be added in the natural index order or the recover 255 : operation will almost certainly fail. In particular, all data shreds 256 : must be added before any parity shreds are added. 257 : 258 : Assumes rs is initialized as a recoverer, returns rs (still 259 : initialized as a recoverer). */ 260 : 261 : static inline fd_reedsol_t * 262 : fd_reedsol_recover_add_erased_shred( fd_reedsol_t * rs, 263 : int is_data_shred, 264 8670 : void * ptr ) { 265 : 266 : /* Assumes assert is_data_shred==1 implies rs->parity_shred_cnt==0 and 267 : data_shred_cnt, parity_shred_cnt won't go over the max */ 268 : 269 8670 : rs->recover.shred [ rs->data_shred_cnt + rs->parity_shred_cnt ] = ptr; 270 8670 : rs->recover.erased[ rs->data_shred_cnt + rs->parity_shred_cnt ] = (uchar)1; 271 8670 : rs->data_shred_cnt += !!is_data_shred; 272 8670 : rs->parity_shred_cnt += !is_data_shred; 273 : 274 8670 : return rs; 275 8670 : } 276 : 277 : /* fd_reedsol_recover_abort aborts an in-progress encoding operation. 278 : Releases any read or write interests in any shreds that were added to 279 : the operation. Upon return, the contents of the erased shreds are 280 : undefined. Assumes rs is initialized and rs will not be initialized 281 : on return. */ 282 : 283 : static inline void 284 0 : fd_reedsol_recover_abort( fd_reedsol_t * rs ) { 285 0 : rs->data_shred_cnt = 0UL; 286 0 : rs->parity_shred_cnt = 0UL; 287 0 : } 288 : 289 : /* fd_reedsol_recover_fini finishes the in-progress recover operation. 290 : If successful, upon return, any erased shreds will be filled with the 291 : correct data as recovered by the Reed-Solomon recovery algorithm. If 292 : the recover operation fails with FD_REEDSOL_ERR_{CORRUPT,PARTIAL}, 293 : the contents of any erased shreds are undefined. 294 : 295 : Upon return, this will no longer have any read or write interest in 296 : any of the provided shreds. 297 : 298 : Returns one of: 299 : 300 : FD_REEDSOL_SUCCESS if the recover operation was successful 301 : 302 : FD_REEDSOL_ERR_CORRUPT if the shreds are not consistent with having 303 : come from a Reed-Solomon encoding with the provided number of data 304 : shreds 305 : 306 : FD_REEDSOL_ERR_PARTIAL if there's not enough un-erased data to 307 : recover data_shred_cnt data shreds. There must be at least one 308 : un-erased shred (data or parity) for each data shred in the 309 : operation. 310 : 311 : It's worth pointing out that the recovery process differs from 312 : typical network coding theory by making no effort to correct data 313 : corruption. The shred signature verification process should detect 314 : any data corruption, and any shred that fails signature verification 315 : can be treated as an erasure. This prevents the network from forking 316 : if the leader (maliciously) creates data shreds from one version of 317 : the block and parity shreds from another version of the block. 318 : 319 : Assumes rs is initialized as a recoverer, rs will not be initialized 320 : on return. */ 321 : 322 : int 323 : fd_reedsol_recover_fini( fd_reedsol_t * rs ); 324 : 325 : /* Misc APIs */ 326 : 327 : /* fd_reedsol_strerror converts a FD_REEDSOL_SUCCESS / FD_REEDSOL_ERR_* 328 : code into a human readable cstr. The lifetime of the returned 329 : pointer is infinite. The returned pointer is always to a non-NULL 330 : cstr. */ 331 : 332 : FD_FN_CONST char const * 333 : fd_reedsol_strerror( int err ); 334 : 335 : FD_PROTOTYPES_END 336 : 337 : #endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_h */