Line data Source code
1 : #ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_h 2 : #define HEADER_fd_src_ballet_reedsol_fd_reedsol_h 3 : 4 : /* fd_reedsol provides APIs for producing Reed-Solomon encoded parity 5 : data and for reconstructing missing data from parity data. The 6 : encoding process is optimized, and highly optimized for Turbine's 7 : typical case. 8 : 9 : Reed-Solomon works in GF(2^8), i.e. the codeword size is 1 byte, but 10 : is typically used on each byte of larger pieces of data called 11 : shreds (a Solana-specific term, often called shards elsewhere in the 12 : literature). Mathematically, the encoding process forms a vector 13 : from the input data, taking one byte from each shred, and 14 : left-multiplies the vector by a constant matrix in GF(2^8). The 15 : resulting vector contains one byte for each of the parity shreds. 16 : Solana also calls parity shreds "code" shreds, but due to the naming 17 : collision with executable code, we have opted for "parity." This 18 : mathematical structure thus forces each shred to be of identical size 19 : but doesn't otherwise impose any size restrictions.*/ 20 : 21 : #include "../fd_ballet_base.h" 22 : 23 : /* FD_REEDSOL_{DATA,PARITY}_SHREDS_MAX describe the inclusive maximum 24 : number of data and parity shreds that this implementation supports. 25 : These limits are not mathematical limits, but limits based on current 26 : Solana needs and performance. The common case for both shred counts 27 : to be set to 32. */ 28 : 29 17271 : #define FD_REEDSOL_DATA_SHREDS_MAX (67UL) 30 16947 : #define FD_REEDSOL_PARITY_SHREDS_MAX (67UL) 31 : 32 : #define FD_REEDSOL_ALIGN (128UL) 33 : #define FD_REEDSOL_FOOTPRINT (2304UL) /* 18*ALIGN */ 34 : 35 : /* FD_REEDSOL_SUCCESS, FD_REEDSOL_ERR_* are error code return values used 36 : by the recover operation, which is the only part that can fail for 37 : non-bug reasons. Their meaning is documented with 38 : fd_reedsol_recover_fini. SUCCESS must be zero, ERR_* are negative 39 : and distinct. */ 40 : 41 270 : #define FD_REEDSOL_SUCCESS (0) 42 0 : #define FD_REEDSOL_ERR_CORRUPT (-1) 43 0 : #define FD_REEDSOL_ERR_PARTIAL (-2) 44 : 45 : struct __attribute__((aligned(FD_REEDSOL_ALIGN))) fd_reedsol_private { 46 : uchar scratch[ 1024 ]; // Used for the ultra high performance implementation 47 : 48 : ulong shred_sz; // shred_sz: the size of each shred in bytes (all shreds must be the same size) 49 : ulong data_shred_cnt; // {data,parity}_shred_cnt: the number of data or parity shreds 50 : ulong parity_shred_cnt; // (respectively) have been added to the in-process operation 51 : 52 : union { 53 : 54 : struct { 55 : uchar const * data_shred [ FD_REEDSOL_DATA_SHREDS_MAX ]; // {data,parity}_shred: pointers to the 1st byte of each shred 56 : uchar * parity_shred[ FD_REEDSOL_PARITY_SHREDS_MAX ]; 57 : } encode; 58 : 59 : struct { 60 : uchar * shred[ FD_REEDSOL_DATA_SHREDS_MAX + FD_REEDSOL_PARITY_SHREDS_MAX ]; 61 : /* erased: whether the shred at the corresponding index is an 62 : erasure (i.e. wasn't received or was corrupted). Used only for 63 : decoding operations. TODO: Is this the right data type? Should 64 : it use a fd_smallset instead? */ 65 : uchar erased[ FD_REEDSOL_DATA_SHREDS_MAX + FD_REEDSOL_PARITY_SHREDS_MAX ]; 66 : } recover; 67 : 68 : }; 69 : }; 70 : 71 : typedef struct fd_reedsol_private fd_reedsol_t; 72 : 73 : FD_PROTOTYPES_BEGIN 74 : 75 : /* fd_reedsol_{align,footprint} return the alignment and footprint 76 : required in bytes for a fd_reedsol_t. */ 77 : 78 0 : static inline FD_FN_CONST ulong fd_reedsol_align( void ) { return FD_REEDSOL_ALIGN; } 79 0 : static inline FD_FN_CONST ulong fd_reedsol_footprint( void ) { return FD_REEDSOL_FOOTPRINT; } 80 : 81 : /* fd_reedsol_encode_init: starts a Reed-Solomon encoding operation that 82 : will encode shreds of size shred_sz. mem is assumed to be a piece of 83 : memory that meets the alignment and size constraints specified above. 84 : Takes a write interest in mem that persists until the operation is 85 : aborted or finalized. shred_sz must be >= 32. If the provided size is 86 : inadequate, NULL is returned. Returns mem as a a newly initialized encoder. 87 : Every call to fd_reedsol_encode_init should be paired with a call to 88 : fd_reedsol_encode_fini (normal execution) or fd_reedsol_encode_abort 89 : (abnormal execution). */ 90 : 91 : static inline fd_reedsol_t * 92 : fd_reedsol_encode_init( void * mem, 93 1430574 : ulong shred_sz ) { 94 1430574 : if( FD_UNLIKELY( shred_sz < 32 ) ) return NULL; 95 : 96 1430574 : fd_reedsol_t * rs = (fd_reedsol_t *)mem; 97 1430574 : rs->shred_sz = shred_sz; 98 1430574 : rs->data_shred_cnt = 0UL; 99 1430574 : rs->parity_shred_cnt = 0UL; 100 1430574 : return rs; 101 1430574 : } 102 : 103 : /* fd_reedsol_encode_add_data_shred: adds a shred consisting of the 104 : memory [ptr,ptr+shred_sz) to the in-process Reed-Solomon encoding 105 : operation. Takes a read interest in the shred that persists for the 106 : lifetime of the operation (i.e. until finalized or aborted). Data 107 : shreds have no alignment restrictions and can overlap with each other 108 : but should not overlap with any parity shreds in the same encoding 109 : operation. 110 : 111 : Note: The order in which data shreds are added relative to other data 112 : shreds matters. It impacts the parity data produced by the encoding 113 : operation. 114 : 115 : Assumes rs is initialized as an encoder and returns rs (still 116 : initialized as an encoder). */ 117 : 118 : static inline fd_reedsol_t * 119 : fd_reedsol_encode_add_data_shred( fd_reedsol_t * rs, 120 51705990 : void const * ptr ) { 121 51705990 : rs->encode.data_shred[ rs->data_shred_cnt++ ] = (uchar const*) ptr; 122 51705990 : return rs; 123 51705990 : } 124 : 125 : /* fd_reedsol_encode_add_parity_shred: adds the block of memory 126 : [ptr,ptr+shred_sz) to the in-process Reed-Solomon encoding operation 127 : as the destination of a parity shred. Takes a write interest in the 128 : memory that persists for the lifetime of the operation (i.e. until 129 : finalized or aborted). Parity shreds have no alignment 130 : restrictions but must not overlap with each other or with data shreds 131 : in the same operation (U.B. if they overlap). 132 : 133 : Note: The order in which parity shreds are added matters only insofar 134 : as which data will be written to which location. 135 : 136 : Assumes rs is initialized as an encoder and returns rs (still 137 : initialized as an encoder). */ 138 : 139 : static inline fd_reedsol_t * 140 : fd_reedsol_encode_add_parity_shred( fd_reedsol_t * rs, 141 54254862 : void * ptr ) { 142 54254862 : rs->encode.parity_shred[ rs->parity_shred_cnt++ ] = (uchar *)ptr; 143 54254862 : return rs; 144 54254862 : } 145 : 146 : /* fd_reedsol_encode_abort aborts an in-progress encoding operation. 147 : Releases any read or write interests in any shreds that were added to 148 : the operation. Upon return, the contents of the parity shreds are 149 : undefined. Assumes rs is initialized as an encoder, rs will not be 150 : initialized on return. */ 151 : 152 : static inline void 153 0 : fd_reedsol_encode_abort( fd_reedsol_t * rs ) { 154 0 : rs->data_shred_cnt = 0UL; 155 0 : rs->parity_shred_cnt = 0UL; 156 0 : } 157 : 158 : /* fd_reedsol_encode_fini finishes the in-progress encoding operation. 159 : Upon return, the parity shreds will be filled with the correct 160 : Reed-Solomon encoded parity data. Upon return, this will no longer 161 : have any read or write interest in any of the provided shreds. 162 : Assumes rs is initialized as an encoder, rs will not be initialized 163 : on return. */ 164 : 165 : void 166 : fd_reedsol_encode_fini( fd_reedsol_t * rs ); 167 : 168 : /* fd_reedsol_recover_init: starts a Reed-Solomon recover/decode 169 : operation that will recover shreds of size shred_sz. mem is assumed 170 : to be an unused piece of memory that meets the alignment and size 171 : constraints specified above. Takes a write interest in mem that 172 : persists until the operation is aborted or finalized. shred_sz must 173 : be >= 32. If the provided size is inadequate, NULL is returned. Returns mem 174 : as a newly initialized recoverer. Every call to fd_reedsol_recover_init 175 : should be paired with a call to fd_reedsol_recover_fini (normal execution) or 176 : fd_reedsol_recover_abort (abnormal execution). */ 177 : 178 : static inline fd_reedsol_t * 179 270 : fd_reedsol_recover_init( void * mem, ulong shred_sz ) { 180 270 : if( FD_UNLIKELY( shred_sz < 32 ) ) return NULL; 181 : 182 270 : fd_reedsol_t * rs = (fd_reedsol_t *)mem; 183 270 : rs->shred_sz = shred_sz; 184 270 : rs->data_shred_cnt = 0UL; 185 270 : rs->parity_shred_cnt = 0UL; 186 270 : return rs; 187 270 : } 188 : 189 : /* fd_reedsol_recover_add_rcvd_shred adds the shred consisting of the of 190 : memory [ptr, ptr+shred_sz) to the in-process Reed-Solomon recover 191 : operation as a source of data. Takes a read interest in the shred 192 : that persists for the lifetime of the operation (i.e. until finalized 193 : or aborted). Received shreds have no alignment restrictions and can 194 : overlap with each other (if necessary, but there's no known use case 195 : for doing so), but should not overlap with any erased shreds in the 196 : same recovery operation. 197 : 198 : The shred is treated as a data shred if is_data_shred is non-zero and 199 : as a parity shred if not. Data shreds and parity shreds are mostly 200 : treated identically in the recover operation, but having the right 201 : number of data shreds is important for validating the shreds are 202 : correct. 203 : 204 : Note: The order in which shreds are added (using this function and 205 : fd_reedsol_recover_add_erased_shred) is very important for recovery. 206 : Shreds must be added in the natural index order or the recover 207 : operation will almost certainly fail. In particular, all data shreds 208 : must be added before any parity shreds are added. 209 : 210 : Assumes rs is initialized as a recoverer, returns rs (still 211 : initialized as a recoverer). */ 212 : 213 : static inline fd_reedsol_t * 214 : fd_reedsol_recover_add_rcvd_shred( fd_reedsol_t * rs, 215 : int is_data_shred, 216 8604 : void const * ptr ) { 217 : 218 : /* Assumes is_data_shred==1 implies rs->parity_shred_cnt==0 and 219 : data_shred_cnt, parity_shred_cnt won't go over the max */ 220 : 221 : /* For performance reasons, we need to store all the shred pointers in 222 : one flat array, which means the array needs to be non-const. The 223 : const in the function signature signals that this operation won't 224 : modify the shred. */ 225 : 226 8604 : rs->recover.shred [ rs->data_shred_cnt + rs->parity_shred_cnt ] = (void *)ptr; 227 8604 : rs->recover.erased[ rs->data_shred_cnt + rs->parity_shred_cnt ] = (uchar)0; 228 8604 : rs->data_shred_cnt += !!is_data_shred; 229 8604 : rs->parity_shred_cnt += !is_data_shred; 230 : 231 8604 : return rs; 232 8604 : } 233 : 234 : /* fd_reedsol_recover_add_erased_shred adds the block of memory 235 : [ptr,ptr+shred_sz) to the in-process Reed-Solomon recover operation 236 : as the destination for a shred that will be recovered. Takes a write 237 : interest in the shred that persists for the lifetime of the operation 238 : (i.e. until finalized or aborted). Erased shreds have no alignment 239 : restrictions but should not overlap with any other shreds in the same 240 : recover operation. The contents of the block of memory are 241 : ignored and will be overwritten by the time the operation is 242 : finished. 243 : 244 : The shred is treated as a data shred if is_data_shred is non-zero and 245 : as a parity shred if not. Data shreds and parity shreds are mostly 246 : treated identically in the recover operation, but having the right 247 : number of data shreds is important for validating the shreds are 248 : correct. 249 : 250 : Note: The order in which shreds are added (using this function and 251 : fd_reedsol_recover_add_rcvd_shred) is very important for recovery. 252 : Shreds must be added in the natural index order or the recover 253 : operation will almost certainly fail. In particular, all data shreds 254 : must be added before any parity shreds are added. 255 : 256 : Assumes rs is initialized as a recoverer, returns rs (still 257 : initialized as a recoverer). */ 258 : 259 : static inline fd_reedsol_t * 260 : fd_reedsol_recover_add_erased_shred( fd_reedsol_t * rs, 261 : int is_data_shred, 262 8670 : void * ptr ) { 263 : 264 : /* Assumes assert is_data_shred==1 implies rs->parity_shred_cnt==0 and 265 : data_shred_cnt, parity_shred_cnt won't go over the max */ 266 : 267 8670 : rs->recover.shred [ rs->data_shred_cnt + rs->parity_shred_cnt ] = ptr; 268 8670 : rs->recover.erased[ rs->data_shred_cnt + rs->parity_shred_cnt ] = (uchar)1; 269 8670 : rs->data_shred_cnt += !!is_data_shred; 270 8670 : rs->parity_shred_cnt += !is_data_shred; 271 : 272 8670 : return rs; 273 8670 : } 274 : 275 : /* fd_reedsol_recover_abort aborts an in-progress encoding operation. 276 : Releases any read or write interests in any shreds that were added to 277 : the operation. Upon return, the contents of the erased shreds are 278 : undefined. Assumes rs is initialized and rs will not be initialized 279 : on return. */ 280 : 281 : static inline void 282 0 : fd_reedsol_recover_abort( fd_reedsol_t * rs ) { 283 0 : rs->data_shred_cnt = 0UL; 284 0 : rs->parity_shred_cnt = 0UL; 285 0 : } 286 : 287 : /* fd_reedsol_recover_fini finishes the in-progress recover operation. 288 : If successful, upon return, any erased shreds will be filled with the 289 : correct data as recovered by the Reed-Solomon recovery algorithm. If 290 : the recover operation fails with FD_REEDSOL_ERR_{CORRUPT,PARTIAL}, 291 : the contents of any erased shreds are undefined. 292 : 293 : Upon return, this will no longer have any read or write interest in 294 : any of the provided shreds. 295 : 296 : Returns one of: 297 : 298 : FD_REEDSOL_SUCCESS if the recover operation was successful 299 : 300 : FD_REEDSOL_ERR_CORRUPT if the shreds are not consistent with having 301 : come from a Reed-Solomon encoding with the provided number of data 302 : shreds 303 : 304 : FD_REEDSOL_ERR_PARTIAL if there's not enough un-erased data to 305 : recover data_shred_cnt data shreds. There must be at least one 306 : un-erased shred (data or parity) for each data shred in the 307 : operation. 308 : 309 : It's worth pointing out that the recovery process differs from 310 : typical network coding theory by making no effort to correct data 311 : corruption. The shred signature verification process should detect 312 : any data corruption, and any shred that fails signature verification 313 : can be treated as an erasure. This prevents the network from forking 314 : if the leader (maliciously) creates data shreds from one version of 315 : the block and parity shreds from another version of the block. 316 : 317 : Assumes rs is initialized as a recoverer, rs will not be initialized 318 : on return. */ 319 : 320 : int 321 : fd_reedsol_recover_fini( fd_reedsol_t * rs ); 322 : 323 : /* Misc APIs */ 324 : 325 : /* fd_reedsol_strerror converts a FD_REEDSOL_SUCCESS / FD_REEDSOL_ERR_* 326 : code into a human readable cstr. The lifetime of the returned 327 : pointer is infinite. The returned pointer is always to a non-NULL 328 : cstr. */ 329 : 330 : FD_FN_CONST char const * 331 : fd_reedsol_strerror( int err ); 332 : 333 : FD_PROTOTYPES_END 334 : 335 : #endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_h */