LCOV - code coverage report
Current view: top level - ballet/reedsol - fd_reedsol.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 41 53 77.4 %
Date: 2025-10-13 04:42:14 Functions: 6 1300 0.5 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_h
       2             : #define HEADER_fd_src_ballet_reedsol_fd_reedsol_h
       3             : 
       4             : /* fd_reedsol provides APIs for producing Reed-Solomon encoded parity
       5             :    data and for reconstructing missing data from parity data.  The
       6             :    encoding process is optimized, and highly optimized for Turbine's
       7             :    typical case.
       8             : 
       9             :    Reed-Solomon works in GF(2^8), i.e. the codeword size is 1 byte, but
      10             :    is typically used on each byte of larger pieces of data called
      11             :    shreds (a Solana-specific term, often called shards elsewhere in the
      12             :    literature).  Mathematically, the encoding process forms a vector
      13             :    from the input data, taking one byte from each shred, and
      14             :    left-multiplies the vector by a constant matrix in GF(2^8).  The
      15             :    resulting vector contains one byte for each of the parity shreds.
      16             :    Solana also calls parity shreds "code" shreds, but due to the naming
      17             :    collision with executable code, we have opted for "parity."  This
      18             :    mathematical structure thus forces each shred to be of identical size
      19             :    but doesn't otherwise impose any size restrictions.*/
      20             : 
      21             : #include "../fd_ballet_base.h"
      22             : 
      23             : /* FD_REEDSOL_{DATA,PARITY}_SHREDS_MAX describe the inclusive maximum
      24             :    number of data and parity shreds that this implementation supports.
      25             :    These limits are not mathematical limits, but limits based on current
      26             :    Solana needs and performance.  The common case for both shred counts
      27             :    to be set to 32. */
      28             : 
      29       17271 : #define FD_REEDSOL_DATA_SHREDS_MAX   (67UL)
      30       16947 : #define FD_REEDSOL_PARITY_SHREDS_MAX (67UL)
      31             : 
      32             : #define FD_REEDSOL_ALIGN     (128UL)
      33             : #define FD_REEDSOL_FOOTPRINT (2304UL) /* 18*ALIGN */
      34             : 
      35             : /* FD_REEDSOL_SUCCESS, FD_REEDSOL_ERR_* are error code return values used
      36             :    by the recover operation, which is the only part that can fail for
      37             :    non-bug reasons.  Their meaning is documented with
      38             :    fd_reedsol_recover_fini.  SUCCESS must be zero, ERR_* are negative
      39             :    and distinct. */
      40             : 
      41         270 : #define FD_REEDSOL_SUCCESS     (0)
      42           0 : #define FD_REEDSOL_ERR_CORRUPT (-1)
      43           0 : #define FD_REEDSOL_ERR_PARTIAL (-2)
      44             : 
      45             : struct __attribute__((aligned(FD_REEDSOL_ALIGN))) fd_reedsol_private {
      46             :   uchar scratch[ 1024 ];  // Used for the ultra high performance implementation
      47             : 
      48             :   ulong shred_sz;         // shred_sz: the size of each shred in bytes (all shreds must be the same size)
      49             :   ulong data_shred_cnt;   // {data,parity}_shred_cnt: the number of data or parity shreds
      50             :   ulong parity_shred_cnt; // (respectively) have been added to the in-process operation
      51             : 
      52             :   union {
      53             : 
      54             :     struct {
      55             :       uchar const * data_shred  [ FD_REEDSOL_DATA_SHREDS_MAX   ]; // {data,parity}_shred: pointers to the 1st byte of each shred
      56             :       uchar *       parity_shred[ FD_REEDSOL_PARITY_SHREDS_MAX ];
      57             :     } encode;
      58             : 
      59             :     struct {
      60             :       uchar * shred[ FD_REEDSOL_DATA_SHREDS_MAX + FD_REEDSOL_PARITY_SHREDS_MAX ];
      61             :       /* erased: whether the shred at the corresponding index is an
      62             :          erasure (i.e. wasn't received or was corrupted).  Used only for
      63             :          decoding operations.  TODO: Is this the right data type? Should
      64             :          it use a fd_smallset instead? */
      65             :       uchar erased[ FD_REEDSOL_DATA_SHREDS_MAX + FD_REEDSOL_PARITY_SHREDS_MAX ];
      66             :     } recover;
      67             : 
      68             :   };
      69             : };
      70             : 
      71             : typedef struct fd_reedsol_private fd_reedsol_t;
      72             : 
      73             : FD_PROTOTYPES_BEGIN
      74             : 
      75             : /* fd_reedsol_{align,footprint} return the alignment and footprint
      76             :    required in bytes for a fd_reedsol_t. */
      77             : 
      78           0 : static inline FD_FN_CONST ulong fd_reedsol_align(     void ) { return FD_REEDSOL_ALIGN;     }
      79           0 : static inline FD_FN_CONST ulong fd_reedsol_footprint( void ) { return FD_REEDSOL_FOOTPRINT; }
      80             : 
      81             : /* fd_reedsol_encode_init: starts a Reed-Solomon encoding operation that
      82             :    will encode shreds of size shred_sz.  mem is assumed to be a piece of
      83             :    memory that meets the alignment and size constraints specified above.
      84             :    Takes a write interest in mem that persists until the operation is
      85             :    aborted or finalized.  shred_sz must be >= 32.  If the provided size is
      86             :    inadequate, NULL is returned.  Returns mem as a a newly initialized encoder.
      87             :    Every call to fd_reedsol_encode_init should be paired with a call to
      88             :    fd_reedsol_encode_fini (normal execution) or fd_reedsol_encode_abort
      89             :    (abnormal execution). */
      90             : 
      91             : static inline fd_reedsol_t *
      92             : fd_reedsol_encode_init( void * mem,
      93     1430574 :                         ulong  shred_sz ) {
      94     1430574 :   if( FD_UNLIKELY( shred_sz < 32 ) ) return NULL;
      95             : 
      96     1430574 :   fd_reedsol_t * rs = (fd_reedsol_t *)mem;
      97     1430574 :   rs->shred_sz         = shred_sz;
      98     1430574 :   rs->data_shred_cnt   = 0UL;
      99     1430574 :   rs->parity_shred_cnt = 0UL;
     100     1430574 :   return rs;
     101     1430574 : }
     102             : 
     103             : /* fd_reedsol_encode_add_data_shred: adds a shred consisting of the
     104             :    memory [ptr,ptr+shred_sz) to the in-process Reed-Solomon encoding
     105             :    operation.  Takes a read interest in the shred that persists for the
     106             :    lifetime of the operation (i.e. until finalized or aborted).  Data
     107             :    shreds have no alignment restrictions and can overlap with each other
     108             :    but should not overlap with any parity shreds in the same encoding
     109             :    operation.
     110             : 
     111             :    Note: The order in which data shreds are added relative to other data
     112             :    shreds matters.  It impacts the parity data produced by the encoding
     113             :    operation.
     114             : 
     115             :    Assumes rs is initialized as an encoder and returns rs (still
     116             :    initialized as an encoder). */
     117             : 
     118             : static inline fd_reedsol_t *
     119             : fd_reedsol_encode_add_data_shred( fd_reedsol_t * rs,
     120    51705990 :                                   void const *   ptr ) {
     121    51705990 :   rs->encode.data_shred[ rs->data_shred_cnt++ ] = (uchar const*) ptr;
     122    51705990 :   return rs;
     123    51705990 : }
     124             : 
     125             : /* fd_reedsol_encode_add_parity_shred: adds the block of memory
     126             :    [ptr,ptr+shred_sz) to the in-process Reed-Solomon encoding operation
     127             :    as the destination of a parity shred.  Takes a write interest in the
     128             :    memory that persists for the lifetime of the operation (i.e. until
     129             :    finalized or aborted).  Parity shreds have no alignment
     130             :    restrictions but must not overlap with each other or with data shreds
     131             :    in the same operation (U.B. if they overlap).
     132             : 
     133             :    Note: The order in which parity shreds are added matters only insofar
     134             :    as which data will be written to which location.
     135             : 
     136             :    Assumes rs is initialized as an encoder and returns rs (still
     137             :    initialized as an encoder). */
     138             : 
     139             : static inline fd_reedsol_t *
     140             : fd_reedsol_encode_add_parity_shred( fd_reedsol_t * rs,
     141    54254862 :                                     void *         ptr ) {
     142    54254862 :   rs->encode.parity_shred[ rs->parity_shred_cnt++ ] = (uchar *)ptr;
     143    54254862 :   return rs;
     144    54254862 : }
     145             : 
     146             : /* fd_reedsol_encode_abort aborts an in-progress encoding operation.
     147             :    Releases any read or write interests in any shreds that were added to
     148             :    the operation.  Upon return, the contents of the parity shreds are
     149             :    undefined.  Assumes rs is initialized as an encoder, rs will not be
     150             :    initialized on return. */
     151             : 
     152             : static inline void
     153           0 : fd_reedsol_encode_abort( fd_reedsol_t * rs ) {
     154           0 :   rs->data_shred_cnt   = 0UL;
     155           0 :   rs->parity_shred_cnt = 0UL;
     156           0 : }
     157             : 
     158             : /* fd_reedsol_encode_fini finishes the in-progress encoding operation.
     159             :    Upon return, the parity shreds will be filled with the correct
     160             :    Reed-Solomon encoded parity data.  Upon return, this will no longer
     161             :    have any read or write interest in any of the provided shreds.
     162             :    Assumes rs is initialized as an encoder, rs will not be initialized
     163             :    on return. */
     164             : 
     165             : void
     166             : fd_reedsol_encode_fini( fd_reedsol_t * rs );
     167             : 
     168             : /* fd_reedsol_recover_init: starts a Reed-Solomon recover/decode
     169             :    operation that will recover shreds of size shred_sz.  mem is assumed
     170             :    to be an unused piece of memory that meets the alignment and size
     171             :    constraints specified above.  Takes a write interest in mem that
     172             :    persists until the operation is aborted or finalized.  shred_sz must
     173             :    be >= 32.  If the provided size is inadequate, NULL is returned.  Returns mem
     174             :    as a newly initialized recoverer.  Every call to fd_reedsol_recover_init
     175             :    should be paired with a call to fd_reedsol_recover_fini (normal execution) or
     176             :    fd_reedsol_recover_abort (abnormal execution). */
     177             : 
     178             : static inline fd_reedsol_t *
     179         270 : fd_reedsol_recover_init( void * mem, ulong shred_sz ) {
     180         270 :   if( FD_UNLIKELY( shred_sz < 32 ) ) return NULL;
     181             : 
     182         270 :   fd_reedsol_t * rs = (fd_reedsol_t *)mem;
     183         270 :   rs->shred_sz         = shred_sz;
     184         270 :   rs->data_shred_cnt   = 0UL;
     185         270 :   rs->parity_shred_cnt = 0UL;
     186         270 :   return rs;
     187         270 : }
     188             : 
     189             : /* fd_reedsol_recover_add_rcvd_shred adds the shred consisting of the of
     190             :    memory [ptr, ptr+shred_sz) to the in-process Reed-Solomon recover
     191             :    operation as a source of data.  Takes a read interest in the shred
     192             :    that persists for the lifetime of the operation (i.e. until finalized
     193             :    or aborted).  Received shreds have no alignment restrictions and can
     194             :    overlap with each other (if necessary, but there's no known use case
     195             :    for doing so), but should not overlap with any erased shreds in the
     196             :    same recovery operation.
     197             : 
     198             :    The shred is treated as a data shred if is_data_shred is non-zero and
     199             :    as a parity shred if not.  Data shreds and parity shreds are mostly
     200             :    treated identically in the recover operation, but having the right
     201             :    number of data shreds is important for validating the shreds are
     202             :    correct.
     203             : 
     204             :    Note: The order in which shreds are added (using this function and
     205             :    fd_reedsol_recover_add_erased_shred) is very important for recovery.
     206             :    Shreds must be added in the natural index order or the recover
     207             :    operation will almost certainly fail.  In particular, all data shreds
     208             :    must be added before any parity shreds are added.
     209             : 
     210             :    Assumes rs is initialized as a recoverer, returns rs (still
     211             :    initialized as a recoverer). */
     212             : 
     213             : static inline fd_reedsol_t *
     214             : fd_reedsol_recover_add_rcvd_shred( fd_reedsol_t * rs,
     215             :                                    int            is_data_shred,
     216        8604 :                                    void const *   ptr ) {
     217             : 
     218             :   /* Assumes is_data_shred==1 implies rs->parity_shred_cnt==0 and
     219             :      data_shred_cnt, parity_shred_cnt won't go over the max */
     220             : 
     221             :   /* For performance reasons, we need to store all the shred pointers in
     222             :      one flat array, which means the array needs to be non-const.  The
     223             :      const in the function signature signals that this operation won't
     224             :      modify the shred. */
     225             : 
     226        8604 :   rs->recover.shred [ rs->data_shred_cnt + rs->parity_shred_cnt ] = (void *)ptr;
     227        8604 :   rs->recover.erased[ rs->data_shred_cnt + rs->parity_shred_cnt ] = (uchar)0;
     228        8604 :   rs->data_shred_cnt   += !!is_data_shred;
     229        8604 :   rs->parity_shred_cnt +=  !is_data_shred;
     230             : 
     231        8604 :   return rs;
     232        8604 : }
     233             : 
     234             : /* fd_reedsol_recover_add_erased_shred adds the block of memory
     235             :    [ptr,ptr+shred_sz) to the in-process Reed-Solomon recover operation
     236             :    as the destination for a shred that will be recovered.  Takes a write
     237             :    interest in the shred that persists for the lifetime of the operation
     238             :    (i.e. until finalized or aborted).  Erased shreds have no alignment
     239             :    restrictions but should not overlap with any other shreds in the same
     240             :    recover operation.  The contents of the block of memory are
     241             :    ignored and will be overwritten by the time the operation is
     242             :    finished.
     243             : 
     244             :    The shred is treated as a data shred if is_data_shred is non-zero and
     245             :    as a parity shred if not.  Data shreds and parity shreds are mostly
     246             :    treated identically in the recover operation, but having the right
     247             :    number of data shreds is important for validating the shreds are
     248             :    correct.
     249             : 
     250             :    Note: The order in which shreds are added (using this function and
     251             :    fd_reedsol_recover_add_rcvd_shred) is very important for recovery.
     252             :    Shreds must be added in the natural index order or the recover
     253             :    operation will almost certainly fail.  In particular, all data shreds
     254             :    must be added before any parity shreds are added.
     255             : 
     256             :    Assumes rs is initialized as a recoverer, returns rs (still
     257             :    initialized as a recoverer). */
     258             : 
     259             : static inline fd_reedsol_t *
     260             : fd_reedsol_recover_add_erased_shred( fd_reedsol_t * rs,
     261             :                                      int            is_data_shred,
     262        8670 :                                      void *         ptr ) {
     263             : 
     264             :   /* Assumes assert is_data_shred==1 implies rs->parity_shred_cnt==0 and
     265             :      data_shred_cnt, parity_shred_cnt won't go over the max */
     266             : 
     267        8670 :   rs->recover.shred [ rs->data_shred_cnt + rs->parity_shred_cnt ] = ptr;
     268        8670 :   rs->recover.erased[ rs->data_shred_cnt + rs->parity_shred_cnt ] = (uchar)1;
     269        8670 :   rs->data_shred_cnt   += !!is_data_shred;
     270        8670 :   rs->parity_shred_cnt +=  !is_data_shred;
     271             : 
     272        8670 :   return rs;
     273        8670 : }
     274             : 
     275             : /* fd_reedsol_recover_abort aborts an in-progress encoding operation.
     276             :    Releases any read or write interests in any shreds that were added to
     277             :    the operation.  Upon return, the contents of the erased shreds are
     278             :    undefined.  Assumes rs is initialized and rs will not be initialized
     279             :    on return. */
     280             : 
     281             : static inline void
     282           0 : fd_reedsol_recover_abort( fd_reedsol_t * rs ) {
     283           0 :   rs->data_shred_cnt   = 0UL;
     284           0 :   rs->parity_shred_cnt = 0UL;
     285           0 : }
     286             : 
     287             : /* fd_reedsol_recover_fini finishes the in-progress recover operation.
     288             :    If successful, upon return, any erased shreds will be filled with the
     289             :    correct data as recovered by the Reed-Solomon recovery algorithm.  If
     290             :    the recover operation fails with FD_REEDSOL_ERR_{CORRUPT,PARTIAL},
     291             :    the contents of any erased shreds are undefined.
     292             : 
     293             :    Upon return, this will no longer have any read or write interest in
     294             :    any of the provided shreds.
     295             : 
     296             :    Returns one of:
     297             : 
     298             :    FD_REEDSOL_SUCCESS if the recover operation was successful
     299             : 
     300             :    FD_REEDSOL_ERR_CORRUPT if the shreds are not consistent with having
     301             :    come from a Reed-Solomon encoding with the provided number of data
     302             :    shreds
     303             : 
     304             :    FD_REEDSOL_ERR_PARTIAL if there's not enough un-erased data to
     305             :    recover data_shred_cnt data shreds.  There must be at least one
     306             :    un-erased shred (data or parity) for each data shred in the
     307             :    operation.
     308             : 
     309             :    It's worth pointing out that the recovery process differs from
     310             :    typical network coding theory by making no effort to correct data
     311             :    corruption.  The shred signature verification process should detect
     312             :    any data corruption, and any shred that fails signature verification
     313             :    can be treated as an erasure.  This prevents the network from forking
     314             :    if the leader (maliciously) creates data shreds from one version of
     315             :    the block and parity shreds from another version of the block.
     316             : 
     317             :    Assumes rs is initialized as a recoverer, rs will not be initialized
     318             :    on return. */
     319             : 
     320             : int
     321             : fd_reedsol_recover_fini( fd_reedsol_t * rs );
     322             : 
     323             : /* Misc APIs */
     324             : 
     325             : /* fd_reedsol_strerror converts a FD_REEDSOL_SUCCESS / FD_REEDSOL_ERR_*
     326             :    code into a human readable cstr.  The lifetime of the returned
     327             :    pointer is infinite.  The returned pointer is always to a non-NULL
     328             :    cstr. */
     329             : 
     330             : FD_FN_CONST char const *
     331             : fd_reedsol_strerror( int err );
     332             : 
     333             : FD_PROTOTYPES_END
     334             : 
     335             : #endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_h */

Generated by: LCOV version 1.14