LCOV - code coverage report
Current view: top level - vinyl/io - fd_vinyl_io.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 45 50 90.0 %
Date: 2025-12-07 04:58:33 Functions: 40 858 4.7 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_vinyl_io_fd_vinyl_io_h
       2             : #define HEADER_fd_src_vinyl_io_fd_vinyl_io_h
       3             : 
       4             : /* A fd_vinyl_io_t reads from / appends to a bstream stored in some
       5             :    physical layer (typically slow and non-volatile).  Supports massive
       6             :    numbers of async concurrent reads and appends and the ability to
       7             :    recover from unexpected interrupts (Ctrl-C, power failures, etc).  To
       8             :    accommodate the myriad of different styles of physical layers and
       9             :    interfaces, the API is run time plugin friendly.  Summary of
      10             :    operations:
      11             : 
      12             :      read_imm: blocking read a contiguous range of blocks in the
      13             :      bstream's past.  Mostly used for iterating over a bstream's past.
      14             : 
      15             :      read: start reading a contiguous range of blocks in the bstream's
      16             :      past.  The caller promises the range to read is contiguous in the
      17             :      underlying physical storage.
      18             : 
      19             :      poll: finish an outstanding read.  Outstanding reads can complete
      20             :      in an arbitary order.  All reads must be finished by poll but note
      21             :      that it is possible to detect a read is complete out-of-band too
      22             :      (for speculative processing).
      23             : 
      24             :      append: start appending a set of blocks to the end of the bstream's
      25             :      present (moving blocks from the bstream's future to the bstream's
      26             :      present).  The blocks will be contiguous in the underlying storage.
      27             :      The blocks must be suitably aligned and with a lifetime until the
      28             :      next commit.
      29             : 
      30             :      commit: finish all outstanding appends, moving all blocks in the
      31             :      bstream's present to the bstream's past.  This will empty the io's
      32             :      append scratch pad.  The underlying implementation is free to
      33             :      process outstanding appends in any order (and free to interleave
      34             :      them arbitrarily with outstanding reads).
      35             : 
      36             :      hint: indicates the next sz worth of blocks appended to the bstream
      37             :      must be contiguous in the physical storage.
      38             : 
      39             :      alloc: allocate memory from the io's append scratch pad.  These
      40             :      allocations will have a suitable alignment for append and a
      41             :      lifetime until the next commit.  This may trigger a commit of
      42             :      outstanding appends if there isn't enough scratch pad free.
      43             : 
      44             :      copy: append a contiguous range of blocks from the bstream's past
      45             :      to the end of the bstream's present.  May commit outstanding
      46             :      appends.
      47             : 
      48             :      forget: forget all blocks before a given sequence number, moving
      49             :      blocks from the bstream's past to the bstream's antiquity.  The
      50             :      caller can only forget up to the bstream's present.
      51             : 
      52             :      rewind: move blocks from the bstream's past (and potentially
      53             :      antiquity) to the bstream's future.  The bstream must have an empty
      54             :      present (i.e. no appends in progress) and no reads in progress.
      55             :      This allows, for example, on recovery, a multi-block pair that was
      56             :      incompletely written to be cleaned up.
      57             : 
      58             :      sync: update the range for the bstream past where recovery will
      59             :      resume.  This moves all blocks in the bstream's antiquity to end of
      60             :      the bstream's future. */
      61             : 
      62             : /* FIXME: consider a query to get how many reads are outstanding? (with
      63             :    this, rewind and forget could be complete generic). */
      64             : 
      65             : #include "../bstream/fd_vinyl_bstream.h"
      66             : 
      67             : /* FD_VINYL_IO_TYPE_* identifies which IO implementation is in use. */
      68             : 
      69           9 : #define FD_VINYL_IO_TYPE_MM (0)  /* memory mapped */
      70           9 : #define FD_VINYL_IO_TYPE_BD (1)  /* synchronous blocking */
      71             : #define FD_VINYL_IO_TYPE_WD (2)  /* async O_DIRECT write (specialized) */
      72           0 : #define FD_VINYL_IO_TYPE_UR (3)  /* async io_uring */
      73             : 
      74             : /* FD_VINYL_IO_FLAG_* are flags used by various vinyl IO APIs */
      75             : 
      76     1497918 : #define FD_VINYL_IO_FLAG_BLOCKING (1) /* Okay to block the caller */
      77             : 
      78             : /* A fd_vinyl_io_rd_t describes a read request to the underlying I/O
      79             :    implementation to read [seq,seq+sz) (cyclic) from the bstream's past
      80             :    into dst.  seq, dst and sz should be FD_VINYL_BSTREAM_BLOCK_SZ
      81             :    aligned.  Any failure encountered while reading should FD_LOG_CRIT
      82             :    (just like reading an invalid memory address will seg fault).
      83             :    Underlying I/O implementations can add other information to this
      84             :    structure as necessary.  ctx is an arbitrary user defined value. */
      85             : 
      86             : #define FD_VINYL_IO_READ_SZ (64UL)
      87             : 
      88             : struct fd_vinyl_io_rd {
      89             :   ulong  ctx;
      90             :   ulong  seq;
      91             :   void * dst;
      92             :   ulong  sz;
      93             :   uchar  _[ FD_VINYL_IO_READ_SZ - 32UL ];
      94             : };
      95             : 
      96             : typedef struct fd_vinyl_io_rd fd_vinyl_io_rd_t;
      97             : 
      98             : /* fd_vinyl_io_t is an opaque handle of a fd_vinyl_io instance.  Some
      99             :    details are exposed to facilitate inlining in high performance
     100             :    contexts. */
     101             : 
     102             : struct fd_vinyl_io_private;
     103             : typedef struct fd_vinyl_io_private fd_vinyl_io_t;
     104             : 
     105             : typedef void   (*fd_vinyl_io_func_read_imm_t)( fd_vinyl_io_t * io, ulong seq, void * dst, ulong sz );
     106             : typedef void   (*fd_vinyl_io_func_read_t    )( fd_vinyl_io_t * io, fd_vinyl_io_rd_t * rd );
     107             : typedef int    (*fd_vinyl_io_func_poll_t    )( fd_vinyl_io_t * io, fd_vinyl_io_rd_t ** _rd, int flags );
     108             : typedef ulong  (*fd_vinyl_io_func_append_t  )( fd_vinyl_io_t * io, void const * src, ulong sz );
     109             : typedef int    (*fd_vinyl_io_func_commit_t  )( fd_vinyl_io_t * io, int flags );
     110             : typedef ulong  (*fd_vinyl_io_func_hint_t    )( fd_vinyl_io_t * io, ulong sz );
     111             : typedef void * (*fd_vinyl_io_func_alloc_t   )( fd_vinyl_io_t * io, ulong sz, int flags );
     112             : typedef ulong  (*fd_vinyl_io_func_copy_t    )( fd_vinyl_io_t * io, ulong seq, ulong sz );
     113             : typedef void   (*fd_vinyl_io_func_forget_t  )( fd_vinyl_io_t * io, ulong seq );
     114             : typedef void   (*fd_vinyl_io_func_rewind_t  )( fd_vinyl_io_t * io, ulong seq );
     115             : typedef int    (*fd_vinyl_io_func_sync_t    )( fd_vinyl_io_t * io, int flags );
     116             : typedef void * (*fd_vinyl_io_func_fini_t    )( fd_vinyl_io_t * io );
     117             : 
     118             : struct fd_vinyl_io_impl {
     119             :   fd_vinyl_io_func_read_imm_t read_imm;
     120             :   fd_vinyl_io_func_read_t     read;
     121             :   fd_vinyl_io_func_poll_t     poll;
     122             :   fd_vinyl_io_func_append_t   append;
     123             :   fd_vinyl_io_func_commit_t   commit;
     124             :   fd_vinyl_io_func_hint_t     hint;
     125             :   fd_vinyl_io_func_alloc_t    alloc;
     126             :   fd_vinyl_io_func_copy_t     copy;
     127             :   fd_vinyl_io_func_forget_t   forget;
     128             :   fd_vinyl_io_func_rewind_t   rewind;
     129             :   fd_vinyl_io_func_sync_t     sync;
     130             :   fd_vinyl_io_func_fini_t     fini;
     131             : };
     132             : 
     133             : typedef struct fd_vinyl_io_impl fd_vinyl_io_impl_t;
     134             : 
     135             : struct fd_vinyl_io_private {
     136             :   int                  type;
     137             :   ulong                seed;
     138             :   ulong                seq_ancient;  /* FD_VINYL_BSTREAM_BLOCK_SZ multiple */
     139             :   ulong                seq_past;     /* " */
     140             :   ulong                seq_present;  /* " */
     141             :   ulong                seq_future;   /* " */
     142             :   ulong                spad_max;     /* " */
     143             :   ulong                spad_used;    /* " */
     144             :   fd_vinyl_io_impl_t * impl;         /* implementation specific funcs */
     145             :   /* io implementation specific details follow */
     146             : };
     147             : 
     148             : FD_PROTOTYPES_BEGIN
     149             : 
     150             : /* fd_vinyl_io_* return the current value of the eponymous io field.
     151             :    Assumes io is valid.  For all but type and seed, the return value is
     152             :    a FD_VINYL_BSTREAM_BLOCK_SZ multiple.  Note that we don't have a
     153             :    generic notion of dev_max or dev_free as such is not a well defined
     154             :    concept.  Individual IO implementations can provide them as
     155             :    appropriate though. */
     156             : 
     157          12 : FD_FN_PURE static inline int   fd_vinyl_io_type( fd_vinyl_io_t const * io ) { return io->type; }
     158             : 
     159          12 : FD_FN_PURE static inline ulong fd_vinyl_io_seed( fd_vinyl_io_t const * io ) { return io->seed; }
     160             : 
     161    12000012 : FD_FN_PURE static inline ulong fd_vinyl_io_seq_ancient( fd_vinyl_io_t const * io ) { return io->seq_ancient; }
     162    12000012 : FD_FN_PURE static inline ulong fd_vinyl_io_seq_past   ( fd_vinyl_io_t const * io ) { return io->seq_past;    }
     163    12000012 : FD_FN_PURE static inline ulong fd_vinyl_io_seq_present( fd_vinyl_io_t const * io ) { return io->seq_present; }
     164    12000012 : FD_FN_PURE static inline ulong fd_vinyl_io_seq_future ( fd_vinyl_io_t const * io ) { return io->seq_future;  }
     165             : 
     166         732 : FD_FN_PURE static inline ulong fd_vinyl_io_spad_max ( fd_vinyl_io_t const * io ) { return io->spad_max;                 }
     167         732 : FD_FN_PURE static inline ulong fd_vinyl_io_spad_used( fd_vinyl_io_t const * io ) { return io->spad_used;                }
     168         732 : FD_FN_PURE static inline ulong fd_vinyl_io_spad_free( fd_vinyl_io_t const * io ) { return io->spad_max - io->spad_used; }
     169             : 
     170           0 : FD_FN_PURE static inline ulong fd_vinyl_io_dev_used( fd_vinyl_io_t const * io ) { return io->seq_future - io->seq_ancient; }
     171             : 
     172             : /* fd_vinyl_io_read_imm does an immediate (blocking) read of
     173             :    [seq,seq+dst_sz) (cyclic) from io's bstream's past into dst.  Assumes
     174             :    there are no reads currently posted on io.  Retains no interest in
     175             :    dst.  seq, dst and sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned.
     176             :    This is used mostly for sequential iterating over a bstream's past
     177             :    (i.e. serial recovery and discovering partitions for parallel
     178             :    recovery). */
     179             : 
     180             : static inline void
     181             : fd_vinyl_io_read_imm( fd_vinyl_io_t * io,
     182             :                       ulong           seq,
     183             :                       void *          dst,
     184     3000948 :                       ulong           sz ) {
     185     3000948 :   io->impl->read_imm( io, seq, dst, sz );
     186     3000948 : }
     187             : 
     188             : /* fd_vinyl_io_read starts the executing the read command rd.  That is,
     189             :    start reading bstream bytes [seq,seq+sz) (cyclic) into dst.  seq, dst
     190             :    and sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned.  Further,
     191             :    [seq,seq+sz) should be in the bstream's past and the region to read
     192             :    should be stored contiguously in the underlying storage.
     193             : 
     194             :    On entry, the caller should have ownership of rd and rd->dst.  The io
     195             :    has ownership of these return and a read interest in bstream bytes
     196             :    [seq,seq_sz) (cyclic).  The ownership of these will be returned to
     197             :    the caller and the read interest will end when poll returns the
     198             :    request. */
     199             : 
     200             : static inline void
     201             : fd_vinyl_io_read( fd_vinyl_io_t *    io,
     202     3747414 :                   fd_vinyl_io_rd_t * rd ) {
     203     3747414 :   io->impl->read( io, rd );
     204     3747414 : }
     205             : 
     206             : /* fd_vinyl_io_poll checks if any outstanding reads are complete.  Reads
     207             :    can complete in any order by the I/O layer.  flags is a bit-or of
     208             :    FD_VINYL_IO_FLAGs.  BLOCKING indicates the call is allowed to block
     209             :    the caller (the io layer promises the call cannot fail from the
     210             :    caller's point of view).  Returns FD_VINYL_SUCCESS if a read complete
     211             :    (*_rd will point to the read command ended with the ownership and
     212             :    read interested as described above), FD_VINYL_ERR_EMPTY if there are
     213             :    no commands pending (*_rd will be NULL) and FD_VINYL_ERR_AGAIN if
     214             :    none of the posted commands are ready (*_rd will be NULL).  AGAIN is
     215             :    only possible for a non-blocking call). */
     216             : 
     217             : static inline int
     218             : fd_vinyl_io_poll( fd_vinyl_io_t *     io,
     219             :                   fd_vinyl_io_rd_t ** _rd,
     220     7494828 :                   int                 flags ) {
     221     7494828 :   return io->impl->poll( io, _rd, flags );
     222     7494828 : }
     223             : 
     224             : /* fd_vinyl_io_append starts appending sz bytes at src to the bstream.
     225             :    src and sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned.  Returns
     226             :    bstream sequence number seq_append where the data is being appended.
     227             :    io will have a read interest in src until the next commit.  This
     228             :    moves blocks from the bstream's future to the bstream's present.  On
     229             :    commit, the region [seq_future_before,seq_append) (cyclic) will be
     230             :    filled with zero padding if the I/O implementation requires it to
     231             :    keep the append contiguous in the physical store (this region will be
     232             :    empty if covered by a previous hint or if this is an append of a
     233             :    single block) and the region [seq_append,seq_future_after) (cyclic)
     234             :    will be filled with the appended info.
     235             : 
     236             :    fd_vinyl_io_commit moves all blocks in the bstream's present to the
     237             :    bstream's past (i.e. sets seq_present to seq_future).  flags is a
     238             :    bit-of FD_VINYL_IO_FLAGs.  If BLOCKING is set, this is allowed to
     239             :    block the caller.  Returns FD_VINYL_SUCCESS (0) on success and
     240             :    FD_VINYL_ERR_AGAIN (negative) if commit could not be completed
     241             :    immediately (only possible for a non-blocking call).  commit empties
     242             :    the io append scratch pad on success.
     243             : 
     244             :    fd_vinyl_io_hint indicates the next sz bytes to append must be
     245             :    contiguous in the bstream.  This can move blocks from the bstream's
     246             :    future to the bstream's present.  Returns (the potentially updated)
     247             :    seq_future.  On commit, the region
     248             :    [seq_future_before,seq_future_after) (cyclic) will be filled with
     249             :    zero padding (this region will be empty if covered by a previous
     250             :    hint) and the region [seq_future_after,seq_future_after+sz) (cyclic)
     251             :    will contiguous in the physical storage.  This is useful for grouping
     252             :    sets of blocks from different memory regions on the host that must be
     253             :    written contiguously from a protocol point of view (e.g. a move
     254             :    control block and the pair that follows it).
     255             : 
     256             :    fd_vinyl_io_alloc returns a pointer to sz bytes of
     257             :    FD_VINYL_BSTREAM_BLOCK_SZ aligned memory suitable allocated from io's
     258             :    append scratch pad.  flags is a bit-or FD_VINYL_IO_FLAG_*.  BLOCKING
     259             :    indicates the call is allowed to block the caller.  If a non-blocking
     260             :    call, will return NULL if there is no suitable memory at this time.
     261             :    Will never return NULL for a blocking call.  The lifetime of the
     262             :    returned pointer is the lesser of the next append, next commit, the
     263             :    next alloc or the io.  sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned
     264             :    and at most io's spad_max.  This may do a commit to free up scratch
     265             :    pad memory if necessary (moving blocks from the present to the past).
     266             : 
     267             :    fd_vinyl_io_trim trims sz bytes from the end of the most recent
     268             :    fd_vinyl_io_alloc.  sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned
     269             :    and at most the size of the most recent alloc.
     270             : 
     271             :    fd_vinyl_io_copy starts appending a copy of the sz bytes at seq in
     272             :    the bstream's past to the bstream.  seq and sz should be
     273             :    FD_VINYL_BSTREAM_BLOCK_SZ aligned.  [seq,seq+sz) (cyclic) should be
     274             :    in the bstream's past.  io will have a read interest in this region
     275             :    until the next commit.  This will do a _blocking_ commit to free up
     276             :    scratch pad memory if necessary (moving blocks from the present to
     277             :    the past).  FIXME: consider non-blocking copy support? (copy would
     278             :    need a flags args).
     279             : 
     280             :    None of these can fail from the caller's perspective (they will all
     281             :    FD_LOG_CRIT if anything goes wrong ... much like accessing invalid
     282             :    memory will seg fault). */
     283             : 
     284             : static inline ulong
     285             : fd_vinyl_io_append( fd_vinyl_io_t * io,
     286             :                     void const *    src,
     287      749736 :                     ulong           sz ) {
     288      749736 :   return io->impl->append( io, src, sz );
     289      749736 : }
     290             : 
     291             : static inline int
     292             : fd_vinyl_io_commit( fd_vinyl_io_t * io,
     293      749064 :                     int             flags ) {
     294      749064 :   return io->impl->commit( io, flags );
     295      749064 : }
     296             : 
     297             : static inline ulong
     298             : fd_vinyl_io_hint( fd_vinyl_io_t * io,
     299      752724 :                   ulong           sz ) {
     300      752724 :   return io->impl->hint( io, sz );
     301      752724 : }
     302             : 
     303             : static inline void *
     304             : fd_vinyl_io_alloc( fd_vinyl_io_t * io,
     305             :                    ulong           sz,
     306         726 :                    int             flags ) {
     307         726 :   return io->impl->alloc( io, sz, flags );
     308         726 : }
     309             : 
     310             : static inline void
     311             : fd_vinyl_io_trim( fd_vinyl_io_t * io,
     312           0 :                   ulong           sz ) {
     313           0 :   io->spad_used -= sz;
     314           0 : }
     315             : 
     316             : static inline ulong
     317             : fd_vinyl_io_copy( fd_vinyl_io_t * io,
     318             :                   ulong           seq,
     319      752454 :                   ulong           sz ) {
     320      752454 :   return io->impl->copy( io, seq, sz );
     321      752454 : }
     322             : 
     323             : /* fd_vinyl_io_forget moves [seq_past,seq) (cyclic) from the bstream's
     324             :    past to the bstream's antiquity, setting seq_past to seq.  As such,
     325             :    seq should be in [seq_past,seq_present] (cyclic) and
     326             :    FD_VINYL_BSTREAM_BLOCK_SZ aligned.  There should be no reads, copies
     327             :    or appends in progress.  Cannot fail from the caller's perspective
     328             :    (will FD_LOG_CRIT if anything goes wrong).
     329             : 
     330             :    IMPORTANT SAFETY TIP!  Though the bstream has been updated from the
     331             :    caller's point of view, the bstream needs to be sync'd for recover to
     332             :    start from the new seq_past. */
     333             : 
     334             : static inline void
     335             : fd_vinyl_io_forget( fd_vinyl_io_t * io,
     336      266526 :                     ulong           seq ) {
     337      266526 :   io->impl->forget( io, seq );
     338      266526 : }
     339             : 
     340             : /* fd_vinyl_io_rewind moves blocks [seq,seq_present) (cyclic) from the
     341             :    bstream's past to the bstream's future (updating seq_ancient and
     342             :    seq_past as necessary).  There should be no reads, copies or appends
     343             :    in progress.  seq should at most seq_present (cylic) and
     344             :    FD_VINYL_BSTREAM_BLOCK_SZ aligned.  Cannot fail from the caller's
     345             :    perspective (will FD_LOG_CRIT if anything goes wrong).
     346             : 
     347             :    IMPORTANT SAFETY TIP!  Though the bstream has been updated from the
     348             :    caller's point of view, the bstream needs to be sync'd for recovery
     349             :    to account for the rewind (and this is probably more critical than
     350             :    forget because appends will start modifying the bstream blocks that
     351             :    recovery would be expecting to be in the pre-rewind state). */
     352             : 
     353             : static inline void
     354             : fd_vinyl_io_rewind( fd_vinyl_io_t * io,
     355      264216 :                     ulong           seq ) {
     356      264216 :   io->impl->rewind( io, seq );
     357      264216 : }
     358             : 
     359             : /* fd_vinyl_io_sync moves [seq_ancient,seq_past) (cyclic) from the
     360             :    bstream's antiquity to the end of the bstream's future, setting
     361             :    seq_ancient to seq_past.  It promises the caller the bstream's past
     362             :    is fully written and that the bstream's past region is what recovery
     363             :    will use to recover the bstream's key-val state at seq_present.
     364             :    flags is a bit-or of FD_VINYL_IO_FLAGs.  BLOCKING indicates the call
     365             :    is allowed to block the caller.  Returns FD_VINYL_SUCCESS (0) on
     366             :    success and a FD_VINYL_ERR_AGAIN (negative) if the call would block
     367             :    the caller (only possible for a non-blocking call). */
     368             : /* FIXME: consider allowing new user info to be passed? */
     369             : 
     370             : static inline int
     371             : fd_vinyl_io_sync( fd_vinyl_io_t * io,
     372      748878 :                   int             flags ) {
     373      748878 :   return io->impl->sync( io, flags );
     374      748878 : }
     375             : 
     376             : /* fd_vinyl_io_fini tears down io, returning the memory region used to
     377             :    hold the I/O implementation state.  Implicitly completes any
     378             :    in-progress reads and cancels any in-progress appends (and thus can
     379             :    block the caller).
     380             : 
     381             :    IMPORTANT SAFETY TIP!  This does _not_ sync the bstream first (e.g.
     382             :    if an application is tearing down due to an anomalous condition, it
     383             :    may not want to sync on fini so that it can recover from a known good
     384             :    point). */
     385             : 
     386             : void *
     387             : fd_vinyl_io_fini( fd_vinyl_io_t * io );
     388             : 
     389             : /* Helpers ************************************************************/
     390             : 
     391             : /* fd_vinyl_io_spad_est() returns estimate of the smallest scratch pad
     392             :    size required most applications.  Specifically, this returns:
     393             : 
     394             :      2 pair_sz( LZ4_COMPRESSBOUND( VAL_MAX ) )
     395             : 
     396             :    so that it is possible to load a object footprint into the scratch
     397             :    pad and then have a worst case scratch memory for compression to
     398             :    re-encode the object. */
     399             : 
     400             : FD_FN_CONST ulong fd_vinyl_io_spad_est( void );
     401             : 
     402             : /* fd_vinyl_io_append_* are helper functions that start appending the
     403             :    given info, appropriately formatted and hashed, to io's bstream.
     404             :    There is no excess requirements for alignment.  They do no input
     405             :    argument checking.  On return, io retains no interest in the given
     406             :    info (that is, they use io's scratch memory and thus can trigger an
     407             :    io commit to move blocks from the bstream's present to the bstream's
     408             :    past if there isn't enough scratch pad free).  They return the
     409             :    bstream sequence number where the data is being appended.  They
     410             :    cannot fail from the caller's perspective (they will FD_LOG_CRIT if
     411             :    anything goes awry). */
     412             : 
     413             : ulong
     414             : fd_vinyl_io_append_pair_raw( fd_vinyl_io_t *         io,
     415             :                              fd_vinyl_key_t const *  key,   /* pair key */
     416             :                              fd_vinyl_info_t const * info,  /* pair info */
     417             :                              void const *            val ); /* contains info->val_sz bytes, in [0,FD_VINYL_VAL_MAX] */
     418             : 
     419             : ulong
     420             : fd_vinyl_io_append_dead( fd_vinyl_io_t *                 io,
     421             :                          fd_vinyl_bstream_phdr_t const * phdr,      /* pair header of erased pair */
     422             :                          void const *                    info,      /* contains info_sz bytes, info_sz treated as 0 if NULL */
     423             :                          ulong                           info_sz ); /* in [0,FD_VINYL_BSTREAM_DEAD_INFO_MAX] */
     424             : 
     425             : ulong
     426             : fd_vinyl_io_append_move( fd_vinyl_io_t *                 io,
     427             :                          fd_vinyl_bstream_phdr_t const * src,       /* pair header of src pair */
     428             :                          fd_vinyl_key_t const *          dst,       /* src pair getting renamed to dst or is replacing dst */
     429             :                          void const *                    info,      /* contains info_sz bytes, info_sz treated as 0 if NULL */
     430             :                          ulong                           info_sz ); /* in [0,FD_VINYL_BSTREAM_MOVE_INFO_MAX] */
     431             : 
     432             : ulong
     433             : fd_vinyl_io_append_part( fd_vinyl_io_t * io,
     434             :                          ulong           seq_prev,  /* should be a part before seq or seq */
     435             :                          ulong           dead_cnt,  /* number of dead blocks in the partition */
     436             :                          ulong           move_cnt,  /* number of move blocks in the partition */
     437             :                          void const *    info,      /* contains info_sz bytes, info_sz treated as 0 if NULL */
     438             :                          ulong           info_sz ); /* in [0,FD_VINYL_BSTREAM_PART_INFO_MAX] */
     439             : 
     440             : /* fd_vinyl_io_append_pair_inplace appends the style RAW pair at phdr
     441             :    to the bstream.  This will preferentially append the pair in the
     442             :    given style.  Returns the location where the pair was appended.  On
     443             :    return, *_style holds the actual style used and *_val_esz contains
     444             :    the pair encoded value byte size.
     445             : 
     446             :    Note that if the requested style is RAW or if the pair could not be
     447             :    usefully encoded in the requested style (e.g. the compressed size
     448             :    ended up larger than the uncompressed size), this will append from
     449             :    phdr in-place zero copy.  When appending a pair in-place, this will
     450             :    clear the zero padding region and insert the appropriate data
     451             :    integrity footers at the end of the pair.  On other cases, this will
     452             :    append from the io append scratch memory the encoded pair and the
     453             :    pair will be untouched.
     454             : 
     455             :    As such, the caller should assume the io has a read interest on the
     456             :    pair's header region and value region and a write interest on the
     457             :    pair zero padding region and footer region until the next append or
     458             :    commit and the pair's zero padding and footer regions may be
     459             :    clobbered by this call. */
     460             : 
     461             : ulong
     462             : fd_vinyl_io_append_pair_inplace( fd_vinyl_io_t *           io,
     463             :                                  int                       style,
     464             :                                  fd_vinyl_bstream_phdr_t * phdr,
     465             :                                  int *                     _style,
     466             :                                  ulong *                   _val_esz );
     467             : 
     468             : /* fd_vinyl_io_bd *****************************************************/
     469             : 
     470             : /* fd_vinyl_io_bd_{align,footprint} specify the alignment and footprint
     471             :    needed for a bstream stored on a block device / large file with a
     472             :    spad_max append scratch pad.  align will be a reasonable power-of-2
     473             :    and footprint will be a multiple of align.  Returns 0 for an invalid
     474             :    spad_max.
     475             : 
     476             :    fd_vinyl_io_bd_init starts using a file as a bstream store.  lmem
     477             :    points to a local memory region with suitable alignment and footprint
     478             :    to hold the bstream's state.  spad_max gives the size of the append
     479             :    scratch pad (should be a FD_VINYL_BSTREAM_BLOCK_SZ multiple).  dev_fd
     480             :    is a file descriptor for the block device / large file.  The file
     481             :    should already exist and be sized to the appropriate capacity.
     482             : 
     483             :    FIXME: allow user to specify a subrange of dev_fd to use for the
     484             :    store?
     485             : 
     486             :    If reset is non-zero, ignores any existing file contents and will
     487             :    start a new bstream.  The bstream metadata user info will be set to
     488             :    the info_sz bytes at info and the bstream will use io_seed for its
     489             :    data integrity hashing seed.
     490             : 
     491             :    Otherwise, this will attempt to resume at the point the bstream was
     492             :    last synchronized.  info, info_sz and io_seed will be ignored.
     493             : 
     494             :    IMPORTANT SAFETY TIP!  The io_seed is the not same thing as the meta
     495             :    seed.  The io_seed is a property of the bstream (with a lifetime of
     496             :    the bstream and is shared among all users of the bstream).  The meta
     497             :    seed is a property of the meta (and ideally uniquely and randomly set
     498             :    per vinyl tile run).
     499             : 
     500             :    Returns a handle to the bstream on success (has ownership of lmem and
     501             :    dev_fd, ownership returned on fini) and NULL on failure (logs
     502             :    details, no ownership changed).  Retains no interest in info. */
     503             : 
     504             : ulong fd_vinyl_io_bd_align    ( void );
     505             : ulong fd_vinyl_io_bd_footprint( ulong spad_max );
     506             : 
     507             : fd_vinyl_io_t *
     508             : fd_vinyl_io_bd_init( void *       lmem,
     509             :                      ulong        spad_max,
     510             :                      int          dev_fd,
     511             :                      int          reset,
     512             :                      void const * info,
     513             :                      ulong        info_sz,
     514             :                      ulong        io_seed );
     515             : 
     516             : /* fd_vinyl_io_mm *****************************************************/
     517             : 
     518             : /* fd_vinyl_io_mm_* is the same as fd_vinyl_io_bd_* but uses dev_sz byte
     519             :    sized memory region dev as the "block device".  The result is
     520             :    bit-level identical to fd_vinyl_io_bd (and vice versa).  This is
     521             :    primarily for testing purposes but, as dev could also be a memory
     522             :    mapped file / block device, this could be useful in general
     523             :    (especially for concurrent read access, e.g. parallel recovery).
     524             :    Note that "sync" only guarantees appends to the dev memory region
     525             :    happened.  If the memory region is backed by a file, when the actual
     526             :    blocks are written to the physical storage is controlled by the
     527             :    kernel / driver / physical device (it is up to the caller of sync to
     528             :    do any additional context specific control here). */
     529             : 
     530             : ulong fd_vinyl_io_mm_align    ( void );
     531             : ulong fd_vinyl_io_mm_footprint( ulong spad_max );
     532             : 
     533             : fd_vinyl_io_t *
     534             : fd_vinyl_io_mm_init( void *       lmem,
     535             :                      ulong        spad_max,
     536             :                      void *       dev,
     537             :                      ulong        dev_sz,
     538             :                      int          reset,
     539             :                      void const * info,
     540             :                      ulong        info_sz,
     541             :                      ulong        io_seed );
     542             : 
     543             : /* fd_vinyl_{mmio,mmio_sz} return {a pointer in the caller's address
     544             :    space to the raw bstream storage,the raw bstream storage byte size).
     545             :    These are a _subset_ of the dev / dev_sz region passed to mm_init and
     546             :    these will be FD_VINYL_BSTREAM_BLOCK_SZ aligned.  If a byte seq is in
     547             :    the store, it will be at mmio[ seq % mmio_sz ].  Note that mmio_sz is
     548             :    not necessarily a power of two.  Note also that the bstream's past is
     549             :    guaranteed to be in the store.  The lifetime of the returned region
     550             :    is the lifetime of the io.  Returns NULL and 0 if io does not support
     551             :    memory mapped io.  These exist to support thread parallel recovery. */
     552             : 
     553             : void * fd_vinyl_mmio   ( fd_vinyl_io_t * io );
     554             : ulong  fd_vinyl_mmio_sz( fd_vinyl_io_t * io );
     555             : 
     556             : FD_PROTOTYPES_END
     557             : 
     558             : #endif /* HEADER_fd_src_vinyl_io_fd_vinyl_io_h */

Generated by: LCOV version 1.14