LCOV - code coverage report
Current view: top level - vinyl/io - fd_vinyl_io.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 45 50 90.0 %
Date: 2026-02-14 05:50:46 Functions: 40 1034 3.9 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_vinyl_io_fd_vinyl_io_h
       2             : #define HEADER_fd_src_vinyl_io_fd_vinyl_io_h
       3             : 
       4             : /* A fd_vinyl_io_t reads from / appends to a bstream stored in some
       5             :    physical layer (typically slow and non-volatile).  Supports massive
       6             :    numbers of async concurrent reads and appends and the ability to
       7             :    recover from unexpected interrupts (Ctrl-C, power failures, etc).  To
       8             :    accommodate the myriad of different styles of physical layers and
       9             :    interfaces, the API is run time plugin friendly.  Summary of
      10             :    operations:
      11             : 
      12             :      read_imm: blocking read a contiguous range of blocks in the
      13             :      bstream's past.  Mostly used for iterating over a bstream's past.
      14             : 
      15             :      read: start reading a contiguous range of blocks in the bstream's
      16             :      past.  The caller promises the range to read is contiguous in the
      17             :      underlying physical storage.
      18             : 
      19             :      poll: finish an outstanding read.  Outstanding reads can complete
      20             :      in an arbitary order.  All reads must be finished by poll but note
      21             :      that it is possible to detect a read is complete out-of-band too
      22             :      (for speculative processing).
      23             : 
      24             :      append: start appending a set of blocks to the end of the bstream's
      25             :      present (moving blocks from the bstream's future to the bstream's
      26             :      present).  The blocks will be contiguous in the underlying storage.
      27             :      The blocks must be suitably aligned and with a lifetime until the
      28             :      next commit.
      29             : 
      30             :      commit: finish all outstanding appends, moving all blocks in the
      31             :      bstream's present to the bstream's past.  This will empty the io's
      32             :      append scratch pad.  The underlying implementation is free to
      33             :      process outstanding appends in any order (and free to interleave
      34             :      them arbitrarily with outstanding reads).
      35             : 
      36             :      hint: indicates the next sz worth of blocks appended to the bstream
      37             :      must be contiguous in the physical storage.
      38             : 
      39             :      alloc: allocate memory from the io's append scratch pad.  These
      40             :      allocations will have a suitable alignment for append and a
      41             :      lifetime until the next commit.  This may trigger a commit of
      42             :      outstanding appends if there isn't enough scratch pad free.
      43             : 
      44             :      copy: append a contiguous range of blocks from the bstream's past
      45             :      to the end of the bstream's present.  May commit outstanding
      46             :      appends.
      47             : 
      48             :      forget: forget all blocks before a given sequence number, moving
      49             :      blocks from the bstream's past to the bstream's antiquity.  The
      50             :      caller can only forget up to the bstream's present.
      51             : 
      52             :      rewind: move blocks from the bstream's past (and potentially
      53             :      antiquity) to the bstream's future.  The bstream must have an empty
      54             :      present (i.e. no appends in progress) and no reads in progress.
      55             :      This allows, for example, on recovery, a multi-block pair that was
      56             :      incompletely written to be cleaned up.
      57             : 
      58             :      sync: update the range for the bstream past where recovery will
      59             :      resume.  This moves all blocks in the bstream's antiquity to end of
      60             :      the bstream's future. */
      61             : 
      62             : /* FIXME: consider a query to get how many reads are outstanding? (with
      63             :    this, rewind and forget could be complete generic). */
      64             : 
      65             : #include "../bstream/fd_vinyl_bstream.h"
      66             : 
      67             : /* FD_VINYL_IO_TYPE_* identifies which IO implementation is in use. */
      68             : 
      69           9 : #define FD_VINYL_IO_TYPE_MM (0)  /* memory mapped */
      70           9 : #define FD_VINYL_IO_TYPE_BD (1)  /* synchronous blocking */
      71             : #define FD_VINYL_IO_TYPE_WD (2)  /* async O_DIRECT write (specialized) */
      72           0 : #define FD_VINYL_IO_TYPE_UR (3)  /* async io_uring */
      73             : 
      74             : /* FD_VINYL_IO_FLAG_* are flags used by various vinyl IO APIs */
      75             : 
      76     1497300 : #define FD_VINYL_IO_FLAG_BLOCKING (1) /* Okay to block the caller */
      77             : 
      78             : /* A fd_vinyl_io_rd_t describes a read request to the underlying I/O
      79             :    implementation to read [seq,seq+sz) (cyclic) from the bstream's past
      80             :    into dst.  seq, dst and sz should be FD_VINYL_BSTREAM_BLOCK_SZ
      81             :    aligned.  Any failure encountered while reading should FD_LOG_CRIT
      82             :    (just like reading an invalid memory address will seg fault).
      83             :    Underlying I/O implementations can add other information to this
      84             :    structure as necessary.  ctx is an arbitrary user defined value. */
      85             : 
      86             : #define FD_VINYL_IO_READ_SZ (64UL)
      87             : 
      88             : struct fd_vinyl_io_rd {
      89             :   ulong  ctx;
      90             :   ulong  seq;
      91             :   void * dst;
      92             :   ulong  sz;
      93             :   uchar  _[ FD_VINYL_IO_READ_SZ - 32UL ];
      94             : };
      95             : 
      96             : typedef struct fd_vinyl_io_rd fd_vinyl_io_rd_t;
      97             : 
      98             : /* fd_vinyl_io_t is an opaque handle of a fd_vinyl_io instance.  Some
      99             :    details are exposed to facilitate inlining in high performance
     100             :    contexts. */
     101             : 
     102             : struct fd_vinyl_io_private;
     103             : typedef struct fd_vinyl_io_private fd_vinyl_io_t;
     104             : 
     105             : typedef void   (*fd_vinyl_io_func_read_imm_t)( fd_vinyl_io_t * io, ulong seq, void * dst, ulong sz );
     106             : typedef void   (*fd_vinyl_io_func_read_t    )( fd_vinyl_io_t * io, fd_vinyl_io_rd_t * rd );
     107             : typedef int    (*fd_vinyl_io_func_poll_t    )( fd_vinyl_io_t * io, fd_vinyl_io_rd_t ** _rd, int flags );
     108             : typedef ulong  (*fd_vinyl_io_func_append_t  )( fd_vinyl_io_t * io, void const * src, ulong sz );
     109             : typedef int    (*fd_vinyl_io_func_commit_t  )( fd_vinyl_io_t * io, int flags );
     110             : typedef ulong  (*fd_vinyl_io_func_hint_t    )( fd_vinyl_io_t * io, ulong sz );
     111             : typedef void * (*fd_vinyl_io_func_alloc_t   )( fd_vinyl_io_t * io, ulong sz, int flags );
     112             : typedef ulong  (*fd_vinyl_io_func_copy_t    )( fd_vinyl_io_t * io, ulong seq, ulong sz );
     113             : typedef void   (*fd_vinyl_io_func_forget_t  )( fd_vinyl_io_t * io, ulong seq );
     114             : typedef void   (*fd_vinyl_io_func_rewind_t  )( fd_vinyl_io_t * io, ulong seq );
     115             : typedef int    (*fd_vinyl_io_func_sync_t    )( fd_vinyl_io_t * io, int flags );
     116             : typedef void * (*fd_vinyl_io_func_fini_t    )( fd_vinyl_io_t * io );
     117             : 
     118             : struct fd_vinyl_io_impl {
     119             :   fd_vinyl_io_func_read_imm_t read_imm;
     120             :   fd_vinyl_io_func_read_t     read;
     121             :   fd_vinyl_io_func_poll_t     poll;
     122             :   fd_vinyl_io_func_append_t   append;
     123             :   fd_vinyl_io_func_commit_t   commit;
     124             :   fd_vinyl_io_func_hint_t     hint;
     125             :   fd_vinyl_io_func_alloc_t    alloc;
     126             :   fd_vinyl_io_func_copy_t     copy;
     127             :   fd_vinyl_io_func_forget_t   forget;
     128             :   fd_vinyl_io_func_rewind_t   rewind;
     129             :   fd_vinyl_io_func_sync_t     sync;
     130             :   fd_vinyl_io_func_fini_t     fini;
     131             : };
     132             : 
     133             : typedef struct fd_vinyl_io_impl fd_vinyl_io_impl_t;
     134             : 
     135             : struct fd_vinyl_io_private {
     136             :   int                  type;
     137             :   ulong                seed;
     138             :   ulong                seq_ancient;  /* FD_VINYL_BSTREAM_BLOCK_SZ multiple */
     139             :   ulong                seq_past;     /* " */
     140             :   ulong                seq_present;  /* " */
     141             :   ulong                seq_future;   /* " */
     142             :   ulong                spad_max;     /* " */
     143             :   ulong                spad_used;    /* " */
     144             :   fd_vinyl_io_impl_t * impl;         /* implementation specific funcs */
     145             : 
     146             :   ulong cache_read_cnt;     /* Cache read request count */
     147             :   ulong cache_read_tot_sz;  /* Cache bytes read total */
     148             :   ulong cache_write_cnt;    /* Cache write request count */
     149             :   ulong cache_write_tot_sz; /* Cache bytes written total */
     150             :   ulong file_read_cnt;      /* File read request count  */
     151             :   ulong file_read_tot_sz;   /* File bytes read total */
     152             :   ulong file_write_cnt;     /* File write request count */
     153             :   ulong file_write_tot_sz;  /* File bytes written total */
     154             : 
     155             :   /* io implementation specific details follow */
     156             : };
     157             : 
     158             : FD_PROTOTYPES_BEGIN
     159             : 
     160             : /* fd_vinyl_io_* return the current value of the eponymous io field.
     161             :    Assumes io is valid.  For all but type and seed, the return value is
     162             :    a FD_VINYL_BSTREAM_BLOCK_SZ multiple.  Note that we don't have a
     163             :    generic notion of dev_max or dev_free as such is not a well defined
     164             :    concept.  Individual IO implementations can provide them as
     165             :    appropriate though. */
     166             : 
     167          12 : FD_FN_PURE static inline int   fd_vinyl_io_type( fd_vinyl_io_t const * io ) { return io->type; }
     168             : 
     169          12 : FD_FN_PURE static inline ulong fd_vinyl_io_seed( fd_vinyl_io_t const * io ) { return io->seed; }
     170             : 
     171    12000012 : FD_FN_PURE static inline ulong fd_vinyl_io_seq_ancient( fd_vinyl_io_t const * io ) { return io->seq_ancient; }
     172    12000012 : FD_FN_PURE static inline ulong fd_vinyl_io_seq_past   ( fd_vinyl_io_t const * io ) { return io->seq_past;    }
     173    12000012 : FD_FN_PURE static inline ulong fd_vinyl_io_seq_present( fd_vinyl_io_t const * io ) { return io->seq_present; }
     174    12000012 : FD_FN_PURE static inline ulong fd_vinyl_io_seq_future ( fd_vinyl_io_t const * io ) { return io->seq_future;  }
     175             : 
     176        2970 : FD_FN_PURE static inline ulong fd_vinyl_io_spad_max ( fd_vinyl_io_t const * io ) { return io->spad_max;                 }
     177        2970 : FD_FN_PURE static inline ulong fd_vinyl_io_spad_used( fd_vinyl_io_t const * io ) { return io->spad_used;                }
     178        2970 : FD_FN_PURE static inline ulong fd_vinyl_io_spad_free( fd_vinyl_io_t const * io ) { return io->spad_max - io->spad_used; }
     179             : 
     180           0 : FD_FN_PURE static inline ulong fd_vinyl_io_dev_used( fd_vinyl_io_t const * io ) { return io->seq_future - io->seq_ancient; }
     181             : 
     182             : /* fd_vinyl_io_read_imm does an immediate (blocking) read of
     183             :    [seq,seq+dst_sz) (cyclic) from io's bstream's past into dst.  Assumes
     184             :    there are no reads currently posted on io.  Retains no interest in
     185             :    dst.  seq and sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned.  This
     186             :    is used mostly for sequential iterating over a bstream's past (i.e.
     187             :    serial recovery and discovering partitions for parallel recovery). */
     188             : 
     189             : static inline void
     190             : fd_vinyl_io_read_imm( fd_vinyl_io_t * io,
     191             :                       ulong           seq,
     192             :                       void *          dst,
     193     2998734 :                       ulong           sz ) {
     194     2998734 :   io->impl->read_imm( io, seq, dst, sz );
     195     2998734 : }
     196             : 
     197             : /* fd_vinyl_io_read starts the executing the read command rd.  That is,
     198             :    start reading bstream bytes [seq,seq+sz) (cyclic) into dst.  seq and
     199             :    sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned.  Further,
     200             :    [seq,seq+sz) should be in the bstream's past and the region to read
     201             :    should be stored contiguously in the underlying storage.
     202             : 
     203             :    On entry, the caller should have ownership of rd and rd->dst.  The io
     204             :    has ownership of these return and a read interest in bstream bytes
     205             :    [seq,seq_sz) (cyclic).  The ownership of these will be returned to
     206             :    the caller and the read interest will end when poll returns the
     207             :    request. */
     208             : 
     209             : static inline void
     210             : fd_vinyl_io_read( fd_vinyl_io_t *    io,
     211     3748140 :                   fd_vinyl_io_rd_t * rd ) {
     212     3748140 :   io->impl->read( io, rd );
     213     3748140 : }
     214             : 
     215             : /* fd_vinyl_io_poll checks if any outstanding reads are complete.  Reads
     216             :    can complete in any order by the I/O layer.  flags is a bit-or of
     217             :    FD_VINYL_IO_FLAGs.  BLOCKING indicates the call is allowed to block
     218             :    the caller (the io layer promises the call cannot fail from the
     219             :    caller's point of view).  Returns FD_VINYL_SUCCESS if a read complete
     220             :    (*_rd will point to the read command ended with the ownership and
     221             :    read interested as described above), FD_VINYL_ERR_EMPTY if there are
     222             :    no commands pending (*_rd will be NULL) and FD_VINYL_ERR_AGAIN if
     223             :    none of the posted commands are ready (*_rd will be NULL).  AGAIN is
     224             :    only possible for a non-blocking call). */
     225             : 
     226             : static inline int
     227             : fd_vinyl_io_poll( fd_vinyl_io_t *     io,
     228             :                   fd_vinyl_io_rd_t ** _rd,
     229     7496280 :                   int                 flags ) {
     230     7496280 :   return io->impl->poll( io, _rd, flags );
     231     7496280 : }
     232             : 
     233             : /* fd_vinyl_io_append starts appending sz bytes at src to the bstream.
     234             :    src and sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned.  Returns
     235             :    bstream sequence number seq_append where the data is being appended.
     236             :    io will have a read interest in src until the next commit.  This
     237             :    moves blocks from the bstream's future to the bstream's present.  On
     238             :    commit, the region [seq_future_before,seq_append) (cyclic) will be
     239             :    filled with zero padding if the I/O implementation requires it to
     240             :    keep the append contiguous in the physical store (this region will be
     241             :    empty if covered by a previous hint or if this is an append of a
     242             :    single block) and the region [seq_append,seq_future_after) (cyclic)
     243             :    will be filled with the appended info.
     244             : 
     245             :    fd_vinyl_io_commit moves all blocks in the bstream's present to the
     246             :    bstream's past (i.e. sets seq_present to seq_future).  flags is a
     247             :    bit-of FD_VINYL_IO_FLAGs.  If BLOCKING is set, this is allowed to
     248             :    block the caller.  Returns FD_VINYL_SUCCESS (0) on success and
     249             :    FD_VINYL_ERR_AGAIN (negative) if commit could not be completed
     250             :    immediately (only possible for a non-blocking call).  commit empties
     251             :    the io append scratch pad on success.
     252             : 
     253             :    fd_vinyl_io_hint indicates the next sz bytes to append must be
     254             :    contiguous in the bstream.  This can move blocks from the bstream's
     255             :    future to the bstream's present.  Returns (the potentially updated)
     256             :    seq_future.  On commit, the region
     257             :    [seq_future_before,seq_future_after) (cyclic) will be filled with
     258             :    zero padding (this region will be empty if covered by a previous
     259             :    hint) and the region [seq_future_after,seq_future_after+sz) (cyclic)
     260             :    will contiguous in the physical storage.  This is useful for grouping
     261             :    sets of blocks from different memory regions on the host that must be
     262             :    written contiguously from a protocol point of view (e.g. a move
     263             :    control block and the pair that follows it).
     264             : 
     265             :    fd_vinyl_io_alloc returns a pointer to sz bytes of
     266             :    FD_VINYL_BSTREAM_BLOCK_SZ aligned memory suitable allocated from io's
     267             :    append scratch pad.  flags is a bit-or FD_VINYL_IO_FLAG_*.  BLOCKING
     268             :    indicates the call is allowed to block the caller.  If a non-blocking
     269             :    call, will return NULL if there is no suitable memory at this time.
     270             :    Will never return NULL for a blocking call.  The lifetime of the
     271             :    returned pointer is the lesser of the next append, next commit, the
     272             :    next alloc or the io.  sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned
     273             :    and at most io's spad_max.  This may do a commit to free up scratch
     274             :    pad memory if necessary (moving blocks from the present to the past).
     275             : 
     276             :    fd_vinyl_io_trim trims sz bytes from the end of the most recent
     277             :    fd_vinyl_io_alloc.  sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned
     278             :    and at most the size of the most recent alloc.
     279             : 
     280             :    fd_vinyl_io_copy starts appending a copy of the sz bytes at seq in
     281             :    the bstream's past to the bstream.  seq and sz should be
     282             :    FD_VINYL_BSTREAM_BLOCK_SZ aligned.  [seq,seq+sz) (cyclic) should be
     283             :    in the bstream's past.  io will have a read interest in this region
     284             :    until the next commit.  This will do a _blocking_ commit to free up
     285             :    scratch pad memory if necessary (moving blocks from the present to
     286             :    the past).  FIXME: consider non-blocking copy support? (copy would
     287             :    need a flags args).
     288             : 
     289             :    None of these can fail from the caller's perspective (they will all
     290             :    FD_LOG_CRIT if anything goes wrong ... much like accessing invalid
     291             :    memory will seg fault). */
     292             : 
     293             : static inline ulong
     294             : fd_vinyl_io_append( fd_vinyl_io_t * io,
     295             :                     void const *    src,
     296      750210 :                     ulong           sz ) {
     297      750210 :   return io->impl->append( io, src, sz );
     298      750210 : }
     299             : 
     300             : static inline int
     301             : fd_vinyl_io_commit( fd_vinyl_io_t * io,
     302      749004 :                     int             flags ) {
     303      749004 :   return io->impl->commit( io, flags );
     304      749004 : }
     305             : 
     306             : static inline ulong
     307             : fd_vinyl_io_hint( fd_vinyl_io_t * io,
     308      753444 :                   ulong           sz ) {
     309      753444 :   return io->impl->hint( io, sz );
     310      753444 : }
     311             : 
     312             : static inline void *
     313             : fd_vinyl_io_alloc( fd_vinyl_io_t * io,
     314             :                    ulong           sz,
     315        2964 :                    int             flags ) {
     316        2964 :   return io->impl->alloc( io, sz, flags );
     317        2964 : }
     318             : 
     319             : static inline void
     320             : fd_vinyl_io_trim( fd_vinyl_io_t * io,
     321           0 :                   ulong           sz ) {
     322           0 :   io->spad_used -= sz;
     323           0 : }
     324             : 
     325             : static inline ulong
     326             : fd_vinyl_io_copy( fd_vinyl_io_t * io,
     327             :                   ulong           seq,
     328      752298 :                   ulong           sz ) {
     329      752298 :   return io->impl->copy( io, seq, sz );
     330      752298 : }
     331             : 
     332             : /* fd_vinyl_io_forget moves [seq_past,seq) (cyclic) from the bstream's
     333             :    past to the bstream's antiquity, setting seq_past to seq.  As such,
     334             :    seq should be in [seq_past,seq_present] (cyclic) and
     335             :    FD_VINYL_BSTREAM_BLOCK_SZ aligned.  There should be no reads, copies
     336             :    or appends in progress.  Cannot fail from the caller's perspective
     337             :    (will FD_LOG_CRIT if anything goes wrong).
     338             : 
     339             :    IMPORTANT SAFETY TIP!  Though the bstream has been updated from the
     340             :    caller's point of view, the bstream needs to be sync'd for recover to
     341             :    start from the new seq_past. */
     342             : 
     343             : static inline void
     344             : fd_vinyl_io_forget( fd_vinyl_io_t * io,
     345      263526 :                     ulong           seq ) {
     346      263526 :   io->impl->forget( io, seq );
     347      263526 : }
     348             : 
     349             : /* fd_vinyl_io_rewind moves blocks [seq,seq_present) (cyclic) from the
     350             :    bstream's past to the bstream's future (updating seq_ancient and
     351             :    seq_past as necessary).  There should be no reads, copies or appends
     352             :    in progress.  seq should at most seq_present (cyclic) and
     353             :    FD_VINYL_BSTREAM_BLOCK_SZ aligned.  Cannot fail from the caller's
     354             :    perspective (will FD_LOG_CRIT if anything goes wrong).
     355             : 
     356             :    IMPORTANT SAFETY TIP!  Though the bstream has been updated from the
     357             :    caller's point of view, the bstream needs to be sync'd for recovery
     358             :    to account for the rewind (and this is probably more critical than
     359             :    forget because appends will start modifying the bstream blocks that
     360             :    recovery would be expecting to be in the pre-rewind state). */
     361             : 
     362             : static inline void
     363             : fd_vinyl_io_rewind( fd_vinyl_io_t * io,
     364      213468 :                     ulong           seq ) {
     365      213468 :   io->impl->rewind( io, seq );
     366      213468 : }
     367             : 
     368             : /* fd_vinyl_io_sync moves [seq_ancient,seq_past) (cyclic) from the
     369             :    bstream's antiquity to the end of the bstream's future, setting
     370             :    seq_ancient to seq_past.  It promises the caller the bstream's past
     371             :    is fully written and that the bstream's past region is what recovery
     372             :    will use to recover the bstream's key-val state at seq_present.
     373             :    flags is a bit-or of FD_VINYL_IO_FLAGs.  BLOCKING indicates the call
     374             :    is allowed to block the caller.  Returns FD_VINYL_SUCCESS (0) on
     375             :    success and a FD_VINYL_ERR_AGAIN (negative) if the call would block
     376             :    the caller (only possible for a non-blocking call). */
     377             : /* FIXME: consider allowing new user info to be passed? */
     378             : 
     379             : static inline int
     380             : fd_vinyl_io_sync( fd_vinyl_io_t * io,
     381      748320 :                   int             flags ) {
     382      748320 :   return io->impl->sync( io, flags );
     383      748320 : }
     384             : 
     385             : /* fd_vinyl_io_fini tears down io, returning the memory region used to
     386             :    hold the I/O implementation state.  Implicitly completes any
     387             :    in-progress reads and cancels any in-progress appends (and thus can
     388             :    block the caller).
     389             : 
     390             :    IMPORTANT SAFETY TIP!  This does _not_ sync the bstream first (e.g.
     391             :    if an application is tearing down due to an anomalous condition, it
     392             :    may not want to sync on fini so that it can recover from a known good
     393             :    point). */
     394             : 
     395             : void *
     396             : fd_vinyl_io_fini( fd_vinyl_io_t * io );
     397             : 
     398             : /* Helpers ************************************************************/
     399             : 
     400             : /* fd_vinyl_io_spad_est() returns estimate of the smallest scratch pad
     401             :    size required most applications.  Specifically, this returns:
     402             : 
     403             :      2 pair_sz( LZ4_COMPRESSBOUND( VAL_MAX ) )
     404             : 
     405             :    so that it is possible to load a object footprint into the scratch
     406             :    pad and then have a worst case scratch memory for compression to
     407             :    re-encode the object. */
     408             : 
     409             : FD_FN_CONST ulong fd_vinyl_io_spad_est( void );
     410             : 
     411             : /* fd_vinyl_io_append_* are helper functions that start appending the
     412             :    given info, appropriately formatted and hashed, to io's bstream.
     413             :    There is no excess requirements for alignment.  They do no input
     414             :    argument checking.  On return, io retains no interest in the given
     415             :    info (that is, they use io's scratch memory and thus can trigger an
     416             :    io commit to move blocks from the bstream's present to the bstream's
     417             :    past if there isn't enough scratch pad free).  They return the
     418             :    bstream sequence number where the data is being appended.  They
     419             :    cannot fail from the caller's perspective (they will FD_LOG_CRIT if
     420             :    anything goes awry). */
     421             : 
     422             : ulong
     423             : fd_vinyl_io_append_pair_raw( fd_vinyl_io_t *         io,
     424             :                              fd_vinyl_key_t const *  key,   /* pair key */
     425             :                              fd_vinyl_info_t const * info,  /* pair info */
     426             :                              void const *            val ); /* contains info->val_sz bytes, in [0,FD_VINYL_VAL_MAX] */
     427             : 
     428             : ulong
     429             : fd_vinyl_io_append_dead( fd_vinyl_io_t *                 io,
     430             :                          fd_vinyl_bstream_phdr_t const * phdr,      /* pair header of erased pair */
     431             :                          void const *                    info,      /* contains info_sz bytes, info_sz treated as 0 if NULL */
     432             :                          ulong                           info_sz ); /* in [0,FD_VINYL_BSTREAM_DEAD_INFO_MAX] */
     433             : 
     434             : ulong
     435             : fd_vinyl_io_append_move( fd_vinyl_io_t *                 io,
     436             :                          fd_vinyl_bstream_phdr_t const * src,       /* pair header of src pair */
     437             :                          fd_vinyl_key_t const *          dst,       /* src pair getting renamed to dst or is replacing dst */
     438             :                          void const *                    info,      /* contains info_sz bytes, info_sz treated as 0 if NULL */
     439             :                          ulong                           info_sz ); /* in [0,FD_VINYL_BSTREAM_MOVE_INFO_MAX] */
     440             : 
     441             : ulong
     442             : fd_vinyl_io_append_part( fd_vinyl_io_t * io,
     443             :                          ulong           seq_prev,  /* should be a part before seq or seq */
     444             :                          ulong           dead_cnt,  /* number of dead blocks in the partition */
     445             :                          ulong           move_cnt,  /* number of move blocks in the partition */
     446             :                          void const *    info,      /* contains info_sz bytes, info_sz treated as 0 if NULL */
     447             :                          ulong           info_sz ); /* in [0,FD_VINYL_BSTREAM_PART_INFO_MAX] */
     448             : 
     449             : /* fd_vinyl_io_append_pair_inplace appends the style RAW pair at phdr
     450             :    to the bstream.  This will preferentially append the pair in the
     451             :    given style.  Returns the location where the pair was appended.  On
     452             :    return, *_style holds the actual style used and *_val_esz contains
     453             :    the pair encoded value byte size.
     454             : 
     455             :    Note that if the requested style is RAW or if the pair could not be
     456             :    usefully encoded in the requested style (e.g. the compressed size
     457             :    ended up larger than the uncompressed size), this will append from
     458             :    phdr in-place zero copy.  When appending a pair in-place, this will
     459             :    clear the zero padding region and insert the appropriate data
     460             :    integrity footers at the end of the pair.  On other cases, this will
     461             :    append from the io append scratch memory the encoded pair and the
     462             :    pair will be untouched.
     463             : 
     464             :    As such, the caller should assume the io has a read interest on the
     465             :    pair's header region and value region and a write interest on the
     466             :    pair zero padding region and footer region until the next append or
     467             :    commit and the pair's zero padding and footer regions may be
     468             :    clobbered by this call. */
     469             : 
     470             : ulong
     471             : fd_vinyl_io_append_pair_inplace( fd_vinyl_io_t *           io,
     472             :                                  int                       style,
     473             :                                  fd_vinyl_bstream_phdr_t * phdr,
     474             :                                  int *                     _style,
     475             :                                  ulong *                   _val_esz );
     476             : 
     477             : /* fd_vinyl_io_bd *****************************************************/
     478             : 
     479             : /* fd_vinyl_io_bd_{align,footprint} specify the alignment and footprint
     480             :    needed for a bstream stored on a block device / large file with a
     481             :    spad_max append scratch pad.  align will be a reasonable power-of-2
     482             :    and footprint will be a multiple of align.  Returns 0 for an invalid
     483             :    spad_max.
     484             : 
     485             :    fd_vinyl_io_bd_init starts using a file as a bstream store.  lmem
     486             :    points to a local memory region with suitable alignment and footprint
     487             :    to hold the bstream's state.  spad_max gives the size of the append
     488             :    scratch pad (should be a FD_VINYL_BSTREAM_BLOCK_SZ multiple).  dev_fd
     489             :    is a file descriptor for the block device / large file.  The file
     490             :    should already exist and be sized to the appropriate capacity.
     491             : 
     492             :    FIXME: allow user to specify a subrange of dev_fd to use for the
     493             :    store?
     494             : 
     495             :    If reset is non-zero, ignores any existing file contents and will
     496             :    start a new bstream.  The bstream metadata user info will be set to
     497             :    the info_sz bytes at info and the bstream will use io_seed for its
     498             :    data integrity hashing seed.
     499             : 
     500             :    Otherwise, this will attempt to resume at the point the bstream was
     501             :    last synchronized.  info, info_sz and io_seed will be ignored.
     502             : 
     503             :    IMPORTANT SAFETY TIP!  The io_seed is the not same thing as the meta
     504             :    seed.  The io_seed is a property of the bstream (with a lifetime of
     505             :    the bstream and is shared among all users of the bstream).  The meta
     506             :    seed is a property of the meta (and ideally uniquely and randomly set
     507             :    per vinyl tile run).
     508             : 
     509             :    Returns a handle to the bstream on success (has ownership of lmem and
     510             :    dev_fd, ownership returned on fini) and NULL on failure (logs
     511             :    details, no ownership changed).  Retains no interest in info. */
     512             : 
     513             : ulong fd_vinyl_io_bd_align    ( void );
     514             : ulong fd_vinyl_io_bd_footprint( ulong spad_max );
     515             : 
     516             : fd_vinyl_io_t *
     517             : fd_vinyl_io_bd_init( void *       lmem,
     518             :                      ulong        spad_max,
     519             :                      int          dev_fd,
     520             :                      int          reset,
     521             :                      void const * info,
     522             :                      ulong        info_sz,
     523             :                      ulong        io_seed );
     524             : 
     525             : /* fd_vinyl_io_mm *****************************************************/
     526             : 
     527             : /* fd_vinyl_io_mm_* is the same as fd_vinyl_io_bd_* but uses dev_sz byte
     528             :    sized memory region dev as the "block device".  The result is
     529             :    bit-level identical to fd_vinyl_io_bd (and vice versa).  This is
     530             :    primarily for testing purposes but, as dev could also be a memory
     531             :    mapped file / block device, this could be useful in general
     532             :    (especially for concurrent read access, e.g. parallel recovery).
     533             :    Note that "sync" only guarantees appends to the dev memory region
     534             :    happened.  If the memory region is backed by a file, when the actual
     535             :    blocks are written to the physical storage is controlled by the
     536             :    kernel / driver / physical device (it is up to the caller of sync to
     537             :    do any additional context specific control here). */
     538             : 
     539             : ulong fd_vinyl_io_mm_align    ( void );
     540             : ulong fd_vinyl_io_mm_footprint( ulong spad_max );
     541             : 
     542             : fd_vinyl_io_t *
     543             : fd_vinyl_io_mm_init( void *       lmem,
     544             :                      ulong        spad_max,
     545             :                      void *       dev,
     546             :                      ulong        dev_sz,
     547             :                      int          reset,
     548             :                      void const * info,
     549             :                      ulong        info_sz,
     550             :                      ulong        io_seed );
     551             : 
     552             : /* fd_vinyl_{mmio,mmio_sz} return {a pointer in the caller's address
     553             :    space to the raw bstream storage,the raw bstream storage byte size).
     554             :    These are a _subset_ of the dev / dev_sz region passed to mm_init and
     555             :    these will be FD_VINYL_BSTREAM_BLOCK_SZ aligned.  If a byte seq is in
     556             :    the store, it will be at mmio[ seq % mmio_sz ].  Note that mmio_sz is
     557             :    not necessarily a power of two.  Note also that the bstream's past is
     558             :    guaranteed to be in the store.  The lifetime of the returned region
     559             :    is the lifetime of the io.  Returns NULL and 0 if io does not support
     560             :    memory mapped io.  These exist to support thread parallel recovery. */
     561             : 
     562             : void * fd_vinyl_mmio   ( fd_vinyl_io_t * io );
     563             : ulong  fd_vinyl_mmio_sz( fd_vinyl_io_t * io );
     564             : 
     565             : FD_PROTOTYPES_END
     566             : 
     567             : #endif /* HEADER_fd_src_vinyl_io_fd_vinyl_io_h */

Generated by: LCOV version 1.14