LCOV - code coverage report
Current view: top level - util/archive - fd_tar.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 6 28 21.4 %
Date: 2025-07-01 05:00:49 Functions: 1 91 1.1 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_archive_fd_tar_h
       2             : #define HEADER_fd_src_archive_fd_tar_h
       3             : 
       4             : /* fd_tar implements the ustar and old-GNU versions of the TAR file
       5             :    format. This is not a general-purpose TAR implementation.  It is
       6             :    currently only intended for loading and writing Solana snapshots. */
       7             : 
       8             : #include "../io/fd_io.h"
       9             : 
      10             : /* File Format ********************************************************/
      11             : 
      12             : /* The high level format of a tar archive/ball is a set of 512 byte blocks.
      13             :    Each file will be described a tar header (fd_tar_meta_t) and will be
      14             :    followed by the raw bytes of the file. The last block that is used for
      15             :    the file will be padded to fit into a tar block. When the archive is
      16             :    completed, it will be trailed by two EOF blocks which are populated with
      17             :    zero bytes. */
      18             : 
      19             : /* fd_tar_meta_t is the ustar/OLDGNU version of the TAR header. */
      20             : 
      21           0 : #define FD_TAR_BLOCK_SZ (512UL)
      22             : 
      23             : struct __attribute__((packed)) fd_tar_meta {
      24           0 : # define FD_TAR_NAME_SZ (100)
      25             :   /* 0x000 */ char name    [ FD_TAR_NAME_SZ ];
      26             :   /* 0x064 */ char mode    [   8 ];
      27             :   /* 0x06c */ char uid     [   8 ];
      28             :   /* 0x074 */ char gid     [   8 ];
      29             :   /* 0x07c */ char size    [  12 ];
      30             :   /* 0x088 */ char mtime   [  12 ];
      31             :   /* 0x094 */ char chksum  [   8 ];
      32             :   /* 0x09c */ char typeflag;
      33             :   /* 0x09d */ char linkname[ 100 ];
      34             :   /* 0x101 */ char magic   [   6 ];
      35             :   /* 0x107 */ char version [   2 ];
      36             :   /* 0x109 */ char uname   [  32 ];
      37             :   /* 0x129 */ char gname   [  32 ];
      38             :   /* 0x149 */ char devmajor[   8 ];
      39             :   /* 0x151 */ char devminor[   8 ];
      40             :   /* 0x159 */ char prefix  [ 155 ];
      41             :   /* 0x1f4 */ char padding [  12 ];
      42             : };
      43             : 
      44             : typedef struct fd_tar_meta fd_tar_meta_t;
      45             : 
      46             : /* FD_TAR_MAGIC is the only value of fd_tar_meta::magic supported by
      47             :    fd_tar. */
      48             : 
      49             : #define FD_TAR_MAGIC "ustar"
      50             : 
      51             : /* Known file types */
      52             : 
      53          54 : #define FD_TAR_TYPE_NULL      ('\0')  /* implies FD_TAR_TYPE_REGULAR */
      54         108 : #define FD_TAR_TYPE_REGULAR   ('0')
      55             : #define FD_TAR_TYPE_HARD_LINK ('1')
      56             : #define FD_TAR_TYPE_SYM_LINK  ('2')
      57             : #define FD_TAR_TYPE_CHAR_DEV  ('3')
      58             : #define FD_TAR_TYPE_BLOCK_DEV ('4')
      59             : #define FD_TAR_TYPE_DIR       ('5')
      60             : #define FD_TAR_TYPE_FIFO      ('6')
      61             : 
      62             : FD_PROTOTYPES_BEGIN
      63             : 
      64             : /* fd_tar_meta_is_reg returns 1 if the file type is 'regular', and 0
      65             :    otherwise. */
      66             : 
      67             : FD_FN_PURE static inline int
      68          54 : fd_tar_meta_is_reg( fd_tar_meta_t const * meta ) {
      69          54 :   return ( meta->typeflag == FD_TAR_TYPE_NULL    )
      70          54 :        | ( meta->typeflag == FD_TAR_TYPE_REGULAR );
      71          54 : }
      72             : 
      73             : /* fd_tar_meta_get_size parses the size field of the TAR header.
      74             :    Returns ULONG_MAX if parsing failed. */
      75             : 
      76             : FD_FN_PURE ulong
      77             : fd_tar_meta_get_size( fd_tar_meta_t const * meta );
      78             : 
      79             : /* fd_tar_set_octal is a helper function to write 12-byte octal fields */
      80             : 
      81             : int
      82             : fd_tar_set_octal( char  buf[ static 12 ],
      83             :                   ulong val );
      84             : 
      85             : /* fd_tar_meta_set_size sets the size field.  Returns 1 on success, 0
      86             :    if sz is too large to be represented in TAR header. Set size using the
      87             :    OLDGNU size extension to allow for unlimited file sizes. The first byte
      88             :    must be 0x80 followed by 0s and then the size in binary. */
      89             : 
      90             : static inline int
      91             : fd_tar_meta_set_size( fd_tar_meta_t * meta,
      92           0 :                       ulong           sz ) {
      93           0 :   meta->size[ 0 ] = (char)0x80;
      94           0 :   FD_STORE( ulong, meta->size + 4UL, fd_ulong_bswap( sz ) );
      95           0 :   return 1;
      96           0 : }
      97             : 
      98             : /* fd_tar_meta_set_mtime sets the modification time field.  Returns 1
      99             :    on success, 0 if time cannot be represented in TAR header. */
     100             : 
     101             : static inline int
     102             : fd_tar_meta_set_mtime( fd_tar_meta_t * meta,
     103           0 :                        ulong           mtime ) {
     104           0 :   return fd_tar_set_octal( meta->mtime, mtime );
     105           0 : }
     106             : 
     107             : FD_PROTOTYPES_END
     108             : 
     109             : /* Streaming reader ***************************************************/
     110             : 
     111             : typedef struct fd_tar_reader fd_tar_reader_t;
     112             : 
     113             : /* fd_tar_file_fn_t is called by fd_tar when a new file was encountered.
     114             :    cb_arg is the callback context value. meta is the file header
     115             :    (lifetime until return).  sz is the expected file size that follows
     116             :    (via read callbacks).  The actual read size might differ in case of
     117             :    errors (e.g. unexpected EOF).  Returns 0 on success and non-zero if
     118             :    tar reader should stop. */
     119             : 
     120             : typedef int
     121             : (* fd_tar_file_fn_t)( void *                cb_arg,
     122             :                       fd_tar_meta_t const * meta,
     123             :                       ulong                 sz );
     124             : 
     125             : /* fd_tar_read_cb_t is called by fd_tar when a new chunk of data has
     126             :    been read.  Each read callback is associated with the last file
     127             :    callback.  Read callbacks are issued in order such that concatenating
     128             :    all buffers results in the correct file content.  Returns 0 on
     129             :    success and non-zero if tar reader should stop.
     130             : 
     131             :    cb_arg is the callback context value.  buf points to the first byte
     132             :    of the chunk.  bufsz is the byte count.  The lifetime of buf is until
     133             :    the callback returns. */
     134             : 
     135             : typedef int
     136             : (* fd_tar_read_fn_t)( void *       cb_arg,
     137             :                       void const * buf,
     138             :                       ulong        bufsz );
     139             : 
     140             : /* fd_tar_read_vtable_t is the virtual function table of the
     141             :    fd_tar_reader_t consumer object. */
     142             : 
     143             : struct fd_tar_read_vtable {
     144             :   fd_tar_file_fn_t file;
     145             :   fd_tar_read_fn_t read;
     146             : };
     147             : 
     148             : typedef struct fd_tar_read_vtable fd_tar_read_vtable_t;
     149             : 
     150             : /* fd_tar_reader_t is a streaming TAR reader using a callback API for
     151             :    delivering data.  To use, feed it the chunks of the TAR stream via
     152             :    fd_tar_read.  There is no restriction on the size and alignment of
     153             :    these chunks, other than that the chunks are supplied in order and
     154             :    gapless.  The resulting callback sequence is (1x file, Nx read, 1x
     155             :    file, Nx read ...).  As in: Each new file encountered creates a file
     156             :    callback and a variable number of read callbacks. */
     157             : 
     158             : struct fd_tar_reader {
     159             : 
     160             :   /* Buffered file header.  Required because a file header might be
     161             :      split across multiple fd_tar_read calls. */
     162             :   union {
     163             :     uchar         buf[ sizeof(fd_tar_meta_t) ];
     164             :     fd_tar_meta_t header;
     165             :   };
     166             : 
     167             :   ulong pos;      /* Number of bytes consumed */
     168             :   ulong buf_ctr;  /* Write cursor in file header */
     169             :   ulong file_sz;  /* Number of file bytes left */
     170             : 
     171             :   /* Callback parameters */
     172             :   fd_tar_read_vtable_t cb_vt;
     173             :   void *               cb_arg;
     174             : 
     175             : };
     176             : 
     177             : FD_PROTOTYPES_BEGIN
     178             : 
     179             : /* fd_tar_reader_{align,footprint} return parameters for the memory
     180             :    region backing a fd_tar_reader_t. */
     181             : 
     182             : FD_FN_CONST static inline ulong
     183           0 : fd_tar_reader_align( void ) {
     184           0 :   return alignof(fd_tar_reader_t);
     185           0 : }
     186             : 
     187             : FD_FN_CONST static inline ulong
     188           0 : fd_tar_reader_footprint( void ) {
     189           0 :   return sizeof(fd_tar_reader_t);
     190           0 : }
     191             : 
     192             : /* fd_tar_reader_new creates a new TAR reader.  mem is the memory region
     193             :    that will hold the fd_tar_reader_t (matches above align/ footprint
     194             :    requirements).  cb_vt contains the callback function pointers of
     195             :    the recipient.  cb_vt pointer is borrowed until this function
     196             :    returns.  cb_arg is the callback context value (usually a pointer to
     197             :    the recipient object).  Returns a qualified handle to the reader
     198             :    object in mem on success.  On failure, returns NULL and writes reason
     199             :    to warning log.  Reasons for failure include invalid memory region or
     200             :    NULL callback. */
     201             : 
     202             : fd_tar_reader_t *
     203             : fd_tar_reader_new( void *                       mem,
     204             :                    fd_tar_read_vtable_t const * cb_vt,
     205             :                    void *                       cb_arg );
     206             : 
     207             : /* fd_tar_reader_delete destroys a .tar reader and frees any allocated
     208             :    resources.  Returns the underlying memory region back to the caller. */
     209             : 
     210             : void *
     211             : fd_tar_reader_delete( fd_tar_reader_t * reader );
     212             : 
     213             : /* fd_tar_read processes a chunk of the TAR stream.  Issues callbacks
     214             :    when file headers or content are read.  reader is an fd_tar_reader_t
     215             :    pointer.  data points to the first byte of the data chunk.  data_sz
     216             :    is the byte count.  data_sz==0UL is a no-op.  Returns 0 on success.
     217             :    Returns -1 on end-of-file.  On failure, returns positive errno
     218             :    compatible error code.  In case of error, caller should delete reader
     219             :    and must not issue any more fd_tar_read calls.  Suitable as a
     220             :    fd_decompress_cb_t callback. If the underlying functions returns track_err
     221             :    at any point, after fd_tar_read has processed the end of the data buffer,
     222             :    we will proceed to return track_err assuming no other errors have been
     223             :    thrown. Pass in 0 to not use this functionality. */
     224             : 
     225             : int
     226             : fd_tar_read( void *        reader,
     227             :              uchar const * data,
     228             :              ulong         data_sz,
     229             :              int           track_err );
     230             : 
     231             : /* Streaming writer ***************************************************/
     232             : 
     233             : /* TL;DR. I didn't read the code. How do I use this?
     234             : 
     235             :    Init with fd_tar_writer_new( mem, tarball_name ).
     236             : 
     237             :    For each file you want to add to the archive:
     238             :     1. Write out tar header with fd_tar_writer_new_file( writer, file_name )
     239             :     2. Write out file data with fd_tar_writer_write_file_data( writer, data, data_sz ).
     240             :        This can be done as many times as you want.
     241             :     3. Finish the current file with fd_tar_writer_fini_file( writer ).
     242             : 
     243             :    When you are done, call fd_tar_writer_delete( writer ) to write out the
     244             :    tar archive trailer and close otu the file descriptor.
     245             : 
     246             :    If you want to reserve space for an existing file and write back to it
     247             :    at some point in the future see the below comments for
     248             :    fd_tar_writer_{make,fill}_space().
     249             : 
     250             :    */
     251             : 
     252             : struct fd_tar_writer {
     253             :   int                      fd;         /* The file descriptor for the tar archive. */
     254             :   ulong                    header_pos; /* The position in the file for the current files header.
     255             :                                           If there is no current file that is being streamed out,
     256             :                                           the header_pos will be equal to ULONG_MAX. */
     257             :   ulong                    data_sz;    /* The size of the current files data. If there is no
     258             :                                           current file that is being streamed out, the data_sz
     259             :                                           will be equal to ULONG_MAX. */
     260             :   ulong                    wb_pos;     /* If this value is not equal to ULONG_MAX that means that
     261             :                                           this is the position at which to write back to with a
     262             :                                           call to fd_tar_writer_fill_space. */
     263             :   /* TODO: Right now, the stream to the tar writer just uses fd_io_write.
     264             :      This can eventually be abstracted to use write callbacks that use
     265             :      fd_io streaming under the hood. This adds some additional complexity
     266             :      that's related to writing back into the header: if the header is still
     267             :      in the ostream buf, modify the buffer. Otherwise, read the header
     268             :      directly from the file. */
     269             : 
     270             : };
     271             : typedef struct fd_tar_writer fd_tar_writer_t;
     272             : 
     273             : FD_FN_CONST static inline ulong
     274           0 : fd_tar_writer_align( void ) {
     275           0 :   return alignof(fd_tar_writer_t);
     276           0 : }
     277             : 
     278             : FD_FN_CONST static inline ulong
     279           0 : fd_tar_writer_footprint( void ) {
     280           0 :   return sizeof(fd_tar_writer_t);
     281           0 : }
     282             : 
     283             : /* fd_tar_writer_new creates a new TAR writer. mem is the memory region
     284             :    that will hold the fd_tar_writer_t (matches above align/footprint
     285             :    requirements). Returns a qualified handle to the tar writer
     286             :    object in mem on success. On failure, returns NULL and writes reason
     287             :    to warning log. Reasons for failure include invalid memory region.
     288             :    The writer will enable the user to write/stream out files of variable
     289             :    size into a continual stream. The writer should persist for the span of
     290             :    a single tar archive. The user is repsonsible for passing in an open, valid
     291             :    file descriptor. */
     292             : 
     293             : fd_tar_writer_t *
     294             : fd_tar_writer_new( void * mem, int fd );
     295             : 
     296             : /* fd_tar_writer_delete destroys a tar writer and frees any allocated
     297             :    resources. Returns the underlying memory region back to the caller.
     298             :    This writer will also handle cleanup for the tar archive: it will write
     299             :    out the tar archive trailer and will close the underlying file descriptor. */
     300             : 
     301             : void *
     302             : fd_tar_writer_delete( fd_tar_writer_t * writer );
     303             : 
     304             : /* fd_tar_write_new_file writes out a file header, it will leave certain
     305             :    fields blank to allow for writing back of header metadata that is unknown
     306             :    until the file done streaming out. The user must enforce the invariant that
     307             :    this can only be called after fd_tar_fini_file() orfd_tar_writer_new() */
     308             : 
     309             : int
     310             : fd_tar_writer_new_file( fd_tar_writer_t * writer,
     311             :                         char const *      file_name );
     312             : 
     313             : /* fd_tar_writer_write_file_data will write out a variable amount of bytes to the
     314             :    writer's tarball. This can be called multiple times for a single file.
     315             :    The user must enforce the invariant that this function succeeded a call
     316             :    to fd_tar_new_file and should precede a call to fd_tar_fini_file. If this
     317             :    invariant isn't enforced, then the tar writer will silently produce an
     318             :    invalid file. */
     319             : 
     320             : int
     321             : fd_tar_writer_write_file_data( fd_tar_writer_t * writer,
     322             :                                void const *      data,
     323             :                                ulong             data_sz );
     324             : 
     325             : /* fd_tar_fini_file will write out any alignment bytes to the current file's
     326             :    data. It will then write back to the file header with the file size and
     327             :    the checksum. */
     328             : 
     329             : int
     330             : fd_tar_writer_fini_file( fd_tar_writer_t * writer );
     331             : 
     332             : /* fd_tar_writer_make_space and fd_tar_writer_fill_space, allow for writing
     333             :    back to a specific place in the tar stream. This can be used by first
     334             :    making a call to fd_tar_write_new_file, fd_tar_writer_make_space, and
     335             :    fd_tar_writer_fini_file. This will populate the header and write out
     336             :    random bytes. The start of this data file will be saved by the tar writer.
     337             :    Up to n data files can be appended to the tar archive before a call to
     338             :    fd_tar_writer_fill_space. fd_tar_writer_fill_space should only be called
     339             :    after an unpaired call to fd_tar_writer_make_space and it requires a valid
     340             :    fd_tar_writer_t handle. It allows the user to write back to the point at
     341             :    which they made space. _make_space and _fill_space should be paired together.
     342             :    There can only be one oustanding call to make_space at a time.
     343             : 
     344             :    TODO: This can be extended to support multiple write backs. */
     345             : 
     346             : int
     347             : fd_tar_writer_make_space( fd_tar_writer_t * writer, ulong sz );
     348             : 
     349             : int
     350             : fd_tar_writer_fill_space( fd_tar_writer_t * writer, void const * data, ulong sz );
     351             : 
     352             : FD_PROTOTYPES_END
     353             : 
     354             : #endif /* HEADER_fd_src_archive_fd_tar_h */

Generated by: LCOV version 1.14