LCOV - code coverage report
Current view: top level - util/archive - fd_tar.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 6 28 21.4 %
Date: 2025-01-08 12:08:44 Functions: 1 105 1.0 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_archive_fd_tar_h
       2             : #define HEADER_fd_src_archive_fd_tar_h
       3             : 
       4             : /* fd_tar implements the ustar and old-GNU versions of the TAR file
       5             :    format. This is not a general-purpose TAR implementation.  It is
       6             :    currently only intended for loading and writing Solana snapshots. */
       7             : 
       8             : #include "../fd_util_base.h"
       9             : #include "../io/fd_io.h"
      10             : 
      11             : /* File Format ********************************************************/
      12             : 
      13             : /* The high level format of a tar archive/ball is a set of 512 byte blocks.
      14             :    Each file will be described a tar header (fd_tar_meta_t) and will be
      15             :    followed by the raw bytes of the file. The last block that is used for
      16             :    the file will be padded to fit into a tar block. When the archive is
      17             :    completed, it will be trailed by two EOF blocks which are populated with
      18             :    zero bytes. */
      19             : 
      20             : /* fd_tar_meta_t is the ustar/OLDGNU version of the TAR header. */
      21             : 
      22           0 : #define FD_TAR_BLOCK_SZ (512UL)
      23             : 
      24             : struct __attribute__((packed)) fd_tar_meta {
      25           0 : # define FD_TAR_NAME_SZ (100)
      26             :   /* 0x000 */ char name    [ FD_TAR_NAME_SZ ];
      27             :   /* 0x064 */ char mode    [   8 ];
      28             :   /* 0x06c */ char uid     [   8 ];
      29             :   /* 0x074 */ char gid     [   8 ];
      30             :   /* 0x07c */ char size    [  12 ];
      31             :   /* 0x088 */ char mtime   [  12 ];
      32             :   /* 0x094 */ char chksum  [   8 ];
      33             :   /* 0x09c */ char typeflag;
      34             :   /* 0x09d */ char linkname[ 100 ];
      35             :   /* 0x101 */ char magic   [   6 ];
      36             :   /* 0x107 */ char version [   2 ];
      37             :   /* 0x109 */ char uname   [  32 ];
      38             :   /* 0x129 */ char gname   [  32 ];
      39             :   /* 0x149 */ char devmajor[   8 ];
      40             :   /* 0x151 */ char devminor[   8 ];
      41             :   /* 0x159 */ char prefix  [ 155 ];
      42             :   /* 0x1f4 */ char padding [  12 ];
      43             : };
      44             : 
      45             : typedef struct fd_tar_meta fd_tar_meta_t;
      46             : 
      47             : /* FD_TAR_MAGIC is the only value of fd_tar_meta::magic supported by
      48             :    fd_tar. */
      49             : 
      50             : #define FD_TAR_MAGIC "ustar"
      51             : 
      52             : /* Known file types */
      53             : 
      54          54 : #define FD_TAR_TYPE_NULL      ('\0')  /* implies FD_TAR_TYPE_REGULAR */
      55         108 : #define FD_TAR_TYPE_REGULAR   ('0')
      56             : #define FD_TAR_TYPE_HARD_LINK ('1')
      57             : #define FD_TAR_TYPE_SYM_LINK  ('2')
      58             : #define FD_TAR_TYPE_CHAR_DEV  ('3')
      59             : #define FD_TAR_TYPE_BLOCK_DEV ('4')
      60             : #define FD_TAR_TYPE_DIR       ('5')
      61             : #define FD_TAR_TYPE_FIFO      ('6')
      62             : 
      63             : FD_PROTOTYPES_BEGIN
      64             : 
      65             : /* fd_tar_meta_is_reg returns 1 if the file type is 'regular', and 0
      66             :    otherwise. */
      67             : 
      68             : FD_FN_PURE static inline int
      69          54 : fd_tar_meta_is_reg( fd_tar_meta_t const * meta ) {
      70          54 :   return ( meta->typeflag == FD_TAR_TYPE_NULL    )
      71          54 :        | ( meta->typeflag == FD_TAR_TYPE_REGULAR );
      72          54 : }
      73             : 
      74             : /* fd_tar_meta_get_size parses the size field of the TAR header.
      75             :    Returns ULONG_MAX if parsing failed. */
      76             : 
      77             : FD_FN_PURE ulong
      78             : fd_tar_meta_get_size( fd_tar_meta_t const * meta );
      79             : 
      80             : /* fd_tar_set_octal is a helper function to write 12-byte octal fields */
      81             : 
      82             : int
      83             : fd_tar_set_octal( char  buf[ static 12 ],
      84             :                   ulong val );
      85             : 
      86             : /* fd_tar_meta_set_size sets the size field.  Returns 1 on success, 0
      87             :    if sz is too large to be represented in TAR header. Set size using the 
      88             :    OLDGNU size extension to allow for unlimited file sizes. The first byte
      89             :    must be 0x80 followed by 0s and then the size in binary. */
      90             : 
      91             : static inline int
      92             : fd_tar_meta_set_size( fd_tar_meta_t * meta,
      93           0 :                       ulong           sz ) {
      94           0 :   meta->size[ 0 ] = (char)0x80;
      95           0 :   FD_STORE( ulong, meta->size + 4UL, fd_ulong_bswap( sz ) );
      96           0 :   return 1;
      97           0 : }
      98             : 
      99             : /* fd_tar_meta_set_mtime sets the modification time field.  Returns 1
     100             :    on success, 0 if time cannot be represented in TAR header. */
     101             : 
     102             : static inline int
     103             : fd_tar_meta_set_mtime( fd_tar_meta_t * meta,
     104           0 :                        ulong           mtime ) {
     105           0 :   return fd_tar_set_octal( meta->mtime, mtime );
     106           0 : }
     107             : 
     108             : FD_PROTOTYPES_END
     109             : 
     110             : /* Streaming reader ***************************************************/
     111             : 
     112             : typedef struct fd_tar_reader fd_tar_reader_t;
     113             : 
     114             : /* fd_tar_file_fn_t is called by fd_tar when a new file was encountered.
     115             :    cb_arg is the callback context value. meta is the file header
     116             :    (lifetime until return).  sz is the expected file size that follows
     117             :    (via read callbacks).  The actual read size might differ in case of
     118             :    errors (e.g. unexpected EOF).  Returns 0 on success and non-zero if
     119             :    tar reader should stop. */
     120             : 
     121             : typedef int
     122             : (* fd_tar_file_fn_t)( void *                cb_arg,
     123             :                       fd_tar_meta_t const * meta,
     124             :                       ulong                 sz );
     125             : 
     126             : /* fd_tar_read_cb_t is called by fd_tar when a new chunk of data has
     127             :    been read.  Each read callback is associated with the last file
     128             :    callback.  Read callbacks are issued in order such that concatenating
     129             :    all buffers results in the correct file content.  Returns 0 on
     130             :    success and non-zero if tar reader should stop.
     131             : 
     132             :    cb_arg is the callback context value.  buf points to the first byte
     133             :    of the chunk.  bufsz is the byte count.  The lifetime of buf is until
     134             :    the callback returns. */
     135             : 
     136             : typedef int
     137             : (* fd_tar_read_fn_t)( void *       cb_arg,
     138             :                       void const * buf,
     139             :                       ulong        bufsz );
     140             : 
     141             : /* fd_tar_read_vtable_t is the virtual function table of the
     142             :    fd_tar_reader_t consumer object. */
     143             : 
     144             : struct fd_tar_read_vtable {
     145             :   fd_tar_file_fn_t file;
     146             :   fd_tar_read_fn_t read;
     147             : };
     148             : 
     149             : typedef struct fd_tar_read_vtable fd_tar_read_vtable_t;
     150             : 
     151             : /* fd_tar_reader_t is a streaming TAR reader using a callback API for
     152             :    delivering data.  To use, feed it the chunks of the TAR stream via
     153             :    fd_tar_read.  There is no restriction on the size and alignment of
     154             :    these chunks, other than that the chunks are supplied in order and
     155             :    gapless.  The resulting callback sequence is (1x file, Nx read, 1x
     156             :    file, Nx read ...).  As in: Each new file encountered creates a file
     157             :    callback and a variable number of read callbacks. */
     158             : 
     159             : struct fd_tar_reader {
     160             : 
     161             :   /* Buffered file header.  Required because a file header might be
     162             :      split across multiple fd_tar_read calls. */
     163             :   union {
     164             :     uchar         buf[ sizeof(fd_tar_meta_t) ];
     165             :     fd_tar_meta_t header;
     166             :   };
     167             : 
     168             :   ulong pos;      /* Number of bytes consumed */
     169             :   ulong buf_ctr;  /* Write cursor in file header */
     170             :   ulong file_sz;  /* Number of file bytes left */
     171             : 
     172             :   /* Callback parameters */
     173             :   fd_tar_read_vtable_t cb_vt;
     174             :   void *               cb_arg;
     175             : 
     176             : };
     177             : 
     178             : FD_PROTOTYPES_BEGIN
     179             : 
     180             : /* fd_tar_reader_{align,footprint} return parameters for the memory
     181             :    region backing a fd_tar_reader_t. */
     182             : 
     183             : FD_FN_CONST static inline ulong
     184           0 : fd_tar_reader_align( void ) {
     185           0 :   return alignof(fd_tar_reader_t);
     186           0 : }
     187             : 
     188             : FD_FN_CONST static inline ulong
     189           0 : fd_tar_reader_footprint( void ) {
     190           0 :   return sizeof(fd_tar_reader_t);
     191           0 : }
     192             : 
     193             : /* fd_tar_reader_new creates a new TAR reader.  mem is the memory region
     194             :    that will hold the fd_tar_reader_t (matches above align/ footprint
     195             :    requirements).  cb_vt contains the callback function pointers of
     196             :    the recipient.  cb_vt pointer is borrowed until this function
     197             :    returns.  cb_arg is the callback context value (usually a pointer to
     198             :    the recipient object).  Returns a qualified handle to the reader
     199             :    object in mem on success.  On failure, returns NULL and writes reason
     200             :    to warning log.  Reasons for failure include invalid memory region or
     201             :    NULL callback. */
     202             : 
     203             : fd_tar_reader_t *
     204             : fd_tar_reader_new( void *                       mem,
     205             :                    fd_tar_read_vtable_t const * cb_vt,
     206             :                    void *                       cb_arg );
     207             : 
     208             : /* fd_tar_reader_delete destroys a .tar reader and frees any allocated
     209             :    resources.  Returns the underlying memory region back to the caller. */
     210             : 
     211             : void *
     212             : fd_tar_reader_delete( fd_tar_reader_t * reader );
     213             : 
     214             : /* fd_tar_read processes a chunk of the TAR stream.  Issues callbacks
     215             :    when file headers or content are read.  reader is an fd_tar_reader_t
     216             :    pointer.  data points to the first byte of the data chunk.  data_sz
     217             :    is the byte count.  data_sz==0UL is a no-op.  Returns 0 on success.
     218             :    Returns -1 on end-of-file.  On failure, returns positive errno
     219             :    compatible error code.  In case of error, caller should delete reader
     220             :    and must not issue any more fd_tar_read calls.  Suitable as a
     221             :    fd_decompress_cb_t callback. */
     222             : 
     223             : int
     224             : fd_tar_read( void *        reader,
     225             :              uchar const * data,
     226             :              ulong         data_sz );
     227             : 
     228             : /* Streaming writer ***************************************************/
     229             : 
     230             : /* TL;DR. I didn't read the code. How do I use this?
     231             : 
     232             :    Init with fd_tar_writer_new( mem, tarball_name ).
     233             : 
     234             :    For each file you want to add to the archive:
     235             :     1. Write out tar header with fd_tar_writer_new_file( writer, file_name )
     236             :     2. Write out file data with fd_tar_writer_write_file_data( writer, data, data_sz ).
     237             :        This can be done as many times as you want.
     238             :     3. Finish the current file with fd_tar_writer_fini_file( writer ).
     239             :   
     240             :    When you are done, call fd_tar_writer_delete( writer ) to write out the 
     241             :    tar archive trailer and close otu the file descriptor.
     242             : 
     243             :    If you want to reserve space for an existing file and write back to it 
     244             :    at some point in the future see the below comments for
     245             :    fd_tar_writer_{make,fill}_space().
     246             :    
     247             :    */
     248             : 
     249             : struct fd_tar_writer {
     250             :   int                      fd;         /* The file descriptor for the tar archive. */
     251             :   ulong                    header_pos; /* The position in the file for the current files header.
     252             :                                           If there is no current file that is being streamed out, 
     253             :                                           the header_pos will be equal to ULONG_MAX. */
     254             :   ulong                    data_sz;    /* The size of the current files data. If there is no
     255             :                                           current file that is being streamed out, the data_sz
     256             :                                           will be equal to ULONG_MAX. */
     257             :   ulong                    wb_pos;     /* If this value is not equal to ULONG_MAX that means that
     258             :                                           this is the position at which to write back to with a 
     259             :                                           call to fd_tar_writer_fill_space. */
     260             :   /* TODO: Right now, the stream to the tar writer just uses fd_io_write. 
     261             :      This can eventually be abstracted to use write callbacks that use
     262             :      fd_io streaming under the hood. This adds some additional complexity 
     263             :      that's related to writing back into the header: if the header is still
     264             :      in the ostream buf, modify the buffer. Otherwise, read the header
     265             :      directly from the file. */
     266             : 
     267             : };
     268             : typedef struct fd_tar_writer fd_tar_writer_t;
     269             : 
     270             : FD_FN_CONST static inline ulong
     271           0 : fd_tar_writer_align( void ) {
     272           0 :   return alignof(fd_tar_writer_t);
     273           0 : }
     274             : 
     275             : FD_FN_CONST static inline ulong
     276           0 : fd_tar_writer_footprint( void ) {
     277           0 :   return sizeof(fd_tar_writer_t);
     278           0 : }
     279             : 
     280             : /* fd_tar_writer_new creates a new TAR writer. mem is the memory region
     281             :    that will hold the fd_tar_writer_t (matches above align/footprint
     282             :    requirements). Returns a qualified handle to the tar writer
     283             :    object in mem on success. On failure, returns NULL and writes reason
     284             :    to warning log. Reasons for failure include invalid memory region.
     285             :    The writer will enable the user to write/stream out files of variable
     286             :    size into a continual stream. The writer should persist for the span of
     287             :    a single tar archive. The user is repsonsible for passing in an open, valid
     288             :    file descriptor. */
     289             : 
     290             : fd_tar_writer_t *
     291             : fd_tar_writer_new( void * mem, int fd );
     292             : 
     293             : /* fd_tar_writer_delete destroys a tar writer and frees any allocated
     294             :    resources. Returns the underlying memory region back to the caller.
     295             :    This writer will also handle cleanup for the tar archive: it will write
     296             :    out the tar archive trailer and will close the underlying file descriptor. */
     297             : 
     298             : void *
     299             : fd_tar_writer_delete( fd_tar_writer_t * writer );
     300             : 
     301             : /* fd_tar_write_new_file writes out a file header, it will leave certain
     302             :    fields blank to allow for writing back of header metadata that is unknown
     303             :    until the file done streaming out. The user must enforce the invariant that
     304             :    this can only be called after fd_tar_fini_file() orfd_tar_writer_new() */
     305             : 
     306             : int
     307             : fd_tar_writer_new_file( fd_tar_writer_t * writer,
     308             :                         char const *      file_name );
     309             : 
     310             : /* fd_tar_writer_write_file_data will write out a variable amount of bytes to the
     311             :    writer's tarball. This can be called multiple times for a single file.
     312             :    The user must enforce the invariant that this function succeeded a call
     313             :    to fd_tar_new_file and should precede a call to fd_tar_fini_file. If this
     314             :    invariant isn't enforced, then the tar writer will silently produce an
     315             :    invalid file. */
     316             : 
     317             : int
     318             : fd_tar_writer_write_file_data( fd_tar_writer_t * writer,
     319             :                                void const *      data,
     320             :                                ulong             data_sz );
     321             : 
     322             : /* fd_tar_fini_file will write out any alignment bytes to the current file's
     323             :    data. It will then write back to the file header with the file size and
     324             :    the checksum. */
     325             : 
     326             : int
     327             : fd_tar_writer_fini_file( fd_tar_writer_t * writer );
     328             : 
     329             : /* fd_tar_writer_make_space and fd_tar_writer_fill_space, allow for writing
     330             :    back to a specific place in the tar stream. This can be used by first
     331             :    making a call to fd_tar_write_new_file, fd_tar_writer_make_space, and
     332             :    fd_tar_writer_fini_file. This will populate the header and write out 
     333             :    random bytes. The start of this data file will be saved by the tar writer.
     334             :    Up to n data files can be appended to the tar archive before a call to 
     335             :    fd_tar_writer_fill_space. fd_tar_writer_fill_space should only be called
     336             :    after an unpaired call to fd_tar_writer_make_space and it requires a valid
     337             :    fd_tar_writer_t handle. It allows the user to write back to the point at
     338             :    which they made space. _make_space and _fill_space should be paired together.
     339             :    There can only be one oustanding call to make_space at a time.
     340             :    
     341             :    TODO: This can be extended to support multiple write backs. */
     342             : 
     343             : int
     344             : fd_tar_writer_make_space( fd_tar_writer_t * writer, ulong sz );
     345             : 
     346             : int
     347             : fd_tar_writer_fill_space( fd_tar_writer_t * writer, void const * data, ulong sz );
     348             : 
     349             : FD_PROTOTYPES_END
     350             : 
     351             : #endif /* HEADER_fd_src_archive_fd_tar_h */

Generated by: LCOV version 1.14