LCOV - code coverage report
Current view: top level - util/archive - fd_tar_writer.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 0 210 0.0 %
Date: 2025-01-08 12:08:44 Functions: 0 7 0.0 %

          Line data    Source code
       1             : #include "fd_tar.h"
       2             : #include "../fd_util.h"
       3             : 
       4             : #include <errno.h>
       5             : #include <fcntl.h>
       6             : #include <unistd.h>
       7             : #include <stdio.h>
       8             : 
       9             : static char null_tar_block[ FD_TAR_BLOCK_SZ ] = {0};
      10             : 
      11           0 : #define FD_TAR_PERM           ("0000644\0")
      12           0 : #define FD_TAR_MAGIC_VERSION  ("ustar  \0")
      13           0 : #define FD_TAR_DEFAULT_CHKSUM ("        " )
      14             : 
      15             : fd_tar_writer_t *
      16           0 : fd_tar_writer_new( void * mem, int fd ) {
      17             : 
      18             :   /* Allocate the relevant memory for the writer. */
      19             : 
      20           0 :   if( FD_UNLIKELY( !mem ) ) {
      21           0 :     FD_LOG_WARNING(( "NULL mem" ));
      22           0 :     return NULL;
      23           0 :   }
      24             : 
      25           0 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)mem, fd_tar_writer_align() ) ) ) {
      26           0 :     FD_LOG_WARNING(( "unaligned mem" ));
      27           0 :     return NULL;
      28           0 :   }
      29             : 
      30           0 :   fd_tar_writer_t * writer = (fd_tar_writer_t *)mem;
      31             : 
      32             :   /* Make sure that the file descriptor is valid. */
      33             : 
      34           0 :   if( FD_UNLIKELY( fd<=0 ) ) {
      35           0 :     FD_LOG_WARNING(( "Invalid file descriptor" ));
      36           0 :     return NULL;
      37           0 :   }
      38             : 
      39             :   /* If the file already exists, truncate it's length to zero. */
      40             : 
      41           0 :   int err = ftruncate( fd, 0UL );
      42           0 :   if( FD_UNLIKELY( err==-1 ) ) {
      43           0 :     FD_LOG_WARNING(( "Failed to truncate tarball (%i-%s)", errno, fd_io_strerror( errno ) ));
      44           0 :     return NULL;
      45           0 :   }
      46             : 
      47           0 :   writer->fd         = fd;
      48           0 :   writer->header_pos = ULONG_MAX;
      49           0 :   writer->data_sz    = ULONG_MAX;
      50           0 :   writer->wb_pos     = ULONG_MAX;
      51             : 
      52           0 :   return writer;
      53           0 : }
      54             : 
      55             : void *
      56           0 : fd_tar_writer_delete( fd_tar_writer_t * writer ) {
      57             : 
      58             :   /* The end of a tar archive is marked with two EOF 512 byte blocks that are
      59             :      filled with zeros. These must be written out. */
      60             : 
      61           0 :   ulong out_sz = 0UL;
      62           0 :   int err = fd_io_write( writer->fd, null_tar_block, FD_TAR_BLOCK_SZ, FD_TAR_BLOCK_SZ, &out_sz );
      63           0 :   if( FD_UNLIKELY( err ) ) {
      64           0 :     FD_LOG_WARNING(( "Failed to write out the first tar trailer (%i-%s)", errno, fd_io_strerror( errno ) ));
      65           0 :     return NULL;
      66           0 :   }
      67           0 :   err = fd_io_write( writer->fd, null_tar_block, FD_TAR_BLOCK_SZ, FD_TAR_BLOCK_SZ, &out_sz );
      68           0 :   if( FD_UNLIKELY( err ) ) {
      69           0 :     FD_LOG_WARNING(( "Failed to write out the second tar trailer (%i-%s)", errno, fd_io_strerror( errno ) ));
      70           0 :     return NULL;
      71           0 :   }
      72             : 
      73           0 :   return (void*)writer;
      74           0 : }
      75             : 
      76             : int
      77             : fd_tar_writer_new_file( fd_tar_writer_t * writer,
      78           0 :                         char const *      file_name ) {
      79             : 
      80             :   /* TODO: This function currently fills in the bare minimum to get processed 
      81             :      by Agave, Firedancer, and most tar command line tools. To make this tool
      82             :      more robust and generalizable, it may make sense to populate some of the
      83             :      other fields in the tar header. */
      84             : 
      85             :   /* Save position of the header in the file and do simple sanity checks. */
      86             : 
      87           0 :   long header_pos = lseek( writer->fd, 0, SEEK_CUR );
      88           0 :   if( FD_UNLIKELY( header_pos==-1L ) ) {
      89           0 :     FD_LOG_WARNING(( "Failed to get the current file position" ));
      90           0 :     return -1;
      91           0 :   }
      92             : 
      93             : 
      94           0 :   writer->header_pos = (ulong)header_pos;
      95             : 
      96           0 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( writer->header_pos, FD_TAR_BLOCK_SZ ) ) ) {
      97           0 :     FD_LOG_WARNING(( "Unaligned header position %lu", writer->header_pos ));
      98           0 :     return -1;
      99           0 :   }
     100             :   
     101             :   /* Populate what fields you can in the header */
     102             : 
     103           0 :   fd_tar_meta_t meta = {0};
     104             : 
     105             :   /* Copy in file name */
     106             : 
     107           0 :   fd_memcpy( &meta.name, file_name, strlen( file_name ) );
     108             : 
     109             :   /* Copy in the mode: it will always be 0644 and will be left padded.
     110             :      TODO: make this mode configurable in the future. */
     111             : 
     112           0 :   fd_memcpy( &meta.mode, FD_TAR_PERM, sizeof(FD_TAR_PERM) );
     113             :   
     114             :   /* Copy in the magic and version */
     115             : 
     116           0 :   fd_memcpy( &meta.magic, FD_TAR_MAGIC_VERSION, sizeof(FD_TAR_MAGIC_VERSION) );
     117             : 
     118             :   /* Write in the temporary value for the checksum. The tar format dictates
     119             :      that the checksum bytes should be spaces when it is calculated. */
     120             : 
     121           0 :   fd_memcpy( &meta.chksum, FD_TAR_DEFAULT_CHKSUM, sizeof(FD_TAR_DEFAULT_CHKSUM) );
     122             : 
     123           0 :   ulong out_sz = 0UL;
     124           0 :   int   err    = fd_io_write( writer->fd, &meta, FD_TAR_BLOCK_SZ, FD_TAR_BLOCK_SZ, &out_sz );
     125           0 :   if( FD_UNLIKELY( err ) ) {
     126           0 :     FD_LOG_WARNING(( "Failed to write out the header (%i-%s)", errno, fd_io_strerror( errno ) ));
     127           0 :     return -1;
     128           0 :   }
     129             : 
     130           0 :   if( FD_UNLIKELY( out_sz!=FD_TAR_BLOCK_SZ ) ) {
     131           0 :     FD_LOG_WARNING(( "Failed to write out correct size header (%lu)", out_sz ));
     132           0 :     return -1;
     133           0 :   }
     134             : 
     135             :   /* Now that the header is written out, reset the data size to prepare
     136             :      for the file to be written out. */
     137             : 
     138           0 :   writer->data_sz  = 0UL;
     139             : 
     140           0 :   return 0;
     141           0 : }
     142             : 
     143             : int
     144             : fd_tar_writer_write_file_data( fd_tar_writer_t * writer,
     145             :                                void const *      data,
     146           0 :                                ulong             data_sz ) {
     147             :   
     148           0 :   if( FD_UNLIKELY( writer->header_pos==ULONG_MAX ) ) {
     149           0 :     FD_LOG_WARNING(( "There is no corresponding tar header for the tar write" ));
     150           0 :     return -1;
     151           0 :   }
     152             : 
     153             :   /* Simply write out the data and update the data_sz field. */
     154             : 
     155           0 :   ulong out_sz = 0UL;
     156           0 :   int   err    = fd_io_write( writer->fd, data, data_sz, data_sz, &out_sz );
     157           0 :   if( FD_UNLIKELY( err ) ) {
     158           0 :     FD_LOG_WARNING(( "Failed to write out the data (%i-%s)", errno, fd_io_strerror( errno ) ));
     159           0 :     return -1;
     160           0 :   }
     161           0 :   if( FD_UNLIKELY( out_sz!=data_sz ) ) {
     162           0 :     FD_LOG_WARNING(( "Failed to write out the data (%lu)", out_sz ));
     163           0 :     return -1;
     164           0 :   }
     165             : 
     166           0 :   writer->data_sz += data_sz;
     167             : 
     168           0 :   return 0;
     169           0 : }
     170             : 
     171             : int
     172           0 : fd_tar_writer_fini_file( fd_tar_writer_t * writer ) {
     173             : 
     174             :   /* If the current file that has been written out does not meet the tar
     175             :      alignment requirements (512), pad out the rest of the file and update the
     176             :      header with the file sz and checksum. */
     177             : 
     178           0 :   ulong out_sz   = 0UL;
     179           0 :   ulong align_sz = fd_ulong_align_up( writer->data_sz, FD_TAR_BLOCK_SZ ) - writer->data_sz;
     180           0 :   int   err      = fd_io_write( writer->fd, null_tar_block, align_sz, align_sz, &out_sz );
     181           0 :   if( FD_UNLIKELY( err ) ) {
     182           0 :     FD_LOG_WARNING(( "Failed to write out the padding (%i-%s)", errno, fd_io_strerror( errno ) ));
     183           0 :     return -1;
     184           0 :   }
     185           0 :   if( FD_UNLIKELY( out_sz!=align_sz ) ) {
     186           0 :     FD_LOG_WARNING(( "Failed to write out the correct size padding (%lu)", out_sz ));
     187           0 :     return -1;
     188           0 :   }
     189             : 
     190             :   /* Now we need to write back to the header of the file. This involves
     191             :      first setting the file pointer to where we expect the header to be.  */
     192             : 
     193           0 :   long eof_pos = lseek( writer->fd, 0L, SEEK_CUR );
     194           0 :   if( FD_UNLIKELY( eof_pos==-1L ) ) {
     195           0 :     FD_LOG_WARNING(( "Failed to get the current file position" ));
     196           0 :     return -1;
     197           0 :   }
     198           0 :   long seek = lseek( writer->fd, (long)writer->header_pos, SEEK_SET );
     199           0 :   if( FD_UNLIKELY( (ulong)seek!=writer->header_pos ) ) {
     200           0 :     FD_LOG_WARNING(( "Failed to seek to the header position (%ld)", seek ));
     201           0 :     return -1;
     202           0 :   }
     203             : 
     204           0 :   fd_tar_meta_t meta = {0};
     205           0 :   err = fd_io_read( writer->fd, &meta, FD_TAR_BLOCK_SZ, FD_TAR_BLOCK_SZ, &out_sz );
     206           0 :   if( FD_UNLIKELY( err ) ) {
     207           0 :     FD_LOG_WARNING(( "Failed to write out the header (%i-%s)", errno, fd_io_strerror( errno ) ));
     208           0 :     return -1;
     209           0 :   }
     210           0 :   if( FD_UNLIKELY( out_sz!=FD_TAR_BLOCK_SZ ) ) {
     211           0 :     FD_LOG_WARNING(( "Failed to write out the correct size header (%lu)", out_sz ));
     212           0 :     return -1;
     213           0 :   }
     214             : 
     215             :   /* The file pointer is now at the start of the file data and should be 
     216             :      moved back to the start of the file header. */
     217             : 
     218           0 :   seek = lseek( writer->fd, (long)writer->header_pos, SEEK_SET );
     219           0 :   if( FD_UNLIKELY( (ulong)seek!=writer->header_pos ) ) {
     220           0 :     FD_LOG_WARNING(( "Failed to seek to the header position (%ld)", seek ));
     221           0 :     return -1;
     222           0 :   }
     223             : 
     224             :   /* Now that the tar header is read in, update the size in the header. */
     225             : 
     226           0 :   err = fd_tar_meta_set_size( &meta, writer->data_sz );
     227           0 :   if( FD_UNLIKELY( !err ) ) {
     228           0 :     FD_LOG_WARNING(( "Failed to set the size in the header" ));
     229           0 :     return -1;
     230           0 :   }
     231             : 
     232             :   /* Write in the checksum which is left padded with zeros */
     233             : 
     234           0 :   uint checksum = 0UL;
     235           0 :   for( ulong i=0UL; i<FD_TAR_BLOCK_SZ; i++ ) {
     236           0 :     checksum += ((uchar *)&meta)[i];
     237           0 :   }
     238           0 :   snprintf( meta.chksum, sizeof(meta.chksum), "%07o", checksum );
     239             : 
     240             :   /* Now write out the updated header */
     241             : 
     242           0 :   err = fd_io_write( writer->fd, &meta, FD_TAR_BLOCK_SZ, FD_TAR_BLOCK_SZ, &out_sz );
     243           0 :   if( FD_UNLIKELY( err ) ) {
     244           0 :     FD_LOG_WARNING(( "Failed to write out the header (%i-%s)", errno, fd_io_strerror( errno ) ));
     245           0 :     return -1;
     246           0 :   }
     247           0 :   if( FD_UNLIKELY( out_sz!=FD_TAR_BLOCK_SZ ) ) {
     248           0 :     FD_LOG_WARNING(( "Failed to write out the correct size header (%lu)", out_sz ));
     249           0 :     return -1;
     250           0 :   }
     251             : 
     252             :   /* Reset the file pointer to the end of the file so that we can continue 
     253             :      writing out the next file. */
     254             : 
     255           0 :   seek = lseek( writer->fd, 0L, SEEK_END );
     256           0 :   if( FD_UNLIKELY( seek!=eof_pos ) ) {
     257           0 :     return -1;
     258           0 :   }
     259             : 
     260             :   /* Reset the data_sz/header pointers as there is no outstanding write. */
     261             :   
     262           0 :   writer->header_pos = ULONG_MAX;
     263           0 :   writer->data_sz    = ULONG_MAX;
     264             :  
     265           0 :   return 0;
     266           0 : }
     267             : 
     268             : int
     269           0 : fd_tar_writer_make_space( fd_tar_writer_t * writer, ulong data_sz ) {
     270             : 
     271           0 :   if( FD_UNLIKELY( writer->wb_pos!=ULONG_MAX )) {
     272           0 :     FD_LOG_WARNING(( "There is an outstanding write back position" ));
     273           0 :     return -1;
     274           0 :   }
     275             : 
     276             :   /* Extend the size of the file to make space that can be written back to.
     277             :      TODO: In the future, this can be made into a hole to avoid preallocating
     278             :      space. */
     279             : 
     280           0 :   long file_sz = lseek( writer->fd, 0L, SEEK_END );
     281           0 :   if( FD_UNLIKELY( file_sz==-1L ) ) {
     282           0 :     FD_LOG_WARNING(( "Failed to get the size of the tarball" ));
     283           0 :     return -1;
     284           0 :   }
     285             : 
     286           0 :   int err = ftruncate( writer->fd, file_sz + (long)data_sz );
     287           0 :   if( FD_UNLIKELY( err ) ) {
     288           0 :     FD_LOG_WARNING(( "Failed to make space in the tarball (%i-%s)", errno, fd_io_strerror( errno ) ));
     289           0 :     return -1;
     290           0 :   }
     291             :   
     292             :   /* Seek to the new end of the file. */
     293             : 
     294           0 :   long new_sz = lseek( writer->fd, 0, SEEK_END );
     295           0 :   if( FD_UNLIKELY( new_sz!=file_sz+(long)data_sz ) ) {
     296           0 :     FD_LOG_WARNING(( "Failed to make space in the tarball" ));
     297           0 :     return -1;
     298           0 :   }
     299             : 
     300           0 :   writer->data_sz = data_sz;
     301           0 :   writer->wb_pos  = (ulong)file_sz;
     302             : 
     303           0 :   return 0;
     304           0 : }
     305             : 
     306             : int
     307           0 : fd_tar_writer_fill_space( fd_tar_writer_t * writer, void const * data, ulong data_sz ) {
     308             : 
     309           0 :   if( FD_UNLIKELY( writer->wb_pos==ULONG_MAX ) ) {
     310           0 :     FD_LOG_WARNING(( "There is no outstanding write back position" ));
     311           0 :     return -1;
     312           0 :   }
     313             : 
     314           0 :   long eof_pos = lseek( writer->fd, 0, SEEK_END );
     315           0 :   if( FD_UNLIKELY( eof_pos==-1L ) ) {
     316           0 :     FD_LOG_WARNING(( "Failed to seek to the end of the file" ));
     317           0 :     return -1;
     318           0 :   }
     319             : 
     320           0 :   long seek = lseek( writer->fd, (long)writer->wb_pos, SEEK_SET );
     321           0 :   if( FD_UNLIKELY( (ulong)seek!=writer->wb_pos ) ) {
     322           0 :     FD_LOG_WARNING(( "Failed to seek to the write back position (%ld %lu)", seek, writer->wb_pos ));
     323           0 :     return -1;
     324           0 :   }
     325             : 
     326             :   /* Write back to the specified location. Once again, this is unsafe and 
     327             :      you can override the rest of the tar archive making it invalid. */
     328             : 
     329           0 :   ulong out_sz = 0UL;
     330           0 :   int err = fd_io_write( writer->fd, data, data_sz, data_sz, &out_sz );
     331           0 :   if( FD_UNLIKELY( err ) ) {
     332           0 :     FD_LOG_WARNING(( "Failed to write out the data (%i-%s)", errno, fd_io_strerror( errno ) ));
     333           0 :     return -1;
     334           0 :   }
     335           0 :   if( FD_UNLIKELY( out_sz!=data_sz ) ) {
     336           0 :     FD_LOG_WARNING(( "Failed to write out the data (%lu)", out_sz ));
     337           0 :     return -1;
     338           0 :   }
     339             : 
     340           0 :   writer->wb_pos = ULONG_MAX;
     341             : 
     342           0 :   seek = lseek( writer->fd, 0, SEEK_END );
     343           0 :   if( FD_UNLIKELY( seek!=eof_pos ) ) {
     344           0 :     FD_LOG_WARNING(( "Failed to seek to the end of the file (%ld)", seek ));
     345           0 :     return -1;
     346           0 :   }
     347             : 
     348           0 :   return 0;
     349           0 : }

Generated by: LCOV version 1.14