Line data Source code
1 : #ifndef HEADER_fd_src_archive_fd_tar_h 2 : #define HEADER_fd_src_archive_fd_tar_h 3 : 4 : /* fd_tar implements the ustar and old-GNU versions of the TAR file 5 : format. This is not a general-purpose TAR implementation. It is 6 : currently only intended for loading and writing Solana snapshots. */ 7 : 8 : #include "../fd_util_base.h" 9 : #include "../io/fd_io.h" 10 : 11 : /* File Format ********************************************************/ 12 : 13 : /* The high level format of a tar archive/ball is a set of 512 byte blocks. 14 : Each file will be described a tar header (fd_tar_meta_t) and will be 15 : followed by the raw bytes of the file. The last block that is used for 16 : the file will be padded to fit into a tar block. When the archive is 17 : completed, it will be trailed by two EOF blocks which are populated with 18 : zero bytes. */ 19 : 20 : /* fd_tar_meta_t is the ustar/OLDGNU version of the TAR header. */ 21 : 22 0 : #define FD_TAR_BLOCK_SZ (512UL) 23 : 24 : struct __attribute__((packed)) fd_tar_meta { 25 0 : # define FD_TAR_NAME_SZ (100) 26 : /* 0x000 */ char name [ FD_TAR_NAME_SZ ]; 27 : /* 0x064 */ char mode [ 8 ]; 28 : /* 0x06c */ char uid [ 8 ]; 29 : /* 0x074 */ char gid [ 8 ]; 30 : /* 0x07c */ char size [ 12 ]; 31 : /* 0x088 */ char mtime [ 12 ]; 32 : /* 0x094 */ char chksum [ 8 ]; 33 : /* 0x09c */ char typeflag; 34 : /* 0x09d */ char linkname[ 100 ]; 35 : /* 0x101 */ char magic [ 6 ]; 36 : /* 0x107 */ char version [ 2 ]; 37 : /* 0x109 */ char uname [ 32 ]; 38 : /* 0x129 */ char gname [ 32 ]; 39 : /* 0x149 */ char devmajor[ 8 ]; 40 : /* 0x151 */ char devminor[ 8 ]; 41 : /* 0x159 */ char prefix [ 155 ]; 42 : /* 0x1f4 */ char padding [ 12 ]; 43 : }; 44 : 45 : typedef struct fd_tar_meta fd_tar_meta_t; 46 : 47 : /* FD_TAR_MAGIC is the only value of fd_tar_meta::magic supported by 48 : fd_tar. */ 49 : 50 : #define FD_TAR_MAGIC "ustar" 51 : 52 : /* Known file types */ 53 : 54 54 : #define FD_TAR_TYPE_NULL ('\0') /* implies FD_TAR_TYPE_REGULAR */ 55 108 : #define FD_TAR_TYPE_REGULAR ('0') 56 : #define FD_TAR_TYPE_HARD_LINK ('1') 57 : #define FD_TAR_TYPE_SYM_LINK ('2') 58 : #define FD_TAR_TYPE_CHAR_DEV ('3') 59 : #define FD_TAR_TYPE_BLOCK_DEV ('4') 60 : #define FD_TAR_TYPE_DIR ('5') 61 : #define FD_TAR_TYPE_FIFO ('6') 62 : 63 : FD_PROTOTYPES_BEGIN 64 : 65 : /* fd_tar_meta_is_reg returns 1 if the file type is 'regular', and 0 66 : otherwise. */ 67 : 68 : FD_FN_PURE static inline int 69 54 : fd_tar_meta_is_reg( fd_tar_meta_t const * meta ) { 70 54 : return ( meta->typeflag == FD_TAR_TYPE_NULL ) 71 54 : | ( meta->typeflag == FD_TAR_TYPE_REGULAR ); 72 54 : } 73 : 74 : /* fd_tar_meta_get_size parses the size field of the TAR header. 75 : Returns ULONG_MAX if parsing failed. */ 76 : 77 : FD_FN_PURE ulong 78 : fd_tar_meta_get_size( fd_tar_meta_t const * meta ); 79 : 80 : /* fd_tar_set_octal is a helper function to write 12-byte octal fields */ 81 : 82 : int 83 : fd_tar_set_octal( char buf[ static 12 ], 84 : ulong val ); 85 : 86 : /* fd_tar_meta_set_size sets the size field. Returns 1 on success, 0 87 : if sz is too large to be represented in TAR header. Set size using the 88 : OLDGNU size extension to allow for unlimited file sizes. The first byte 89 : must be 0x80 followed by 0s and then the size in binary. */ 90 : 91 : static inline int 92 : fd_tar_meta_set_size( fd_tar_meta_t * meta, 93 0 : ulong sz ) { 94 0 : meta->size[ 0 ] = (char)0x80; 95 0 : FD_STORE( ulong, meta->size + 4UL, fd_ulong_bswap( sz ) ); 96 0 : return 1; 97 0 : } 98 : 99 : /* fd_tar_meta_set_mtime sets the modification time field. Returns 1 100 : on success, 0 if time cannot be represented in TAR header. */ 101 : 102 : static inline int 103 : fd_tar_meta_set_mtime( fd_tar_meta_t * meta, 104 0 : ulong mtime ) { 105 0 : return fd_tar_set_octal( meta->mtime, mtime ); 106 0 : } 107 : 108 : FD_PROTOTYPES_END 109 : 110 : /* Streaming reader ***************************************************/ 111 : 112 : typedef struct fd_tar_reader fd_tar_reader_t; 113 : 114 : /* fd_tar_file_fn_t is called by fd_tar when a new file was encountered. 115 : cb_arg is the callback context value. meta is the file header 116 : (lifetime until return). sz is the expected file size that follows 117 : (via read callbacks). The actual read size might differ in case of 118 : errors (e.g. unexpected EOF). Returns 0 on success and non-zero if 119 : tar reader should stop. */ 120 : 121 : typedef int 122 : (* fd_tar_file_fn_t)( void * cb_arg, 123 : fd_tar_meta_t const * meta, 124 : ulong sz ); 125 : 126 : /* fd_tar_read_cb_t is called by fd_tar when a new chunk of data has 127 : been read. Each read callback is associated with the last file 128 : callback. Read callbacks are issued in order such that concatenating 129 : all buffers results in the correct file content. Returns 0 on 130 : success and non-zero if tar reader should stop. 131 : 132 : cb_arg is the callback context value. buf points to the first byte 133 : of the chunk. bufsz is the byte count. The lifetime of buf is until 134 : the callback returns. */ 135 : 136 : typedef int 137 : (* fd_tar_read_fn_t)( void * cb_arg, 138 : void const * buf, 139 : ulong bufsz ); 140 : 141 : /* fd_tar_read_vtable_t is the virtual function table of the 142 : fd_tar_reader_t consumer object. */ 143 : 144 : struct fd_tar_read_vtable { 145 : fd_tar_file_fn_t file; 146 : fd_tar_read_fn_t read; 147 : }; 148 : 149 : typedef struct fd_tar_read_vtable fd_tar_read_vtable_t; 150 : 151 : /* fd_tar_reader_t is a streaming TAR reader using a callback API for 152 : delivering data. To use, feed it the chunks of the TAR stream via 153 : fd_tar_read. There is no restriction on the size and alignment of 154 : these chunks, other than that the chunks are supplied in order and 155 : gapless. The resulting callback sequence is (1x file, Nx read, 1x 156 : file, Nx read ...). As in: Each new file encountered creates a file 157 : callback and a variable number of read callbacks. */ 158 : 159 : struct fd_tar_reader { 160 : 161 : /* Buffered file header. Required because a file header might be 162 : split across multiple fd_tar_read calls. */ 163 : union { 164 : uchar buf[ sizeof(fd_tar_meta_t) ]; 165 : fd_tar_meta_t header; 166 : }; 167 : 168 : ulong pos; /* Number of bytes consumed */ 169 : ulong buf_ctr; /* Write cursor in file header */ 170 : ulong file_sz; /* Number of file bytes left */ 171 : 172 : /* Callback parameters */ 173 : fd_tar_read_vtable_t cb_vt; 174 : void * cb_arg; 175 : 176 : }; 177 : 178 : FD_PROTOTYPES_BEGIN 179 : 180 : /* fd_tar_reader_{align,footprint} return parameters for the memory 181 : region backing a fd_tar_reader_t. */ 182 : 183 : FD_FN_CONST static inline ulong 184 0 : fd_tar_reader_align( void ) { 185 0 : return alignof(fd_tar_reader_t); 186 0 : } 187 : 188 : FD_FN_CONST static inline ulong 189 0 : fd_tar_reader_footprint( void ) { 190 0 : return sizeof(fd_tar_reader_t); 191 0 : } 192 : 193 : /* fd_tar_reader_new creates a new TAR reader. mem is the memory region 194 : that will hold the fd_tar_reader_t (matches above align/ footprint 195 : requirements). cb_vt contains the callback function pointers of 196 : the recipient. cb_vt pointer is borrowed until this function 197 : returns. cb_arg is the callback context value (usually a pointer to 198 : the recipient object). Returns a qualified handle to the reader 199 : object in mem on success. On failure, returns NULL and writes reason 200 : to warning log. Reasons for failure include invalid memory region or 201 : NULL callback. */ 202 : 203 : fd_tar_reader_t * 204 : fd_tar_reader_new( void * mem, 205 : fd_tar_read_vtable_t const * cb_vt, 206 : void * cb_arg ); 207 : 208 : /* fd_tar_reader_delete destroys a .tar reader and frees any allocated 209 : resources. Returns the underlying memory region back to the caller. */ 210 : 211 : void * 212 : fd_tar_reader_delete( fd_tar_reader_t * reader ); 213 : 214 : /* fd_tar_read processes a chunk of the TAR stream. Issues callbacks 215 : when file headers or content are read. reader is an fd_tar_reader_t 216 : pointer. data points to the first byte of the data chunk. data_sz 217 : is the byte count. data_sz==0UL is a no-op. Returns 0 on success. 218 : Returns -1 on end-of-file. On failure, returns positive errno 219 : compatible error code. In case of error, caller should delete reader 220 : and must not issue any more fd_tar_read calls. Suitable as a 221 : fd_decompress_cb_t callback. */ 222 : 223 : int 224 : fd_tar_read( void * reader, 225 : uchar const * data, 226 : ulong data_sz ); 227 : 228 : /* Streaming writer ***************************************************/ 229 : 230 : /* TL;DR. I didn't read the code. How do I use this? 231 : 232 : Init with fd_tar_writer_new( mem, tarball_name ). 233 : 234 : For each file you want to add to the archive: 235 : 1. Write out tar header with fd_tar_writer_new_file( writer, file_name ) 236 : 2. Write out file data with fd_tar_writer_write_file_data( writer, data, data_sz ). 237 : This can be done as many times as you want. 238 : 3. Finish the current file with fd_tar_writer_fini_file( writer ). 239 : 240 : When you are done, call fd_tar_writer_delete( writer ) to write out the 241 : tar archive trailer and close otu the file descriptor. 242 : 243 : If you want to reserve space for an existing file and write back to it 244 : at some point in the future see the below comments for 245 : fd_tar_writer_{make,fill}_space(). 246 : 247 : */ 248 : 249 : struct fd_tar_writer { 250 : int fd; /* The file descriptor for the tar archive. */ 251 : ulong header_pos; /* The position in the file for the current files header. 252 : If there is no current file that is being streamed out, 253 : the header_pos will be equal to ULONG_MAX. */ 254 : ulong data_sz; /* The size of the current files data. If there is no 255 : current file that is being streamed out, the data_sz 256 : will be equal to ULONG_MAX. */ 257 : ulong wb_pos; /* If this value is not equal to ULONG_MAX that means that 258 : this is the position at which to write back to with a 259 : call to fd_tar_writer_fill_space. */ 260 : /* TODO: Right now, the stream to the tar writer just uses fd_io_write. 261 : This can eventually be abstracted to use write callbacks that use 262 : fd_io streaming under the hood. This adds some additional complexity 263 : that's related to writing back into the header: if the header is still 264 : in the ostream buf, modify the buffer. Otherwise, read the header 265 : directly from the file. */ 266 : 267 : }; 268 : typedef struct fd_tar_writer fd_tar_writer_t; 269 : 270 : FD_FN_CONST static inline ulong 271 0 : fd_tar_writer_align( void ) { 272 0 : return alignof(fd_tar_writer_t); 273 0 : } 274 : 275 : FD_FN_CONST static inline ulong 276 0 : fd_tar_writer_footprint( void ) { 277 0 : return sizeof(fd_tar_writer_t); 278 0 : } 279 : 280 : /* fd_tar_writer_new creates a new TAR writer. mem is the memory region 281 : that will hold the fd_tar_writer_t (matches above align/footprint 282 : requirements). Returns a qualified handle to the tar writer 283 : object in mem on success. On failure, returns NULL and writes reason 284 : to warning log. Reasons for failure include invalid memory region. 285 : The writer will enable the user to write/stream out files of variable 286 : size into a continual stream. The writer should persist for the span of 287 : a single tar archive. The user is repsonsible for passing in an open, valid 288 : file descriptor. */ 289 : 290 : fd_tar_writer_t * 291 : fd_tar_writer_new( void * mem, int fd ); 292 : 293 : /* fd_tar_writer_delete destroys a tar writer and frees any allocated 294 : resources. Returns the underlying memory region back to the caller. 295 : This writer will also handle cleanup for the tar archive: it will write 296 : out the tar archive trailer and will close the underlying file descriptor. */ 297 : 298 : void * 299 : fd_tar_writer_delete( fd_tar_writer_t * writer ); 300 : 301 : /* fd_tar_write_new_file writes out a file header, it will leave certain 302 : fields blank to allow for writing back of header metadata that is unknown 303 : until the file done streaming out. The user must enforce the invariant that 304 : this can only be called after fd_tar_fini_file() orfd_tar_writer_new() */ 305 : 306 : int 307 : fd_tar_writer_new_file( fd_tar_writer_t * writer, 308 : char const * file_name ); 309 : 310 : /* fd_tar_writer_write_file_data will write out a variable amount of bytes to the 311 : writer's tarball. This can be called multiple times for a single file. 312 : The user must enforce the invariant that this function succeeded a call 313 : to fd_tar_new_file and should precede a call to fd_tar_fini_file. If this 314 : invariant isn't enforced, then the tar writer will silently produce an 315 : invalid file. */ 316 : 317 : int 318 : fd_tar_writer_write_file_data( fd_tar_writer_t * writer, 319 : void const * data, 320 : ulong data_sz ); 321 : 322 : /* fd_tar_fini_file will write out any alignment bytes to the current file's 323 : data. It will then write back to the file header with the file size and 324 : the checksum. */ 325 : 326 : int 327 : fd_tar_writer_fini_file( fd_tar_writer_t * writer ); 328 : 329 : /* fd_tar_writer_make_space and fd_tar_writer_fill_space, allow for writing 330 : back to a specific place in the tar stream. This can be used by first 331 : making a call to fd_tar_write_new_file, fd_tar_writer_make_space, and 332 : fd_tar_writer_fini_file. This will populate the header and write out 333 : random bytes. The start of this data file will be saved by the tar writer. 334 : Up to n data files can be appended to the tar archive before a call to 335 : fd_tar_writer_fill_space. fd_tar_writer_fill_space should only be called 336 : after an unpaired call to fd_tar_writer_make_space and it requires a valid 337 : fd_tar_writer_t handle. It allows the user to write back to the point at 338 : which they made space. _make_space and _fill_space should be paired together. 339 : There can only be one oustanding call to make_space at a time. 340 : 341 : TODO: This can be extended to support multiple write backs. */ 342 : 343 : int 344 : fd_tar_writer_make_space( fd_tar_writer_t * writer, ulong sz ); 345 : 346 : int 347 : fd_tar_writer_fill_space( fd_tar_writer_t * writer, void const * data, ulong sz ); 348 : 349 : FD_PROTOTYPES_END 350 : 351 : #endif /* HEADER_fd_src_archive_fd_tar_h */