Line data Source code
1 : #ifndef HEADER_fd_src_archive_fd_tar_h
2 : #define HEADER_fd_src_archive_fd_tar_h
3 :
4 : /* fd_tar implements the ustar and old-GNU versions of the TAR file
5 : format. This is not a general-purpose TAR implementation. It is
6 : currently only intended for loading and writing Solana snapshots. */
7 :
8 : #include "../fd_util_base.h"
9 : #include "../io/fd_io.h"
10 :
11 : /* File Format ********************************************************/
12 :
13 : /* The high level format of a tar archive/ball is a set of 512 byte blocks.
14 : Each file will be described a tar header (fd_tar_meta_t) and will be
15 : followed by the raw bytes of the file. The last block that is used for
16 : the file will be padded to fit into a tar block. When the archive is
17 : completed, it will be trailed by two EOF blocks which are populated with
18 : zero bytes. */
19 :
20 : /* fd_tar_meta_t is the ustar/OLDGNU version of the TAR header. */
21 :
22 0 : #define FD_TAR_BLOCK_SZ (512UL)
23 :
24 : struct __attribute__((packed)) fd_tar_meta {
25 0 : # define FD_TAR_NAME_SZ (100)
26 : /* 0x000 */ char name [ FD_TAR_NAME_SZ ];
27 : /* 0x064 */ char mode [ 8 ];
28 : /* 0x06c */ char uid [ 8 ];
29 : /* 0x074 */ char gid [ 8 ];
30 : /* 0x07c */ char size [ 12 ];
31 : /* 0x088 */ char mtime [ 12 ];
32 : /* 0x094 */ char chksum [ 8 ];
33 : /* 0x09c */ char typeflag;
34 : /* 0x09d */ char linkname[ 100 ];
35 : /* 0x101 */ char magic [ 6 ];
36 : /* 0x107 */ char version [ 2 ];
37 : /* 0x109 */ char uname [ 32 ];
38 : /* 0x129 */ char gname [ 32 ];
39 : /* 0x149 */ char devmajor[ 8 ];
40 : /* 0x151 */ char devminor[ 8 ];
41 : /* 0x159 */ char prefix [ 155 ];
42 : /* 0x1f4 */ char padding [ 12 ];
43 : };
44 :
45 : typedef struct fd_tar_meta fd_tar_meta_t;
46 :
47 : /* FD_TAR_MAGIC is the only value of fd_tar_meta::magic supported by
48 : fd_tar. */
49 :
50 : #define FD_TAR_MAGIC "ustar"
51 :
52 : /* Known file types */
53 :
54 54 : #define FD_TAR_TYPE_NULL ('\0') /* implies FD_TAR_TYPE_REGULAR */
55 108 : #define FD_TAR_TYPE_REGULAR ('0')
56 : #define FD_TAR_TYPE_HARD_LINK ('1')
57 : #define FD_TAR_TYPE_SYM_LINK ('2')
58 : #define FD_TAR_TYPE_CHAR_DEV ('3')
59 : #define FD_TAR_TYPE_BLOCK_DEV ('4')
60 : #define FD_TAR_TYPE_DIR ('5')
61 : #define FD_TAR_TYPE_FIFO ('6')
62 :
63 : FD_PROTOTYPES_BEGIN
64 :
65 : /* fd_tar_meta_is_reg returns 1 if the file type is 'regular', and 0
66 : otherwise. */
67 :
68 : FD_FN_PURE static inline int
69 54 : fd_tar_meta_is_reg( fd_tar_meta_t const * meta ) {
70 54 : return ( meta->typeflag == FD_TAR_TYPE_NULL )
71 54 : | ( meta->typeflag == FD_TAR_TYPE_REGULAR );
72 54 : }
73 :
74 : /* fd_tar_meta_get_size parses the size field of the TAR header.
75 : Returns ULONG_MAX if parsing failed. */
76 :
77 : FD_FN_PURE ulong
78 : fd_tar_meta_get_size( fd_tar_meta_t const * meta );
79 :
80 : /* fd_tar_set_octal is a helper function to write 12-byte octal fields */
81 :
82 : int
83 : fd_tar_set_octal( char buf[ static 12 ],
84 : ulong val );
85 :
86 : /* fd_tar_meta_set_size sets the size field. Returns 1 on success, 0
87 : if sz is too large to be represented in TAR header. Set size using the
88 : OLDGNU size extension to allow for unlimited file sizes. The first byte
89 : must be 0x80 followed by 0s and then the size in binary. */
90 :
91 : static inline int
92 : fd_tar_meta_set_size( fd_tar_meta_t * meta,
93 0 : ulong sz ) {
94 0 : meta->size[ 0 ] = (char)0x80;
95 0 : FD_STORE( ulong, meta->size + 4UL, fd_ulong_bswap( sz ) );
96 0 : return 1;
97 0 : }
98 :
99 : /* fd_tar_meta_set_mtime sets the modification time field. Returns 1
100 : on success, 0 if time cannot be represented in TAR header. */
101 :
102 : static inline int
103 : fd_tar_meta_set_mtime( fd_tar_meta_t * meta,
104 0 : ulong mtime ) {
105 0 : return fd_tar_set_octal( meta->mtime, mtime );
106 0 : }
107 :
108 : FD_PROTOTYPES_END
109 :
110 : /* Streaming reader ***************************************************/
111 :
112 : typedef struct fd_tar_reader fd_tar_reader_t;
113 :
114 : /* fd_tar_file_fn_t is called by fd_tar when a new file was encountered.
115 : cb_arg is the callback context value. meta is the file header
116 : (lifetime until return). sz is the expected file size that follows
117 : (via read callbacks). The actual read size might differ in case of
118 : errors (e.g. unexpected EOF). Returns 0 on success and non-zero if
119 : tar reader should stop. */
120 :
121 : typedef int
122 : (* fd_tar_file_fn_t)( void * cb_arg,
123 : fd_tar_meta_t const * meta,
124 : ulong sz );
125 :
126 : /* fd_tar_read_cb_t is called by fd_tar when a new chunk of data has
127 : been read. Each read callback is associated with the last file
128 : callback. Read callbacks are issued in order such that concatenating
129 : all buffers results in the correct file content. Returns 0 on
130 : success and non-zero if tar reader should stop.
131 :
132 : cb_arg is the callback context value. buf points to the first byte
133 : of the chunk. bufsz is the byte count. The lifetime of buf is until
134 : the callback returns. */
135 :
136 : typedef int
137 : (* fd_tar_read_fn_t)( void * cb_arg,
138 : void const * buf,
139 : ulong bufsz );
140 :
141 : /* fd_tar_read_vtable_t is the virtual function table of the
142 : fd_tar_reader_t consumer object. */
143 :
144 : struct fd_tar_read_vtable {
145 : fd_tar_file_fn_t file;
146 : fd_tar_read_fn_t read;
147 : };
148 :
149 : typedef struct fd_tar_read_vtable fd_tar_read_vtable_t;
150 :
151 : /* fd_tar_reader_t is a streaming TAR reader using a callback API for
152 : delivering data. To use, feed it the chunks of the TAR stream via
153 : fd_tar_read. There is no restriction on the size and alignment of
154 : these chunks, other than that the chunks are supplied in order and
155 : gapless. The resulting callback sequence is (1x file, Nx read, 1x
156 : file, Nx read ...). As in: Each new file encountered creates a file
157 : callback and a variable number of read callbacks. */
158 :
159 : struct fd_tar_reader {
160 :
161 : /* Buffered file header. Required because a file header might be
162 : split across multiple fd_tar_read calls. */
163 : union {
164 : uchar buf[ sizeof(fd_tar_meta_t) ];
165 : fd_tar_meta_t header;
166 : };
167 :
168 : ulong pos; /* Number of bytes consumed */
169 : ulong buf_ctr; /* Write cursor in file header */
170 : ulong file_sz; /* Number of file bytes left */
171 :
172 : /* Callback parameters */
173 : fd_tar_read_vtable_t cb_vt;
174 : void * cb_arg;
175 :
176 : };
177 :
178 : FD_PROTOTYPES_BEGIN
179 :
180 : /* fd_tar_reader_{align,footprint} return parameters for the memory
181 : region backing a fd_tar_reader_t. */
182 :
183 : FD_FN_CONST static inline ulong
184 0 : fd_tar_reader_align( void ) {
185 0 : return alignof(fd_tar_reader_t);
186 0 : }
187 :
188 : FD_FN_CONST static inline ulong
189 0 : fd_tar_reader_footprint( void ) {
190 0 : return sizeof(fd_tar_reader_t);
191 0 : }
192 :
193 : /* fd_tar_reader_new creates a new TAR reader. mem is the memory region
194 : that will hold the fd_tar_reader_t (matches above align/ footprint
195 : requirements). cb_vt contains the callback function pointers of
196 : the recipient. cb_vt pointer is borrowed until this function
197 : returns. cb_arg is the callback context value (usually a pointer to
198 : the recipient object). Returns a qualified handle to the reader
199 : object in mem on success. On failure, returns NULL and writes reason
200 : to warning log. Reasons for failure include invalid memory region or
201 : NULL callback. */
202 :
203 : fd_tar_reader_t *
204 : fd_tar_reader_new( void * mem,
205 : fd_tar_read_vtable_t const * cb_vt,
206 : void * cb_arg );
207 :
208 : /* fd_tar_reader_delete destroys a .tar reader and frees any allocated
209 : resources. Returns the underlying memory region back to the caller. */
210 :
211 : void *
212 : fd_tar_reader_delete( fd_tar_reader_t * reader );
213 :
214 : /* fd_tar_read processes a chunk of the TAR stream. Issues callbacks
215 : when file headers or content are read. reader is an fd_tar_reader_t
216 : pointer. data points to the first byte of the data chunk. data_sz
217 : is the byte count. data_sz==0UL is a no-op. Returns 0 on success.
218 : Returns -1 on end-of-file. On failure, returns positive errno
219 : compatible error code. In case of error, caller should delete reader
220 : and must not issue any more fd_tar_read calls. Suitable as a
221 : fd_decompress_cb_t callback. If the underlying functions returns track_err
222 : at any point, after fd_tar_read has processed the end of the data buffer,
223 : we will proceed to return track_err assuming no other errors have been
224 : thrown. Pass in 0 to not use this functionality. */
225 :
226 : int
227 : fd_tar_read( void * reader,
228 : uchar const * data,
229 : ulong data_sz,
230 : int track_err );
231 :
232 : /* Streaming writer ***************************************************/
233 :
234 : /* TL;DR. I didn't read the code. How do I use this?
235 :
236 : Init with fd_tar_writer_new( mem, tarball_name ).
237 :
238 : For each file you want to add to the archive:
239 : 1. Write out tar header with fd_tar_writer_new_file( writer, file_name )
240 : 2. Write out file data with fd_tar_writer_write_file_data( writer, data, data_sz ).
241 : This can be done as many times as you want.
242 : 3. Finish the current file with fd_tar_writer_fini_file( writer ).
243 :
244 : When you are done, call fd_tar_writer_delete( writer ) to write out the
245 : tar archive trailer and close otu the file descriptor.
246 :
247 : If you want to reserve space for an existing file and write back to it
248 : at some point in the future see the below comments for
249 : fd_tar_writer_{make,fill}_space().
250 :
251 : */
252 :
253 : struct fd_tar_writer {
254 : int fd; /* The file descriptor for the tar archive. */
255 : ulong header_pos; /* The position in the file for the current files header.
256 : If there is no current file that is being streamed out,
257 : the header_pos will be equal to ULONG_MAX. */
258 : ulong data_sz; /* The size of the current files data. If there is no
259 : current file that is being streamed out, the data_sz
260 : will be equal to ULONG_MAX. */
261 : ulong wb_pos; /* If this value is not equal to ULONG_MAX that means that
262 : this is the position at which to write back to with a
263 : call to fd_tar_writer_fill_space. */
264 : /* TODO: Right now, the stream to the tar writer just uses fd_io_write.
265 : This can eventually be abstracted to use write callbacks that use
266 : fd_io streaming under the hood. This adds some additional complexity
267 : that's related to writing back into the header: if the header is still
268 : in the ostream buf, modify the buffer. Otherwise, read the header
269 : directly from the file. */
270 :
271 : };
272 : typedef struct fd_tar_writer fd_tar_writer_t;
273 :
274 : FD_FN_CONST static inline ulong
275 0 : fd_tar_writer_align( void ) {
276 0 : return alignof(fd_tar_writer_t);
277 0 : }
278 :
279 : FD_FN_CONST static inline ulong
280 0 : fd_tar_writer_footprint( void ) {
281 0 : return sizeof(fd_tar_writer_t);
282 0 : }
283 :
284 : /* fd_tar_writer_new creates a new TAR writer. mem is the memory region
285 : that will hold the fd_tar_writer_t (matches above align/footprint
286 : requirements). Returns a qualified handle to the tar writer
287 : object in mem on success. On failure, returns NULL and writes reason
288 : to warning log. Reasons for failure include invalid memory region.
289 : The writer will enable the user to write/stream out files of variable
290 : size into a continual stream. The writer should persist for the span of
291 : a single tar archive. The user is repsonsible for passing in an open, valid
292 : file descriptor. */
293 :
294 : fd_tar_writer_t *
295 : fd_tar_writer_new( void * mem, int fd );
296 :
297 : /* fd_tar_writer_delete destroys a tar writer and frees any allocated
298 : resources. Returns the underlying memory region back to the caller.
299 : This writer will also handle cleanup for the tar archive: it will write
300 : out the tar archive trailer and will close the underlying file descriptor. */
301 :
302 : void *
303 : fd_tar_writer_delete( fd_tar_writer_t * writer );
304 :
305 : /* fd_tar_write_new_file writes out a file header, it will leave certain
306 : fields blank to allow for writing back of header metadata that is unknown
307 : until the file done streaming out. The user must enforce the invariant that
308 : this can only be called after fd_tar_fini_file() orfd_tar_writer_new() */
309 :
310 : int
311 : fd_tar_writer_new_file( fd_tar_writer_t * writer,
312 : char const * file_name );
313 :
314 : /* fd_tar_writer_write_file_data will write out a variable amount of bytes to the
315 : writer's tarball. This can be called multiple times for a single file.
316 : The user must enforce the invariant that this function succeeded a call
317 : to fd_tar_new_file and should precede a call to fd_tar_fini_file. If this
318 : invariant isn't enforced, then the tar writer will silently produce an
319 : invalid file. */
320 :
321 : int
322 : fd_tar_writer_write_file_data( fd_tar_writer_t * writer,
323 : void const * data,
324 : ulong data_sz );
325 :
326 : /* fd_tar_fini_file will write out any alignment bytes to the current file's
327 : data. It will then write back to the file header with the file size and
328 : the checksum. */
329 :
330 : int
331 : fd_tar_writer_fini_file( fd_tar_writer_t * writer );
332 :
333 : /* fd_tar_writer_make_space and fd_tar_writer_fill_space, allow for writing
334 : back to a specific place in the tar stream. This can be used by first
335 : making a call to fd_tar_write_new_file, fd_tar_writer_make_space, and
336 : fd_tar_writer_fini_file. This will populate the header and write out
337 : random bytes. The start of this data file will be saved by the tar writer.
338 : Up to n data files can be appended to the tar archive before a call to
339 : fd_tar_writer_fill_space. fd_tar_writer_fill_space should only be called
340 : after an unpaired call to fd_tar_writer_make_space and it requires a valid
341 : fd_tar_writer_t handle. It allows the user to write back to the point at
342 : which they made space. _make_space and _fill_space should be paired together.
343 : There can only be one oustanding call to make_space at a time.
344 :
345 : TODO: This can be extended to support multiple write backs. */
346 :
347 : int
348 : fd_tar_writer_make_space( fd_tar_writer_t * writer, ulong sz );
349 :
350 : int
351 : fd_tar_writer_fill_space( fd_tar_writer_t * writer, void const * data, ulong sz );
352 :
353 : FD_PROTOTYPES_END
354 :
355 : #endif /* HEADER_fd_src_archive_fd_tar_h */
|