Line data Source code
1 : #ifndef HEADER_fd_src_archive_fd_tar_h
2 : #define HEADER_fd_src_archive_fd_tar_h
3 :
4 : /* fd_tar implements the ustar and old-GNU versions of the TAR file
5 : format. This is not a general-purpose TAR implementation. It is
6 : currently only intended for loading and writing Solana snapshots. */
7 :
8 : #include "../io/fd_io.h"
9 :
10 : /* File Format ********************************************************/
11 :
12 : /* The high level format of a tar archive/ball is a set of 512 byte blocks.
13 : Each file will be described a tar header (fd_tar_meta_t) and will be
14 : followed by the raw bytes of the file. The last block that is used for
15 : the file will be padded to fit into a tar block. When the archive is
16 : completed, it will be trailed by two EOF blocks which are populated with
17 : zero bytes. */
18 :
19 : /* fd_tar_meta_t is the ustar/OLDGNU version of the TAR header. */
20 :
21 0 : #define FD_TAR_BLOCK_SZ (512UL)
22 :
23 : struct __attribute__((packed)) fd_tar_meta {
24 0 : # define FD_TAR_NAME_SZ (100)
25 : /* 0x000 */ char name [ FD_TAR_NAME_SZ ];
26 : /* 0x064 */ char mode [ 8 ];
27 : /* 0x06c */ char uid [ 8 ];
28 : /* 0x074 */ char gid [ 8 ];
29 : /* 0x07c */ char size [ 12 ];
30 : /* 0x088 */ char mtime [ 12 ];
31 : /* 0x094 */ char chksum [ 8 ];
32 : /* 0x09c */ char typeflag;
33 : /* 0x09d */ char linkname[ 100 ];
34 : /* 0x101 */ char magic [ 6 ];
35 : /* 0x107 */ char version [ 2 ];
36 : /* 0x109 */ char uname [ 32 ];
37 : /* 0x129 */ char gname [ 32 ];
38 : /* 0x149 */ char devmajor[ 8 ];
39 : /* 0x151 */ char devminor[ 8 ];
40 : /* 0x159 */ char prefix [ 155 ];
41 : /* 0x1f4 */ char padding [ 12 ];
42 : };
43 :
44 : typedef struct fd_tar_meta fd_tar_meta_t;
45 :
46 : /* FD_TAR_MAGIC is the only value of fd_tar_meta::magic supported by
47 : fd_tar. */
48 :
49 : #define FD_TAR_MAGIC "ustar"
50 :
51 : /* Known file types */
52 :
53 54 : #define FD_TAR_TYPE_NULL ('\0') /* implies FD_TAR_TYPE_REGULAR */
54 108 : #define FD_TAR_TYPE_REGULAR ('0')
55 : #define FD_TAR_TYPE_HARD_LINK ('1')
56 : #define FD_TAR_TYPE_SYM_LINK ('2')
57 : #define FD_TAR_TYPE_CHAR_DEV ('3')
58 : #define FD_TAR_TYPE_BLOCK_DEV ('4')
59 : #define FD_TAR_TYPE_DIR ('5')
60 : #define FD_TAR_TYPE_FIFO ('6')
61 :
62 : FD_PROTOTYPES_BEGIN
63 :
64 : /* fd_tar_meta_is_reg returns 1 if the file type is 'regular', and 0
65 : otherwise. */
66 :
67 : FD_FN_PURE static inline int
68 54 : fd_tar_meta_is_reg( fd_tar_meta_t const * meta ) {
69 54 : return ( meta->typeflag == FD_TAR_TYPE_NULL )
70 54 : | ( meta->typeflag == FD_TAR_TYPE_REGULAR );
71 54 : }
72 :
73 : /* fd_tar_meta_get_size parses the size field of the TAR header.
74 : Returns ULONG_MAX if parsing failed. */
75 :
76 : FD_FN_PURE ulong
77 : fd_tar_meta_get_size( fd_tar_meta_t const * meta );
78 :
79 : /* fd_tar_set_octal is a helper function to write 12-byte octal fields */
80 :
81 : int
82 : fd_tar_set_octal( char buf[ static 12 ],
83 : ulong val );
84 :
85 : /* fd_tar_meta_set_size sets the size field. Returns 1 on success, 0
86 : if sz is too large to be represented in TAR header. Set size using the
87 : OLDGNU size extension to allow for unlimited file sizes. The first byte
88 : must be 0x80 followed by 0s and then the size in binary. */
89 :
90 : static inline int
91 : fd_tar_meta_set_size( fd_tar_meta_t * meta,
92 0 : ulong sz ) {
93 0 : meta->size[ 0 ] = (char)0x80;
94 0 : FD_STORE( ulong, meta->size + 4UL, fd_ulong_bswap( sz ) );
95 0 : return 1;
96 0 : }
97 :
98 : /* fd_tar_meta_set_mtime sets the modification time field. Returns 1
99 : on success, 0 if time cannot be represented in TAR header. */
100 :
101 : static inline int
102 : fd_tar_meta_set_mtime( fd_tar_meta_t * meta,
103 0 : ulong mtime ) {
104 0 : return fd_tar_set_octal( meta->mtime, mtime );
105 0 : }
106 :
107 : FD_PROTOTYPES_END
108 :
109 : /* Streaming reader ***************************************************/
110 :
111 : typedef struct fd_tar_reader fd_tar_reader_t;
112 :
113 : /* fd_tar_file_fn_t is called by fd_tar when a new file was encountered.
114 : cb_arg is the callback context value. meta is the file header
115 : (lifetime until return). sz is the expected file size that follows
116 : (via read callbacks). The actual read size might differ in case of
117 : errors (e.g. unexpected EOF). Returns 0 on success and non-zero if
118 : tar reader should stop. */
119 :
120 : typedef int
121 : (* fd_tar_file_fn_t)( void * cb_arg,
122 : fd_tar_meta_t const * meta,
123 : ulong sz );
124 :
125 : /* fd_tar_read_cb_t is called by fd_tar when a new chunk of data has
126 : been read. Each read callback is associated with the last file
127 : callback. Read callbacks are issued in order such that concatenating
128 : all buffers results in the correct file content. Returns 0 on
129 : success and non-zero if tar reader should stop.
130 :
131 : cb_arg is the callback context value. buf points to the first byte
132 : of the chunk. bufsz is the byte count. The lifetime of buf is until
133 : the callback returns. */
134 :
135 : typedef int
136 : (* fd_tar_read_fn_t)( void * cb_arg,
137 : void const * buf,
138 : ulong bufsz );
139 :
140 : /* fd_tar_read_vtable_t is the virtual function table of the
141 : fd_tar_reader_t consumer object. */
142 :
143 : struct fd_tar_read_vtable {
144 : fd_tar_file_fn_t file;
145 : fd_tar_read_fn_t read;
146 : };
147 :
148 : typedef struct fd_tar_read_vtable fd_tar_read_vtable_t;
149 :
150 : /* fd_tar_reader_t is a streaming TAR reader using a callback API for
151 : delivering data. To use, feed it the chunks of the TAR stream via
152 : fd_tar_read. There is no restriction on the size and alignment of
153 : these chunks, other than that the chunks are supplied in order and
154 : gapless. The resulting callback sequence is (1x file, Nx read, 1x
155 : file, Nx read ...). As in: Each new file encountered creates a file
156 : callback and a variable number of read callbacks. */
157 :
158 : struct fd_tar_reader {
159 :
160 : /* Buffered file header. Required because a file header might be
161 : split across multiple fd_tar_read calls. */
162 : union {
163 : uchar buf[ sizeof(fd_tar_meta_t) ];
164 : fd_tar_meta_t header;
165 : };
166 :
167 : ulong pos; /* Number of bytes consumed */
168 : ulong buf_ctr; /* Write cursor in file header */
169 : ulong file_sz; /* Number of file bytes left */
170 :
171 : /* Callback parameters */
172 : fd_tar_read_vtable_t cb_vt;
173 : void * cb_arg;
174 :
175 : };
176 :
177 : FD_PROTOTYPES_BEGIN
178 :
179 : /* fd_tar_reader_{align,footprint} return parameters for the memory
180 : region backing a fd_tar_reader_t. */
181 :
182 : FD_FN_CONST static inline ulong
183 0 : fd_tar_reader_align( void ) {
184 0 : return alignof(fd_tar_reader_t);
185 0 : }
186 :
187 : FD_FN_CONST static inline ulong
188 0 : fd_tar_reader_footprint( void ) {
189 0 : return sizeof(fd_tar_reader_t);
190 0 : }
191 :
192 : /* fd_tar_reader_new creates a new TAR reader. mem is the memory region
193 : that will hold the fd_tar_reader_t (matches above align/ footprint
194 : requirements). cb_vt contains the callback function pointers of
195 : the recipient. cb_vt pointer is borrowed until this function
196 : returns. cb_arg is the callback context value (usually a pointer to
197 : the recipient object). Returns a qualified handle to the reader
198 : object in mem on success. On failure, returns NULL and writes reason
199 : to warning log. Reasons for failure include invalid memory region or
200 : NULL callback. */
201 :
202 : fd_tar_reader_t *
203 : fd_tar_reader_new( void * mem,
204 : fd_tar_read_vtable_t const * cb_vt,
205 : void * cb_arg );
206 :
207 : /* fd_tar_reader_delete destroys a .tar reader and frees any allocated
208 : resources. Returns the underlying memory region back to the caller. */
209 :
210 : void *
211 : fd_tar_reader_delete( fd_tar_reader_t * reader );
212 :
213 : /* fd_tar_read processes a chunk of the TAR stream. Issues callbacks
214 : when file headers or content are read. reader is an fd_tar_reader_t
215 : pointer. data points to the first byte of the data chunk. data_sz
216 : is the byte count. data_sz==0UL is a no-op. Returns 0 on success.
217 : Returns -1 on end-of-file. On failure, returns positive errno
218 : compatible error code. In case of error, caller should delete reader
219 : and must not issue any more fd_tar_read calls. Suitable as a
220 : fd_decompress_cb_t callback. If the underlying functions returns track_err
221 : at any point, after fd_tar_read has processed the end of the data buffer,
222 : we will proceed to return track_err assuming no other errors have been
223 : thrown. Pass in 0 to not use this functionality. */
224 :
225 : int
226 : fd_tar_read( void * reader,
227 : uchar const * data,
228 : ulong data_sz,
229 : int track_err );
230 :
231 : /* Streaming writer ***************************************************/
232 :
233 : /* TL;DR. I didn't read the code. How do I use this?
234 :
235 : Init with fd_tar_writer_new( mem, tarball_name ).
236 :
237 : For each file you want to add to the archive:
238 : 1. Write out tar header with fd_tar_writer_new_file( writer, file_name )
239 : 2. Write out file data with fd_tar_writer_write_file_data( writer, data, data_sz ).
240 : This can be done as many times as you want.
241 : 3. Finish the current file with fd_tar_writer_fini_file( writer ).
242 :
243 : When you are done, call fd_tar_writer_delete( writer ) to write out the
244 : tar archive trailer and close otu the file descriptor.
245 :
246 : If you want to reserve space for an existing file and write back to it
247 : at some point in the future see the below comments for
248 : fd_tar_writer_{make,fill}_space().
249 :
250 : */
251 :
252 : struct fd_tar_writer {
253 : int fd; /* The file descriptor for the tar archive. */
254 : ulong header_pos; /* The position in the file for the current files header.
255 : If there is no current file that is being streamed out,
256 : the header_pos will be equal to ULONG_MAX. */
257 : ulong data_sz; /* The size of the current files data. If there is no
258 : current file that is being streamed out, the data_sz
259 : will be equal to ULONG_MAX. */
260 : ulong wb_pos; /* If this value is not equal to ULONG_MAX that means that
261 : this is the position at which to write back to with a
262 : call to fd_tar_writer_fill_space. */
263 : /* TODO: Right now, the stream to the tar writer just uses fd_io_write.
264 : This can eventually be abstracted to use write callbacks that use
265 : fd_io streaming under the hood. This adds some additional complexity
266 : that's related to writing back into the header: if the header is still
267 : in the ostream buf, modify the buffer. Otherwise, read the header
268 : directly from the file. */
269 :
270 : };
271 : typedef struct fd_tar_writer fd_tar_writer_t;
272 :
273 : FD_FN_CONST static inline ulong
274 0 : fd_tar_writer_align( void ) {
275 0 : return alignof(fd_tar_writer_t);
276 0 : }
277 :
278 : FD_FN_CONST static inline ulong
279 0 : fd_tar_writer_footprint( void ) {
280 0 : return sizeof(fd_tar_writer_t);
281 0 : }
282 :
283 : /* fd_tar_writer_new creates a new TAR writer. mem is the memory region
284 : that will hold the fd_tar_writer_t (matches above align/footprint
285 : requirements). Returns a qualified handle to the tar writer
286 : object in mem on success. On failure, returns NULL and writes reason
287 : to warning log. Reasons for failure include invalid memory region.
288 : The writer will enable the user to write/stream out files of variable
289 : size into a continual stream. The writer should persist for the span of
290 : a single tar archive. The user is repsonsible for passing in an open, valid
291 : file descriptor. */
292 :
293 : fd_tar_writer_t *
294 : fd_tar_writer_new( void * mem, int fd );
295 :
296 : /* fd_tar_writer_delete destroys a tar writer and frees any allocated
297 : resources. Returns the underlying memory region back to the caller.
298 : This writer will also handle cleanup for the tar archive: it will write
299 : out the tar archive trailer and will close the underlying file descriptor. */
300 :
301 : void *
302 : fd_tar_writer_delete( fd_tar_writer_t * writer );
303 :
304 : /* fd_tar_write_new_file writes out a file header, it will leave certain
305 : fields blank to allow for writing back of header metadata that is unknown
306 : until the file done streaming out. The user must enforce the invariant that
307 : this can only be called after fd_tar_fini_file() orfd_tar_writer_new() */
308 :
309 : int
310 : fd_tar_writer_new_file( fd_tar_writer_t * writer,
311 : char const * file_name );
312 :
313 : /* fd_tar_writer_write_file_data will write out a variable amount of bytes to the
314 : writer's tarball. This can be called multiple times for a single file.
315 : The user must enforce the invariant that this function succeeded a call
316 : to fd_tar_new_file and should precede a call to fd_tar_fini_file. If this
317 : invariant isn't enforced, then the tar writer will silently produce an
318 : invalid file. */
319 :
320 : int
321 : fd_tar_writer_write_file_data( fd_tar_writer_t * writer,
322 : void const * data,
323 : ulong data_sz );
324 :
325 : /* fd_tar_fini_file will write out any alignment bytes to the current file's
326 : data. It will then write back to the file header with the file size and
327 : the checksum. */
328 :
329 : int
330 : fd_tar_writer_fini_file( fd_tar_writer_t * writer );
331 :
332 : /* fd_tar_writer_make_space and fd_tar_writer_fill_space, allow for writing
333 : back to a specific place in the tar stream. This can be used by first
334 : making a call to fd_tar_write_new_file, fd_tar_writer_make_space, and
335 : fd_tar_writer_fini_file. This will populate the header and write out
336 : random bytes. The start of this data file will be saved by the tar writer.
337 : Up to n data files can be appended to the tar archive before a call to
338 : fd_tar_writer_fill_space. fd_tar_writer_fill_space should only be called
339 : after an unpaired call to fd_tar_writer_make_space and it requires a valid
340 : fd_tar_writer_t handle. It allows the user to write back to the point at
341 : which they made space. _make_space and _fill_space should be paired together.
342 : There can only be one oustanding call to make_space at a time.
343 :
344 : TODO: This can be extended to support multiple write backs. */
345 :
346 : int
347 : fd_tar_writer_make_space( fd_tar_writer_t * writer, ulong sz );
348 :
349 : int
350 : fd_tar_writer_fill_space( fd_tar_writer_t * writer, void const * data, ulong sz );
351 :
352 : FD_PROTOTYPES_END
353 :
354 : #endif /* HEADER_fd_src_archive_fd_tar_h */
|