Line data Source code
1 : #include "fd_checkpt.h"
2 :
3 : #if FD_HAS_LZ4
4 : #include <lz4.h>
5 :
6 : /* fd_restore_private_lz4 decompresses the cbuf_max memory region
7 : pointed to by cbuf into the ubuf_usz memory region pointed to by ubuf
8 : using the given lz4 decompressor. Assumes lz4, ubuf and cbuf are
9 : valid and assumes ubuf_usz matches the corresponding
10 : fd_checkpt_private_lz4 call and cbuf is valid. On success, returns
11 : the number of leading bytes cbuf bytes that were used for the
12 : decompression (will be in [4,cbuf_max]) and the ubuf should not
13 : be modified until the stream is reset, closed or an additional 64 KiB
14 : has been decompressed. On failure, returns 0 and retains no interest
15 : in ubuf. In either case, this retains no interest in cbuf on return.
16 :
17 : _sbuf, sbuf_sz, sbuf_thresh, _sbuf_cursor specify the small buf
18 : scatter ring state. See fd_checkpt_private_lz4 for more details. */
19 :
20 : static ulong
21 : fd_restore_private_lz4( LZ4_streamDecode_t * lz4,
22 : void * _ubuf,
23 : ulong ubuf_usz,
24 : void const * _cbuf,
25 : ulong cbuf_max,
26 : void * _sbuf,
27 : ulong sbuf_sz,
28 : ulong sbuf_thresh,
29 1679433 : ulong * _sbuf_cursor ) {
30 1679433 : char * ubuf = (char *) _ubuf;
31 1679433 : char const * cbuf = (char const *)_cbuf;
32 :
33 : /* Verify ubuf_usz is in [1,LZ4_MAX_INPUT_SIZE] and cbuf_max is large
34 : enough to store a header and a non-trivial compressed body. */
35 :
36 1679433 : if( FD_UNLIKELY( !((1UL<=ubuf_usz) & (ubuf_usz<=(ulong)LZ4_MAX_INPUT_SIZE)) ) ) {
37 0 : FD_LOG_WARNING(( "bad ubuf_usz" ));
38 0 : return 0UL;
39 0 : }
40 :
41 1679433 : if( FD_UNLIKELY( cbuf_max<4UL ) ) {
42 0 : FD_LOG_WARNING(( "not enough room to compress" ));
43 0 : return 0UL;
44 0 : }
45 :
46 : /* Restore and validate header */
47 :
48 1679433 : if( FD_UNLIKELY( cbuf_max<4UL ) ) { /* 3 bytes for header, 1 byte minimum for body */
49 0 : FD_LOG_WARNING(( "truncated header" ));
50 0 : return 0UL;
51 0 : }
52 :
53 1679433 : ulong ubuf_csz = (((ulong)(uchar)cbuf[0]) )
54 1679433 : | (((ulong)(uchar)cbuf[1]) << 8)
55 1679433 : | (((ulong)(uchar)cbuf[2]) << 16); /* In [1,2^24) */
56 :
57 1679433 : ulong cbuf_sz = ubuf_csz + 3UL;
58 1679433 : if( FD_UNLIKELY( !((4UL<=cbuf_sz) | (cbuf_sz<=FD_CHECKPT_PRIVATE_CSZ_MAX( ubuf_usz ))) ) ) {
59 0 : FD_LOG_WARNING(( "corrupt header" ));
60 0 : return 0UL;
61 0 : }
62 :
63 1679433 : if( FD_UNLIKELY( cbuf_sz>cbuf_max ) ) {
64 0 : FD_LOG_WARNING(( "truncated checkpt" ));
65 0 : return 0UL;
66 0 : }
67 :
68 : /* Small ubuf scatter optimization. See note in
69 : fd_checkpt_private_lz4 for details. */
70 :
71 1679433 : int is_small = ubuf_usz<sbuf_thresh;
72 1679433 : if( is_small ) { /* app dependent branch prob */
73 1585413 : ulong sbuf_cursor = *_sbuf_cursor;
74 1585413 : if( (sbuf_sz-sbuf_cursor)<ubuf_usz ) sbuf_cursor = 0UL; /* cmov */
75 1585413 : ubuf = (char *)_sbuf + sbuf_cursor;
76 1585413 : *_sbuf_cursor = sbuf_cursor + ubuf_usz;
77 1585413 : }
78 :
79 : /* Restore the buffer */
80 :
81 1679433 : int res = LZ4_decompress_safe_continue( lz4, cbuf+3UL, ubuf, (int)ubuf_csz, (int)ubuf_usz );
82 1679433 : if( FD_UNLIKELY( res<=0 ) ) {
83 0 : FD_LOG_WARNING(( "LZ4_decompress_safe_continue error (%i)", res ));
84 0 : return 0UL;
85 0 : }
86 :
87 : /* Small ubuf scatter optimization */
88 :
89 1679433 : if( is_small ) memcpy( _ubuf, ubuf, ubuf_usz ); /* app dependent branch prob */
90 :
91 1679433 : return cbuf_sz;
92 1679433 : }
93 : #endif
94 :
95 : fd_restore_t *
96 : fd_restore_init_stream( void * mem,
97 : int fd,
98 : void * rbuf,
99 15048 : ulong rbuf_sz ) {
100 :
101 : /* Check input args */
102 :
103 15048 : if( FD_UNLIKELY( !mem ) ) {
104 3 : FD_LOG_WARNING(( "NULL mem" ));
105 3 : return NULL;
106 3 : }
107 :
108 15045 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)mem, FD_RESTORE_ALIGN ) ) ) {
109 3 : FD_LOG_WARNING(( "misaligned mem" ));
110 3 : return NULL;
111 3 : }
112 :
113 15042 : if( FD_UNLIKELY( fd<0 ) ) {
114 3 : FD_LOG_WARNING(( "bad fd" ));
115 3 : return NULL;
116 3 : }
117 :
118 15039 : if( FD_UNLIKELY( !rbuf ) ) {
119 3 : FD_LOG_WARNING(( "NULL rbuf" ));
120 3 : return NULL;
121 3 : }
122 :
123 15036 : if( FD_UNLIKELY( rbuf_sz<FD_RESTORE_RBUF_MIN ) ) {
124 3 : FD_LOG_WARNING(( "rbuf_sz too small" ));
125 3 : return NULL;
126 3 : }
127 :
128 : /* Create decompressor */
129 :
130 15033 : # if FD_HAS_LZ4
131 15033 : LZ4_streamDecode_t * lz4 = LZ4_createStreamDecode();
132 15033 : if( FD_UNLIKELY( !lz4 ) ) {
133 0 : FD_LOG_WARNING(( "lz4 error" ));
134 0 : return NULL;
135 0 : }
136 : # else
137 : void * lz4 = NULL;
138 : # endif
139 :
140 : /* Init restore */
141 :
142 15033 : fd_restore_t * restore = (fd_restore_t *)mem;
143 :
144 15033 : restore->fd = fd; /* streaming mode */
145 15033 : restore->frame_style = 0; /* not in frame */
146 15033 : restore->lz4 = (void *)lz4;
147 15033 : restore->sbuf_cursor = 0UL;
148 15033 : restore->rbuf.mem = (uchar *)rbuf;
149 15033 : restore->rbuf.sz = rbuf_sz;
150 15033 : restore->rbuf.lo = 0UL;
151 15033 : restore->rbuf.ready = 0UL;
152 :
153 15033 : return restore;
154 15033 : }
155 :
156 : fd_restore_t *
157 : fd_restore_init_mmio( void * mem,
158 : void const * mmio,
159 15042 : ulong mmio_sz ) {
160 :
161 : /* Check input args */
162 :
163 15042 : if( FD_UNLIKELY( !mem ) ) {
164 3 : FD_LOG_WARNING(( "NULL mem" ));
165 3 : return NULL;
166 3 : }
167 :
168 15039 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)mem, FD_RESTORE_ALIGN ) ) ) {
169 3 : FD_LOG_WARNING(( "misaligned mem" ));
170 3 : return NULL;
171 3 : }
172 :
173 15036 : if( FD_UNLIKELY( (!mmio) & (!!mmio_sz) ) ) {
174 3 : FD_LOG_WARNING(( "NULL mmio with non-zero mmio_sz" ));
175 3 : return NULL;
176 3 : }
177 :
178 : /* Create decompressor */
179 :
180 15033 : # if FD_HAS_LZ4
181 15033 : LZ4_streamDecode_t * lz4 = LZ4_createStreamDecode();
182 15033 : if( FD_UNLIKELY( !lz4 ) ) {
183 0 : FD_LOG_WARNING(( "lz4 error" ));
184 0 : return NULL;
185 0 : }
186 : # else
187 : void * lz4 = NULL;
188 : # endif
189 :
190 : /* Init restore */
191 :
192 15033 : fd_restore_t * restore = (fd_restore_t *)mem;
193 :
194 15033 : restore->fd = -1; /* mmio mode */
195 15033 : restore->frame_style = 0; /* not in frame */
196 15033 : restore->lz4 = (void *)lz4;
197 15033 : restore->mmio.mem = (uchar const *)mmio;
198 15033 : restore->mmio.sz = mmio_sz;
199 15033 : restore->mmio.off = 0UL;
200 :
201 15033 : return restore;
202 15033 : }
203 :
204 : void *
205 30084 : fd_restore_fini( fd_restore_t * restore ) {
206 :
207 30084 : if( FD_UNLIKELY( !restore ) ) {
208 6 : FD_LOG_WARNING(( "NULL restore" ));
209 6 : return NULL;
210 6 : }
211 :
212 30078 : if( FD_UNLIKELY( fd_restore_private_in_frame( restore ) ) ) {
213 12 : FD_LOG_WARNING(( "in a frame" ));
214 12 : restore->frame_style = -1; /* failed */
215 12 : return NULL;
216 12 : }
217 :
218 30066 : # if FD_HAS_LZ4
219 :
220 : /* Note: Though this this doesn't seem to be officially documented,
221 : the lz4-1.9.4@lz4/lib/lz4.c:2575 suggests that this always returns
222 : 0. That is, 0 is success and non-zero is failure. */
223 :
224 30066 : if( FD_UNLIKELY( LZ4_freeStreamDecode( (LZ4_streamDecode_t *)restore->lz4 ) ) )
225 0 : FD_LOG_WARNING(( "LZ4 freeStreamDecode error, attempting to continue" ));
226 :
227 30066 : # endif
228 :
229 30066 : return restore;
230 30078 : }
231 :
232 : int
233 : fd_restore_frame_open( fd_restore_t * restore,
234 407355 : int frame_style ) {
235 :
236 407355 : if( FD_UNLIKELY( !restore ) ) {
237 6 : FD_LOG_WARNING(( "NULL restore" ));
238 6 : return FD_CHECKPT_ERR_INVAL;
239 6 : }
240 :
241 407349 : if( FD_UNLIKELY( !fd_restore_private_can_open( restore ) ) ) {
242 6 : FD_LOG_WARNING(( "in a frame or failed" ));
243 6 : restore->frame_style = -1; /* failed */
244 6 : return FD_CHECKPT_ERR_INVAL;
245 6 : }
246 :
247 407343 : frame_style = fd_int_if( !!frame_style, frame_style, FD_CHECKPT_FRAME_STYLE_DEFAULT );
248 :
249 407343 : switch( frame_style ) {
250 :
251 202812 : case FD_CHECKPT_FRAME_STYLE_RAW: {
252 202812 : break;
253 0 : }
254 :
255 0 : # if FD_HAS_LZ4
256 204525 : case FD_CHECKPT_FRAME_STYLE_LZ4: {
257 204525 : if( FD_UNLIKELY( !LZ4_setStreamDecode( (LZ4_streamDecode_t *)restore->lz4, NULL, 0 ) ) ) {
258 0 : FD_LOG_WARNING(( "LZ4_setStreamDecode failed" ));
259 0 : restore->frame_style = -1; /* failed */
260 0 : return FD_CHECKPT_ERR_COMP;
261 0 : }
262 204525 : restore->sbuf_cursor = 0UL;
263 204525 : break;
264 204525 : }
265 0 : # endif
266 :
267 6 : default: {
268 6 : FD_LOG_WARNING(( "unsupported frame_style" ));
269 6 : restore->frame_style = -1; /* failed */
270 6 : return FD_CHECKPT_ERR_UNSUP;
271 204525 : }
272 :
273 407343 : }
274 :
275 407337 : restore->frame_style = frame_style;
276 407337 : return FD_CHECKPT_SUCCESS;
277 407343 : }
278 :
279 : int
280 407331 : fd_restore_frame_close( fd_restore_t * restore ) {
281 :
282 407331 : if( FD_UNLIKELY( !restore ) ) {
283 6 : FD_LOG_WARNING(( "NULL restore" ));
284 6 : return FD_CHECKPT_ERR_INVAL;
285 6 : }
286 :
287 407325 : if( FD_UNLIKELY( !fd_restore_private_in_frame( restore ) ) ) {
288 6 : FD_LOG_WARNING(( "not in a frame" ));
289 6 : restore->frame_style = -1; /* failed */
290 6 : return FD_CHECKPT_ERR_INVAL;
291 6 : }
292 :
293 407319 : restore->frame_style = 0;
294 407319 : return FD_CHECKPT_SUCCESS;
295 407325 : }
296 :
297 : int
298 : fd_restore_buf( fd_restore_t * restore,
299 : void * buf,
300 3553308 : ulong sz ) {
301 :
302 3553308 : if( FD_UNLIKELY( !restore ) ) {
303 6 : FD_LOG_WARNING(( "NULL restore" ));
304 6 : return FD_CHECKPT_ERR_INVAL;
305 6 : }
306 :
307 3553302 : if( FD_UNLIKELY( !fd_restore_private_in_frame( restore ) ) ) {
308 18 : FD_LOG_WARNING(( "not in a frame" ));
309 18 : restore->frame_style = -1; /* failed */
310 18 : return FD_CHECKPT_ERR_INVAL;
311 18 : }
312 :
313 3553284 : if( FD_UNLIKELY( !sz ) ) return FD_CHECKPT_SUCCESS; /* nothing to do */
314 :
315 3158934 : if( FD_UNLIKELY( !buf ) ) {
316 0 : FD_LOG_WARNING(( "NULL buf with non-zero sz" ));
317 0 : restore->frame_style = -1; /* failed */
318 0 : return FD_CHECKPT_ERR_INVAL;
319 0 : }
320 :
321 3158934 : switch( restore->frame_style ) {
322 :
323 1570521 : case FD_CHECKPT_FRAME_STYLE_RAW: {
324 :
325 1570521 : if( fd_restore_private_is_mmio( restore ) ) { /* mmio mode, app dependent branch prob */
326 :
327 784098 : ulong mmio_sz = restore->mmio.sz;
328 784098 : ulong mmio_off = restore->mmio.off;
329 :
330 784098 : if( FD_UNLIKELY( sz > (mmio_sz-mmio_off) ) ) {
331 0 : FD_LOG_WARNING(( "sz overflow" ));
332 0 : restore->frame_style = -1; /* failed */
333 0 : return FD_CHECKPT_ERR_IO;
334 0 : }
335 :
336 784098 : memcpy( buf, restore->mmio.mem + mmio_off, sz );
337 :
338 784098 : restore->mmio.off = mmio_off + sz; /* at most mmio.sz */
339 :
340 786423 : } else { /* streaming mode */
341 :
342 786423 : int err = fd_io_buffered_read( restore->fd, buf, sz, restore->rbuf.mem, restore->rbuf.sz,
343 786423 : &restore->rbuf.lo, &restore->rbuf.ready );
344 :
345 786423 : if( FD_UNLIKELY( err ) ) {
346 0 : FD_LOG_WARNING(( "fd_io_buffered_read failed (%i-%s)", err, fd_io_strerror( err ) ));
347 0 : restore->frame_style = -1; /* failed */
348 0 : return FD_CHECKPT_ERR_IO;
349 0 : }
350 :
351 786423 : }
352 :
353 1570521 : break;
354 1570521 : }
355 :
356 1570521 : # if FD_HAS_LZ4
357 1588413 : case FD_CHECKPT_FRAME_STYLE_LZ4: {
358 :
359 1588413 : LZ4_streamDecode_t * lz4 = (LZ4_streamDecode_t *)restore->lz4;
360 :
361 1588413 : if( fd_restore_private_is_mmio( restore ) ) { /* mmio mode */
362 :
363 790677 : uchar const * mmio = restore->mmio.mem;
364 790677 : ulong mmio_sz = restore->mmio.sz;
365 790677 : ulong off = restore->mmio.off;
366 :
367 790677 : uchar * chunk = (uchar *)buf;
368 836253 : do {
369 836253 : ulong chunk_usz = fd_ulong_min( sz, FD_CHECKPT_PRIVATE_CHUNK_USZ_MAX );
370 :
371 836253 : ulong chunk_csz = fd_restore_private_lz4( lz4, chunk, chunk_usz, mmio + off, mmio_sz - off,
372 836253 : restore->sbuf, FD_RESTORE_PRIVATE_SBUF_SZ, FD_RESTORE_PRIVATE_SBUF_THRESH,
373 836253 : &restore->sbuf_cursor ); /* logs details */
374 836253 : if( FD_UNLIKELY( !chunk_csz ) ) {
375 0 : restore->frame_style = -1; /* failed */
376 0 : return FD_CHECKPT_ERR_COMP;
377 0 : }
378 :
379 836253 : off += chunk_csz; /* at most mmio_sz */
380 :
381 836253 : chunk += chunk_usz;
382 836253 : sz -= chunk_usz;
383 836253 : } while( sz );
384 :
385 790677 : restore->mmio.off = off;
386 :
387 797736 : } else { /* streaming mode */
388 :
389 797736 : int fd = restore->fd;
390 797736 : uchar * rbuf = restore->rbuf.mem;
391 797736 : ulong rbuf_sz = restore->rbuf.sz;
392 797736 : ulong rbuf_lo = restore->rbuf.lo;
393 797736 : ulong rbuf_ready = restore->rbuf.ready;
394 :
395 797736 : uchar * chunk = (uchar *)buf;
396 843180 : do {
397 843180 : ulong chunk_usz = fd_ulong_min( sz, FD_CHECKPT_PRIVATE_CHUNK_USZ_MAX );
398 :
399 : /* Pre-buffer the header and the first body byte to figure out
400 : how large the compressed chunk actually is.
401 :
402 : Note: This can buffer bytes past the end of the checkpoint in
403 : the uncommon case of there being data past the end of the
404 : checkpoint (e.g. is a stream like stdin without an EOF or the
405 : checkpoint is embedded in a larger file). We could have
406 : fd_io_read below use min_sz-rbuf_ready for the min and max sz
407 : arguments to not overread (but then there isn't much point to
408 : using buffered reads). We could also make an unbuffered
409 : streaming a restore option (but it probably much slower if
410 : there are lots of tiny buffers). Regardless, overreading in
411 : such scenarios is an unavoidable possibility if the incoming
412 : file is corrupt anyway and the caller will usually be able to
413 : seek such streams. So we currently just allow it to get the
414 : benefits of buffering. */
415 :
416 843180 : # define BUFFER(min_ready) \
417 1686360 : if( FD_UNLIKELY( rbuf_ready<min_ready ) ) { /* If not enough bytes buffered */ \
418 : \
419 : /* Move the unprocessed bytes to the beginning of the buffer */ \
420 8511 : \
421 8511 : if( FD_LIKELY( (rbuf_lo>0UL) & (rbuf_ready>0UL) ) ) memmove( rbuf, rbuf+rbuf_lo, rbuf_ready ); \
422 8511 : \
423 : /* Read at least enough bytes to make progress and at most */ \
424 : /* enough bytes to fill the rbuf. If we hit EOF or another */ \
425 : /* error, the restore failed. */ \
426 8511 : \
427 8511 : ulong rsz; \
428 8511 : int err = fd_io_read( fd, rbuf+rbuf_ready, min_ready-rbuf_ready, rbuf_sz-rbuf_ready, &rsz ); \
429 8511 : if( FD_UNLIKELY( err ) ) { \
430 0 : FD_LOG_WARNING(( "fd_io_read failed (%i-%s)", err, fd_io_strerror( err ) )); \
431 0 : restore->frame_style = -1; /* failed */ \
432 0 : return FD_CHECKPT_ERR_IO; \
433 0 : } \
434 8511 : \
435 8511 : rbuf_ready += rsz; /* in [min_ready,rbuf_sz] */ \
436 8511 : rbuf_lo = 0UL; \
437 8511 : }
438 :
439 843180 : BUFFER( 4UL )
440 :
441 843180 : ulong chunk_csz = 3UL + ( ((ulong)rbuf[ rbuf_lo ] )
442 843180 : | ((ulong)rbuf[ rbuf_lo+1UL ] << 8)
443 843180 : | ((ulong)rbuf[ rbuf_lo+2UL ] << 16) );
444 :
445 843180 : if( FD_UNLIKELY( !((4UL<=chunk_csz) & (chunk_csz<=FD_CHECKPT_PRIVATE_CSZ_MAX( chunk_usz ))) ) ) {
446 0 : FD_LOG_WARNING(( "corrupt header" ));
447 0 : restore->frame_style = -1; /* failed */
448 0 : return FD_CHECKPT_ERR_COMP;
449 0 : }
450 :
451 : /* Buffer the compressed chunk. If the fd doesn't have
452 : chunk_csz bytes available (e.g. we hit EOF unexpectedly or
453 : other I/O error), this will fail the restore. Note that we
454 : haven't advanced rbuf_lo yet so we invoke buffer with the
455 : entire chunk_csz. Also note that at this point:
456 :
457 : rbuf_sz >= RBUF_MIN >= CSZ_MAX( USZ_MAX ) >= CSZ_MAX( chunk_usz ) >= chunk_csz
458 :
459 : such that we always can buffer chunk_csz bytes into rbuf. */
460 :
461 843180 : BUFFER( chunk_csz );
462 :
463 : /* Decompress the compressed chunk in rbuf */
464 :
465 843180 : ulong res = fd_restore_private_lz4( lz4, chunk, chunk_usz, rbuf + rbuf_lo, rbuf_ready,
466 843180 : restore->sbuf, FD_RESTORE_PRIVATE_SBUF_SZ, FD_RESTORE_PRIVATE_SBUF_THRESH,
467 843180 : &restore->sbuf_cursor ); /* logs details */
468 843180 : if( FD_UNLIKELY( !res ) ) {
469 0 : restore->frame_style = -1; /* failed */
470 0 : return FD_CHECKPT_ERR_COMP;
471 0 : }
472 :
473 843180 : if( FD_UNLIKELY( res!=chunk_csz ) ) {
474 0 : FD_LOG_WARNING(( "corrupt body" ));
475 0 : restore->frame_style = -1; /* failed */
476 0 : return FD_CHECKPT_ERR_COMP;
477 0 : }
478 :
479 843180 : # undef BUFFER
480 :
481 843180 : rbuf_lo += chunk_csz;
482 843180 : rbuf_ready -= chunk_csz;
483 :
484 843180 : chunk += chunk_usz;
485 843180 : sz -= chunk_usz;
486 843180 : } while( sz );
487 :
488 797736 : restore->rbuf.lo = rbuf_lo;
489 797736 : restore->rbuf.ready = rbuf_ready;
490 :
491 797736 : }
492 :
493 1588413 : break;
494 1588413 : }
495 1588413 : # endif
496 :
497 1588413 : default: { /* never get here */
498 0 : FD_LOG_WARNING(( "unsupported frame style" ));
499 0 : restore->frame_style = -1; /* failed */
500 0 : return FD_CHECKPT_ERR_UNSUP;
501 1588413 : }
502 :
503 3158934 : }
504 :
505 3158934 : return FD_CHECKPT_SUCCESS;
506 3158934 : }
|