Line data Source code
1 : #ifndef HEADER_fd_src_ballet_zstd_fd_zstd_h 2 : #define HEADER_fd_src_ballet_zstd_fd_zstd_h 3 : 4 : /* fd_zstd provides APIs for Zstandard compressed streams, such as .zst 5 : files. Currently uses libzstd in static mode under the hood. 6 : 7 : ### Format 8 : 9 : The Zstandard compression format is documented here: 10 : https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md 11 : 12 : Zstandard streams are the concatenation of one or more frames. 13 : fd_zstd only handles frames containing compressed data. 14 : 15 : Frames are not interdependent which usually allows for stateless 16 : processing between frames. Dictionary and prefixes are exceptions 17 : which may add dependencies to external data. fd_zstd currently does 18 : not support those. 19 : 20 : ### Memory management 21 : 22 : fd_zstd promises to not do any dynamic heap allocations nor syscalls. 23 : 24 : Each fd_zstd_{d,c}stream_t object is backed by a contiguous memory 25 : region which is allocated and managed by the caller. There are few 26 : restrictions on the nature of this memory region (may be backed by 27 : a workspace, scratch, bss, or even the stack). However, fd_zstd 28 : objects may not be relocated or shared across address spaces. 29 : 30 : The footprint of fd_zstd_dstream_t (decompression) depends on the 31 : "window size" (relates to compression level and independent of data 32 : size). Each Zstandard frame requires a particular window size to 33 : decompress. If this parameter is unknown, it can be recovered via 34 : fd_zstd_peek_window_sz. fd_zstd gracefully fails when attempting to 35 : decompress a frame that exceeds the max window size that the 36 : fd_zstd_dctx_t was initialized with. 37 : 38 : The footprint of fd_zstd_cstream_t (compression) depends on the 39 : compression level. 40 : 41 : ### Possible improvements 42 : 43 : libzstd's single-shot decompression mode requires much less scratch 44 : data (only 128 KiB instead of 128 MiB). However, it requires the 45 : input and output buffers to fit the entire compressed/decompressed 46 : frame. The Solana protocol does not properly bound max decompressed 47 : frame size, so using streaming mode is safer for now. */ 48 : 49 : #if FD_HAS_ZSTD 50 : 51 : #include "../fd_ballet_base.h" 52 : 53 : /* FD_ZSTD_MAX_HDR_SZ is the amount of bytes required to fit any 54 : possible frame header. (Including both the magic number and the 55 : frame itself) */ 56 : 57 : #define FD_ZSTD_MAX_HDR_SZ (18UL) 58 : 59 0 : #define FD_ZSTD_CSTREAM_ALIGN (64UL) 60 : 61 : /* Decompress API *****************************************************/ 62 : 63 : /* fd_zstd_dstream_t provides streaming decompression for Zstandard 64 : frames. Handles one frame at a time. */ 65 : 66 : struct fd_zstd_dstream; 67 : typedef struct fd_zstd_dstream fd_zstd_dstream_t; 68 : 69 : struct fd_zstd_peek { 70 : ulong window_sz; 71 : ulong frame_content_sz; /* ULONG_MAX if unknown */ 72 : int frame_is_skippable; 73 : }; 74 : typedef struct fd_zstd_peek fd_zstd_peek_t; 75 : 76 : FD_PROTOTYPES_BEGIN 77 : 78 : /* fd_zstd_peek peeks a frame header. buf points to a fragment 79 : containing the first FD_ZSTD_MAX_HDR_SZ bytes of the frame (or less 80 : if EOF reached). bufsz is the size of that fragment. peek is 81 : populated with the decoded data. Caller should zero-initialize peek 82 : for forward compatibility. Returns peek on success. On failure, 83 : returns NULL. Reasons for failure include insufficient bufsz or 84 : decode error. */ 85 : 86 : fd_zstd_peek_t * 87 : fd_zstd_peek( fd_zstd_peek_t * peek, 88 : void const * buf, 89 : ulong bufsz ); 90 : 91 : /* fd_zstd_dstream_{align,footprint} return the parameters of the 92 : memory region backing a fd_zstd_dstream_t. max_window_sz is the 93 : largest window size that this object is able to handle. */ 94 : 95 : FD_FN_CONST ulong 96 : fd_zstd_dstream_align( void ); 97 : 98 : FD_FN_CONST ulong 99 : fd_zstd_dstream_footprint( ulong max_window_sz ); 100 : 101 : /* fd_zstd_dstream_new creates a new dstream object backed by the memory 102 : region at mem. mem matches align/footprint requirements for the 103 : given max_window_sz. Returns a handle to the newly created dstream 104 : object on success (not just a simple cast of mem). The dstream 105 : expects a new frame on return. On failure, returns NULL. */ 106 : 107 : fd_zstd_dstream_t * 108 : fd_zstd_dstream_new( void * mem, 109 : ulong max_window_sz ); 110 : 111 : /* fd_zstd_dstream_delete destroys the dstream object and releases its 112 : memory region back to the caller. Returns pointer to memory region 113 : on success (same as provided in call to new). Acts as a no-op if 114 : dstream==NULL. */ 115 : 116 : void * 117 : fd_zstd_dstream_delete( fd_zstd_dstream_t * dstream ); 118 : 119 : /* fd_zstd_dstream_reset resets the state of a dstream object, such that 120 : it expects the start of a frame. */ 121 : 122 : void 123 : fd_zstd_dstream_reset( fd_zstd_dstream_t * dstream ); 124 : 125 : /* fd_zstd_dstream_read decompresses a fragment of stream data. 126 : 127 : *in_p is assumed to point to the next byte of compressed data. 128 : in_end points to one byte past the compressed data fragment. *out_p 129 : is assumed to point to the next free byte in the destination buffer. 130 : out_end points to one byte past the destination buffer. 131 : 132 : On return, newly compressed data is written to the destination buffer 133 : and *out_p is updated to point to the next free byte, and *in_p is 134 : updated to point to the next byte not yet decompressed. 135 : If *out_p==out_end, the destination buffer was entirely filled. The 136 : caller should retry with a new buffer in case not everything was 137 : flushed. If *in_p==in_end, the compressed data fragment was fully 138 : consumed, and the caller should move on to the next fragment. 139 : 140 : Returns fd_io compatible error code. Returns 0 if decompressor has 141 : made progress and is expecting more data. Returns -1 (eof) if the 142 : current frame was fully decompressed, in which the caller may move on 143 : to the next frame (reset not required). Note that -1 may be returned 144 : even if *in_p<in_end because the fragment could span multiple frames. 145 : Returns EPROTO on error. The caller should reset the dstream in 146 : this case. If opt_errcode!=NULL and an error occured, *opt_errcode 147 : is set accordingly. */ 148 : 149 : int 150 : fd_zstd_dstream_read( fd_zstd_dstream_t * dstream, 151 : uchar const ** restrict in_p, 152 : uchar const * in_end, 153 : uchar ** restrict out_p, 154 : uchar * out_end, 155 : ulong * opt_errcode ); 156 : 157 : /* TODO: Migrate compression logic from fd_snapshot_create. to fd_zstd.h */ 158 : 159 : FD_PROTOTYPES_END 160 : 161 : #endif /* FD_HAS_ZSTD */ 162 : 163 : #endif /* HEADER_fd_src_ballet_zstd_fd_zstd_h */