Line data Source code
1 : #ifndef HEADER_fd_src_vinyl_io_fd_vinyl_io_h
2 : #define HEADER_fd_src_vinyl_io_fd_vinyl_io_h
3 :
4 : /* A fd_vinyl_io_t reads from / appends to a bstream stored in some
5 : physical layer (typically slow and non-volatile). Supports massive
6 : numbers of async concurrent reads and appends and the ability to
7 : recover from unexpected interrupts (Ctrl-C, power failures, etc). To
8 : accommodate the myriad of different styles of physical layers and
9 : interfaces, the API is run time plugin friendly. Summary of
10 : operations:
11 :
12 : read_imm: blocking read a contiguous range of blocks in the
13 : bstream's past. Mostly used for iterating over a bstream's past.
14 :
15 : read: start reading a contiguous range of blocks in the bstream's
16 : past. The caller promises the range to read is contiguous in the
17 : underlying physical storage.
18 :
19 : poll: finish an outstanding read. Outstanding reads can complete
20 : in an arbitary order. All reads must be finished by poll but note
21 : that it is possible to detect a read is complete out-of-band too
22 : (for speculative processing).
23 :
24 : append: start appending a set of blocks to the end of the bstream's
25 : present (moving blocks from the bstream's future to the bstream's
26 : present). The blocks will be contiguous in the underlying storage.
27 : The blocks must be suitably aligned and with a lifetime until the
28 : next commit.
29 :
30 : commit: finish all outstanding appends, moving all blocks in the
31 : bstream's present to the bstream's past. This will empty the io's
32 : append scratch pad. The underlying implementation is free to
33 : process outstanding appends in any order (and free to interleave
34 : them arbitrarily with outstanding reads).
35 :
36 : hint: indicates the next sz worth of blocks appended to the bstream
37 : must be contiguous in the physical storage.
38 :
39 : alloc: allocate memory from the io's append scratch pad. These
40 : allocations will have a suitable alignment for append and a
41 : lifetime until the next commit. This may trigger a commit of
42 : outstanding appends if there isn't enough scratch pad free.
43 :
44 : copy: append a contiguous range of blocks from the bstream's past
45 : to the end of the bstream's present. May commit outstanding
46 : appends.
47 :
48 : forget: forget all blocks before a given sequence number, moving
49 : blocks from the bstream's past to the bstream's antiquity. The
50 : caller can only forget up to the bstream's present.
51 :
52 : rewind: move blocks from the bstream's past (and potentially
53 : antiquity) to the bstream's future. The bstream must have an empty
54 : present (i.e. no appends in progress) and no reads in progress.
55 : This allows, for example, on recovery, a multi-block pair that was
56 : incompletely written to be cleaned up.
57 :
58 : sync: update the range for the bstream past where recovery will
59 : resume. This moves all blocks in the bstream's antiquity to end of
60 : the bstream's future. */
61 :
62 : /* FIXME: consider a query to get how many reads are outstanding? (with
63 : this, rewind and forget could be complete generic). */
64 :
65 : #include "../bstream/fd_vinyl_bstream.h"
66 :
67 : /* FD_VINYL_IO_TYPE_* identifies which IO implementation is in use. */
68 :
69 9 : #define FD_VINYL_IO_TYPE_MM (0) /* memory mapped */
70 9 : #define FD_VINYL_IO_TYPE_BD (1) /* synchronous blocking */
71 : #define FD_VINYL_IO_TYPE_WD (2) /* async O_DIRECT write (specialized) */
72 0 : #define FD_VINYL_IO_TYPE_UR (3) /* async io_uring */
73 :
74 : /* FD_VINYL_IO_FLAG_* are flags used by various vinyl IO APIs */
75 :
76 1497918 : #define FD_VINYL_IO_FLAG_BLOCKING (1) /* Okay to block the caller */
77 :
78 : /* A fd_vinyl_io_rd_t describes a read request to the underlying I/O
79 : implementation to read [seq,seq+sz) (cyclic) from the bstream's past
80 : into dst. seq, dst and sz should be FD_VINYL_BSTREAM_BLOCK_SZ
81 : aligned. Any failure encountered while reading should FD_LOG_CRIT
82 : (just like reading an invalid memory address will seg fault).
83 : Underlying I/O implementations can add other information to this
84 : structure as necessary. ctx is an arbitrary user defined value. */
85 :
86 : #define FD_VINYL_IO_READ_SZ (64UL)
87 :
88 : struct fd_vinyl_io_rd {
89 : ulong ctx;
90 : ulong seq;
91 : void * dst;
92 : ulong sz;
93 : uchar _[ FD_VINYL_IO_READ_SZ - 32UL ];
94 : };
95 :
96 : typedef struct fd_vinyl_io_rd fd_vinyl_io_rd_t;
97 :
98 : /* fd_vinyl_io_t is an opaque handle of a fd_vinyl_io instance. Some
99 : details are exposed to facilitate inlining in high performance
100 : contexts. */
101 :
102 : struct fd_vinyl_io_private;
103 : typedef struct fd_vinyl_io_private fd_vinyl_io_t;
104 :
105 : typedef void (*fd_vinyl_io_func_read_imm_t)( fd_vinyl_io_t * io, ulong seq, void * dst, ulong sz );
106 : typedef void (*fd_vinyl_io_func_read_t )( fd_vinyl_io_t * io, fd_vinyl_io_rd_t * rd );
107 : typedef int (*fd_vinyl_io_func_poll_t )( fd_vinyl_io_t * io, fd_vinyl_io_rd_t ** _rd, int flags );
108 : typedef ulong (*fd_vinyl_io_func_append_t )( fd_vinyl_io_t * io, void const * src, ulong sz );
109 : typedef int (*fd_vinyl_io_func_commit_t )( fd_vinyl_io_t * io, int flags );
110 : typedef ulong (*fd_vinyl_io_func_hint_t )( fd_vinyl_io_t * io, ulong sz );
111 : typedef void * (*fd_vinyl_io_func_alloc_t )( fd_vinyl_io_t * io, ulong sz, int flags );
112 : typedef ulong (*fd_vinyl_io_func_copy_t )( fd_vinyl_io_t * io, ulong seq, ulong sz );
113 : typedef void (*fd_vinyl_io_func_forget_t )( fd_vinyl_io_t * io, ulong seq );
114 : typedef void (*fd_vinyl_io_func_rewind_t )( fd_vinyl_io_t * io, ulong seq );
115 : typedef int (*fd_vinyl_io_func_sync_t )( fd_vinyl_io_t * io, int flags );
116 : typedef void * (*fd_vinyl_io_func_fini_t )( fd_vinyl_io_t * io );
117 :
118 : struct fd_vinyl_io_impl {
119 : fd_vinyl_io_func_read_imm_t read_imm;
120 : fd_vinyl_io_func_read_t read;
121 : fd_vinyl_io_func_poll_t poll;
122 : fd_vinyl_io_func_append_t append;
123 : fd_vinyl_io_func_commit_t commit;
124 : fd_vinyl_io_func_hint_t hint;
125 : fd_vinyl_io_func_alloc_t alloc;
126 : fd_vinyl_io_func_copy_t copy;
127 : fd_vinyl_io_func_forget_t forget;
128 : fd_vinyl_io_func_rewind_t rewind;
129 : fd_vinyl_io_func_sync_t sync;
130 : fd_vinyl_io_func_fini_t fini;
131 : };
132 :
133 : typedef struct fd_vinyl_io_impl fd_vinyl_io_impl_t;
134 :
135 : struct fd_vinyl_io_private {
136 : int type;
137 : ulong seed;
138 : ulong seq_ancient; /* FD_VINYL_BSTREAM_BLOCK_SZ multiple */
139 : ulong seq_past; /* " */
140 : ulong seq_present; /* " */
141 : ulong seq_future; /* " */
142 : ulong spad_max; /* " */
143 : ulong spad_used; /* " */
144 : fd_vinyl_io_impl_t * impl; /* implementation specific funcs */
145 : /* io implementation specific details follow */
146 : };
147 :
148 : FD_PROTOTYPES_BEGIN
149 :
150 : /* fd_vinyl_io_* return the current value of the eponymous io field.
151 : Assumes io is valid. For all but type and seed, the return value is
152 : a FD_VINYL_BSTREAM_BLOCK_SZ multiple. Note that we don't have a
153 : generic notion of dev_max or dev_free as such is not a well defined
154 : concept. Individual IO implementations can provide them as
155 : appropriate though. */
156 :
157 12 : FD_FN_PURE static inline int fd_vinyl_io_type( fd_vinyl_io_t const * io ) { return io->type; }
158 :
159 12 : FD_FN_PURE static inline ulong fd_vinyl_io_seed( fd_vinyl_io_t const * io ) { return io->seed; }
160 :
161 12000012 : FD_FN_PURE static inline ulong fd_vinyl_io_seq_ancient( fd_vinyl_io_t const * io ) { return io->seq_ancient; }
162 12000012 : FD_FN_PURE static inline ulong fd_vinyl_io_seq_past ( fd_vinyl_io_t const * io ) { return io->seq_past; }
163 12000012 : FD_FN_PURE static inline ulong fd_vinyl_io_seq_present( fd_vinyl_io_t const * io ) { return io->seq_present; }
164 12000012 : FD_FN_PURE static inline ulong fd_vinyl_io_seq_future ( fd_vinyl_io_t const * io ) { return io->seq_future; }
165 :
166 732 : FD_FN_PURE static inline ulong fd_vinyl_io_spad_max ( fd_vinyl_io_t const * io ) { return io->spad_max; }
167 732 : FD_FN_PURE static inline ulong fd_vinyl_io_spad_used( fd_vinyl_io_t const * io ) { return io->spad_used; }
168 732 : FD_FN_PURE static inline ulong fd_vinyl_io_spad_free( fd_vinyl_io_t const * io ) { return io->spad_max - io->spad_used; }
169 :
170 0 : FD_FN_PURE static inline ulong fd_vinyl_io_dev_used( fd_vinyl_io_t const * io ) { return io->seq_future - io->seq_ancient; }
171 :
172 : /* fd_vinyl_io_read_imm does an immediate (blocking) read of
173 : [seq,seq+dst_sz) (cyclic) from io's bstream's past into dst. Assumes
174 : there are no reads currently posted on io. Retains no interest in
175 : dst. seq, dst and sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned.
176 : This is used mostly for sequential iterating over a bstream's past
177 : (i.e. serial recovery and discovering partitions for parallel
178 : recovery). */
179 :
180 : static inline void
181 : fd_vinyl_io_read_imm( fd_vinyl_io_t * io,
182 : ulong seq,
183 : void * dst,
184 3000948 : ulong sz ) {
185 3000948 : io->impl->read_imm( io, seq, dst, sz );
186 3000948 : }
187 :
188 : /* fd_vinyl_io_read starts the executing the read command rd. That is,
189 : start reading bstream bytes [seq,seq+sz) (cyclic) into dst. seq, dst
190 : and sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned. Further,
191 : [seq,seq+sz) should be in the bstream's past and the region to read
192 : should be stored contiguously in the underlying storage.
193 :
194 : On entry, the caller should have ownership of rd and rd->dst. The io
195 : has ownership of these return and a read interest in bstream bytes
196 : [seq,seq_sz) (cyclic). The ownership of these will be returned to
197 : the caller and the read interest will end when poll returns the
198 : request. */
199 :
200 : static inline void
201 : fd_vinyl_io_read( fd_vinyl_io_t * io,
202 3747414 : fd_vinyl_io_rd_t * rd ) {
203 3747414 : io->impl->read( io, rd );
204 3747414 : }
205 :
206 : /* fd_vinyl_io_poll checks if any outstanding reads are complete. Reads
207 : can complete in any order by the I/O layer. flags is a bit-or of
208 : FD_VINYL_IO_FLAGs. BLOCKING indicates the call is allowed to block
209 : the caller (the io layer promises the call cannot fail from the
210 : caller's point of view). Returns FD_VINYL_SUCCESS if a read complete
211 : (*_rd will point to the read command ended with the ownership and
212 : read interested as described above), FD_VINYL_ERR_EMPTY if there are
213 : no commands pending (*_rd will be NULL) and FD_VINYL_ERR_AGAIN if
214 : none of the posted commands are ready (*_rd will be NULL). AGAIN is
215 : only possible for a non-blocking call). */
216 :
217 : static inline int
218 : fd_vinyl_io_poll( fd_vinyl_io_t * io,
219 : fd_vinyl_io_rd_t ** _rd,
220 7494828 : int flags ) {
221 7494828 : return io->impl->poll( io, _rd, flags );
222 7494828 : }
223 :
224 : /* fd_vinyl_io_append starts appending sz bytes at src to the bstream.
225 : src and sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned. Returns
226 : bstream sequence number seq_append where the data is being appended.
227 : io will have a read interest in src until the next commit. This
228 : moves blocks from the bstream's future to the bstream's present. On
229 : commit, the region [seq_future_before,seq_append) (cyclic) will be
230 : filled with zero padding if the I/O implementation requires it to
231 : keep the append contiguous in the physical store (this region will be
232 : empty if covered by a previous hint or if this is an append of a
233 : single block) and the region [seq_append,seq_future_after) (cyclic)
234 : will be filled with the appended info.
235 :
236 : fd_vinyl_io_commit moves all blocks in the bstream's present to the
237 : bstream's past (i.e. sets seq_present to seq_future). flags is a
238 : bit-of FD_VINYL_IO_FLAGs. If BLOCKING is set, this is allowed to
239 : block the caller. Returns FD_VINYL_SUCCESS (0) on success and
240 : FD_VINYL_ERR_AGAIN (negative) if commit could not be completed
241 : immediately (only possible for a non-blocking call). commit empties
242 : the io append scratch pad on success.
243 :
244 : fd_vinyl_io_hint indicates the next sz bytes to append must be
245 : contiguous in the bstream. This can move blocks from the bstream's
246 : future to the bstream's present. Returns (the potentially updated)
247 : seq_future. On commit, the region
248 : [seq_future_before,seq_future_after) (cyclic) will be filled with
249 : zero padding (this region will be empty if covered by a previous
250 : hint) and the region [seq_future_after,seq_future_after+sz) (cyclic)
251 : will contiguous in the physical storage. This is useful for grouping
252 : sets of blocks from different memory regions on the host that must be
253 : written contiguously from a protocol point of view (e.g. a move
254 : control block and the pair that follows it).
255 :
256 : fd_vinyl_io_alloc returns a pointer to sz bytes of
257 : FD_VINYL_BSTREAM_BLOCK_SZ aligned memory suitable allocated from io's
258 : append scratch pad. flags is a bit-or FD_VINYL_IO_FLAG_*. BLOCKING
259 : indicates the call is allowed to block the caller. If a non-blocking
260 : call, will return NULL if there is no suitable memory at this time.
261 : Will never return NULL for a blocking call. The lifetime of the
262 : returned pointer is the lesser of the next append, next commit, the
263 : next alloc or the io. sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned
264 : and at most io's spad_max. This may do a commit to free up scratch
265 : pad memory if necessary (moving blocks from the present to the past).
266 :
267 : fd_vinyl_io_trim trims sz bytes from the end of the most recent
268 : fd_vinyl_io_alloc. sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned
269 : and at most the size of the most recent alloc.
270 :
271 : fd_vinyl_io_copy starts appending a copy of the sz bytes at seq in
272 : the bstream's past to the bstream. seq and sz should be
273 : FD_VINYL_BSTREAM_BLOCK_SZ aligned. [seq,seq+sz) (cyclic) should be
274 : in the bstream's past. io will have a read interest in this region
275 : until the next commit. This will do a _blocking_ commit to free up
276 : scratch pad memory if necessary (moving blocks from the present to
277 : the past). FIXME: consider non-blocking copy support? (copy would
278 : need a flags args).
279 :
280 : None of these can fail from the caller's perspective (they will all
281 : FD_LOG_CRIT if anything goes wrong ... much like accessing invalid
282 : memory will seg fault). */
283 :
284 : static inline ulong
285 : fd_vinyl_io_append( fd_vinyl_io_t * io,
286 : void const * src,
287 749736 : ulong sz ) {
288 749736 : return io->impl->append( io, src, sz );
289 749736 : }
290 :
291 : static inline int
292 : fd_vinyl_io_commit( fd_vinyl_io_t * io,
293 749064 : int flags ) {
294 749064 : return io->impl->commit( io, flags );
295 749064 : }
296 :
297 : static inline ulong
298 : fd_vinyl_io_hint( fd_vinyl_io_t * io,
299 752724 : ulong sz ) {
300 752724 : return io->impl->hint( io, sz );
301 752724 : }
302 :
303 : static inline void *
304 : fd_vinyl_io_alloc( fd_vinyl_io_t * io,
305 : ulong sz,
306 726 : int flags ) {
307 726 : return io->impl->alloc( io, sz, flags );
308 726 : }
309 :
310 : static inline void
311 : fd_vinyl_io_trim( fd_vinyl_io_t * io,
312 0 : ulong sz ) {
313 0 : io->spad_used -= sz;
314 0 : }
315 :
316 : static inline ulong
317 : fd_vinyl_io_copy( fd_vinyl_io_t * io,
318 : ulong seq,
319 752454 : ulong sz ) {
320 752454 : return io->impl->copy( io, seq, sz );
321 752454 : }
322 :
323 : /* fd_vinyl_io_forget moves [seq_past,seq) (cyclic) from the bstream's
324 : past to the bstream's antiquity, setting seq_past to seq. As such,
325 : seq should be in [seq_past,seq_present] (cyclic) and
326 : FD_VINYL_BSTREAM_BLOCK_SZ aligned. There should be no reads, copies
327 : or appends in progress. Cannot fail from the caller's perspective
328 : (will FD_LOG_CRIT if anything goes wrong).
329 :
330 : IMPORTANT SAFETY TIP! Though the bstream has been updated from the
331 : caller's point of view, the bstream needs to be sync'd for recover to
332 : start from the new seq_past. */
333 :
334 : static inline void
335 : fd_vinyl_io_forget( fd_vinyl_io_t * io,
336 266526 : ulong seq ) {
337 266526 : io->impl->forget( io, seq );
338 266526 : }
339 :
340 : /* fd_vinyl_io_rewind moves blocks [seq,seq_present) (cyclic) from the
341 : bstream's past to the bstream's future (updating seq_ancient and
342 : seq_past as necessary). There should be no reads, copies or appends
343 : in progress. seq should at most seq_present (cylic) and
344 : FD_VINYL_BSTREAM_BLOCK_SZ aligned. Cannot fail from the caller's
345 : perspective (will FD_LOG_CRIT if anything goes wrong).
346 :
347 : IMPORTANT SAFETY TIP! Though the bstream has been updated from the
348 : caller's point of view, the bstream needs to be sync'd for recovery
349 : to account for the rewind (and this is probably more critical than
350 : forget because appends will start modifying the bstream blocks that
351 : recovery would be expecting to be in the pre-rewind state). */
352 :
353 : static inline void
354 : fd_vinyl_io_rewind( fd_vinyl_io_t * io,
355 264216 : ulong seq ) {
356 264216 : io->impl->rewind( io, seq );
357 264216 : }
358 :
359 : /* fd_vinyl_io_sync moves [seq_ancient,seq_past) (cyclic) from the
360 : bstream's antiquity to the end of the bstream's future, setting
361 : seq_ancient to seq_past. It promises the caller the bstream's past
362 : is fully written and that the bstream's past region is what recovery
363 : will use to recover the bstream's key-val state at seq_present.
364 : flags is a bit-or of FD_VINYL_IO_FLAGs. BLOCKING indicates the call
365 : is allowed to block the caller. Returns FD_VINYL_SUCCESS (0) on
366 : success and a FD_VINYL_ERR_AGAIN (negative) if the call would block
367 : the caller (only possible for a non-blocking call). */
368 : /* FIXME: consider allowing new user info to be passed? */
369 :
370 : static inline int
371 : fd_vinyl_io_sync( fd_vinyl_io_t * io,
372 748878 : int flags ) {
373 748878 : return io->impl->sync( io, flags );
374 748878 : }
375 :
376 : /* fd_vinyl_io_fini tears down io, returning the memory region used to
377 : hold the I/O implementation state. Implicitly completes any
378 : in-progress reads and cancels any in-progress appends (and thus can
379 : block the caller).
380 :
381 : IMPORTANT SAFETY TIP! This does _not_ sync the bstream first (e.g.
382 : if an application is tearing down due to an anomalous condition, it
383 : may not want to sync on fini so that it can recover from a known good
384 : point). */
385 :
386 : void *
387 : fd_vinyl_io_fini( fd_vinyl_io_t * io );
388 :
389 : /* Helpers ************************************************************/
390 :
391 : /* fd_vinyl_io_spad_est() returns estimate of the smallest scratch pad
392 : size required most applications. Specifically, this returns:
393 :
394 : 2 pair_sz( LZ4_COMPRESSBOUND( VAL_MAX ) )
395 :
396 : so that it is possible to load a object footprint into the scratch
397 : pad and then have a worst case scratch memory for compression to
398 : re-encode the object. */
399 :
400 : FD_FN_CONST ulong fd_vinyl_io_spad_est( void );
401 :
402 : /* fd_vinyl_io_append_* are helper functions that start appending the
403 : given info, appropriately formatted and hashed, to io's bstream.
404 : There is no excess requirements for alignment. They do no input
405 : argument checking. On return, io retains no interest in the given
406 : info (that is, they use io's scratch memory and thus can trigger an
407 : io commit to move blocks from the bstream's present to the bstream's
408 : past if there isn't enough scratch pad free). They return the
409 : bstream sequence number where the data is being appended. They
410 : cannot fail from the caller's perspective (they will FD_LOG_CRIT if
411 : anything goes awry). */
412 :
413 : ulong
414 : fd_vinyl_io_append_pair_raw( fd_vinyl_io_t * io,
415 : fd_vinyl_key_t const * key, /* pair key */
416 : fd_vinyl_info_t const * info, /* pair info */
417 : void const * val ); /* contains info->val_sz bytes, in [0,FD_VINYL_VAL_MAX] */
418 :
419 : ulong
420 : fd_vinyl_io_append_dead( fd_vinyl_io_t * io,
421 : fd_vinyl_bstream_phdr_t const * phdr, /* pair header of erased pair */
422 : void const * info, /* contains info_sz bytes, info_sz treated as 0 if NULL */
423 : ulong info_sz ); /* in [0,FD_VINYL_BSTREAM_DEAD_INFO_MAX] */
424 :
425 : ulong
426 : fd_vinyl_io_append_move( fd_vinyl_io_t * io,
427 : fd_vinyl_bstream_phdr_t const * src, /* pair header of src pair */
428 : fd_vinyl_key_t const * dst, /* src pair getting renamed to dst or is replacing dst */
429 : void const * info, /* contains info_sz bytes, info_sz treated as 0 if NULL */
430 : ulong info_sz ); /* in [0,FD_VINYL_BSTREAM_MOVE_INFO_MAX] */
431 :
432 : ulong
433 : fd_vinyl_io_append_part( fd_vinyl_io_t * io,
434 : ulong seq_prev, /* should be a part before seq or seq */
435 : ulong dead_cnt, /* number of dead blocks in the partition */
436 : ulong move_cnt, /* number of move blocks in the partition */
437 : void const * info, /* contains info_sz bytes, info_sz treated as 0 if NULL */
438 : ulong info_sz ); /* in [0,FD_VINYL_BSTREAM_PART_INFO_MAX] */
439 :
440 : /* fd_vinyl_io_append_pair_inplace appends the style RAW pair at phdr
441 : to the bstream. This will preferentially append the pair in the
442 : given style. Returns the location where the pair was appended. On
443 : return, *_style holds the actual style used and *_val_esz contains
444 : the pair encoded value byte size.
445 :
446 : Note that if the requested style is RAW or if the pair could not be
447 : usefully encoded in the requested style (e.g. the compressed size
448 : ended up larger than the uncompressed size), this will append from
449 : phdr in-place zero copy. When appending a pair in-place, this will
450 : clear the zero padding region and insert the appropriate data
451 : integrity footers at the end of the pair. On other cases, this will
452 : append from the io append scratch memory the encoded pair and the
453 : pair will be untouched.
454 :
455 : As such, the caller should assume the io has a read interest on the
456 : pair's header region and value region and a write interest on the
457 : pair zero padding region and footer region until the next append or
458 : commit and the pair's zero padding and footer regions may be
459 : clobbered by this call. */
460 :
461 : ulong
462 : fd_vinyl_io_append_pair_inplace( fd_vinyl_io_t * io,
463 : int style,
464 : fd_vinyl_bstream_phdr_t * phdr,
465 : int * _style,
466 : ulong * _val_esz );
467 :
468 : /* fd_vinyl_io_bd *****************************************************/
469 :
470 : /* fd_vinyl_io_bd_{align,footprint} specify the alignment and footprint
471 : needed for a bstream stored on a block device / large file with a
472 : spad_max append scratch pad. align will be a reasonable power-of-2
473 : and footprint will be a multiple of align. Returns 0 for an invalid
474 : spad_max.
475 :
476 : fd_vinyl_io_bd_init starts using a file as a bstream store. lmem
477 : points to a local memory region with suitable alignment and footprint
478 : to hold the bstream's state. spad_max gives the size of the append
479 : scratch pad (should be a FD_VINYL_BSTREAM_BLOCK_SZ multiple). dev_fd
480 : is a file descriptor for the block device / large file. The file
481 : should already exist and be sized to the appropriate capacity.
482 :
483 : FIXME: allow user to specify a subrange of dev_fd to use for the
484 : store?
485 :
486 : If reset is non-zero, ignores any existing file contents and will
487 : start a new bstream. The bstream metadata user info will be set to
488 : the info_sz bytes at info and the bstream will use io_seed for its
489 : data integrity hashing seed.
490 :
491 : Otherwise, this will attempt to resume at the point the bstream was
492 : last synchronized. info, info_sz and io_seed will be ignored.
493 :
494 : IMPORTANT SAFETY TIP! The io_seed is the not same thing as the meta
495 : seed. The io_seed is a property of the bstream (with a lifetime of
496 : the bstream and is shared among all users of the bstream). The meta
497 : seed is a property of the meta (and ideally uniquely and randomly set
498 : per vinyl tile run).
499 :
500 : Returns a handle to the bstream on success (has ownership of lmem and
501 : dev_fd, ownership returned on fini) and NULL on failure (logs
502 : details, no ownership changed). Retains no interest in info. */
503 :
504 : ulong fd_vinyl_io_bd_align ( void );
505 : ulong fd_vinyl_io_bd_footprint( ulong spad_max );
506 :
507 : fd_vinyl_io_t *
508 : fd_vinyl_io_bd_init( void * lmem,
509 : ulong spad_max,
510 : int dev_fd,
511 : int reset,
512 : void const * info,
513 : ulong info_sz,
514 : ulong io_seed );
515 :
516 : /* fd_vinyl_io_mm *****************************************************/
517 :
518 : /* fd_vinyl_io_mm_* is the same as fd_vinyl_io_bd_* but uses dev_sz byte
519 : sized memory region dev as the "block device". The result is
520 : bit-level identical to fd_vinyl_io_bd (and vice versa). This is
521 : primarily for testing purposes but, as dev could also be a memory
522 : mapped file / block device, this could be useful in general
523 : (especially for concurrent read access, e.g. parallel recovery).
524 : Note that "sync" only guarantees appends to the dev memory region
525 : happened. If the memory region is backed by a file, when the actual
526 : blocks are written to the physical storage is controlled by the
527 : kernel / driver / physical device (it is up to the caller of sync to
528 : do any additional context specific control here). */
529 :
530 : ulong fd_vinyl_io_mm_align ( void );
531 : ulong fd_vinyl_io_mm_footprint( ulong spad_max );
532 :
533 : fd_vinyl_io_t *
534 : fd_vinyl_io_mm_init( void * lmem,
535 : ulong spad_max,
536 : void * dev,
537 : ulong dev_sz,
538 : int reset,
539 : void const * info,
540 : ulong info_sz,
541 : ulong io_seed );
542 :
543 : /* fd_vinyl_{mmio,mmio_sz} return {a pointer in the caller's address
544 : space to the raw bstream storage,the raw bstream storage byte size).
545 : These are a _subset_ of the dev / dev_sz region passed to mm_init and
546 : these will be FD_VINYL_BSTREAM_BLOCK_SZ aligned. If a byte seq is in
547 : the store, it will be at mmio[ seq % mmio_sz ]. Note that mmio_sz is
548 : not necessarily a power of two. Note also that the bstream's past is
549 : guaranteed to be in the store. The lifetime of the returned region
550 : is the lifetime of the io. Returns NULL and 0 if io does not support
551 : memory mapped io. These exist to support thread parallel recovery. */
552 :
553 : void * fd_vinyl_mmio ( fd_vinyl_io_t * io );
554 : ulong fd_vinyl_mmio_sz( fd_vinyl_io_t * io );
555 :
556 : FD_PROTOTYPES_END
557 :
558 : #endif /* HEADER_fd_src_vinyl_io_fd_vinyl_io_h */
|