Line data Source code
1 : #ifndef HEADER_fd_src_vinyl_io_fd_vinyl_io_h
2 : #define HEADER_fd_src_vinyl_io_fd_vinyl_io_h
3 :
4 : /* A fd_vinyl_io_t reads from / appends to a bstream stored in some
5 : physical layer (typically slow and non-volatile). Supports massive
6 : numbers of async concurrent reads and appends and the ability to
7 : recover from unexpected interrupts (Ctrl-C, power failures, etc). To
8 : accommodate the myriad of different styles of physical layers and
9 : interfaces, the API is run time plugin friendly. Summary of
10 : operations:
11 :
12 : read_imm: blocking read a contiguous range of blocks in the
13 : bstream's past. Mostly used for iterating over a bstream's past.
14 :
15 : read: start reading a contiguous range of blocks in the bstream's
16 : past. The caller promises the range to read is contiguous in the
17 : underlying physical storage.
18 :
19 : poll: finish an outstanding read. Outstanding reads can complete
20 : in an arbitary order. All reads must be finished by poll but note
21 : that it is possible to detect a read is complete out-of-band too
22 : (for speculative processing).
23 :
24 : append: start appending a set of blocks to the end of the bstream's
25 : present (moving blocks from the bstream's future to the bstream's
26 : present). The blocks will be contiguous in the underlying storage.
27 : The blocks must be suitably aligned and with a lifetime until the
28 : next commit.
29 :
30 : commit: finish all outstanding appends, moving all blocks in the
31 : bstream's present to the bstream's past. This will empty the io's
32 : append scratch pad. The underlying implementation is free to
33 : process outstanding appends in any order (and free to interleave
34 : them arbitrarily with outstanding reads).
35 :
36 : hint: indicates the next sz worth of blocks appended to the bstream
37 : must be contiguous in the physical storage.
38 :
39 : alloc: allocate memory from the io's append scratch pad. These
40 : allocations will have a suitable alignment for append and a
41 : lifetime until the next commit. This may trigger a commit of
42 : outstanding appends if there isn't enough scratch pad free.
43 :
44 : copy: append a contiguous range of blocks from the bstream's past
45 : to the end of the bstream's present. May commit outstanding
46 : appends.
47 :
48 : forget: forget all blocks before a given sequence number, moving
49 : blocks from the bstream's past to the bstream's antiquity. The
50 : caller can only forget up to the bstream's present.
51 :
52 : rewind: move blocks from the bstream's past (and potentially
53 : antiquity) to the bstream's future. The bstream must have an empty
54 : present (i.e. no appends in progress) and no reads in progress.
55 : This allows, for example, on recovery, a multi-block pair that was
56 : incompletely written to be cleaned up.
57 :
58 : sync: update the range for the bstream past where recovery will
59 : resume. This moves all blocks in the bstream's antiquity to end of
60 : the bstream's future. */
61 :
62 : /* FIXME: consider a query to get how many reads are outstanding? (with
63 : this, rewind and forget could be complete generic). */
64 :
65 : #include "../bstream/fd_vinyl_bstream.h"
66 :
67 : /* FD_VINYL_IO_TYPE_* identifies which IO implementation is in use. */
68 :
69 9 : #define FD_VINYL_IO_TYPE_MM (0) /* memory mapped */
70 9 : #define FD_VINYL_IO_TYPE_BD (1) /* synchronous blocking */
71 : #define FD_VINYL_IO_TYPE_WD (2) /* async O_DIRECT write (specialized) */
72 0 : #define FD_VINYL_IO_TYPE_UR (3) /* async io_uring */
73 :
74 : /* FD_VINYL_IO_FLAG_* are flags used by various vinyl IO APIs */
75 :
76 1497300 : #define FD_VINYL_IO_FLAG_BLOCKING (1) /* Okay to block the caller */
77 :
78 : /* A fd_vinyl_io_rd_t describes a read request to the underlying I/O
79 : implementation to read [seq,seq+sz) (cyclic) from the bstream's past
80 : into dst. seq, dst and sz should be FD_VINYL_BSTREAM_BLOCK_SZ
81 : aligned. Any failure encountered while reading should FD_LOG_CRIT
82 : (just like reading an invalid memory address will seg fault).
83 : Underlying I/O implementations can add other information to this
84 : structure as necessary. ctx is an arbitrary user defined value. */
85 :
86 : #define FD_VINYL_IO_READ_SZ (64UL)
87 :
88 : struct fd_vinyl_io_rd {
89 : ulong ctx;
90 : ulong seq;
91 : void * dst;
92 : ulong sz;
93 : uchar _[ FD_VINYL_IO_READ_SZ - 32UL ];
94 : };
95 :
96 : typedef struct fd_vinyl_io_rd fd_vinyl_io_rd_t;
97 :
98 : /* fd_vinyl_io_t is an opaque handle of a fd_vinyl_io instance. Some
99 : details are exposed to facilitate inlining in high performance
100 : contexts. */
101 :
102 : struct fd_vinyl_io_private;
103 : typedef struct fd_vinyl_io_private fd_vinyl_io_t;
104 :
105 : typedef void (*fd_vinyl_io_func_read_imm_t)( fd_vinyl_io_t * io, ulong seq, void * dst, ulong sz );
106 : typedef void (*fd_vinyl_io_func_read_t )( fd_vinyl_io_t * io, fd_vinyl_io_rd_t * rd );
107 : typedef int (*fd_vinyl_io_func_poll_t )( fd_vinyl_io_t * io, fd_vinyl_io_rd_t ** _rd, int flags );
108 : typedef ulong (*fd_vinyl_io_func_append_t )( fd_vinyl_io_t * io, void const * src, ulong sz );
109 : typedef int (*fd_vinyl_io_func_commit_t )( fd_vinyl_io_t * io, int flags );
110 : typedef ulong (*fd_vinyl_io_func_hint_t )( fd_vinyl_io_t * io, ulong sz );
111 : typedef void * (*fd_vinyl_io_func_alloc_t )( fd_vinyl_io_t * io, ulong sz, int flags );
112 : typedef ulong (*fd_vinyl_io_func_copy_t )( fd_vinyl_io_t * io, ulong seq, ulong sz );
113 : typedef void (*fd_vinyl_io_func_forget_t )( fd_vinyl_io_t * io, ulong seq );
114 : typedef void (*fd_vinyl_io_func_rewind_t )( fd_vinyl_io_t * io, ulong seq );
115 : typedef int (*fd_vinyl_io_func_sync_t )( fd_vinyl_io_t * io, int flags );
116 : typedef void * (*fd_vinyl_io_func_fini_t )( fd_vinyl_io_t * io );
117 :
118 : struct fd_vinyl_io_impl {
119 : fd_vinyl_io_func_read_imm_t read_imm;
120 : fd_vinyl_io_func_read_t read;
121 : fd_vinyl_io_func_poll_t poll;
122 : fd_vinyl_io_func_append_t append;
123 : fd_vinyl_io_func_commit_t commit;
124 : fd_vinyl_io_func_hint_t hint;
125 : fd_vinyl_io_func_alloc_t alloc;
126 : fd_vinyl_io_func_copy_t copy;
127 : fd_vinyl_io_func_forget_t forget;
128 : fd_vinyl_io_func_rewind_t rewind;
129 : fd_vinyl_io_func_sync_t sync;
130 : fd_vinyl_io_func_fini_t fini;
131 : };
132 :
133 : typedef struct fd_vinyl_io_impl fd_vinyl_io_impl_t;
134 :
135 : struct fd_vinyl_io_private {
136 : int type;
137 : ulong seed;
138 : ulong seq_ancient; /* FD_VINYL_BSTREAM_BLOCK_SZ multiple */
139 : ulong seq_past; /* " */
140 : ulong seq_present; /* " */
141 : ulong seq_future; /* " */
142 : ulong spad_max; /* " */
143 : ulong spad_used; /* " */
144 : fd_vinyl_io_impl_t * impl; /* implementation specific funcs */
145 :
146 : ulong cache_read_cnt; /* Cache read request count */
147 : ulong cache_read_tot_sz; /* Cache bytes read total */
148 : ulong cache_write_cnt; /* Cache write request count */
149 : ulong cache_write_tot_sz; /* Cache bytes written total */
150 : ulong file_read_cnt; /* File read request count */
151 : ulong file_read_tot_sz; /* File bytes read total */
152 : ulong file_write_cnt; /* File write request count */
153 : ulong file_write_tot_sz; /* File bytes written total */
154 :
155 : /* io implementation specific details follow */
156 : };
157 :
158 : FD_PROTOTYPES_BEGIN
159 :
160 : /* fd_vinyl_io_* return the current value of the eponymous io field.
161 : Assumes io is valid. For all but type and seed, the return value is
162 : a FD_VINYL_BSTREAM_BLOCK_SZ multiple. Note that we don't have a
163 : generic notion of dev_max or dev_free as such is not a well defined
164 : concept. Individual IO implementations can provide them as
165 : appropriate though. */
166 :
167 12 : FD_FN_PURE static inline int fd_vinyl_io_type( fd_vinyl_io_t const * io ) { return io->type; }
168 :
169 12 : FD_FN_PURE static inline ulong fd_vinyl_io_seed( fd_vinyl_io_t const * io ) { return io->seed; }
170 :
171 12000012 : FD_FN_PURE static inline ulong fd_vinyl_io_seq_ancient( fd_vinyl_io_t const * io ) { return io->seq_ancient; }
172 12000012 : FD_FN_PURE static inline ulong fd_vinyl_io_seq_past ( fd_vinyl_io_t const * io ) { return io->seq_past; }
173 12000012 : FD_FN_PURE static inline ulong fd_vinyl_io_seq_present( fd_vinyl_io_t const * io ) { return io->seq_present; }
174 12000012 : FD_FN_PURE static inline ulong fd_vinyl_io_seq_future ( fd_vinyl_io_t const * io ) { return io->seq_future; }
175 :
176 2970 : FD_FN_PURE static inline ulong fd_vinyl_io_spad_max ( fd_vinyl_io_t const * io ) { return io->spad_max; }
177 2970 : FD_FN_PURE static inline ulong fd_vinyl_io_spad_used( fd_vinyl_io_t const * io ) { return io->spad_used; }
178 2970 : FD_FN_PURE static inline ulong fd_vinyl_io_spad_free( fd_vinyl_io_t const * io ) { return io->spad_max - io->spad_used; }
179 :
180 0 : FD_FN_PURE static inline ulong fd_vinyl_io_dev_used( fd_vinyl_io_t const * io ) { return io->seq_future - io->seq_ancient; }
181 :
182 : /* fd_vinyl_io_read_imm does an immediate (blocking) read of
183 : [seq,seq+dst_sz) (cyclic) from io's bstream's past into dst. Assumes
184 : there are no reads currently posted on io. Retains no interest in
185 : dst. seq and sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned. This
186 : is used mostly for sequential iterating over a bstream's past (i.e.
187 : serial recovery and discovering partitions for parallel recovery). */
188 :
189 : static inline void
190 : fd_vinyl_io_read_imm( fd_vinyl_io_t * io,
191 : ulong seq,
192 : void * dst,
193 2998734 : ulong sz ) {
194 2998734 : io->impl->read_imm( io, seq, dst, sz );
195 2998734 : }
196 :
197 : /* fd_vinyl_io_read starts the executing the read command rd. That is,
198 : start reading bstream bytes [seq,seq+sz) (cyclic) into dst. seq and
199 : sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned. Further,
200 : [seq,seq+sz) should be in the bstream's past and the region to read
201 : should be stored contiguously in the underlying storage.
202 :
203 : On entry, the caller should have ownership of rd and rd->dst. The io
204 : has ownership of these return and a read interest in bstream bytes
205 : [seq,seq_sz) (cyclic). The ownership of these will be returned to
206 : the caller and the read interest will end when poll returns the
207 : request. */
208 :
209 : static inline void
210 : fd_vinyl_io_read( fd_vinyl_io_t * io,
211 3748140 : fd_vinyl_io_rd_t * rd ) {
212 3748140 : io->impl->read( io, rd );
213 3748140 : }
214 :
215 : /* fd_vinyl_io_poll checks if any outstanding reads are complete. Reads
216 : can complete in any order by the I/O layer. flags is a bit-or of
217 : FD_VINYL_IO_FLAGs. BLOCKING indicates the call is allowed to block
218 : the caller (the io layer promises the call cannot fail from the
219 : caller's point of view). Returns FD_VINYL_SUCCESS if a read complete
220 : (*_rd will point to the read command ended with the ownership and
221 : read interested as described above), FD_VINYL_ERR_EMPTY if there are
222 : no commands pending (*_rd will be NULL) and FD_VINYL_ERR_AGAIN if
223 : none of the posted commands are ready (*_rd will be NULL). AGAIN is
224 : only possible for a non-blocking call). */
225 :
226 : static inline int
227 : fd_vinyl_io_poll( fd_vinyl_io_t * io,
228 : fd_vinyl_io_rd_t ** _rd,
229 7496280 : int flags ) {
230 7496280 : return io->impl->poll( io, _rd, flags );
231 7496280 : }
232 :
233 : /* fd_vinyl_io_append starts appending sz bytes at src to the bstream.
234 : src and sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned. Returns
235 : bstream sequence number seq_append where the data is being appended.
236 : io will have a read interest in src until the next commit. This
237 : moves blocks from the bstream's future to the bstream's present. On
238 : commit, the region [seq_future_before,seq_append) (cyclic) will be
239 : filled with zero padding if the I/O implementation requires it to
240 : keep the append contiguous in the physical store (this region will be
241 : empty if covered by a previous hint or if this is an append of a
242 : single block) and the region [seq_append,seq_future_after) (cyclic)
243 : will be filled with the appended info.
244 :
245 : fd_vinyl_io_commit moves all blocks in the bstream's present to the
246 : bstream's past (i.e. sets seq_present to seq_future). flags is a
247 : bit-of FD_VINYL_IO_FLAGs. If BLOCKING is set, this is allowed to
248 : block the caller. Returns FD_VINYL_SUCCESS (0) on success and
249 : FD_VINYL_ERR_AGAIN (negative) if commit could not be completed
250 : immediately (only possible for a non-blocking call). commit empties
251 : the io append scratch pad on success.
252 :
253 : fd_vinyl_io_hint indicates the next sz bytes to append must be
254 : contiguous in the bstream. This can move blocks from the bstream's
255 : future to the bstream's present. Returns (the potentially updated)
256 : seq_future. On commit, the region
257 : [seq_future_before,seq_future_after) (cyclic) will be filled with
258 : zero padding (this region will be empty if covered by a previous
259 : hint) and the region [seq_future_after,seq_future_after+sz) (cyclic)
260 : will contiguous in the physical storage. This is useful for grouping
261 : sets of blocks from different memory regions on the host that must be
262 : written contiguously from a protocol point of view (e.g. a move
263 : control block and the pair that follows it).
264 :
265 : fd_vinyl_io_alloc returns a pointer to sz bytes of
266 : FD_VINYL_BSTREAM_BLOCK_SZ aligned memory suitable allocated from io's
267 : append scratch pad. flags is a bit-or FD_VINYL_IO_FLAG_*. BLOCKING
268 : indicates the call is allowed to block the caller. If a non-blocking
269 : call, will return NULL if there is no suitable memory at this time.
270 : Will never return NULL for a blocking call. The lifetime of the
271 : returned pointer is the lesser of the next append, next commit, the
272 : next alloc or the io. sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned
273 : and at most io's spad_max. This may do a commit to free up scratch
274 : pad memory if necessary (moving blocks from the present to the past).
275 :
276 : fd_vinyl_io_trim trims sz bytes from the end of the most recent
277 : fd_vinyl_io_alloc. sz should be FD_VINYL_BSTREAM_BLOCK_SZ aligned
278 : and at most the size of the most recent alloc.
279 :
280 : fd_vinyl_io_copy starts appending a copy of the sz bytes at seq in
281 : the bstream's past to the bstream. seq and sz should be
282 : FD_VINYL_BSTREAM_BLOCK_SZ aligned. [seq,seq+sz) (cyclic) should be
283 : in the bstream's past. io will have a read interest in this region
284 : until the next commit. This will do a _blocking_ commit to free up
285 : scratch pad memory if necessary (moving blocks from the present to
286 : the past). FIXME: consider non-blocking copy support? (copy would
287 : need a flags args).
288 :
289 : None of these can fail from the caller's perspective (they will all
290 : FD_LOG_CRIT if anything goes wrong ... much like accessing invalid
291 : memory will seg fault). */
292 :
293 : static inline ulong
294 : fd_vinyl_io_append( fd_vinyl_io_t * io,
295 : void const * src,
296 750210 : ulong sz ) {
297 750210 : return io->impl->append( io, src, sz );
298 750210 : }
299 :
300 : static inline int
301 : fd_vinyl_io_commit( fd_vinyl_io_t * io,
302 749004 : int flags ) {
303 749004 : return io->impl->commit( io, flags );
304 749004 : }
305 :
306 : static inline ulong
307 : fd_vinyl_io_hint( fd_vinyl_io_t * io,
308 753444 : ulong sz ) {
309 753444 : return io->impl->hint( io, sz );
310 753444 : }
311 :
312 : static inline void *
313 : fd_vinyl_io_alloc( fd_vinyl_io_t * io,
314 : ulong sz,
315 2964 : int flags ) {
316 2964 : return io->impl->alloc( io, sz, flags );
317 2964 : }
318 :
319 : static inline void
320 : fd_vinyl_io_trim( fd_vinyl_io_t * io,
321 0 : ulong sz ) {
322 0 : io->spad_used -= sz;
323 0 : }
324 :
325 : static inline ulong
326 : fd_vinyl_io_copy( fd_vinyl_io_t * io,
327 : ulong seq,
328 752298 : ulong sz ) {
329 752298 : return io->impl->copy( io, seq, sz );
330 752298 : }
331 :
332 : /* fd_vinyl_io_forget moves [seq_past,seq) (cyclic) from the bstream's
333 : past to the bstream's antiquity, setting seq_past to seq. As such,
334 : seq should be in [seq_past,seq_present] (cyclic) and
335 : FD_VINYL_BSTREAM_BLOCK_SZ aligned. There should be no reads, copies
336 : or appends in progress. Cannot fail from the caller's perspective
337 : (will FD_LOG_CRIT if anything goes wrong).
338 :
339 : IMPORTANT SAFETY TIP! Though the bstream has been updated from the
340 : caller's point of view, the bstream needs to be sync'd for recover to
341 : start from the new seq_past. */
342 :
343 : static inline void
344 : fd_vinyl_io_forget( fd_vinyl_io_t * io,
345 263526 : ulong seq ) {
346 263526 : io->impl->forget( io, seq );
347 263526 : }
348 :
349 : /* fd_vinyl_io_rewind moves blocks [seq,seq_present) (cyclic) from the
350 : bstream's past to the bstream's future (updating seq_ancient and
351 : seq_past as necessary). There should be no reads, copies or appends
352 : in progress. seq should at most seq_present (cyclic) and
353 : FD_VINYL_BSTREAM_BLOCK_SZ aligned. Cannot fail from the caller's
354 : perspective (will FD_LOG_CRIT if anything goes wrong).
355 :
356 : IMPORTANT SAFETY TIP! Though the bstream has been updated from the
357 : caller's point of view, the bstream needs to be sync'd for recovery
358 : to account for the rewind (and this is probably more critical than
359 : forget because appends will start modifying the bstream blocks that
360 : recovery would be expecting to be in the pre-rewind state). */
361 :
362 : static inline void
363 : fd_vinyl_io_rewind( fd_vinyl_io_t * io,
364 213468 : ulong seq ) {
365 213468 : io->impl->rewind( io, seq );
366 213468 : }
367 :
368 : /* fd_vinyl_io_sync moves [seq_ancient,seq_past) (cyclic) from the
369 : bstream's antiquity to the end of the bstream's future, setting
370 : seq_ancient to seq_past. It promises the caller the bstream's past
371 : is fully written and that the bstream's past region is what recovery
372 : will use to recover the bstream's key-val state at seq_present.
373 : flags is a bit-or of FD_VINYL_IO_FLAGs. BLOCKING indicates the call
374 : is allowed to block the caller. Returns FD_VINYL_SUCCESS (0) on
375 : success and a FD_VINYL_ERR_AGAIN (negative) if the call would block
376 : the caller (only possible for a non-blocking call). */
377 : /* FIXME: consider allowing new user info to be passed? */
378 :
379 : static inline int
380 : fd_vinyl_io_sync( fd_vinyl_io_t * io,
381 748320 : int flags ) {
382 748320 : return io->impl->sync( io, flags );
383 748320 : }
384 :
385 : /* fd_vinyl_io_fini tears down io, returning the memory region used to
386 : hold the I/O implementation state. Implicitly completes any
387 : in-progress reads and cancels any in-progress appends (and thus can
388 : block the caller).
389 :
390 : IMPORTANT SAFETY TIP! This does _not_ sync the bstream first (e.g.
391 : if an application is tearing down due to an anomalous condition, it
392 : may not want to sync on fini so that it can recover from a known good
393 : point). */
394 :
395 : void *
396 : fd_vinyl_io_fini( fd_vinyl_io_t * io );
397 :
398 : /* Helpers ************************************************************/
399 :
400 : /* fd_vinyl_io_spad_est() returns estimate of the smallest scratch pad
401 : size required most applications. Specifically, this returns:
402 :
403 : 2 pair_sz( LZ4_COMPRESSBOUND( VAL_MAX ) )
404 :
405 : so that it is possible to load a object footprint into the scratch
406 : pad and then have a worst case scratch memory for compression to
407 : re-encode the object. */
408 :
409 : FD_FN_CONST ulong fd_vinyl_io_spad_est( void );
410 :
411 : /* fd_vinyl_io_append_* are helper functions that start appending the
412 : given info, appropriately formatted and hashed, to io's bstream.
413 : There is no excess requirements for alignment. They do no input
414 : argument checking. On return, io retains no interest in the given
415 : info (that is, they use io's scratch memory and thus can trigger an
416 : io commit to move blocks from the bstream's present to the bstream's
417 : past if there isn't enough scratch pad free). They return the
418 : bstream sequence number where the data is being appended. They
419 : cannot fail from the caller's perspective (they will FD_LOG_CRIT if
420 : anything goes awry). */
421 :
422 : ulong
423 : fd_vinyl_io_append_pair_raw( fd_vinyl_io_t * io,
424 : fd_vinyl_key_t const * key, /* pair key */
425 : fd_vinyl_info_t const * info, /* pair info */
426 : void const * val ); /* contains info->val_sz bytes, in [0,FD_VINYL_VAL_MAX] */
427 :
428 : ulong
429 : fd_vinyl_io_append_dead( fd_vinyl_io_t * io,
430 : fd_vinyl_bstream_phdr_t const * phdr, /* pair header of erased pair */
431 : void const * info, /* contains info_sz bytes, info_sz treated as 0 if NULL */
432 : ulong info_sz ); /* in [0,FD_VINYL_BSTREAM_DEAD_INFO_MAX] */
433 :
434 : ulong
435 : fd_vinyl_io_append_move( fd_vinyl_io_t * io,
436 : fd_vinyl_bstream_phdr_t const * src, /* pair header of src pair */
437 : fd_vinyl_key_t const * dst, /* src pair getting renamed to dst or is replacing dst */
438 : void const * info, /* contains info_sz bytes, info_sz treated as 0 if NULL */
439 : ulong info_sz ); /* in [0,FD_VINYL_BSTREAM_MOVE_INFO_MAX] */
440 :
441 : ulong
442 : fd_vinyl_io_append_part( fd_vinyl_io_t * io,
443 : ulong seq_prev, /* should be a part before seq or seq */
444 : ulong dead_cnt, /* number of dead blocks in the partition */
445 : ulong move_cnt, /* number of move blocks in the partition */
446 : void const * info, /* contains info_sz bytes, info_sz treated as 0 if NULL */
447 : ulong info_sz ); /* in [0,FD_VINYL_BSTREAM_PART_INFO_MAX] */
448 :
449 : /* fd_vinyl_io_append_pair_inplace appends the style RAW pair at phdr
450 : to the bstream. This will preferentially append the pair in the
451 : given style. Returns the location where the pair was appended. On
452 : return, *_style holds the actual style used and *_val_esz contains
453 : the pair encoded value byte size.
454 :
455 : Note that if the requested style is RAW or if the pair could not be
456 : usefully encoded in the requested style (e.g. the compressed size
457 : ended up larger than the uncompressed size), this will append from
458 : phdr in-place zero copy. When appending a pair in-place, this will
459 : clear the zero padding region and insert the appropriate data
460 : integrity footers at the end of the pair. On other cases, this will
461 : append from the io append scratch memory the encoded pair and the
462 : pair will be untouched.
463 :
464 : As such, the caller should assume the io has a read interest on the
465 : pair's header region and value region and a write interest on the
466 : pair zero padding region and footer region until the next append or
467 : commit and the pair's zero padding and footer regions may be
468 : clobbered by this call. */
469 :
470 : ulong
471 : fd_vinyl_io_append_pair_inplace( fd_vinyl_io_t * io,
472 : int style,
473 : fd_vinyl_bstream_phdr_t * phdr,
474 : int * _style,
475 : ulong * _val_esz );
476 :
477 : /* fd_vinyl_io_bd *****************************************************/
478 :
479 : /* fd_vinyl_io_bd_{align,footprint} specify the alignment and footprint
480 : needed for a bstream stored on a block device / large file with a
481 : spad_max append scratch pad. align will be a reasonable power-of-2
482 : and footprint will be a multiple of align. Returns 0 for an invalid
483 : spad_max.
484 :
485 : fd_vinyl_io_bd_init starts using a file as a bstream store. lmem
486 : points to a local memory region with suitable alignment and footprint
487 : to hold the bstream's state. spad_max gives the size of the append
488 : scratch pad (should be a FD_VINYL_BSTREAM_BLOCK_SZ multiple). dev_fd
489 : is a file descriptor for the block device / large file. The file
490 : should already exist and be sized to the appropriate capacity.
491 :
492 : FIXME: allow user to specify a subrange of dev_fd to use for the
493 : store?
494 :
495 : If reset is non-zero, ignores any existing file contents and will
496 : start a new bstream. The bstream metadata user info will be set to
497 : the info_sz bytes at info and the bstream will use io_seed for its
498 : data integrity hashing seed.
499 :
500 : Otherwise, this will attempt to resume at the point the bstream was
501 : last synchronized. info, info_sz and io_seed will be ignored.
502 :
503 : IMPORTANT SAFETY TIP! The io_seed is the not same thing as the meta
504 : seed. The io_seed is a property of the bstream (with a lifetime of
505 : the bstream and is shared among all users of the bstream). The meta
506 : seed is a property of the meta (and ideally uniquely and randomly set
507 : per vinyl tile run).
508 :
509 : Returns a handle to the bstream on success (has ownership of lmem and
510 : dev_fd, ownership returned on fini) and NULL on failure (logs
511 : details, no ownership changed). Retains no interest in info. */
512 :
513 : ulong fd_vinyl_io_bd_align ( void );
514 : ulong fd_vinyl_io_bd_footprint( ulong spad_max );
515 :
516 : fd_vinyl_io_t *
517 : fd_vinyl_io_bd_init( void * lmem,
518 : ulong spad_max,
519 : int dev_fd,
520 : int reset,
521 : void const * info,
522 : ulong info_sz,
523 : ulong io_seed );
524 :
525 : /* fd_vinyl_io_mm *****************************************************/
526 :
527 : /* fd_vinyl_io_mm_* is the same as fd_vinyl_io_bd_* but uses dev_sz byte
528 : sized memory region dev as the "block device". The result is
529 : bit-level identical to fd_vinyl_io_bd (and vice versa). This is
530 : primarily for testing purposes but, as dev could also be a memory
531 : mapped file / block device, this could be useful in general
532 : (especially for concurrent read access, e.g. parallel recovery).
533 : Note that "sync" only guarantees appends to the dev memory region
534 : happened. If the memory region is backed by a file, when the actual
535 : blocks are written to the physical storage is controlled by the
536 : kernel / driver / physical device (it is up to the caller of sync to
537 : do any additional context specific control here). */
538 :
539 : ulong fd_vinyl_io_mm_align ( void );
540 : ulong fd_vinyl_io_mm_footprint( ulong spad_max );
541 :
542 : fd_vinyl_io_t *
543 : fd_vinyl_io_mm_init( void * lmem,
544 : ulong spad_max,
545 : void * dev,
546 : ulong dev_sz,
547 : int reset,
548 : void const * info,
549 : ulong info_sz,
550 : ulong io_seed );
551 :
552 : /* fd_vinyl_{mmio,mmio_sz} return {a pointer in the caller's address
553 : space to the raw bstream storage,the raw bstream storage byte size).
554 : These are a _subset_ of the dev / dev_sz region passed to mm_init and
555 : these will be FD_VINYL_BSTREAM_BLOCK_SZ aligned. If a byte seq is in
556 : the store, it will be at mmio[ seq % mmio_sz ]. Note that mmio_sz is
557 : not necessarily a power of two. Note also that the bstream's past is
558 : guaranteed to be in the store. The lifetime of the returned region
559 : is the lifetime of the io. Returns NULL and 0 if io does not support
560 : memory mapped io. These exist to support thread parallel recovery. */
561 :
562 : void * fd_vinyl_mmio ( fd_vinyl_io_t * io );
563 : ulong fd_vinyl_mmio_sz( fd_vinyl_io_t * io );
564 :
565 : FD_PROTOTYPES_END
566 :
567 : #endif /* HEADER_fd_src_vinyl_io_fd_vinyl_io_h */
|