Line data Source code
1 : /* fd_vinyl_io_wd.h is a vinyl_io driver that does async O_DIRECT writes 2 : via the snapwr tile. Implements a fast way to create a bstream on 3 : Linux (DMA under the hood). 4 : 5 : Internally manages a pool of DMA/LBA friendly blocks (i.e. 4 KiB 6 : aligned, O(16 MiB) size). Blocks have either state IDLE, APPEND 7 : (currently being written to), or IOWAIT (waiting for snapwr 8 : completion). */ 9 : 10 : #include "../../../vinyl/io/fd_vinyl_io.h" 11 : 12 : /* wd_buf describes an O_DIRECT append buf */ 13 : 14 : struct wd_buf; 15 : typedef struct wd_buf wd_buf_t; 16 : 17 : struct wd_buf { 18 : uchar * buf; /* pointer into dcache */ 19 : uint state; /* WD_BUF_* */ 20 : wd_buf_t * next; /* next ele in linked list */ 21 : ulong io_seq; /* mcache request sequence number */ 22 : ulong bstream_seq; /* APPEND=bstream seq of first block */ 23 : /* IOWAIT=bstream seq after buffer is fully written */ 24 : }; 25 : 26 : /* WD_BUF_* give append buf states */ 27 : 28 0 : #define WD_BUF_IDLE 1U 29 0 : #define WD_BUF_APPEND 2U 30 0 : #define WD_BUF_IOWAIT 3U 31 : 32 : /* fd_vinyl_io_wd implements the fd_vinyl_io_t interface */ 33 : 34 : struct fd_vinyl_io_wd { 35 : fd_vinyl_io_t base[1]; 36 : ulong dev_base; 37 : ulong dev_sz; /* Block store byte size (BLOCK_SZ multiple) */ 38 : 39 : /* Buffer linked lists by state */ 40 : wd_buf_t * buf_idle; /* free stack */ 41 : wd_buf_t * buf_append; /* current wip block */ 42 : wd_buf_t * buf_iowait_head; /* least recently enqueued (seq increasing) */ 43 : wd_buf_t * buf_iowait_tail; /* most recently enqueued */ 44 : 45 : /* Work queue (snapwr) */ 46 : fd_frag_meta_t * wr_mcache; /* metadata ring */ 47 : ulong wr_seq; /* next metadata seq no */ 48 : ulong wr_seqack; /* next expected ACK seq */ 49 : ulong wr_depth; /* metadata ring depth */ 50 : uchar * wr_base; /* base pointer for data cache */ 51 : uchar * wr_chunk0; /* [wr_chunk0,wr_chunk1) is the data cache data region */ 52 : uchar * wr_chunk1; 53 : ulong const * wr_fseq; /* completion notifications */ 54 : ulong wr_mtu; /* max block byte size */ 55 : }; 56 : 57 : typedef struct fd_vinyl_io_wd fd_vinyl_io_wd_t; 58 : 59 : 60 : /* fd_vinyl_io_wd_{align,footprint} specify the alignment and footprint 61 : needed for a bstream O_DIRECT writer with block_depth max blocks 62 : inflight. align will be a reasonable power-of-2 and footprint will 63 : be a multiple of align. Returns 0 for an invalid block_depth. */ 64 : 65 : ulong 66 : fd_vinyl_io_wd_align( void ); 67 : 68 : ulong 69 : fd_vinyl_io_wd_footprint( ulong block_depth ); 70 : 71 : /* fd_vinyl_io_wd_init creates a bstream fast append backend. lmem 72 : points to a local memory region with suitable alignment and footprint 73 : to hold bstream's state. io_seed is the bstream's data integrity 74 : hashing seed. 75 : 76 : block_queue is an mcache (request queue) used to submit write 77 : requests to a snapwr. fd_mcache_depth(block_queue)==block_depth. 78 : block_dcache is a dcache (data cache) sized to block_depth*block_mtu 79 : data_sz. block_mtu is a multiple of FD_VINYL_BSTREAM_BLOCK_SZ and 80 : determines the largest O_DIRECT write operation (typically between 2 81 : to 64 MiB). block_fseq points to the snapwr tile's fseq (used to 82 : report write completions). 83 : 84 : Returns a handle to the bstream on success (has ownership of lmem and 85 : dev_fd, ownership returned on fini) and NULL on failure (logs 86 : details, no ownership changed). */ 87 : 88 : fd_vinyl_io_t * 89 : fd_vinyl_io_wd_init( void * lmem, 90 : ulong dev_sz, 91 : ulong io_seed, 92 : fd_frag_meta_t * block_mcache, 93 : uchar * block_dcache, 94 : ulong const * block_fseq, 95 : ulong block_mtu ); 96 : 97 : /* API restrictions: 98 : 99 : - Any method is unsupported (crash application if called) unless 100 : otherwise specified 101 : - Supported methods: append, commit, alloc, fini 102 : - In-place append not supported. All appends must use a buffer 103 : sourced from alloc as the input buffer. 104 : - append, commit, alloc require FD_VINYL_IO_FLAG_BLOCKING to be unset */ 105 : 106 : extern fd_vinyl_io_impl_t fd_vinyl_io_wd_impl; 107 : 108 : /* fd_vinyl_io_wd_alloc implements fd_vinyl_io_alloc. */ 109 : 110 : void * 111 : fd_vinyl_io_wd_alloc( fd_vinyl_io_t * io, 112 : ulong sz, 113 : int flags ); 114 : 115 : /* fd_vinyl_io_wd_busy returns 1 if there is at least one buffer in use 116 : (either APPEND or IOWAIT state). Returns 0 if all buffers are IDLE. */ 117 : 118 : int 119 : fd_vinyl_io_wd_busy( fd_vinyl_io_t * io ); 120 : 121 : /* fd_vinyl_io_wd_ctrl sends a control message to the snapwr tile. 122 : Blocks until the message is acknowledged. */ 123 : 124 : void 125 : fd_vinyl_io_wd_ctrl( fd_vinyl_io_t * io, 126 : ulong ctl, 127 : ulong sig ); 128 : 129 : /* fd_viny_io_wd_alloc_fast is an optimistic version of vinyl_io->alloc. 130 : If it fails (returns NULL), the caller should fall back to calling 131 : fd_vinyl_io_alloc normally. */ 132 : 133 : static inline void * 134 : fd_vinyl_io_wd_alloc_fast( fd_vinyl_io_t * io, 135 0 : ulong sz ) { 136 0 : fd_vinyl_io_wd_t * wd = (fd_vinyl_io_wd_t *)io; /* Note: io must be non-NULL to have even been called */ 137 : 138 0 : wd_buf_t * buf = wd->buf_append; 139 0 : if( FD_UNLIKELY( !buf ) ) return NULL; 140 : 141 0 : ulong buf_used = wd->base->seq_future - buf->bstream_seq; 142 0 : ulong buf_free = wd->wr_mtu - buf_used; 143 0 : if( FD_UNLIKELY( sz>buf_free ) ) return NULL; 144 : 145 0 : return buf->buf + buf_used; 146 0 : }