Line data Source code
1 : /* fd_vinyl_io_wd.h is a vinyl_io driver that does async O_DIRECT writes 2 : via the snapwr tile. Implements a fast way to create a bstream on 3 : Linux (DMA under the hood). 4 : 5 : Internally manages a pool of DMA/LBA friendly blocks (i.e. 4 KiB 6 : aligned, O(16 MiB) size). Blocks have either state IDLE, APPEND 7 : (currently being written to), or IOWAIT (waiting for snapwr 8 : completion). */ 9 : 10 : #include "../../../vinyl/io/fd_vinyl_io.h" 11 : 12 : /* wd_buf describes an O_DIRECT append buf */ 13 : 14 : struct wd_buf; 15 : typedef struct wd_buf wd_buf_t; 16 : 17 : struct wd_buf { 18 : uchar * buf; /* pointer into dcache */ 19 : uint state; /* WD_BUF_* */ 20 : wd_buf_t * next; /* next ele in linked list */ 21 : ulong io_seq; /* mcache request sequence number */ 22 : ulong bstream_seq; /* APPEND=bstream seq of first block */ 23 : /* IOWAIT=bstream seq after buffer is fully written */ 24 : }; 25 : 26 : /* WD_BUF_* give append buf states */ 27 : 28 0 : #define WD_BUF_IDLE 1U 29 0 : #define WD_BUF_APPEND 2U 30 0 : #define WD_BUF_IOWAIT 3U 31 : 32 : #define WD_WR_FSEQ_CNT_MAX (32UL) 33 : 34 : /* fd_vinyl_io_wd implements the fd_vinyl_io_t interface */ 35 : 36 : struct fd_vinyl_io_wd { 37 : fd_vinyl_io_t base[1]; 38 : ulong dev_base; 39 : ulong dev_sz; /* Block store byte size (BLOCK_SZ multiple) */ 40 : 41 : /* Buffer linked lists by state */ 42 : wd_buf_t * buf_idle; /* free stack */ 43 : wd_buf_t * buf_append; /* current wip block */ 44 : wd_buf_t * buf_iowait_head; /* least recently enqueued (seq increasing) */ 45 : wd_buf_t * buf_iowait_tail; /* most recently enqueued */ 46 : 47 : /* Work queue (snapwr) */ 48 : fd_frag_meta_t * wr_mcache; /* metadata ring */ 49 : ulong wr_seq; /* next metadata seq no */ 50 : ulong wr_seqack; /* next expected ACK seq */ 51 : ulong wr_depth; /* metadata ring depth */ 52 : uchar * wr_base; /* base pointer for data cache */ 53 : uchar * wr_chunk0; /* [wr_chunk0,wr_chunk1) is the data cache data region */ 54 : uchar * wr_chunk1; 55 : ulong const * wr_fseq[WD_WR_FSEQ_CNT_MAX]; /* completion notifications */ 56 : ulong wr_fseq_cnt;/* completion notifications count */ 57 : ulong wr_mtu; /* max block byte size */ 58 : }; 59 : 60 : typedef struct fd_vinyl_io_wd fd_vinyl_io_wd_t; 61 : 62 : 63 : /* fd_vinyl_io_wd_{align,footprint} specify the alignment and footprint 64 : needed for a bstream O_DIRECT writer with block_depth max blocks 65 : inflight. align will be a reasonable power-of-2 and footprint will 66 : be a multiple of align. Returns 0 for an invalid block_depth. */ 67 : 68 : ulong 69 : fd_vinyl_io_wd_align( void ); 70 : 71 : ulong 72 : fd_vinyl_io_wd_footprint( ulong block_depth ); 73 : 74 : /* fd_vinyl_io_wd_init creates a bstream fast append backend. lmem 75 : points to a local memory region with suitable alignment and footprint 76 : to hold bstream's state. io_seed is the bstream's data integrity 77 : hashing seed. 78 : 79 : block_queue is an mcache (request queue) used to submit write 80 : requests to a snapwr. fd_mcache_depth(block_queue)==block_depth. 81 : block_dcache is a dcache (data cache) sized to block_depth*block_mtu 82 : data_sz. block_mtu is a multiple of FD_VINYL_BSTREAM_BLOCK_SZ and 83 : determines the largest O_DIRECT write operation (typically between 2 84 : to 64 MiB). block_fseq points to the snapwr tile's fseq(s) (used 85 : to report write completions). 86 : 87 : Returns a handle to the bstream on success (has ownership of lmem and 88 : dev_fd, ownership returned on fini) and NULL on failure (logs 89 : details, no ownership changed). */ 90 : 91 : fd_vinyl_io_t * 92 : fd_vinyl_io_wd_init( void * lmem, 93 : ulong dev_sz, 94 : ulong io_seed, 95 : fd_frag_meta_t * block_mcache, 96 : uchar * block_dcache, 97 : ulong const ** block_fseq, 98 : ulong block_fseq_cnt, 99 : ulong block_mtu ); 100 : 101 : /* API restrictions: 102 : 103 : - Any method is unsupported (crash application if called) unless 104 : otherwise specified 105 : - Supported methods: append, commit, alloc, fini 106 : - In-place append not supported. All appends must use a buffer 107 : sourced from alloc as the input buffer. 108 : - append, commit, alloc require FD_VINYL_IO_FLAG_BLOCKING to be unset */ 109 : 110 : extern fd_vinyl_io_impl_t fd_vinyl_io_wd_impl; 111 : 112 : /* fd_vinyl_io_wd_alloc implements fd_vinyl_io_alloc. */ 113 : 114 : void * 115 : fd_vinyl_io_wd_alloc( fd_vinyl_io_t * io, 116 : ulong sz, 117 : int flags ); 118 : 119 : /* fd_vinyl_io_wd_busy returns 1 if there is at least one buffer in use 120 : (either APPEND or IOWAIT state). Returns 0 if all buffers are IDLE. */ 121 : 122 : int 123 : fd_vinyl_io_wd_busy( fd_vinyl_io_t * io ); 124 : 125 : /* fd_vinyl_io_wd_ctrl sends a control message to the snapwr tile. 126 : Blocks until the message is acknowledged. */ 127 : 128 : void 129 : fd_vinyl_io_wd_ctrl( fd_vinyl_io_t * io, 130 : ulong ctl, 131 : ulong sig ); 132 : 133 : /* fd_viny_io_wd_alloc_fast is an optimistic version of vinyl_io->alloc. 134 : If it fails (returns NULL), the caller should fall back to calling 135 : fd_vinyl_io_alloc normally. */ 136 : 137 : static inline void * 138 : fd_vinyl_io_wd_alloc_fast( fd_vinyl_io_t * io, 139 0 : ulong sz ) { 140 0 : fd_vinyl_io_wd_t * wd = (fd_vinyl_io_wd_t *)io; /* Note: io must be non-NULL to have even been called */ 141 0 : 142 0 : wd_buf_t * buf = wd->buf_append; 143 0 : if( FD_UNLIKELY( !buf ) ) return NULL; 144 0 : 145 0 : ulong buf_used = wd->base->seq_future - buf->bstream_seq; 146 0 : ulong buf_free = wd->wr_mtu - buf_used; 147 0 : if( FD_UNLIKELY( sz>buf_free ) ) return NULL; 148 0 : 149 0 : return buf->buf + buf_used; 150 0 : }