Line data Source code
1 : #ifndef HEADER_fd_src_vinyl_meta_fd_vinyl_meta_h
2 : #define HEADER_fd_src_vinyl_meta_fd_vinyl_meta_h
3 :
4 : /* The metadata for all current pairs and pairs in the process of being
5 : created is cached in DRAM and can be queried by key lockfree
6 : concurrent in fast O(1) operations via a fd_vinyl_meta_t. The
7 : "current pairs" are the set of pairs that would be obtained by
8 : recovering the key-val state from the bstream's blocks
9 : [seq_past,seq_present).
10 :
11 : This is implemented as a fd_map_slot_para with a some specialized
12 : implementation to optimize various API in normal operation. That is,
13 : in normal operation, a meta cache has a single writer (the vinyl
14 : tile) and multiple concurrent readers (various clients needing to
15 : test if a pair is present, where it is located, how big the val is
16 : and other application specific pair info like timestamps, balances,
17 : expirations, etc).
18 :
19 : During recovery, a meta cache can have multiple concurrent readers
20 : and writers (the individual threads replaying their segments of the
21 : bstream's past).
22 :
23 : Given it is based on fd_map_slot_para, a fd_vinyl_meta_t can be
24 : shared between threads in different processes. Likewise, it can be
25 : used persistent. But, as it can be exactly reconstructed during
26 : recovery from the bstream's past and there are no provisions for
27 : "syncing" the meta cache with the bstream in the face of unexpected
28 : processs interruptions, persistence should only be used for post
29 : mortem debugging. */
30 :
31 : #include "../bstream/fd_vinyl_bstream.h"
32 :
33 : /* Instantiate a library header API for a fd_vinyl_meta_t as a
34 : fd_map_slot_para of mapping of keys to fd_vinyl_meta_ele_t. Note
35 : that, when an element is not locked:
36 :
37 : phdr.ctl will be 0 if a meta element is free. All other element
38 : fields should be ignored if the element is free.
39 :
40 : phdr.ctl will be ULONG_MAX if the mapped version of pair is not in
41 : the bstream's past yet (i.e. the pair is being created). memo,
42 : phdr.key, and line_idx are valid. phdr.info (including the val_sz)
43 : and seq should be ignored.
44 :
45 : Otherwise, phdr _exactly_ mirrors the most recent version of phdr
46 : in the bstream's past. All element fields are valid.
47 :
48 : IMPORTANT SAFETY TIP! meta_seed and bstream_seed are _not_ the same
49 : thing. meta_seed ideally is unique and random for each run but the
50 : bstream_seed should be persistent across all users of the underlying
51 : bstream. */
52 :
53 : struct fd_vinyl_meta_ele {
54 :
55 : ulong memo; /* ==fd_vinyl_key_memo( seed, key ) */
56 : fd_vinyl_bstream_phdr_t phdr;
57 :
58 : /* The below fields are used only by the vinyl tile. Concurrent
59 : readers should ignore them. */
60 :
61 : ulong seq; /* If pair exists at bstream seq_present, bstream seq of the most recent version.
62 : This will be in the bstream's past and there will be no bstream objects concerning phdr.key
63 : at a later seq. */
64 : ulong line_idx; /* If a pair decoded val is cached, vinyl cache line assigned to pair, ULONG_MAX if not. */
65 :
66 : };
67 :
68 : typedef struct fd_vinyl_meta_ele fd_vinyl_meta_ele_t;
69 :
70 : /* Note: When remove needs to move an element to preserve its probe
71 : sequence, if the corresponding pair is in cache, it also needs to
72 : update the vinyl line structure to reflect the new location of the
73 : element. We could do this by stashing the vinyl line / line_cnt into
74 : the meta ctx and incorporating the needed update in MAP_ELE_MOVE
75 : below (its what ctx is for).
76 :
77 : But we don't actually use the remove API currently as we have a
78 : specialized version for single producer use below. And that version
79 : does handle the line updates. So we omit here. The upshot is that:
80 :
81 : IMPORTANT SAFETY TIP: fd_vinyl_meta_remove currently does not update
82 : the line-to-element mapping. So it is not safe to use if there are
83 : items in data cache. (It is currently used in parallel recovery but
84 : then at a time when the data cache is flushed. So it is safe then.) */
85 :
86 : struct fd_vinyl_line;
87 : typedef struct fd_vinyl_line fd_vinyl_line_t;
88 :
89 : #define MAP_NAME fd_vinyl_meta
90 : #define MAP_ELE_T fd_vinyl_meta_ele_t
91 : #define MAP_KEY_T fd_vinyl_key_t
92 : #define MAP_KEY phdr.key
93 7539102 : #define MAP_KEY_EQ(k0,k1) fd_vinyl_key_eq( (k0), (k1) )
94 15037275 : #define MAP_KEY_HASH(key,seed) fd_vinyl_key_memo( (seed), (key) )
95 : #define MAP_MEMOIZE 1
96 : #define MAP_MEMO memo
97 : #define MAP_KEY_EQ_IS_SLOW 1
98 26589687 : #define MAP_ELE_IS_FREE(ctx,ele) (!(ele)->phdr.ctl)
99 1875204 : #define MAP_ELE_FREE(ctx,ele) do { (ele)->phdr.ctl = 0UL; } while(0)
100 171672 : #define MAP_ELE_MOVE(ctx,dst,src) do { fd_vinyl_meta_ele_t * _src = (src); *(dst) = *_src; _src->phdr.ctl = 0UL; } while(0)
101 : #define MAP_IMPL_STYLE 1
102 : #include "../../util/tmpl/fd_map_slot_para.c"
103 :
104 : FD_PROTOTYPES_BEGIN
105 :
106 : /* fd_vinyl_meta_ele_in_use returns 1 if the given ele is in use and 0
107 : otherwise. Assumes ele is valid. If the element is in use,
108 : ele->phdr.key and ele->memo are valid.
109 :
110 : fd_vinyl_meta_ele_in_bstream returns 1 if the given ele is present at
111 : bstream seq_present and 0 otherwise. Assumes ele valid and in use.
112 : If ele is in bstream, ele->phdr exactly mirrors the pair header for
113 : the version of the pair present at seq_present and ele->seq gives the
114 : location in the bstream's past of this version of the pair.
115 :
116 : fd_vinyl_meta_ele_in_cache returns 1 if space has been allocated in
117 : the vinyl's data cache for the pair. Assumes ele is valid and in
118 : use. If ele is in cache, ele->line_idx gives vinyl cache line
119 : assigned to the pair value. */
120 :
121 48670653 : FD_FN_PURE static inline int fd_vinyl_meta_ele_in_use ( fd_vinyl_meta_ele_t const * ele ) { return !!ele->phdr.ctl; }
122 0 : FD_FN_PURE static inline int fd_vinyl_meta_ele_in_bstream( fd_vinyl_meta_ele_t const * ele ) { return ele->phdr.ctl!=ULONG_MAX; }
123 0 : FD_FN_PURE static inline int fd_vinyl_meta_ele_in_cache ( fd_vinyl_meta_ele_t const * ele ) { return ele->line_idx!=ULONG_MAX; }
124 :
125 : /* fd_vinyl_meta_prepare_fast prepares to modify meta element ele_idx
126 : under the assumption the caller is the only active writer and there
127 : are no meta prepares in progress.
128 :
129 : fd_vinyl_meta_publish_fast completes a prepare of meta element
130 : ele_idx, making the modifications visible to concurrent readers.
131 :
132 : fd_vinyl_meta_cancel_fast completes a prepare of meta element
133 : ele_idx, indicating to concurrent readers that no modifications were
134 : made. The caller promises that shared fields of the meta element was
135 : not modified at any point in time during the prepare.
136 :
137 : l is a pointer to the meta lock array, s is the meta lock shift and e
138 : is the meta element index. Does no input argument checking. These
139 : functions cannot fail. */
140 :
141 : static inline void
142 : fd_vinyl_meta_lock_update_fast( ulong * lock,
143 23541318 : long dir ) {
144 23541318 : ulong version = (*lock) + (ulong)dir;
145 23541318 : FD_COMPILER_MFENCE();
146 23541318 : *lock = version;
147 23541318 : FD_COMPILER_MFENCE();
148 23541318 : }
149 :
150 7500717 : #define fd_vinyl_meta_prepare_fast(l,s,e) fd_vinyl_meta_lock_update_fast( (l) + ((e)>>(s)), 1L )
151 3748266 : #define fd_vinyl_meta_publish_fast(l,s,e) fd_vinyl_meta_lock_update_fast( (l) + ((e)>>(s)), 1L )
152 3752451 : #define fd_vinyl_meta_cancel_fast( l,s,e) fd_vinyl_meta_lock_update_fast( (l) + ((e)>>(s)), -1L )
153 :
154 : /* fd_vinyl_meta_query_fast queries a meta for key under the assumption
155 : the caller is the only active meta writer and there are no meta
156 : prepares in progress. Does no input arg checking. On success,
157 : returns FD_VINYL_SUCCESS (0) (key was found, on return *_ele_idx
158 : gives key's current meta element index). On failure, returns a
159 : FD_VINYL_ERR code (negative). Reasons for failure include KEY (key
160 : was not found, on return *_ele_idx gives the meta element index
161 : suitable for storing pair metadata if the meta is not already at
162 : capacity). Will FD_LOG_CRIT if anything wonky is detected.
163 :
164 : IMPORTANT SAFETY TIP! In general, meta element ele_idx should not be
165 : modified by the writer unless it is protected by a prepare.
166 : Inserting without doing a prepare is fine so long as phdr.ctl becomes
167 : visible last. */
168 :
169 : FD_FN_PURE int
170 : fd_vinyl_meta_query_fast( fd_vinyl_meta_ele_t const * ele0, /* indexed [0,ele_max) */
171 : ulong ele_max, /* power of 2 */
172 : fd_vinyl_key_t const * key, /* key to query */
173 : ulong memo, /* == fd_vinyl_key_memo( seed, key ) */
174 : ulong * _ele_idx ); /* will be in [0,ele_max) on return */
175 :
176 : /* fd_vinyl_meta_remove_fast removes the key<>metadata mapping at meta
177 : element ele_idx from the meta under the asumption the caller is the
178 : only active writer to the meta and there are no meta prepares in
179 : progress. This cannot fail from the caller's perspective
180 : (FD_LOG_CRIT if meta or line is corruption detected during removal). */
181 :
182 : void
183 : fd_vinyl_meta_remove_fast( fd_vinyl_meta_ele_t * ele0, /* Assumes at least one unoccupied element in the meta */
184 : ulong ele_max, /* Assumes power of 2 (of at least 2 given 1 occupied and 1 hole) */
185 : ulong * lock, /* lock_max == ele_max >> lock_shift */
186 : int lock_shift, /* ==log2 ele_max / lock_cnt where lock_cnt is a power 2 <= ele_max */
187 : fd_vinyl_line_t * line, /* Indexed [0,line_cnt) */
188 : ulong line_cnt, /* In [3,FD_VINYL_LINE_MAX] */
189 : ulong ele_idx ); /* Assumes map element ele_idx is occupied */
190 :
191 : /* fd_vinyl_meta_query is a simplified query for key for concurrent
192 : readers. It handles out try / test and filters out keys that are in
193 : the process of being created. Assumes meta is a current local join,
194 : key and _info are valid for the duration of the call and _info is
195 : a fd_vinyl_info_t compatible memory region.
196 :
197 : Returns FD_VINYL_SUCCESS (0) if pair key existed in the bstream's
198 : seq_present when the bstream was observed during the call. On
199 : return, *_info will be populated with the pair key's info at that
200 : seq_present (including the val byte size).
201 :
202 : Returns FD_VINYL_ERR_KEY (negative) if pair key did not exist in the
203 : bstream's seq_present when the bstream was observed. On return,
204 : *_info will be clobbered. Retains no interest in key or _info.
205 :
206 : IMPORTANT SAFETY TIP! This can block the caller until the query
207 : resolves. Concurrent readers wanting guaranteed non-blocking
208 : behavior in the face of a vinyl tile dying in the middle of updating
209 : conflicting ele meta, wanting to provide hints / extract the key memo
210 : for reuse / etc, should use more general lockfree query API. */
211 :
212 : static inline int
213 : fd_vinyl_meta_query( fd_vinyl_meta_t * meta,
214 : fd_vinyl_key_t const * key,
215 0 : void * _info ) {
216 0 : ulong ctl;
217 :
218 0 : for(;;) {
219 :
220 0 : fd_vinyl_meta_query_t query[1];
221 0 : int err = fd_vinyl_meta_query_try( meta, key, NULL, query, FD_MAP_FLAG_BLOCKING );
222 0 : if( FD_UNLIKELY( err ) ) return err;
223 0 : fd_vinyl_meta_ele_t const * ele = fd_vinyl_meta_query_ele_const( query );
224 :
225 0 : ctl = ele->phdr.ctl;
226 0 : *(fd_vinyl_info_t *)_info = ele->phdr.info;
227 :
228 0 : if( FD_LIKELY( !fd_vinyl_meta_query_test( query ) ) ) break;
229 :
230 0 : FD_SPIN_PAUSE();
231 :
232 0 : }
233 :
234 0 : return fd_int_if( ctl!=ULONG_MAX, FD_MAP_SUCCESS, FD_MAP_ERR_KEY );
235 0 : }
236 :
237 : FD_PROTOTYPES_END
238 :
239 : #endif /* HEADER_fd_src_vinyl_meta_fd_vinyl_meta_h */
|