Line data Source code
1 : /* Generate prototypes, inlines and/or implementations for concurrent
2 : persistent shared maps based on chaining. A map can store a
3 : practically unbounded number of elements. If sized on creation for
4 : the maximum number of mapped elements, typical map operations are a
5 : fast O(1) time and map element overhead is a small O(1) space.
6 : Further, large numbers of composite map operations can be done
7 : concurrently with very low risk of conflicts.
8 :
9 : In the current implementation, each map chain has a version number.
10 : Operations that require changing a chain's connectivity (e.g.
11 : inserting or removing an element from a chain) or modifying an
12 : element managed by that chain, the chain's version number is
13 : increased by one (atomic fetch-and-or based) such that other
14 : potential users of keys managed by that chain detect and react
15 : appropriately to a potentially concurrent conflicting operation in
16 : progress. When an operation completes, the chain version number is
17 : increased by one again to notify other users the operation is no
18 : longer in progress and that the set of keys managed by that chain
19 : and/or values associated with those keys has potentially changed
20 : since the previous version. For example, lockfree queries can
21 : interoperate with this via a zero-copy
22 : try-speculatively-process-then-test pattern similar to that used in
23 : fd_tango for high throughput message processing.
24 :
25 : As such, there can be an arbitrary number of concurrent readers
26 : processing map keys. These readers will not interfere with each
27 : other and will not block any concurrent insert / remove / modify
28 : operations. Insert / remove / modify operations can potentially
29 : block each other. Since there are typically O(1) keys per chain, the
30 : probability of concurrent insert / remove / modify operations
31 : involving different keys blocking each other is small. Further, this
32 : controllable a priori by provisioning the number of chains
33 : appropriately. Concurrent operations on the same key are serialized
34 : (as they necessarily would be any implementation). Since the
35 : operations are HPC implementations, collisions are resolved as fast
36 : as is practical. The upshot is that the map supports massive
37 : concurrency while preserving concurrent operation serializability.
38 :
39 : Version numbers are stored with chain head pointers such that the
40 : cache traffic required for managing chain versioning is covered by
41 : the same cache traffic required for managing chains in a
42 : non-concurrent implementation (e.g. fd_map_chain). Operations do
43 : many internal integrity checking / bounds checking for use in high
44 : reliability applications.
45 :
46 : Lastly, fine grained versioning allows for concurrent execution of
47 : complex operations involving multiple keys simultaneously. This
48 : allows using the map as a concurrent transactional memory and for
49 : serialization of all map elements at a consistent point in time while
50 : minimizing impact on ongoing concurrent operations (e.g. snapshotting
51 : all the elements in the map).
52 :
53 : The main drawback of chain versioning is the extra memory footprint
54 : required for chain metadata storage. The current implementation
55 : supports indexing compression and uses atomic bit field techniques to
56 : minimize this overhead.
57 :
58 : Concurrent operation requires FD_HAS_ATOMIC. This will still work on
59 : platforms without FD_HAS_ATOMIC but concurrent operations will not be
60 : safe.
61 :
62 : In short, if you need a concurrent map, this is a lot better than
63 : protecting a non-concurrent implementation with a global lock. And,
64 : if you don't, it will be comparably performant to a non-concurrent
65 : implementation.
66 :
67 : This generator is designed for ultra tight coupling with pools,
68 : treaps, heaps, lists, other maps, etc. Likewise, a map can be
69 : persisted beyond the lifetime of the creating process, be used
70 : inter-process, relocated in memory, be naively
71 : serialized/deserialized, be moved between hosts, use index
72 : compression for cache and memory bandwidth efficiency, etc.
73 : Concurrency and flexibility are prioritized.
74 :
75 : Typical usage:
76 :
77 : struct myele {
78 : ulong key; // Technically "MAP_KEY_T MAP_KEY" (default is ulong key), managed by mymap when the element is in the mymap
79 : ulong next; // Technically "MAP_IDX_T MAP_NEXT" (default is ulong next), managed by mymap when the element is in the mymap
80 :
81 : ... key and next can be located arbitrarily in the element and
82 : ... can be reused for other purposes when the element is not in a
83 : ... mymap. The mapping of a key to an element in the element
84 : ... store is arbitrary. An element should not be moved /
85 : ... released from the element store while in the mymap.
86 :
87 : };
88 :
89 : typedef struct myele myele_t;
90 :
91 : #define MAP_NAME mymap
92 : #define MAP_ELE_T myele_t
93 : #include "tmpl/fd_map_chain_para.c"
94 :
95 : will declare the following APIs as a header only style library in the
96 : compilation unit:
97 :
98 : // A mymap_t is a stack declaration friendly quasi-opaque local
99 : // object used to hold the state of a local join to a mymap.
100 : // Similarly, a mymap_query_t and a mymap_iter_t hold the local
101 : // state of an ongoing local query and local iteration
102 : // respectively. E.g. it is fine to do mymap_t join[1];" to
103 : // allocate a mymap_t but the contents should not be used directly.
104 :
105 : typedef struct mymap_private mymap_t;
106 : typedef struct mymap_query_private mymap_query_t;
107 : typedef struct mymap_iter_private mymap_iter_t;
108 :
109 : // mymap_ele_max_max returns the maximum element store capacity
110 : // compatible with a mymap.
111 :
112 : ulong mymap_ele_max_max( void );
113 :
114 : // mymap_chain_max returns the maximum number of chains supported
115 : // by a mymap. Will be an integer power-of-two.
116 :
117 : ulong mymap_chain_max( void );
118 :
119 : // mymap_chain_cnt_est returns a reasonable number of chains to use
120 : // for a map that is expected to hold up to ele_max_est elements.
121 : // ele_max_est will be clamped to be in [1,mymap_ele_max_max()] and
122 : // the return value will be an integer power-of-two in
123 : // [1,mymap_chain_max()].
124 :
125 : ulong mymap_chain_cnt_est( ulong ele_max_est );
126 :
127 : // mymap_{align,footprint} returns the alignment and footprint
128 : // needed for a memory region to be used as a mymap. align will be
129 : // an integer power-of-two and footprint will be a multiple of
130 : // align. footprint returns 0 if chain_cnt is not an integer
131 : // power-of-two in [1,mymap_chain_max()].
132 : //
133 : // mymap_new formats a memory region with the required alignment
134 : // and footprint into a mymap. shmem points in the caller's
135 : // address space to the memory region to use. Returns shmem on
136 : // success (mymap has ownership of the memory region) and NULL on
137 : // failure (no changes, logs details). The caller is not joined on
138 : // return. The mymap will be empty with all map chains at version
139 : // 0 (unlocked).
140 : //
141 : // mymap_join joins the caller to an existing mymap. ljoin points
142 : // to a mymap_t compatible memory region in the caller's address
143 : // space, shmap points in the caller's address space to the memory
144 : // region containing the mymap, shele points in the caller's
145 : // address space to mymap's element store and ele_max gives the
146 : // element store's capacity in [0,ele_max_max]. Returns a handle
147 : // to the caller's local join on success (join has ownership of the
148 : // ljoin region) and NULL on failure (no changes, logs details).
149 : //
150 : // mymap_leave leaves a mymap join. join points to a current local
151 : // join. Returns the memory region used for the join on success
152 : // (caller has ownership on return and the caller is no longer
153 : // joined) and NULL on failure (no changes, logs details). Use the
154 : // join accessors before leaving to get shmap, shele and ele_max
155 : // used by the join if needed.
156 : //
157 : // mymap_delete unformats a memory region used as a mymap. Assumes
158 : // shmap points in the caller's address space to a memory region
159 : // containing the mymap and that there are no joins. Returns shmem
160 : // on success (caller has ownership of the memory region, any
161 : // remaining elements still in the mymap are released to the caller
162 : // implicitly) and NULL on failure (no changes, logs details).
163 :
164 : ulong mymap_align ( void );
165 : ulong mymap_footprint( ulong chain_cnt );
166 : void * mymap_new ( void * shmem, ulong chain_cnt, ulong seed );
167 : mymap_t * mymap_join ( void * ljoin, void * shmap, void * shele, ulong ele_max );
168 : void * mymap_leave ( mymap_t * join );
169 : void * mymap_delete ( void * shmap );
170 :
171 : // mymap_{chain_cnt,seed} return the mymap configuration. Assumes
172 : // join is a current local join. The values will be valid for the
173 : // mymap lifetime.
174 :
175 : ulong mymap_chain_cnt( mymap_t const * join );
176 : ulong mymap_seed ( mymap_t const * join );
177 :
178 : // mymap_{shmap,shele,ele_max} return join details. Assumes join
179 : // is a current local join. The values will be valid for the join
180 : // lifetime. mymap_{shmap_const,shele_const} are const correct
181 : // versions.
182 :
183 : void const * mymap_shmap_const( mymap_t const * join );
184 : void const * mymap_shele_const( mymap_t const * join );
185 : ulong mymap_ele_max ( mymap_t const * join );
186 :
187 : void * mymap_shmap( mymap_t * join );
188 : void * mymap_shele( mymap_t * join );
189 :
190 : // mymap_key_{eq,hash} expose the provided MAP_KEY_{EQ,HASH} macros
191 : // as inlines with strict semantics. They assume that the provided
192 : // pointers are in the caller's address space to keys that will not
193 : // be changed during the call. They retain no interest in any keys
194 : // on return.
195 : //
196 : // mymap_key_eq returns 1 if *k0 and *k1 are equal and 0 otherwise.
197 : //
198 : // mymap_key_hash returns the hash of *key using the hash function
199 : // seed. Should ideally be a random mapping from a MAP_KEY_T to a
200 : // ulong but this depends on what the user actually used for
201 : // MAP_KEY_HASH. The seed used by a particular mymap instance can
202 : // be obtained above.
203 :
204 : int mymap_key_eq ( ulong * k0, ulong * k1 );
205 : ulong mymap_key_hash( ulong * key, ulong seed );
206 :
207 : // mymap_backoff does FD_SPIN_PAUSE a random number of times. The
208 : // number of pauses is an approximately uniform IID random number
209 : // in [0,scale/2^16] where scale is in [0,2^32). Specifically, the
210 : // number of pauses is:
211 : //
212 : // floor( scale r / 2^48 )
213 : //
214 : // where r is a non-deterministic 32-bit uniform IID random number.
215 : // Under the hood, r is generated by hashing the user provided seed
216 : // and the least significant 32-bits of the CPU tickcounter.
217 : // Ideally, seed is a 32-bit globally unique identifier for the
218 : // logical thread of execution but this is up to the application to
219 : // specify and rarely matters in practice. This is a useful
220 : // building block for random exponential backoffs.
221 :
222 : void mymap_backoff( ulong scale, ulong seed );
223 :
224 : // mymap_query_memo returns the key_hash of the query associated
225 : // with the query's key to allow users to minimize potentially
226 : // expensive key hash computations in various operations.
227 : //
228 : // mymap_query_ele returns a pointer in the caller's address space
229 : // to the element store element associated with the query or a
230 : // sentinel value. The sentinel value is application dependent and
231 : // thus arbitrary (e.g. not necessarily in the element store,
232 : // including NULL, a local temporary used as a bit bucket, etc).
233 : // Assumes query is valid. The lifetime of the returned pointer
234 : // depends on the query. mymap_query_ele_const is a const correct
235 : // version.
236 :
237 : ulong mymap_query_memo ( mymap_query_t const * query );
238 : myele_t const * mymap_query_ele_const( mymap_query_t const * query );
239 : myele_t * mymap_query_ele ( mymap_query_t * query );
240 :
241 : // mymap_hint hints that the caller plans to do an operation
242 : // involving key soon. Assumes join is a current local join, key
243 : // points to a valid key in the caller's address space for the
244 : // duration of the call and query points to a local scratch to hold
245 : // info about the hint. Retains no interest in key. On return,
246 : // the query memo will be initialized.
247 : //
248 : // flags is a bit-or of FD_MAP_FLAG flags. If FD_MAP_FLAG_USE_HINT
249 : // is set, this will assume that query's memo is already
250 : // initialized for key (e.g. mostly useful for hashless
251 : // prefetching). If FD_MAP_FLAG_PREFETCH_META /
252 : // FD_MAP_FLAG_PREFETCH_DATA is set, this will issue a prefetch for
253 : // key's mymap metadata (i.e. lock version) / the element at the
254 : // start of key's chain if any (i.e. typically the key of interest
255 : // in a well managed map). FD_MAP_FLAG_PREFETCH combines both for
256 : // convenience. This can be used to overlap key access latency
257 : // with unrelated operations. All other flags are ignored.
258 :
259 : void
260 : mymap_hint( MAP_(t) const * join
261 : MAP_KEY_T const * key
262 : MAP_(query_t) * query,
263 : int flags );
264 :
265 : // mymap_insert inserts into a mymap a mapping from a key to an
266 : // element store element. ele points in the caller's address space
267 : // to the element and ele->key is initialized to the key. flags is
268 : // a bit-or of FD_MAP_FLAG flags. If FD_MAP_FLAG_BLOCKING is set /
269 : // clear in flags, this is allowed / not allowed to block the
270 : // caller. Assumes join is a current local join, element is not in
271 : // the mymap and the key is not currently mapped to anything in the
272 : // mymap. This is a non-blocking fast O(1) and supports highly
273 : // concurrent operation.
274 : //
275 : // Returns FD_MAP_SUCCESS (0) on success and an FD_MAP_ERR
276 : // (negative) on failure. On success, ele was inserted into the
277 : // mymap at some point during the call (mymap took ownership of the
278 : // element at that time but the application is free to manage all
279 : // fields of the element except ele->key, ele->next and, if the
280 : // mymap is memoized, ele->hash ... insert will initialize the hash
281 : // field to mymap_key_hash( key, seed ) and this will be stable
282 : // while ele is in mymap). On failure, the mymap was not modified
283 : // by the call, no changes of ownership occurred in the call and
284 : // returns:
285 : //
286 : // - FD_MAP_ERR_INVAL: ele is not a pointer to an element store
287 : // element.
288 : //
289 : // - FD_MAP_ERR_AGAIN: A potentially conflicting operation was in
290 : // progress at some point during the call. Try again later (e.g.
291 : // after a random exponential backoff). Specifically, this
292 : // operation requires locking the map chain associated with key.
293 : // Since there are typically O(1) keys per chain, the probability
294 : // of getting AGAIN due to a false conflict is negligible even
295 : // under highly concurrent loads. Since insert / remove are fast
296 : // O(1) operations, any remaining conflicts, real or imagined,
297 : // are typically very short lived. Never returned for a blocking
298 : // call.
299 : //
300 : // IMPORTANT SAFETY TIP! Do not use inside a modify try/test,
301 : // query try/test, txn try/test or iter lock/unlock.
302 :
303 : int mymap_insert( mymap_t * join, myele_t * ele, int flags );
304 :
305 : // mymap_remove removes the mapping (if any) for key from the
306 : // mymap. On return, query will contain information about the
307 : // removed mapping and memo will be initialized for key. sentinel
308 : // gives the query element pointer value (arbitrary) to pass
309 : // through when this did not remove a mapping for any reason.
310 : // flags is a bit-or of FD_MAP_FLAG flags. If FD_MAP_FLAG_BLOCKING
311 : // is set / clear in flags, this is allowed / not allowed to block
312 : // the caller. If FD_MAP_FLAG_USE_HINT is set, this assumes
313 : // query's memo is already initialized for key. Assumes join is a
314 : // current local join and key is valid for the duration of the
315 : // call. Retains no interest in key, sentinel or query. This is a
316 : // non-blocking fast O(1) and supports highly concurrent operation.
317 : //
318 : // Returns FD_MAP_SUCCESS (0) on success and an FD_MAP_ERR
319 : // (negative) on failure. On success, key's mapping was removed at
320 : // some point during the call. mymap_query_ele( query ) will point
321 : // in the caller's address space to the element store element where
322 : // key mapped just before it was removed (element ownership
323 : // transferred to the caller at that time). On failure, no changes
324 : // were made by this call, mymap_query_ele( query ) will be
325 : // sentinel and:
326 : //
327 : // - FD_MAP_ERR_KEY: Key was not found in the mymap at some point
328 : // during the call.
329 : //
330 : // - FD_MAP_ERR_AGAIN: A potentially conflicting operation was in
331 : // progress at some point during the call. Same considerations
332 : // as insert above. Never returned for a blocking call.
333 : //
334 : // - FD_MAP_ERR_CORRUPT: Memory corruption was detected at some
335 : // point during the call.
336 : //
337 : // IMPORTANT SAFETY TIP! Do not use inside a modify try/test,
338 : // query try/test, txn try/test or iter lock/unlock.
339 :
340 : int
341 : mymap_remove( mymap_t * join,
342 : ulong const * key,
343 : myele_t const * sentinel,
344 : mymap_query_t * query,
345 : int flags );
346 :
347 : // mymap_modify_try tries to start modification of the mymap
348 : // element corresponding to key. On return, query will hold
349 : // information about the try and memo will be initialized for key.
350 : // sentinel gives the query element pointer value (arbitrary) to
351 : // pass through when it is not safe to try. flags is a bit-or of
352 : // FD_MAP_FLAG flags. If FD_MAP_FLAG_BLOCKING is set / clear, this
353 : // call is allowed / not allowed to block the caller. If
354 : // FD_MAP_FLAG_USE_HINT is set, this assumes query's memo is
355 : // already initialized for key. If FD_MAP_FLAG_ADAPTIVE is set /
356 : // clear, this call should / should not adapt the mymap to
357 : // accelerate future operations on this key. Adaptation for a key
358 : // can potentially slow future operations for other keys. The
359 : // marginal benefit of adaptation for a key grows linearly with the
360 : // number of keys managed by the key's chain. Assumes join is a
361 : // current local join and key is valid for the duration of the
362 : // call. Retains no interest in key, sentinel or query. This is a
363 : // non-blocking fast O(1) and supports highly concurrent operation.
364 : //
365 : // Returns FD_MAP_SUCCESS (0) on success and an FD_MAP_ERR
366 : // (negative) on failure. On success, mymap_query_ele( query )
367 : // will point in the caller's address space to the element to
368 : // modify and the chain that manages key will be locked. The mymap
369 : // retains ownership of this element and management of the key and
370 : // next fields. The caller is free to modify any other fields. On
371 : // failure, the mymap was not modified by this call,
372 : // mymap_query_ele( query ) will be sentinel and returns:
373 : //
374 : // - FD_MAP_ERR_KEY: Key was not found in the mymap in some point
375 : // during the call.
376 : //
377 : // - FD_MAP_ERR_AGAIN: A potentially conflicting operation was in
378 : // progress at some point during the try. Same considerations as
379 : // insert above. Never returned for a blocking call.
380 : //
381 : // - FD_MAP_ERR_CORRUPT: Memory corruption was detected at some
382 : // point during the call.
383 : //
384 : // IMPORTANT SAFETY TIP! Do not interleave or nest with a query
385 : // try/test, txn try/test or iter_lock/unlock on the same thread.
386 :
387 : int
388 : mymap_modify_try( mymap_t * join,
389 : ulong const * key,
390 : myele_t * sentinel,
391 : mymap_query_t * query,
392 : int flags );
393 :
394 : // mymap_modify_test finishes an in-progress modification. Assumes
395 : // query is valid and the caller is in a modify try. Returns
396 : // FD_MAP_SUCCESS (0). On return, the caller will no longer be in
397 : // a modify try. Guaranteed to succeed.
398 : //
399 : // IMPORTANT SAFETY TIP! Do not interleave or nest with a query
400 : // try/test, txn try/test or iter_lock/unlock on the same thread.
401 :
402 : int mymap_modify_test( mymap_query_t * query );
403 :
404 : // mymap_query_try tries to speculatively query a mymap for key.
405 : // flags is a bit-or of FD_MAP_FLAG flags. If FD_MAP_FLAG_USE_HINT
406 : // is set, this assumes query's memo is already initialized for
407 : // key. On return, query will hold information about the try and
408 : // memo will be initialized for key. sentinel gives the query
409 : // element pointer value (arbitrary) to pass through when it is not
410 : // safe to try the query. Assumes join is a current local join and
411 : // key is valid for the duration of the call. Does not modify the
412 : // mymap and retains no interest in key, sentinel or query. This
413 : // is a non-blocking fast O(1) and supports highly concurrent
414 : // operation.
415 : //
416 : // Returns FD_MAP_SUCCESS (0) on success and an FD_MAP_ERR
417 : // (negative) on failure. On success, key mapped to an element in
418 : // the element store at some point during the call.
419 : // mymap_query_ele( query ) will point in the caller's address
420 : // space to the element store element where key mapped at that
421 : // time. The mymap retains ownership of this element but the
422 : // caller can zero copy speculatively process the element. On
423 : // failure, mymap_query_ele( query ) will be sentinel and returns:
424 : //
425 : // - FD_MAP_ERR_KEY: Key was not found in the mymap in some point
426 : // during the call.
427 : //
428 : // - FD_MAP_ERR_AGAIN: A potentially conflicting operation was in
429 : // progress at some point during the call. Try again later (e.g.
430 : // after a random exponential backoff). Unlike insert and
431 : // remove, this call does _not_ require a lock on the chain
432 : // associated with key. As such, AGAIN can only be caused by
433 : // concurrent operations that require a lock on the chain that
434 : // manages key (with similar considerations as insert and remove)
435 : // and this will never interfere with any other concurrent
436 : // operation. Among the many implications, a query will never
437 : // delay a concurrent query and AGAIN will never be returned if
438 : // only concurrent queries are in progress.
439 : //
440 : // - FD_MAP_ERR_CORRUPT: Memory corruption was detected at some
441 : // point during the call.
442 : //
443 : // IMPORTANT SAFETY TIP! THE CALLER SHOULD BE PREPARED TO HANDLE
444 : // ARBITRARY AND/OR INCONSISTENT VALUES FOR ELEMENT FIELDS DURING
445 : // SPECULATIVE PROCESSING. CALLERS SHOULD NOT COMMIT ANY RESULTS
446 : // OF SPECULATIVE PROCESSING UNTIL IT TESTS THE QUERY WAS
447 : // SUCCESSFUL.
448 : //
449 : // The simplest form of speculative processing is to copy the
450 : // element from the element store into a local temporary, test that
451 : // the speculation was valid, and then process the local temporary
452 : // copy at its leisure. Zero copy, more selective copying and/or
453 : // writing speculative results into local tempoaries are more
454 : // advanced examples of speculative processing.
455 : //
456 : // Use mymap_modify to do a blocking (non-speculative) and/or
457 : // adaptive query (just don't actually modify the element).
458 : //
459 : // IMPORTANT SAFETY TIP! Do not interleave or nest with a modify
460 : // try/test, txn try/test or iter_lock/unlock on the same thread.
461 :
462 : int
463 : mymap_query_try( mymap_t const * join,
464 : ulong const * key,
465 : myele_t const * sentinel,
466 : mymap_query_t * query,
467 : int flags );
468 :
469 : // mymap_query_test tests if an in-progress query is still valid.
470 : // Assumes query is valid, we are still in a query try and chain
471 : // version numbers have not wrapped since we started the try.
472 : // Returns FD_MAP_SUCCESS (0) if the query is still valid and
473 : // FD_MAP_ERR_AGAIN (negative) if a potentially conflicting
474 : // operation was in progress at some point during the try.
475 : //
476 : // IMPORTANT SAFETY TIP! Do not interleave or nest with a modify
477 : // try/test, txn try/test or iter_lock/unlock on the same thread.
478 :
479 : int mymap_query_test( mymap_query_t const * query );
480 :
481 : // mymap_txn_key_max_max() returns the theoretical maximum number
482 : // of keys that can be in a transaction. (Practically unbounded.)
483 :
484 : ulong mymap_txn_key_max_max( void );
485 :
486 : // mymap_txn_{align,footprint} return the alignment and footprint
487 : // required for a mymap_txn_t that can support at least key_max
488 : // keys. align will be an integer power of two. footprint will be
489 : // a multiple of align. Returns 0 if key_max > key_max_max.
490 : //
491 : // mymap_txn_init formats a memory region with the required
492 : // alignment and footprint as a txn_t that can support at least
493 : // key_max keys. ltxn points in the caller's address space to the
494 : // memory region to use. Assumes join is a current local join.
495 : // On success, returns ltxn (txn will have ownership of the memory
496 : // region, txn will be valid with empty speculative and locked key
497 : // sets). The lifetime of the join should be at least the lifetime
498 : // of the txn. On failure (obviously bad inputs), returns NULL (no
499 : // changes).
500 : //
501 : // mymap_txn_fini unformats a memory region as a txn_t and returns
502 : // a pointer to the underlying memory. On success, returns ltxn.
503 : // The caller has ownership of the memory region on return. On
504 : // failure (e.g. NULL input), returns NULL (no changes).
505 :
506 : ulong mymap_txn_align ( void );
507 : ulong mymap_txn_footprint( ulong key_max );
508 : mymap_txn_t * mymap_txn_init ( void * ltxn, mymap_t * join, ulong key_max );
509 : void * mymap_txn_fini ( mymap_txn_t * txn );
510 :
511 : // mymap_txn_add indicates that key may be used in a txn. Assumes
512 : // txn is valid and not in a try and key is valid for duration of
513 : // the call. Retains no interest in key. A zero value for lock
514 : // indicates the key will be operated on speculatively. A non-zero
515 : // value indicates the key will potentially be inserted / removed /
516 : // modified by the transaction. It is okay to have a mixture of
517 : // speculative and locked keys in a transaction. Further, it is
518 : // okay to add the same key multiple times (though not particularly
519 : // efficient), including as a mixture of speculative and locked (if
520 : // _any_ adds of same key are locked, it will be treated as a
521 : // locked key for the txn overall). Returns FD_MAP_SUCCESS (zero)
522 : // on success (txn is valid and not in a try) and FD_MAP_ERR_INVAL
523 : // (negative) on failure (txn is valid and not in a try but key was
524 : // not added). INVAL is only possible when more than key_max adds
525 : // have been done since init.
526 :
527 : int mymap_txn_add( mymap_txn_t * txn, mymap_key_t const * key, int lock );
528 :
529 : // mymap_txn_try returns FD_MAP_SUCCESS (zero) if it is safe to try
530 : // the transaction and FD_MAP_ERR_AGAIN (negative) if the user
531 : // should try again later (e.g. after a random exponential
532 : // backoff). flags is a bit-of of FD_MAP_FLAG flags. If
533 : // FD_MAP_FLAG_BLOCKING is set / clear, this call is allowed / not
534 : // allowed to block the caller. The upper 30-bits of flags can be
535 : // used to provide a seed for random backoffs (but this is up to
536 : // the application and rarely matters in practice). Assumes txn is
537 : // valid and not in a try. On success, txn will be valid and in a
538 : // try. On a failure, txn will be valid and not in a try.
539 : //
540 : // IMPORTANT SAFETY TIP! Do not interleave or nest with modify
541 : // try/test, query try/test or iter_lock/unlock on the same thread.
542 : //
543 : // To better under the restrictions on nesting and interleaving,
544 : // mymap_{insert,remove,query_try,modify_try,query_try} will fail
545 : // with FD_MAP_ERR_AGAIN for any key managed by a chain locked by
546 : // the txn but can succeed for keys on managed by other chains.
547 : // This behavior is unpredictable as it depends on the keys in the
548 : // txn, keys not in the transaction, map seed, map chain count and
549 : // user provided key hash function. Interleaving a query_test,
550 : // modify_test, iter_unlock can be similarly unpredictable. Worse,
551 : // an interleaved modify_test or iter_unlock can muck up the chain
552 : // locks and used by the txn try. Similarly for other cases.
553 : //
554 : // IMPORTANT SAFETY TIP! If some txn keys were speculative, the
555 : // caller should not rely on any reads from the corresponding
556 : // element until the transaction tests successfully. Similar
557 : // considerations as mymap_query_try.
558 :
559 : int mymap_txn_try( mymap_txn_t * txn, int flags );
560 :
561 : // mymap_txn_hint behaves _identially_ to mymap_hint but is allowed
562 : // to assume we are in a txn try and key was added to the txn as
563 : // either speculative or locked.
564 : //
565 : // mymap_txn_{insert,remove} behave _identically_ to
566 : // mymap_{insert,remove} from the caller's point of view but
567 : // assumes we are in a txn try and key was added to the txn as
568 : // locked. These will never return FD_MAP_ERR_AGAIN.
569 : //
570 : // Similarly, mymap_txn_query behaves _identically_ to
571 : // mymap_query_try from the caller's point of view but assumes we
572 : // are in a txn try and key was added to txn as either speculative
573 : // or locked. Will never return FD_MAP_ERR_AGAIN.
574 : //
575 : // Likewise, mymap_txn_modify behaves _identically_ to
576 : // mymap_modify_try from the caller's point of view but assumes we
577 : // are in a txn try and key was added to txn as locked. It will
578 : // never return FD_MAP_ERR_AGAIN.
579 : //
580 : // There is no mymap_query_test or mymap_modify_test because these
581 : // are part of the overall txn test.
582 : //
583 : // IMPORTANT SAFETY TIP!
584 : //
585 : // These never should be used outside a txn try.
586 : //
587 : // IMPORTANT SAFETY TIP!
588 : //
589 : // For a speculative txn key, mymap_query can return FD_MAP_ERR_KEY
590 : // and/or FD_MAP_ERR_CORRUPT if there are locked concurrent
591 : // operations on the chain managing key (e.g a concurrent remove of
592 : // a key that happens to be on the same chain). When such
593 : // operations are possible, these errors can be canaries that the
594 : // transaction has already failed (testing the txn is still
595 : // necessary to it "official"). CORRUPT in this situation is most
596 : // likely an "unofficial" failure than memory corruption.
597 : // Similarly, while mymap_txn_query is guaranteed give a pointer to
598 : // an element store element on success, there is no guarantee it
599 : // will be to the correct element, a well formed element (or even
600 : // to the same location if used multiple times in the try). When
601 : // such concurrent operations are not possible (e.g. single
602 : // threaded operation), SUCCESS, KEY, CORRUPT and the element
603 : // pointer returned have their usual interpretations.
604 : //
605 : // TL;DR speculative txn keys are best used for common stable
606 : // constant-ish read-only data to minimize concurrent complex
607 : // transactions using these common keys from unnecessarily blocking
608 : // each other.
609 : //
610 : // TL;DR resolve all speculative txn keys to elements at
611 : // transaction start exactly once for sanity.
612 : //
613 : // TL;DR avoid using speculative txn keys at all unless very well
614 : // versed in lockfree programming idioms and gotchas.
615 :
616 : int mymap_txn_hint ( mymap_t const * join, ulong const * key, mymap_query_t * query, int flags );
617 : int mymap_txn_insert( mymap_t * join, myele_t * ele );
618 : int mymap_txn_remove( mymap_t * join, ulong const * key, myele_t const * sentinel, mymap_query_t * query, int flags );
619 : int mymap_txn_modify( mymap_t * join, ulong const * key, myele_t * sentinel, mymap_query_t * query, int flags );
620 : int mymap_txn_query ( mymap_t const * join, ulong const * key, myele_t const * sentinel, mymap_query_t * query, int flags );
621 :
622 : // mymap_txn_test returns FD_MAP_SUCCESS (zero) if the txn try
623 : // succeeded and FD_MAP_ERR_AGAIN (negative) if it failed (e.g. the
624 : // test detected a potentially conflicting concurrent operation
625 : // during the try). On success, any results from processing of
626 : // keys marked as speculative can be trusted. On failure, the
627 : // mymap was not changed by the try. Regardless of return value,
628 : // txn will _not_ be in a try on return _but_ will still be valid.
629 : // As such, if a transaction fails, it can be retried (e.g. after a
630 : // random exponential backoff) without needing to recreate it (e.g.
631 : // no need to fini then init/add again). Assumes txn is in a try
632 : // and, for any txn speculative keys, no wrapping of txn version
633 : // numbers has occurred since the try started.
634 : //
635 : // IMPORTANT SAFETY TIP! This is guaranteed to succeed if no keys
636 : // were added to the transaction as speculative.
637 : //
638 : // IMPORTANT SAFETY TIP! Do not interleave or nest with modify
639 : // try/test, query try/test or iter_lock/unlock on the same thread.
640 :
641 : int mymap_txn_test( mymap_txn_t * txn );
642 :
643 : // mymap_iter_lock locks zero or more map chains. Assumes join is
644 : // a current local join. On input, lock_seq[i] for i in
645 : // [0,lock_cnt) gives the set of chains to lock. flags is a bit-or
646 : // of FD_MAP_FLAG flags. If FD_MAP_FLAG_BLOCKING is set / not set,
647 : // this call is allowed / not allowed to block the caller. Assumes
648 : // join, lock_seq and lock_cnt are valid and the caller does not
649 : // already have any of these locks. The upper 30-bits of flags
650 : // can be used to provide a seed for random backoffs (but this is
651 : // up to the application and rarely necessary). In particular,
652 : // lock_seq should contain unique values in [0,chain_cnt), which
653 : // also implies lock_cnt is at most chain_cnt. Retains no interest
654 : // in lock_seq. Returns FD_MAP_SUCCESS (zero) on success and
655 : // FD_MAP_ERR_AGAIN (negative) on failure. On return:
656 : //
657 : // FD_MAP_SUCCESS: lock_seq will be a permutation of the input
658 : // giving the actual order (from oldest to newest) in which the
659 : // locks were acquired. This can be used, for example, to unlock
660 : // in the same order and can be used by the caller to optimize
661 : // the order for iterating over keys to reduce the amount of
662 : // contention with other concurrent operations. If there were no
663 : // potentially conflicting concurrent operations during the call,
664 : // lock_seq will be in the input order.
665 : //
666 : // FD_MAP_ERR_AGAIN: a potentially conflicting operation was in
667 : // progress at some point during the call. lock_seq might have
668 : // been changed (but will still be a permutation of the input).
669 : // The mymap itself wasn't changed by the call.
670 : //
671 : // FD_MAP_ERR_INVAL: join, lock_seq and/or lock_cnt were
672 : // obviously invalid. Same considerations as FD_MAP_ERR_AGAIN.
673 : //
674 : // Guaranteed to succeed if blocking (but will not return to the
675 : // caller until all the requested chains are locked).
676 : //
677 : // IMPORTANT SAFETY TIP! Do not use interleave or nest with modify
678 : // try/test, query try/test or txn try/test on the same thread.
679 :
680 : int
681 : mymap_iter_lock( mymap_t * join,
682 : ulong * lock_seq,
683 : ulong lock_cnt,
684 : int flags );
685 :
686 : // mymap_iter_unlock unlocks chains lock_seq[i] for i in
687 : // [0,lock_cnt) in that order. Assumes join is a current local
688 : // join, lock_seq and lock_cnt are valid (same requirements as
689 : // mymap_iter_lock) and the caller has a lock on those chains.
690 : // Retains no interest in lock_seq. Guaranteed to succeed.
691 : //
692 : // IMPORTANT SAFETY TIP! Do not use interleave or nest with modify
693 : // try/test, query try/test or txn try/test on the same thread.
694 :
695 : void
696 : mymap_iter_unlock( mymap_t * join,
697 : ulong const * lock_seq,
698 : ulong lock_cnt );
699 :
700 : // mymap_iter_chain_idx returns the index of the map chain that
701 : // manages key. Useful for iterating over groups of related keys
702 : // when the map hash function is designed to group all related keys
703 : // onto the same chain.
704 :
705 : ulong
706 : mymap_iter_chain_idx( mymap_t const * join,
707 : ulong const * key );
708 :
709 : // mymap_{iter,iter_done,iter_next,iter_ele,iter_ele_const} iterate
710 : // over a single map chain. Assumes join is a current local join,
711 : // chain_idx is in [0,mymap_chain_cnt(join)) and the caller lock on
712 : // chain idx or the chain is otherwise known to be idle.
713 : //
714 : // These are building blocks for concurrent parallel iteration. As
715 : // the locking and ordering requirements for such an iterator are
716 : // very application specific, no default global iterators are
717 : // provided (i.e. a generic global iterator will need to be so
718 : // conservative on locking than typical application requirements,
719 : // it is practically more mischievious than useful). E.g. a mymap
720 : // snapshot might lock all chains to get the state of the entire
721 : // mymap at a consistent point in time. For each chain (in the
722 : // order given by the lock acquisition), the snapshot would
723 : // serialize all keys on that chain and then unlock it
724 : // incrementally.
725 :
726 : mymap_iter_t mymap_iter ( mymap_t const * join, ulong chain_idx );
727 : mymap_iter_t mymap_iter_done ( mymap_iter_t iter );
728 : mymap_iter_t mymap_iter_next ( mymap_iter_t iter );
729 : myele_t const * mymap_iter_ele_const( mymap_iter_t iter );
730 : myele_t * mymap_iter_ele ( mymap_iter_t iter );
731 :
732 : // mymap_reset removes all elements from the mymap. Caller has
733 : // ownership of all items removed on return. Assumes that join is
734 : // a current local join and the caller has a lock on all map chains
735 : // or the map is otherwise known to be idle.
736 :
737 : void mymap_reset( mymap_t * join );
738 :
739 : // mymap_verify returns FD_MAP_SUCCESS (0) if the join, underlying
740 : // map and underlying element store give a valid mapping of unique
741 : // keys to unique elements in the element store. Assumes that
742 : // caller has a lock on all map chains or the map is otherwise
743 : // known to be idle. Returns FD_MAP_ERR_INVAL (negative) otherwise
744 : // (no changes by this call, logs details).
745 :
746 : int mymap_verify( mymap_t const * join );
747 :
748 : // mymap_strerror converts an FD_MAP_SUCCESS / FD_MAP_ERR code into
749 : // a human readable cstr. The lifetime of the returned pointer is
750 : // infinite. The returned pointer is always to a non-NULL cstr.
751 :
752 : char const * mymap_strerror( int err );
753 :
754 : Do this as often as desired in a compilation unit to get different
755 : types of concurrent maps. Options exist for generating library
756 : header prototypes and/or library implementations for concurrent maps
757 : usable across multiple compilation units. Additional options exist
758 : to use index compression, different hashing functions, key comparison
759 : functions, etc as detailed below.
760 :
761 : To better understand the insert/remove/{modify,query}_{try,test}
762 : APIs:
763 :
764 : ... basic insert
765 :
766 : myele_t * ele = ... acquire an unused element from the element store
767 :
768 : ... populate ele appropriately, including
769 :
770 : ele->key = ... key associated with this element
771 :
772 : int err = mymap_insert( join, ele, FD_MAP_FLAG_BLOCKING );
773 :
774 : if( FD_UNLIKELY( err ) ) { // Not possible in this example
775 :
776 : ... If err is FD_MAP_ERR_INVAL, ele did not point at an element
777 : ... store element.
778 : ...
779 : ... If err is FD_MAP_ERR_AGAIN, there was a potentially
780 : ... conflicting operation in progress on the mymap during the
781 : ... call. We can try again later (e.g. after a random backoff or
782 : ... doing other non-conflicting work).
783 :
784 : } else {
785 :
786 : ... At this point, a mapping from key to the element store
787 : ... element pointed to by ele in our address space was added
788 : ... during the call. ele->key will be stable while in the mymap.
789 : ... Neither ele->key nor ele->next should be modified by the
790 : ... application while in the mymap. The application is free to
791 : ... manage all other fields of the element as desired.
792 :
793 : }
794 :
795 : ... basic remove
796 :
797 : ulong key = ... key to remove
798 :
799 : mymap_query_t query[1];
800 : int err = mymap_remove( join, &key, NULL, query, FD_MAP_FLAG_BLOCKING );
801 : mymap_ele_t * ele = mymap_query_ele( query );
802 :
803 : if( FD_UNLIKELY( err ) ) {
804 :
805 : ... At this point, ele==sentinel==NULL.
806 : ...
807 : ... If err is FD_MAP_ERR_KEY, key was not in the mymap at some
808 : ... point during the remove.
809 : ...
810 : ... If err is FD_MAP_ERR_AGAIN, there was a potentially
811 : ... conflicting operation in progress during the remove. We can
812 : ... try again later (e.g. after a random backoff or doing other
813 : ... non-conflicting work). (Not possible in this example.)
814 : ...
815 : ... If err is FD_MAP_ERR_CORRUPT, memory corruption was detected
816 : ... at some point during the call. (Usually abortive.)
817 :
818 : } else {
819 :
820 : ... At this point, ele points into the element store (non-NULL),
821 : ... ele->key matches key, key mapped to that element before the
822 : ... remove, and we have ownership of that element.
823 :
824 : ... release ele to the element store
825 :
826 : }
827 :
828 : ... basic modify
829 :
830 : ulong key = ... key to modify
831 :
832 : mymap_query_t query[1];
833 : int err = mymap_modify_try( join, &key, NULL, query, FD_MAP_FLAG_BLOCKING );
834 : mymap_ele_t * ele = mymap_query_ele( query );
835 :
836 : if( FD_UNLIKELY( err ) ) {
837 :
838 : ... At this point, ele==sentinel==NULL.
839 : ...
840 : ... If err is FD_MAP_ERR_KEY, key was not in the mymap at some
841 : ... point during the try.
842 : ...
843 : ... If err is FD_MAP_ERR_AGAIN, there was a potentially
844 : ... conflicting operation in progress during the try. We can try
845 : ... again later (e.g. after a random backoff or doing other
846 : ... non-conflicting work). (Not possible in this example.)
847 : ...
848 : ... If err is FD_MAP_ERR_CORRUPT, memory corruption was detected
849 : ... at some point during the call. (Usually abortive.)
850 :
851 : } else {
852 :
853 : ... At this point, ele points in our address space to an element
854 : ... store element, ele->key matches key and we are in a modify try
855 : ... such that it is safe to modify fields ele not managed by the
856 : ... mymap.
857 :
858 : ... Modify application managed fields of ele here.
859 :
860 : ... IMPORTANT SAFETY TIP! IF THE USER WANTS TO SUPPORT ROLLING
861 : ... BACK A MODIFICATION AT THIS POINT, THEY CAN DO SO BY SAVING
862 : ... THE ORIGINAL VALUE OF ELE BEFORE MODIFYING ANY FIELDS AND
863 : ... THEN RESTORING IT HERE.
864 :
865 : ... Finish the modification (guaranteed to succeed)
866 :
867 : mymap_modify_test( query );
868 :
869 : ... At this point, the modification is done and we are no
870 : ... longer in a try.
871 :
872 : }
873 :
874 : ... basic speculative query
875 :
876 : ulong key = ... key to query
877 :
878 : mymap_query_t query[1];
879 : int err = mymap_query_try( join, &key, NULL, query, 0 );
880 : mymap_ele_t const * ele = mymap_query_ele_const( query );
881 :
882 : if( FD_UNLIKELY( err ) ) {
883 :
884 : ... At this point, ele==sentinel==NULL.
885 : ...
886 : ... If err is FD_MAP_ERR_KEY, key was not in the mymap at some
887 : ... point during the try.
888 : ...
889 : ... If err is FD_MAP_ERR_AGAIN, there was a potentially
890 : ... conflicting operation in progress during the try and we can
891 : ... try again later (e.g. after a random backoff or doing other
892 : ... non-conflicting work).
893 : ...
894 : ... If err is FD_MAP_ERR_CORRUPT, memory corruption was detected
895 : ... during the call. (Usually abortive.)
896 :
897 : } else {
898 :
899 : ... At this point, ele points in our address space to an element
900 : ... in the element store (non-NULL) and ele->key matched key at
901 : ... some point during the try.
902 :
903 : ... Speculatively process ele here.
904 : ...
905 : ... DO NOT TRUST ANY RESULTS OF THIS SPECULATIVE PROCESSING YET.
906 : ... THERE IS NO GUARANTEE YET THAT A CONCURRENT USER HASN'T
907 : ... CHANGED THE MYMAP IN A WAY THAT COULD YIELD ARBITRARY AND
908 : ... INCONSISTENT RESULTS.
909 : ...
910 : ... The simplest and most common form of speculative processing
911 : ... is to copy the needed portions of ele into a local stack
912 : ... temp.
913 : ...
914 : ... Note: concurrent operations could include removing key from
915 : ... the mymap (and maybe multiple cycles of inserting and
916 : ... removing it and then at potentially different element store
917 : ... locations). That's not an issue practically as the ele
918 : ... pointer here will be to an element compatible memory region
919 : ... that will continue to exist regardless and we shouldn't be
920 : ... trusting any query reads yet (the query test will detect if
921 : ... if these can be trusted).
922 : ...
923 : ... Rant: If ele is more complex than plain-old-data, so long ele
924 : ... is using allocators like fd_alloc and fd_wksp for dynamically
925 : ... allocated fields (e.g. not using the giant steaming pile of
926 : ... page based memory virtual memory, operating system, language
927 : ... and standard library fail that is heap based allocation ala
928 : ... malloc/free), concurrent removes are _still_ fine for the
929 : ... exact same reason. That is, the memory that actually backed
930 : ... dynamically allocated fields will still continue to exist
931 : ... post remove ... you know ... just like reality (turns out,
932 : ... surprise, "free" doesn't actually uninstall any DIMMs and
933 : ... malloc/free are the worst possible abstraction for resource
934 : ... management).
935 : ...
936 : ... The concurrent remove case actually demonstrates why fd_alloc
937 : ... / fd_wksp / fd_shmem / etc exist in the first place. Beyond
938 : ... being faster, simpler, more concurrent and more reliable
939 : ... (especially in cases like this), they are more flexible (e.g.
940 : ... sharing and persisting the data structure asynchronously
941 : ... across multiple processes in different address spaces) and
942 : ... more secure (e.g. can easily bounds check memory accesses
943 : ... and then use the memory subsystem to sandbox different
944 : ... components from touching memory they shouldn't, actually
945 : ... using a modern virtual memory subsystem for something useful
946 : ... for a change instead of bending over backwards to provide
947 : ... exactly the wrong abstraction of the real world). Common
948 : ... hardware and software practices have turned computers into an
949 : ... unreliable and insecure Tower of Babel. Had virtual memory
950 : ... been better abstracted and better implemented all levels of
951 : ... the stack, life would be much easier (and things like fast
952 : ... persistent memories might have seen a lot more commercial
953 : ... success). In the meantime, dispelling the magical thinking
954 : ... encouraged by the conventional abstractions, the key lessons
955 : ... are:
956 : ...
957 : ... * Friends don't let friends malloc.
958 : ... * Lockfree is not a synonym for garbage collection.
959 : ... * Real world computers aren't infinite tape Turing machines.
960 : ... * Real world memory doesn't magically disappear.
961 :
962 : ... At this point, we are done with speculative processing (or we
963 : ... don't want to do any more speculative processing if the try
964 : ... has already failed).
965 :
966 : err = mymap_query_test( query );
967 : if( FD_UNLIKELY( err ) ) {
968 :
969 : ... At this point, err will be FD_MAP_ERR_AGAIN and a
970 : ... potentially conflicting operation in the try was detected
971 : ... by the test.
972 :
973 : ... Application specific handling here (e.g. try again after a
974 : ... random backoff or doing other non-conflicting work).
975 :
976 : } else {
977 :
978 : ... At this point, the results of the speculation thus far can
979 : ... be trusted and can be considered to have been computed at
980 : ... some point in time between try and test.
981 :
982 : }
983 : }
984 :
985 : To better understand the txn API:
986 :
987 : ... allocate a txn
988 :
989 : ulong align = mymap_txn_align();
990 : ulong footprint = mymap_txn_footprint( key_max );
991 : void * ltxn = ... allocate align/footprint local scratch memory
992 : mymap_txn_t * txn = mymap_txn_init( ltxn, join, key_max );
993 :
994 : ... add at most key_max keys to the transaction as locked
995 :
996 : for( ... all keys involved in the transaction ... ) mymap_txn_add( txn, key, 1 ); // guaranteed to succeed for this example
997 :
998 : ... try to do the transaction
999 :
1000 : int err = mymap_txn_try( txn, FD_MAP_FLAG_BLOCKING );
1001 :
1002 : if( FD_UNLIKELY( err ) ) { // Not possible in this example
1003 :
1004 : ... At this point, err is FD_MAP_ERR_AGAIN and there was a
1005 : ... potentially conflicting operation in progress during the try.
1006 : ... We can try again later (e.g. after a random backoff or doing
1007 : ... other non-conflicting work). We are no longer in a try but
1008 : ... we could reuse the txn as-is to retry.
1009 :
1010 : } else {
1011 :
1012 : ... At this point, it is safe to try the transaction.
1013 :
1014 : ... Do the transaction here. Since all keys are locked in this
1015 : ... example, we don't need to worry about any changing behind our
1016 : ... back (i.e. the try is guaranteed to succeed).
1017 :
1018 : ... Like modify, if we wants to rollback the transaction at this
1019 : ... point, we should save the state of all locked keys involved
1020 : ... to local temporaries before we do the transaction and then
1021 : ... restore the state here.
1022 :
1023 : ... Finish the try (guaranteed to succeed for this example)
1024 :
1025 : mymap_txn_test( txn );
1026 :
1027 : ... At this point, we are no longer in a txn try but the txn is
1028 : ... valid such that we could reuse the txn as-is for another
1029 : ... transaction involving the same keys.
1030 :
1031 : mymap_txn_fini( txn );
1032 :
1033 : ... At this point, txn is no longer valid and we have ownership of
1034 : ... the ltxn memory region
1035 :
1036 : ... free ltxn
1037 :
1038 : }
1039 :
1040 : To better understand the iter API:
1041 :
1042 : ... basic mymap element snapshot (i.e. iterate over all elements in
1043 : ... the mymap at a globally consistent point in time while
1044 : ... minimizing contention with other concurrent operations)
1045 :
1046 : ulong lock_cnt = mymap_chain_cnt( join );
1047 :
1048 : ulong * lock_seq = ... allocate lock_cnt ulong scratch ...
1049 :
1050 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ ) lock_seq[ lock_idx ] = lock_idx;
1051 :
1052 : mymap_iter_lock( join, lock_seq, lock_cnt, FD_MAP_FLAG_BLOCKING );
1053 :
1054 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ ) {
1055 : ulong chain_idx = lock_seq[ lock_idx ]; // process chains in the order they were locked
1056 :
1057 : for( mymap_iter_t iter = mymap_iter( join, chain_idx ); !mymap_iter_done( iter ); iter = mymap_iter_next( iter ) ) {
1058 : myele_t const * ele = mymap_iter_ele_const( iter );
1059 :
1060 : ... append ele to snapshot here (ele will be appended in
1061 : ... no particular order for this example). Note that, as
1062 : ... the caller has a lock on the chain that manages ele,
1063 : ... the caller is free to modify the fields of ele it
1064 : ... manages.
1065 :
1066 : }
1067 :
1068 : mymap_iter_unlock( join, lock_seq + lock_idx, 1UL ); // unlock incrementally
1069 : }
1070 :
1071 : ... free lock_seq here
1072 : */
1073 :
1074 : #include "fd_map.h"
1075 :
1076 : /* FIXME: consider adding a parallel verify that operates on a
1077 : locked/idle subset of the chains. */
1078 :
1079 : /* MAP_NAME gives the API prefix to use for map */
1080 :
1081 : #ifndef MAP_NAME
1082 : #error "Define MAP_NAME"
1083 : #endif
1084 :
1085 : /* MAP_ELE_T is the map element type */
1086 :
1087 : #ifndef MAP_ELE_T
1088 : #error "Define MAP_ELE_T"
1089 : #endif
1090 :
1091 : /* MAP_KEY_T is the map key type */
1092 :
1093 : #ifndef MAP_KEY_T
1094 : #define MAP_KEY_T ulong
1095 : #endif
1096 :
1097 : /* MAP_KEY is the MAP_ELE_T key field */
1098 :
1099 : #ifndef MAP_KEY
1100 0 : #define MAP_KEY key
1101 : #endif
1102 :
1103 : /* MAP_IDX_T is the map next index type. Should be a primitive unsigned
1104 : integer type large enough to represent the largest capacity element
1105 : store of interest. (E.g. if ushort, the maximum element store
1106 : capacity compatible with the map will be 65535 elements.) */
1107 :
1108 : #ifndef MAP_IDX_T
1109 387426576 : #define MAP_IDX_T ulong
1110 : #endif
1111 :
1112 : /* MAP_NEXT is the MAP_ELE_T next field */
1113 :
1114 : #ifndef MAP_NEXT
1115 0 : #define MAP_NEXT next
1116 : #endif
1117 :
1118 : /* MAP_KEY_EQ returns 0/1 if *k0 is the same/different as *k1 */
1119 :
1120 : #ifndef MAP_KEY_EQ
1121 22516527 : #define MAP_KEY_EQ(k0,k1) ((*(k0))==(*(k1)))
1122 : #endif
1123 :
1124 : /* MAP_KEY_HASH returns a random mapping of *key into ulong. The
1125 : mapping is parameterized by the 64-bit ulong seed. */
1126 :
1127 : #ifndef MAP_KEY_HASH
1128 : #define MAP_KEY_HASH(key,seed) fd_ulong_hash( (*(key)) ^ (seed) )
1129 : #endif
1130 :
1131 : /* If MAP_MEMOIZE is defined to non-zero, elements have a field that
1132 : can be used while in the map to hold the MAP_KEY_HASH for an
1133 : element's key. This is useful for accelerating user code that might
1134 : need a hash and various map operations. */
1135 :
1136 : #ifndef MAP_MEMOIZE
1137 : #define MAP_MEMOIZE 0
1138 : #endif
1139 :
1140 : /* If MAP_MEMOIZE is non-zero, MAP_MEMO is the memo element field.
1141 : Should be a ulong. Like MAP_KEY and MAP_NEXT, when an element is in
1142 : the map, this value is managed by the map and will contain the
1143 : MAP_KEY_HASH of the element's key and the map's seed. */
1144 :
1145 : #ifndef MAP_MEMO
1146 : #define MAP_MEMO memo
1147 : #endif
1148 :
1149 : /* If MAP_MEMOIZE is defined to non-zero, a non-zero MAP_KEY_EQ_IS_SLOW
1150 : indicates the MAP_MEMO field should be used to accelerate MAP_KEY_EQ
1151 : operations. This is useful when MAP_KEY_EQ is non-trivial (e.g.
1152 : variable length string compare, large buffer compares, etc). */
1153 :
1154 : #ifndef MAP_KEY_EQ_IS_SLOW
1155 : #define MAP_KEY_EQ_IS_SLOW 0
1156 : #endif
1157 :
1158 : /* MAP_CNT_WIDTH gives the number of bits in a ulong to reserve for
1159 : encoding the count in a versioned count. Element store capacity
1160 : should be representable in this width. Default is 43 bits (e.g.
1161 : enough to support a ~1 PiB element store of 128 byte elements). The
1162 : versioning width will be 64-MAP_CNT_WIDTH. Since the least
1163 : significant bit of the version is used to indicate locked, versioning
1164 : width should be at least 2 and ideally as large as possible. With
1165 : the 43 default, a chain's version number will not be reused until
1166 : 2^20 individual operations on a chain have been done. Version
1167 : numbers only impact speculative operations. If not using speculative
1168 : operations, version width can be reduced to the minimum. */
1169 :
1170 : #ifndef MAP_CNT_WIDTH
1171 1946155721 : #define MAP_CNT_WIDTH (43)
1172 : #endif
1173 :
1174 : /* MAP_ALIGN gives the alignment required for the map shared memory.
1175 : Default is 128 for double cache line alignment. Should be at least
1176 : ulong alignment. */
1177 :
1178 : #ifndef MAP_ALIGN
1179 : #define MAP_ALIGN (128UL)
1180 : #endif
1181 :
1182 : /* MAP_MAGIC is the shared memory magic number to aid in persistent
1183 : and/or interprocess usage. */
1184 :
1185 : #ifndef MAP_MAGIC
1186 3 : #define MAP_MAGIC (0xf17eda2c37a7a900UL) /* firedancer map_para version 0 */
1187 : #endif
1188 :
1189 : /* MAP_IMPL_STYLE controls what to generate:
1190 : 0 - header only library
1191 : 1 - library header declaration
1192 : 2 - library implementation */
1193 :
1194 : #ifndef MAP_IMPL_STYLE
1195 : #define MAP_IMPL_STYLE 0
1196 : #endif
1197 :
1198 : /* Implementation *****************************************************/
1199 :
1200 1649539058 : #define MAP_VER_WIDTH (64-MAP_CNT_WIDTH)
1201 :
1202 : #if MAP_IMPL_STYLE==0 /* local use only */
1203 : #define MAP_STATIC FD_FN_UNUSED static
1204 : #else /* library header and/or implementation */
1205 : #define MAP_STATIC
1206 : #endif
1207 :
1208 53039213858 : #define MAP_(n) FD_EXPAND_THEN_CONCAT3(MAP_NAME,_,n)
1209 :
1210 : #if MAP_IMPL_STYLE!=2 /* need header */
1211 :
1212 : #include "../bits/fd_bits.h"
1213 :
1214 : /* Note: we don't overalign chain metadata to reduce on map metadata
1215 : footprint requirements. Though this can cause cache false sharing
1216 : for concurrent operations on different keys that are managed
1217 : different chains that share a cache line, this risk can be controlled
1218 : by overprovisioning chain_cnt. That is, for a fixed map metadata
1219 : footprint, this false sharing seems preferable to using fewer chains
1220 : as that would lead to an equivalent increase in the amount of locking
1221 : necessary to avoid potential conflicts for keys managed by the same
1222 : chain (i.e. the former makes good use of the padding that would be
1223 : otherwise wasted if overaligning this). */
1224 :
1225 : struct MAP_(shmem_private_chain) {
1226 : ulong ver_cnt; /* versioned count, cnt is in [0,ele_max] in lsb, ver in msb, odd: chain locked, even: chain unlocked */
1227 : MAP_IDX_T head_cidx; /* compressed index of the first element on the chain */
1228 : };
1229 :
1230 : typedef struct MAP_(shmem_private_chain) MAP_(shmem_private_chain_t);
1231 :
1232 : struct __attribute__((aligned(MAP_ALIGN))) MAP_(shmem_private) {
1233 :
1234 : /* FIXME: consider having a memo of the chain in which an element is
1235 : stored and/or using doubly linked list chains (maybe with the xor
1236 : trick)? We could do faster variants of remove and maybe amortize
1237 : some hash calcs. */
1238 :
1239 : ulong magic; /* == MAP_MAGIC */
1240 : ulong seed; /* Hash seed, arbitrary */
1241 : ulong chain_cnt; /* Number of chains, positive integer power-of-two */
1242 :
1243 : /* Padding to MAP_ALIGN alignment here */
1244 :
1245 : /* MAP_(shmem_private_chain_t) chain[ chain_cnt ] here */
1246 : };
1247 :
1248 : typedef struct MAP_(shmem_private) MAP_(shmem_t);
1249 :
1250 : struct MAP_(private) {
1251 : MAP_(shmem_t) * map; /* Location of the map in the local address space */
1252 : MAP_ELE_T * ele; /* Location of the element store in the local address space */
1253 : ulong ele_max; /* Capacity of the element store, in [0,ele_max_max] */
1254 : };
1255 :
1256 : typedef struct MAP_(private) MAP_(t);
1257 :
1258 : struct MAP_(query_private) {
1259 : ulong memo; /* Query key memo */
1260 : MAP_ELE_T * ele; /* Points to the operation element in the local address space (or a sentinel) */
1261 : MAP_(shmem_private_chain_t) * chain; /* Points to the chain that manages element in the local address space */
1262 : ulong ver_cnt; /* Versioned count of the chain at operation try */
1263 : };
1264 :
1265 : typedef struct MAP_(query_private) MAP_(query_t);
1266 :
1267 : struct MAP_(txn_private_info) {
1268 : MAP_(shmem_private_chain_t) * chain; /* Points to the chain that manages one or more txn keys (set by txn_add) */
1269 : ulong ver_cnt; /* Versioned count of the chain at the transaction start (set by txn_try) */
1270 : };
1271 :
1272 : typedef struct MAP_(txn_private_info) MAP_(txn_private_info_t);
1273 :
1274 : struct MAP_(txn_private) {
1275 : MAP_(shmem_t) * map; /* Map used by this transaction */
1276 : ulong info_max; /* Number of chains possible for this transaction */
1277 : ulong lock_cnt; /* Number of chains in the locked set, in [0,info_max] */
1278 : ulong spec_cnt; /* Number of chains in the speculative set, in [0,info_max], lock_cnt + spec_cnt <= info_max */
1279 :
1280 : /* MAP_(txn_private_info_t) info[ info_max ] here (obligatory sigh
1281 : about lagging C++ support for 0 sized structure array footers).
1282 :
1283 : The locked set is at indices [0,lock_cnt), lock_cnt infos.
1284 : The free set is at indices [lock_cnt,info_max-spec_cnt), free_cnt = info_max-spec_cnt-lock_cnt infos.
1285 : The speculative set is at indices [info_max-spec_cnt,info_max), spec_cnt infos.
1286 :
1287 : A chain will appear at most once in a set. A chain will not appear
1288 : in both sets.
1289 :
1290 : Note that it would be trivial to make this shared memory persistent
1291 : though not obvious if that would be useful. (A precomputed
1292 : template for a common transaction done by multiple threads is a
1293 : possibility but the versions would still need to be local.) */
1294 :
1295 : };
1296 :
1297 : typedef struct MAP_(txn_private) MAP_(txn_t);
1298 :
1299 : struct MAP_(iter_private) {
1300 : MAP_ELE_T const * ele; /* Pointer to the element store in the caller's address space */
1301 : ulong ele_idx; /* Current iteration element store index (or the null index) */
1302 : };
1303 :
1304 : typedef struct MAP_(iter_private) MAP_(iter_t);
1305 :
1306 : FD_PROTOTYPES_BEGIN
1307 :
1308 : /* map_private_vcnt pack ver and cnt into a versioned cnt. ver is
1309 : masked to fit into MAP_VER_WIDTH bits. cnt is assumed in
1310 : [0,ele_max_max].
1311 :
1312 : map_private_vcnt_{ver,cnt} extract the {version,index} from a
1313 : versioned index. Return will fit into {MAP_VER_WIDTH,MAP_CNT_WIDTH}
1314 : bits. */
1315 :
1316 58609560 : FD_FN_CONST static inline ulong MAP_(private_vcnt)( ulong ver, ulong cnt ) { return (ver<<MAP_CNT_WIDTH) | cnt; }
1317 :
1318 57443083 : FD_FN_CONST static inline ulong MAP_(private_vcnt_ver)( ulong ver_cnt ) { return ver_cnt >> MAP_CNT_WIDTH; }
1319 823269358 : FD_FN_CONST static inline ulong MAP_(private_vcnt_cnt)( ulong ver_cnt ) { return (ver_cnt << MAP_VER_WIDTH) >> MAP_VER_WIDTH; }
1320 :
1321 : /* map_shmem_private_chain returns the location in the caller's address
1322 : space of the map chain metadata associated with hash. The chain
1323 : associated with hash 0 is the first chain. Assumes map is valid.
1324 : map_shmem_private_chain_const is a const correct version. */
1325 :
1326 : FD_FN_PURE static inline MAP_(shmem_private_chain_t) *
1327 : MAP_(shmem_private_chain)( MAP_(shmem_t) * map,
1328 221552516 : ulong hash ) {
1329 221552516 : return (MAP_(shmem_private_chain_t) *)(map+1) + (hash & (map->chain_cnt-1UL));
1330 221552516 : }
1331 :
1332 : FD_FN_PURE static inline MAP_(shmem_private_chain_t) const *
1333 : MAP_(shmem_private_chain_const)( MAP_(shmem_t) const * map,
1334 9641812538 : ulong hash ) {
1335 9641812538 : return (MAP_(shmem_private_chain_t) const *)(map+1) + (hash & (map->chain_cnt-1UL));
1336 9641812538 : }
1337 :
1338 : /* map_txn_private_info returns the location in the caller's address
1339 : space of the txn info. Assumes txn is valid. */
1340 :
1341 : FD_FN_CONST static inline MAP_(txn_private_info_t) *
1342 12169101 : MAP_(txn_private_info)( MAP_(txn_t) * txn ) {
1343 12169101 : return (MAP_(txn_private_info_t) *)(txn+1);
1344 12169101 : }
1345 :
1346 : /* map_private_{cidx,idx} compress / decompress 64-bit in-register
1347 : indices to/from their in-memory representations. */
1348 :
1349 35047035 : FD_FN_CONST static inline MAP_IDX_T MAP_(private_cidx)( ulong idx ) { return (MAP_IDX_T)idx; }
1350 8470428015 : FD_FN_CONST static inline ulong MAP_(private_idx) ( MAP_IDX_T cidx ) { return (ulong) cidx; }
1351 :
1352 : /* map_private_idx_null returns the element storage index that
1353 : represents NULL. */
1354 :
1355 4825236 : FD_FN_CONST static inline ulong MAP_(private_idx_null)( void ) { return (ulong)(MAP_IDX_T)(~0UL); }
1356 :
1357 : /* map_private_idx_is_null returns 1 if idx is the NULL map index and 0
1358 : otherwise. */
1359 :
1360 7550003914 : FD_FN_CONST static inline int MAP_(private_idx_is_null)( ulong idx ) { return idx==(ulong)(MAP_IDX_T)(~0UL); }
1361 :
1362 : /* map_private_fetch_and_or does a ulong FD_ATOMIC_FETCH_AND_OR when the
1363 : target has FD_HAS_ATOMIC and emulates it when not. When emulated,
1364 : the map will not be safe to use concurrently but will still work. */
1365 :
1366 : static inline ulong
1367 : MAP_(private_fetch_and_or)( ulong volatile * p,
1368 90282010 : ulong b ) {
1369 90282010 : ulong x;
1370 90282010 : FD_COMPILER_MFENCE();
1371 90282010 : # if FD_HAS_ATOMIC
1372 90282010 : x = FD_ATOMIC_FETCH_AND_OR( p, b );
1373 : # else
1374 : x = *p;
1375 : *p = x | b;
1376 : # endif
1377 90282010 : FD_COMPILER_MFENCE();
1378 90282010 : return x;
1379 90282010 : }
1380 :
1381 3000342 : FD_FN_CONST static inline ulong MAP_(ele_max_max)( void ) { return (ulong)(MAP_IDX_T)(ULONG_MAX >> MAP_VER_WIDTH); }
1382 :
1383 : FD_FN_CONST static inline ulong
1384 3000513 : MAP_(chain_max)( void ) {
1385 3000513 : return fd_ulong_pow2_dn( (ULONG_MAX - sizeof(MAP_(shmem_t)) - alignof(MAP_(shmem_t)) + 1UL) /
1386 3000513 : sizeof(MAP_(shmem_private_chain_t)) );
1387 3000513 : }
1388 :
1389 : FD_FN_CONST static inline ulong
1390 3000219 : MAP_(chain_cnt_est)( ulong ele_max_est ) {
1391 :
1392 : /* Clamp to be in [1,ele_max_max] (as ele_max_est 0 is degenerate and
1393 : as the map is guaranteed to hold at most ele_max_max keys). */
1394 :
1395 3000219 : ele_max_est = fd_ulong_min( fd_ulong_max( ele_max_est, 1UL ), MAP_(ele_max_max)() );
1396 :
1397 : /* Compute the number of chains as the power of 2 that makes the
1398 : average chain length between ~1 and ~2 when ele_max_est are stored
1399 : in the map and then clamp to the chain max. */
1400 :
1401 3000219 : ulong chain_min = (ele_max_est>>1) + (ele_max_est&1UL); /* chain_min = ceil(ele_max_est/2), in [1,2^63], computed w/o overflow */
1402 3000219 : ulong chain_cnt = fd_ulong_pow2_up( chain_min ); /* Power of 2 in [1,2^63] */
1403 :
1404 3000219 : return fd_ulong_min( chain_cnt, MAP_(chain_max)() );
1405 3000219 : }
1406 :
1407 582 : FD_FN_CONST static inline ulong MAP_(align)( void ) { return alignof(MAP_(shmem_t)); }
1408 :
1409 : FD_FN_CONST static inline ulong
1410 264 : MAP_(footprint)( ulong chain_cnt ) {
1411 264 : if( !(fd_ulong_is_pow2( chain_cnt ) & (chain_cnt<=MAP_(chain_max)())) ) return 0UL;
1412 : /* Note: assumes shmem_t and shmem_private_chain_t have compatible alignments */
1413 252 : return fd_ulong_align_up( sizeof(MAP_(shmem_t)) + chain_cnt*sizeof(MAP_(shmem_private_chain_t)),
1414 252 : alignof(MAP_(shmem_t)) ); /* no overflow */
1415 264 : }
1416 :
1417 30 : FD_FN_PURE static inline ulong MAP_(seed) ( MAP_(t) const * join ) { return join->map->seed; }
1418 178826387 : FD_FN_PURE static inline ulong MAP_(chain_cnt)( MAP_(t) const * join ) { return join->map->chain_cnt; }
1419 :
1420 3 : FD_FN_PURE static inline void const * MAP_(shmap_const)( MAP_(t) const * join ) { return join->map; }
1421 3 : FD_FN_PURE static inline void const * MAP_(shele_const)( MAP_(t) const * join ) { return join->ele; }
1422 3 : FD_FN_PURE static inline ulong MAP_(ele_max) ( MAP_(t) const * join ) { return join->ele_max; }
1423 :
1424 3 : FD_FN_PURE static inline void * MAP_(shmap)( MAP_(t) * join ) { return join->map; }
1425 3 : FD_FN_PURE static inline void * MAP_(shele)( MAP_(t) * join ) { return join->ele; }
1426 :
1427 : FD_FN_PURE static inline int
1428 : MAP_(key_eq)( MAP_KEY_T const * k0,
1429 1152239329 : MAP_KEY_T const * k1 ) {
1430 1152239329 : return !!(MAP_KEY_EQ( (k0), (k1) ));
1431 1152239329 : }
1432 :
1433 : FD_FN_PURE static inline ulong
1434 : MAP_(key_hash)( MAP_KEY_T const * key,
1435 963985394 : ulong seed ) {
1436 963985394 : return (MAP_KEY_HASH( (key), (seed) ));
1437 963985394 : }
1438 :
1439 : static inline void
1440 : MAP_(backoff)( ulong scale,
1441 0 : ulong seed ) {
1442 0 : ulong r = (ulong)(uint)fd_ulong_hash( seed ^ (((ulong)fd_tickcount())<<32) );
1443 0 : for( ulong rem=(scale*r)>>48; rem; rem-- ) FD_SPIN_PAUSE();
1444 0 : }
1445 :
1446 25042929 : FD_FN_PURE static inline ulong MAP_(query_memo )( MAP_(query_t) const * query ) { return query->memo; }
1447 348178677 : FD_FN_PURE static inline MAP_ELE_T const * MAP_(query_ele_const)( MAP_(query_t) const * query ) { return query->ele; }
1448 236093496 : FD_FN_PURE static inline MAP_ELE_T * MAP_(query_ele )( MAP_(query_t) * query ) { return query->ele; }
1449 :
1450 : static inline int
1451 25613667 : MAP_(modify_test)( MAP_(query_t) * query ) {
1452 25613667 : MAP_(shmem_private_chain_t) * chain = query->chain;
1453 25613667 : ulong ver_cnt = query->ver_cnt;
1454 25613667 : FD_COMPILER_MFENCE();
1455 25613667 : chain->ver_cnt = ver_cnt + (2UL<<MAP_CNT_WIDTH);
1456 25613667 : FD_COMPILER_MFENCE();
1457 25613667 : return FD_MAP_SUCCESS;
1458 25613667 : }
1459 :
1460 : static inline int
1461 587842504 : MAP_(query_test)( MAP_(query_t) const * query ) {
1462 587842504 : MAP_(shmem_private_chain_t) const * chain = query->chain;
1463 587842504 : ulong ver_cnt = query->ver_cnt;
1464 587842504 : FD_COMPILER_MFENCE();
1465 587842504 : ulong _ver_cnt = chain->ver_cnt;
1466 587842504 : FD_COMPILER_MFENCE();
1467 587842504 : return fd_int_if( ver_cnt==_ver_cnt, FD_MAP_SUCCESS, FD_MAP_ERR_AGAIN );
1468 587842504 : }
1469 :
1470 : FD_FN_CONST static inline ulong
1471 27485811 : MAP_(txn_key_max_max)( void ) {
1472 27485811 : return (ULONG_MAX - sizeof(MAP_(txn_t)) - alignof(MAP_(txn_t)) + 1UL) / sizeof( MAP_(txn_private_info_t) );
1473 27485811 : }
1474 :
1475 3745386 : FD_FN_CONST static inline ulong MAP_(txn_align)( void ) { return alignof(MAP_(txn_t)); }
1476 :
1477 : FD_FN_CONST static inline ulong
1478 23740425 : MAP_(txn_footprint)( ulong key_max ) {
1479 23740425 : if( key_max > MAP_(txn_key_max_max)() ) return 0UL;
1480 23740422 : return sizeof(MAP_(txn_t)) + key_max*sizeof(MAP_(txn_private_info_t)); /* no overflow */
1481 23740425 : }
1482 :
1483 : static inline MAP_(txn_t) *
1484 : MAP_(txn_init)( void * mem,
1485 : MAP_(t) * join,
1486 3745383 : ulong key_max ) {
1487 3745383 : MAP_(txn_t) * txn = (MAP_(txn_t) *)mem;
1488 3745383 : if( FD_UNLIKELY( (!mem ) |
1489 3745383 : (!fd_ulong_is_aligned( (ulong)mem, MAP_(txn_align)() )) |
1490 3745383 : (!join ) |
1491 3745383 : (key_max > MAP_(txn_key_max_max)() ) ) ) return NULL;
1492 3745371 : txn->map = join->map;
1493 3745371 : txn->info_max = key_max; /* Worst case number of chains impacted by this transaction */
1494 3745371 : txn->lock_cnt = 0UL;
1495 3745371 : txn->spec_cnt = 0UL;
1496 3745371 : return txn;
1497 3745383 : }
1498 :
1499 3745374 : FD_FN_CONST static inline void * MAP_(txn_fini)( MAP_(txn_t) * txn ) { return (void *)txn; }
1500 :
1501 : FD_FN_PURE static inline ulong
1502 : MAP_(iter_chain_idx)( MAP_(t) const * join,
1503 0 : MAP_KEY_T const * key ) {
1504 0 : MAP_(shmem_t) const * map = join->map;
1505 0 : return MAP_(key_hash)( key, map->seed ) & (map->chain_cnt-1UL);
1506 0 : }
1507 :
1508 : FD_FN_PURE static inline MAP_(iter_t)
1509 : MAP_(iter)( MAP_(t) const * join,
1510 8924647613 : ulong chain_idx ) {
1511 : /* FIXME: consider iter = {NULL,NULL} if chain_idx >= join->map->chain_cnt? */
1512 8924647613 : MAP_(shmem_private_chain_t) const * chain = MAP_(shmem_private_chain_const)( join->map, 0UL ) + chain_idx;
1513 8924647613 : MAP_(iter_t) iter;
1514 8924647613 : iter.ele = join->ele;
1515 8924647613 : iter.ele_idx = MAP_(private_idx)( chain->head_cidx );
1516 8924647613 : return iter;
1517 8924647613 : }
1518 :
1519 7091770327 : FD_FN_CONST static inline int MAP_(iter_done)( MAP_(iter_t) iter ) { return MAP_(private_idx_is_null)( iter.ele_idx ); }
1520 :
1521 : FD_FN_PURE static inline MAP_(iter_t)
1522 3753877285 : MAP_(iter_next)( MAP_(iter_t) iter ) {
1523 3753877285 : MAP_ELE_T const * ele = iter.ele + iter.ele_idx;
1524 3753877285 : iter.ele_idx = MAP_(private_idx)( ele->MAP_NEXT );
1525 3753877285 : return iter;
1526 3753877285 : }
1527 :
1528 : FD_FN_CONST static inline MAP_ELE_T *
1529 3640291746 : MAP_(iter_ele)( MAP_(iter_t) iter ) {
1530 3640291746 : return (MAP_ELE_T *)(iter.ele + iter.ele_idx);
1531 3640291746 : }
1532 :
1533 : FD_FN_CONST static inline MAP_ELE_T const *
1534 223838737 : MAP_(iter_ele_const)( MAP_(iter_t) iter ) {
1535 223838737 : return iter.ele + iter.ele_idx;
1536 223838737 : }
1537 :
1538 : MAP_STATIC void * MAP_(new) ( void * shmem, ulong chain_cnt, ulong seed );
1539 : MAP_STATIC MAP_(t) * MAP_(join) ( void * ljoin, void * shmap, void * shele, ulong ele_max );
1540 : MAP_STATIC void * MAP_(leave) ( MAP_(t) * join );
1541 : MAP_STATIC void * MAP_(delete)( void * shmap );
1542 :
1543 : MAP_STATIC void
1544 : MAP_(hint)( MAP_(t) const * join,
1545 : MAP_KEY_T const * key,
1546 : MAP_(query_t) * query,
1547 : int flags );
1548 :
1549 : MAP_STATIC int MAP_(insert)( MAP_(t) * join, MAP_ELE_T * ele, int flags );
1550 :
1551 : MAP_STATIC int
1552 : MAP_(remove)( MAP_(t) * join,
1553 : MAP_KEY_T const * key,
1554 : MAP_ELE_T const * sentinel,
1555 : MAP_(query_t) * query,
1556 : int flags );
1557 :
1558 : MAP_STATIC int
1559 : MAP_(modify_try)( MAP_(t) * join,
1560 : MAP_KEY_T const * key,
1561 : MAP_ELE_T * sentinel,
1562 : MAP_(query_t) * query,
1563 : int flags );
1564 :
1565 : MAP_STATIC int
1566 : MAP_(query_try)( MAP_(t) const * join,
1567 : MAP_KEY_T const * key,
1568 : MAP_ELE_T const * sentinel,
1569 : MAP_(query_t) * query,
1570 : int flags );
1571 :
1572 : MAP_STATIC int MAP_(txn_add)( MAP_(txn_t) * txn, MAP_KEY_T const * key, int lock );
1573 :
1574 : MAP_STATIC int MAP_(txn_try)( MAP_(txn_t) * txn, int flags );
1575 :
1576 : static inline void
1577 : MAP_(txn_hint)( MAP_(t) const * join,
1578 : MAP_KEY_T const * key,
1579 : MAP_(query_t) * query,
1580 3276729 : int flags ) {
1581 3276729 : MAP_(hint)( join, key, query, flags );
1582 3276729 : }
1583 :
1584 : MAP_STATIC int
1585 : MAP_(txn_insert)( MAP_(t) * join,
1586 : MAP_ELE_T * ele );
1587 :
1588 : MAP_STATIC int
1589 : MAP_(txn_remove)( MAP_(t) * join,
1590 : MAP_KEY_T const * key,
1591 : MAP_ELE_T const * sentinel,
1592 : MAP_(query_t) * query,
1593 : int flags );
1594 :
1595 : MAP_STATIC int
1596 : MAP_(txn_modify)( MAP_(t) * join,
1597 : MAP_KEY_T const * key,
1598 : MAP_ELE_T * sentinel,
1599 : MAP_(query_t) * query,
1600 : int flags );
1601 :
1602 : static inline int
1603 : MAP_(txn_query)( MAP_(t) const * join,
1604 : MAP_KEY_T const * key,
1605 : MAP_ELE_T const * sentinel,
1606 : MAP_(query_t) * query,
1607 1638945 : int flags ) {
1608 1638945 : return MAP_(txn_modify)( (MAP_(t) *)join, key, (MAP_ELE_T *)sentinel, query, flags & (~FD_MAP_FLAG_ADAPTIVE) );
1609 1638945 : }
1610 :
1611 : MAP_STATIC int MAP_(txn_test)( MAP_(txn_t) * txn );
1612 :
1613 : MAP_STATIC int
1614 : MAP_(iter_lock)( MAP_(t) * join,
1615 : ulong * lock_seq,
1616 : ulong lock_cnt,
1617 : int flags );
1618 :
1619 : MAP_STATIC void
1620 : MAP_(iter_unlock)( MAP_(t) * join,
1621 : ulong const * lock_seq,
1622 : ulong lock_cnt );
1623 :
1624 : MAP_STATIC void MAP_(reset)( MAP_(t) * join );
1625 :
1626 : MAP_STATIC int MAP_(verify)( MAP_(t) const * join );
1627 :
1628 : MAP_STATIC FD_FN_CONST char const * MAP_(strerror)( int err );
1629 :
1630 : FD_PROTOTYPES_END
1631 :
1632 : #endif
1633 :
1634 : #if MAP_IMPL_STYLE!=1 /* need implementations (assumes header already included) */
1635 :
1636 : #include "../log/fd_log.h" /* Used by constructors and verify (FIXME: Consider making a compile time option) */
1637 :
1638 : /* MAP_CRIT_{BEGIN,BLOCKED,END} handle virtually all atomic boilerplate
1639 : for operations that require modifying a map chain's structure or
1640 : elements managed by that chain. Usage:
1641 :
1642 : MAP_CRIT( chain, blocking ) {
1643 :
1644 : ... At this point, we have a lock on the chain and the "ulong"
1645 : ... ver_cnt contains the chain's versioned count just before we
1646 : ... took the lock. The "int" retain_lock is zero.
1647 : ...
1648 : ... Do locked operations on the map chain here
1649 : ...
1650 : ... On exiting this block, if retain_lock is non-zero, we resume
1651 : ... execution immediately after MAP_CRIT_END. This is used for
1652 : ... "try" style operations where a "test" operation is done to
1653 : ... unlock the chain after the caller does their try/test work.
1654 : ... Otherwise, we will update the version number, unlock the
1655 : ... chain and then resume execution after MAP_CRIT_END.
1656 : ...
1657 : ... Because compiler memory fences are done just before entering
1658 : ... and after exiting this block, there is typically no need to
1659 : ... use any atomics / volatile / fencing here. That is, we can
1660 : ... just write "normal" code on platforms where writes to memory
1661 : ... become visible to other threads in the order in which they
1662 : ... were issued in the machine code (e.g. x86) as the version
1663 : ... update and unlock writes are after the changes done here
1664 : ... and others will not proceed until they see the new version
1665 : ... and unlock. YMMV for non-x86 platforms (probably need
1666 : ... additional hardware store fences in these macros).
1667 : ...
1668 : ... It is safe to use "break" and/or "continue" within this
1669 : ... block. The overall MAP_CRIT will exit with the appropriate
1670 : ... compiler fencing, version update and unlocking and then
1671 : ... execution will resume immediately after MAP_CRIT_END.
1672 : ...
1673 : ... IMPORTANT SAFETY TIP! DO NOT RETURN FROM THIS BLOCK.
1674 : ...
1675 : ... IMPORTANT SAFETY TIP! OPERATIONS THAT CHANGE THE CHAIN
1676 : ... ELEMENT COUNT SHOULD UPDATE VER_CNT's COUNT WHILE HOLDING
1677 : ... THE VERSION CONSTANT.
1678 :
1679 : } MAP_CRIT_BLOCKED {
1680 :
1681 : ... At this point, somebody else had a lock on the chain when we
1682 : ... tried to take the lock.
1683 : ...
1684 : ... Handle blocked here.
1685 : ...
1686 : ... On exiting this block, if blocking was zero in MAP_CRIT, we
1687 : ... will resume execution immediately after MAP_CRIT_END. If
1688 : ... blocking was non-zero, we will resume execution immediately
1689 : ... before MAP_CRIT (e.g. we will retry again after a short spin
1690 : ... pause). Similar considerations to the above for compiler
1691 : ... memory fences, "break" and "continue". As we do not have the
1692 : ... lock here, retain_lock is neither relevant nor available.
1693 : ...
1694 : ... IMPORTANT SAFETY TIP! DO NOT RETURN FROM THIS BLOCK.
1695 :
1696 : } MAP_CRIT_END; */
1697 :
1698 90282010 : #define MAP_CRIT(c,b) do { \
1699 90282010 : ulong volatile * _vc = (ulong volatile *)&(c)->ver_cnt; \
1700 90282010 : int _b = (b); \
1701 90282010 : int retain_lock = 0; \
1702 90282010 : for(;;) { \
1703 90282010 : ulong ver_cnt = *_vc; \
1704 90282010 : /* use a test-and-test-and-set style to reduce atomic contention */ \
1705 90282010 : if( FD_LIKELY( !(ver_cnt & (1UL<<MAP_CNT_WIDTH)) ) ) { /* opt for low contention */ \
1706 90282010 : ver_cnt = MAP_(private_fetch_and_or)( _vc, 1UL<<MAP_CNT_WIDTH ); \
1707 90282010 : if( FD_LIKELY( !(ver_cnt & (1UL<<MAP_CNT_WIDTH)) ) ) { /* opt for low contention */ \
1708 90282010 : FD_COMPILER_MFENCE(); \
1709 90282010 : do
1710 :
1711 : #define MAP_CRIT_BLOCKED \
1712 90282010 : while(0); \
1713 90282010 : FD_COMPILER_MFENCE(); \
1714 90282010 : if( !retain_lock ) *_vc = ver_cnt+(2UL<<MAP_CNT_WIDTH); /* likely compile time */ \
1715 90282010 : FD_COMPILER_MFENCE(); \
1716 54824452 : break; \
1717 90282010 : } \
1718 90282010 : } \
1719 90282010 : FD_COMPILER_MFENCE(); \
1720 0 : do
1721 :
1722 : #define MAP_CRIT_END \
1723 0 : while(0); \
1724 0 : FD_COMPILER_MFENCE(); \
1725 0 : if( !_b ) break; /* likely compile time */ \
1726 0 : FD_SPIN_PAUSE(); \
1727 0 : } \
1728 90282010 : } while(0)
1729 :
1730 : MAP_STATIC void *
1731 : MAP_(new)( void * shmem,
1732 : ulong chain_cnt,
1733 81 : ulong seed ) {
1734 :
1735 81 : if( FD_UNLIKELY( !shmem ) ) {
1736 3 : FD_LOG_WARNING(( "NULL shmem" ));
1737 3 : return NULL;
1738 3 : }
1739 :
1740 78 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, MAP_(align)() ) ) ) {
1741 3 : FD_LOG_WARNING(( "misaligned shmem" ));
1742 3 : return NULL;
1743 3 : }
1744 :
1745 75 : ulong footprint = MAP_(footprint)( chain_cnt );
1746 75 : if( FD_UNLIKELY( !footprint ) ) {
1747 6 : FD_LOG_WARNING(( "bad footprint" ));
1748 6 : return NULL;
1749 6 : }
1750 :
1751 : /* seed is arbitrary */
1752 :
1753 : /* Init the metadata */
1754 :
1755 69 : MAP_(shmem_t) * map = (MAP_(shmem_t) *)shmem;
1756 :
1757 69 : map->seed = seed;
1758 69 : map->chain_cnt = chain_cnt;
1759 :
1760 : /* Set all the chains to version 0 and empty */
1761 :
1762 69 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, 0UL );
1763 4823769 : for( ulong chain_idx=0UL; chain_idx<chain_cnt; chain_idx++ ) {
1764 4823700 : chain[ chain_idx ].ver_cnt = MAP_(private_vcnt)( 0UL, 0UL );
1765 4823700 : chain[ chain_idx ].head_cidx = MAP_(private_cidx)( MAP_(private_idx_null)() );
1766 4823700 : }
1767 :
1768 69 : FD_COMPILER_MFENCE();
1769 69 : map->magic = MAP_MAGIC;
1770 69 : FD_COMPILER_MFENCE();
1771 :
1772 69 : return shmem;
1773 75 : }
1774 :
1775 : MAP_STATIC MAP_(t) *
1776 : MAP_(join)( void * ljoin,
1777 : void * shmap,
1778 : void * shele,
1779 114 : ulong ele_max ) {
1780 114 : MAP_(t) * join = (MAP_(t) *)ljoin;
1781 114 : MAP_(shmem_t) * map = (MAP_(shmem_t) *)shmap;
1782 114 : MAP_ELE_T * ele = (MAP_ELE_T *)shele;
1783 :
1784 114 : if( FD_UNLIKELY( !join ) ) {
1785 3 : FD_LOG_WARNING(( "NULL ljoin" ));
1786 3 : return NULL;
1787 3 : }
1788 :
1789 111 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)join, alignof(MAP_(t)) ) ) ) {
1790 3 : FD_LOG_WARNING(( "misaligned ljoin" ));
1791 3 : return NULL;
1792 3 : }
1793 :
1794 108 : if( FD_UNLIKELY( !map ) ) {
1795 3 : FD_LOG_WARNING(( "NULL shmap" ));
1796 3 : return NULL;
1797 3 : }
1798 :
1799 105 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)map, MAP_(align)() ) ) ) {
1800 3 : FD_LOG_WARNING(( "misaligned shmap" ));
1801 3 : return NULL;
1802 3 : }
1803 :
1804 102 : if( FD_UNLIKELY( map->magic!=MAP_MAGIC ) ) {
1805 3 : FD_LOG_WARNING(( "bad magic" ));
1806 3 : return NULL;
1807 3 : }
1808 :
1809 99 : if( FD_UNLIKELY( (!ele) & (!!ele_max) ) ) {
1810 3 : FD_LOG_WARNING(( "NULL shele" ));
1811 3 : return NULL;
1812 3 : }
1813 :
1814 96 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)ele, alignof(MAP_ELE_T) ) ) ) {
1815 3 : FD_LOG_WARNING(( "misaligned shele" ));
1816 3 : return NULL;
1817 3 : }
1818 :
1819 93 : if( FD_UNLIKELY( ele_max > MAP_(ele_max_max)() ) ) {
1820 0 : FD_LOG_WARNING(( "ele_max greater than ele_max_max" ));
1821 0 : return NULL;
1822 0 : }
1823 :
1824 93 : join->map = map;
1825 93 : join->ele = ele;
1826 93 : join->ele_max = ele_max;
1827 :
1828 93 : return join;
1829 93 : }
1830 :
1831 : MAP_STATIC void *
1832 30 : MAP_(leave)( MAP_(t) * join ) {
1833 :
1834 30 : if( FD_UNLIKELY( !join ) ) {
1835 3 : FD_LOG_WARNING(( "NULL join" ));
1836 3 : return NULL;
1837 3 : }
1838 :
1839 27 : return (void *)join;
1840 30 : }
1841 :
1842 : MAP_STATIC void *
1843 12 : MAP_(delete)( void * shmap ) {
1844 12 : MAP_(shmem_t) * map = (MAP_(shmem_t) *)shmap;
1845 :
1846 12 : if( FD_UNLIKELY( !map ) ) {
1847 3 : FD_LOG_WARNING(( "NULL shmap" ));
1848 3 : return NULL;
1849 3 : }
1850 :
1851 9 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)map, MAP_(align)() ) ) ) {
1852 3 : FD_LOG_WARNING(( "misaligned shmap" ));
1853 3 : return NULL;
1854 3 : }
1855 :
1856 6 : if( FD_UNLIKELY( map->magic!=MAP_MAGIC ) ) {
1857 3 : FD_LOG_WARNING(( "bad magic" ));
1858 3 : return NULL;
1859 3 : }
1860 :
1861 3 : FD_COMPILER_MFENCE();
1862 3 : map->magic = 0UL;
1863 3 : FD_COMPILER_MFENCE();
1864 :
1865 3 : return (void *)map;
1866 6 : }
1867 :
1868 : MAP_STATIC int
1869 : MAP_(insert)( MAP_(t) * join,
1870 : MAP_ELE_T * ele,
1871 32458548 : int flags ) {
1872 :
1873 : /* Determine the element index (fastest if ele are power-of-two) and
1874 : the chain that should hold ele */
1875 :
1876 32458548 : ulong ele_idx = (ulong)(ele - join->ele);
1877 32458548 : if( FD_UNLIKELY( ele_idx>=join->ele_max ) ) return FD_MAP_ERR_INVAL;
1878 :
1879 26829651 : MAP_(shmem_t) * map = join->map;
1880 :
1881 26829651 : ulong memo = MAP_(key_hash)( &ele->MAP_KEY, map->seed );
1882 26829651 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, memo );
1883 :
1884 : /* Insert element at the head of chain. If chain is already locked,
1885 : signal to try again later. */
1886 :
1887 26829651 : int err;
1888 :
1889 53659302 : MAP_CRIT( chain, flags & FD_MAP_FLAG_BLOCKING ) {
1890 26829651 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
1891 26829651 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
1892 :
1893 26829651 : ele->MAP_NEXT = chain->head_cidx;
1894 : # if MAP_MEMOIZE
1895 25774011 : ele->MAP_MEMO = memo;
1896 : # endif
1897 26829651 : chain->head_cidx = MAP_(private_cidx)( ele_idx );
1898 26829651 : ver_cnt = MAP_(private_vcnt)( version, ele_cnt+1UL ); /* version updated on exit */
1899 26829651 : err = FD_MAP_SUCCESS;
1900 :
1901 26829651 : } MAP_CRIT_BLOCKED {
1902 :
1903 0 : err = FD_MAP_ERR_AGAIN;
1904 :
1905 0 : } MAP_CRIT_END;
1906 :
1907 26829651 : return err;
1908 32458548 : }
1909 :
1910 : MAP_STATIC void
1911 : MAP_(hint)( MAP_(t) const * join,
1912 : MAP_KEY_T const * key,
1913 : MAP_(query_t) * query,
1914 8893983 : int flags ) {
1915 8893983 : MAP_(shmem_t) * map = join->map;
1916 8893983 : MAP_ELE_T * ele0 = join->ele;
1917 8893983 : ulong ele_max = join->ele_max;
1918 :
1919 8893983 : ulong memo = (flags & FD_MAP_FLAG_USE_HINT) ? query->memo : MAP_(key_hash)( key, map->seed );
1920 8893983 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, memo );
1921 :
1922 8893983 : if( FD_LIKELY( flags & FD_MAP_FLAG_PREFETCH_META ) ) FD_VOLATILE_CONST( chain->ver_cnt );
1923 8893983 : if( FD_LIKELY( flags & FD_MAP_FLAG_PREFETCH_DATA ) ) {
1924 4448334 : ulong ele_idx = MAP_(private_idx)( chain->head_cidx );
1925 4448334 : if( FD_LIKELY( ele_idx < ele_max ) ) FD_VOLATILE_CONST( ele0[ ele_idx ] );
1926 4448334 : }
1927 :
1928 8893983 : query->memo = memo;
1929 8893983 : }
1930 :
1931 : MAP_STATIC int
1932 : MAP_(remove)( MAP_(t) * join,
1933 : MAP_KEY_T const * key,
1934 : MAP_ELE_T const * sentinel,
1935 : MAP_(query_t) * query,
1936 27338005 : int flags ) {
1937 :
1938 : /* Determine the chain that should hold key */
1939 :
1940 27338005 : MAP_(shmem_t) * map = join->map;
1941 27338005 : MAP_ELE_T * ele = join->ele;
1942 27338005 : ulong ele_max = join->ele_max;
1943 :
1944 27338005 : ulong memo = (flags & FD_MAP_FLAG_USE_HINT) ? query->memo : MAP_(key_hash)( key, map->seed );
1945 27338005 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, memo );
1946 :
1947 : /* Find the key on the chain. If found, remove it. If not found,
1948 : corrupt or blocked, fail the operation. */
1949 :
1950 27338005 : query->memo = memo;
1951 27338005 : query->ele = (MAP_ELE_T *)sentinel;
1952 27338005 : query->chain = chain;
1953 :
1954 27338005 : int err;
1955 :
1956 54676010 : MAP_CRIT( chain, flags & FD_MAP_FLAG_BLOCKING ) {
1957 27338005 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
1958 27338005 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
1959 :
1960 27338005 : query->ver_cnt = ver_cnt;
1961 :
1962 27338005 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) { /* optimize for not corrupt */
1963 0 : err = FD_MAP_ERR_CORRUPT;
1964 0 : goto done;
1965 0 : }
1966 :
1967 27338005 : MAP_IDX_T * cur = &chain->head_cidx;
1968 43183179 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) { /* guarantee bounded exec under corruption */
1969 39532367 : ulong ele_idx = MAP_(private_idx)( *cur );
1970 39532367 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) { /* optimize for not corrupt */
1971 0 : err = FD_MAP_ERR_CORRUPT;
1972 0 : goto done;
1973 0 : }
1974 :
1975 39532367 : if(
1976 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
1977 3973413 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==memo ) &&
1978 3973413 : # endif
1979 37439612 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) { /* optimize for found */
1980 :
1981 23687193 : *cur = ele[ ele_idx ].MAP_NEXT;
1982 23687193 : ver_cnt = MAP_(private_vcnt)( version, ele_cnt-1UL ); /* version updated on exit */
1983 23687193 : query->ele = &ele[ ele_idx ];
1984 23687193 : err = FD_MAP_SUCCESS;
1985 23687193 : goto done;
1986 23687193 : }
1987 :
1988 15845174 : cur = &ele[ ele_idx ].MAP_NEXT; /* Retain the pointer to next so we can rewrite it on found */
1989 15845174 : }
1990 :
1991 : /* Key was not found */
1992 :
1993 3650812 : ulong ele_idx = MAP_(private_idx)( *cur );
1994 3650812 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) { /* optimize for not corrupt */
1995 0 : err = FD_MAP_ERR_CORRUPT;
1996 0 : goto done;
1997 0 : }
1998 :
1999 3650812 : err = FD_MAP_ERR_KEY;
2000 :
2001 27338005 : done: /* silly language restriction */;
2002 :
2003 27338005 : } MAP_CRIT_BLOCKED {
2004 :
2005 0 : query->ver_cnt = ver_cnt;
2006 0 : err = FD_MAP_ERR_AGAIN;
2007 :
2008 0 : } MAP_CRIT_END;
2009 :
2010 27338005 : return err;
2011 27338005 : }
2012 :
2013 : MAP_STATIC int
2014 : MAP_(modify_try)( MAP_(t) * join,
2015 : MAP_KEY_T const * key,
2016 : MAP_ELE_T * sentinel,
2017 : MAP_(query_t) * query,
2018 27486447 : int flags ) {
2019 :
2020 : /* Determine which chain might hold key */
2021 :
2022 27486447 : MAP_(shmem_t) * map = join->map;
2023 27486447 : MAP_ELE_T * ele = join->ele;
2024 27486447 : ulong ele_max = join->ele_max;
2025 :
2026 27486447 : ulong memo = (flags & FD_MAP_FLAG_USE_HINT) ? query->memo : MAP_(key_hash)( key, map->seed );
2027 27486447 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, memo );
2028 :
2029 : /* Search for the key on chain. If found, retain the chain lock
2030 : and return the found element. If not found, corrupt or blocked,
2031 : fail. */
2032 :
2033 27486447 : query->memo = memo;
2034 27486447 : query->ele = (MAP_ELE_T *)sentinel;
2035 27486447 : query->chain = chain;
2036 :
2037 27486447 : int err;
2038 :
2039 54972894 : MAP_CRIT( chain, flags & FD_MAP_FLAG_BLOCKING ) {
2040 :
2041 27486447 : query->ver_cnt = ver_cnt;
2042 :
2043 27486447 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2044 27486447 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) { /* optimize for not corrupt */
2045 0 : err = FD_MAP_ERR_CORRUPT;
2046 0 : goto done;
2047 0 : }
2048 :
2049 27486447 : MAP_IDX_T * cur = &chain->head_cidx;
2050 29579673 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) { /* guarantee bounded exec under corruption */
2051 27706893 : ulong ele_idx = MAP_(private_idx)( *cur );
2052 27706893 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) { /* optimize for not corrupt */
2053 0 : err = FD_MAP_ERR_CORRUPT;
2054 0 : goto done;
2055 0 : }
2056 :
2057 27706893 : if(
2058 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
2059 3966474 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==memo ) &&
2060 3966474 : # endif
2061 25613667 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) { /* optimize for found */
2062 25613667 : if( flags & FD_MAP_FLAG_ADAPTIVE ) {
2063 937638 : *cur = ele[ ele_idx ].MAP_NEXT;
2064 937638 : ele[ ele_idx ].MAP_NEXT = chain->head_cidx;
2065 937638 : chain->head_cidx = MAP_(private_cidx)( ele_idx );
2066 937638 : }
2067 25613667 : query->ele = &ele[ ele_idx ];
2068 25613667 : err = FD_MAP_SUCCESS;
2069 25613667 : retain_lock = 1;
2070 25613667 : goto done;
2071 25613667 : }
2072 :
2073 2093226 : cur = &ele[ ele_idx ].MAP_NEXT; /* Retain the pointer to next so we can rewrite it on found */
2074 2093226 : }
2075 :
2076 1872780 : ulong ele_idx = MAP_(private_idx)( *cur );
2077 1872780 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) { /* optimize for not corrupt */
2078 0 : err = FD_MAP_ERR_CORRUPT;
2079 0 : goto done;
2080 0 : }
2081 :
2082 1872780 : err = FD_MAP_ERR_KEY;
2083 :
2084 27486447 : done: /* silly language restriction */;
2085 :
2086 27486447 : } MAP_CRIT_BLOCKED {
2087 :
2088 0 : query->ver_cnt = ver_cnt;
2089 0 : err = FD_MAP_ERR_AGAIN;
2090 :
2091 0 : } MAP_CRIT_END;
2092 :
2093 27486447 : return err;
2094 27486447 : }
2095 :
2096 : MAP_STATIC int
2097 : MAP_(query_try)( MAP_(t) const * join,
2098 : MAP_KEY_T const * key,
2099 : MAP_ELE_T const * sentinel,
2100 : MAP_(query_t) * query,
2101 733490247 : int flags ) {
2102 :
2103 : /* Determine which chain might hold key */
2104 :
2105 733490247 : MAP_(shmem_t) const * map = join->map;
2106 733490247 : MAP_ELE_T const * ele = join->ele;
2107 733490247 : ulong ele_max = join->ele_max;
2108 :
2109 733490247 : ulong memo = (flags & FD_MAP_FLAG_USE_HINT) ? query->memo : MAP_(key_hash)( key, map->seed );
2110 733490247 : MAP_(shmem_private_chain_t) const * chain = MAP_(shmem_private_chain_const)( map, memo );
2111 :
2112 : /* Determine the version of the chain we are querying. Then
2113 : speculatively read and validate the number of elements on the chain
2114 : at that version. If the chain is locked, tell the user to try
2115 : again later. If the number of elements in the chain is invalid,
2116 : tell user the map is corrupt. */
2117 :
2118 733490247 : ulong volatile const * _vc = &chain->ver_cnt;
2119 :
2120 733490247 : FD_COMPILER_MFENCE();
2121 733490247 : ulong then = *_vc;
2122 733490247 : FD_COMPILER_MFENCE();
2123 :
2124 733490247 : ulong ele_cnt = MAP_(private_vcnt_cnt)( then );
2125 :
2126 733490247 : FD_COMPILER_MFENCE();
2127 733490247 : ulong now = *_vc;
2128 733490247 : FD_COMPILER_MFENCE();
2129 :
2130 733490247 : query->memo = memo;
2131 733490247 : query->ele = (MAP_ELE_T *) sentinel;
2132 733490247 : query->chain = (MAP_(shmem_private_chain_t) *)chain;
2133 733490247 : query->ver_cnt = then;
2134 :
2135 733490247 : if( FD_UNLIKELY( (now!=then) | (!!(then & (1UL<<MAP_CNT_WIDTH))) ) ) return FD_MAP_ERR_AGAIN;
2136 733490247 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) return FD_MAP_ERR_CORRUPT;
2137 :
2138 : /* Search the chain for key. Since we know the numer of elements on
2139 : the chain, we can bound this search to avoid corruption causing out
2140 : of bound reads, infinite loops and such. */
2141 :
2142 733490247 : MAP_IDX_T const * cur = &chain->head_cidx;
2143 1281417011 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) {
2144 :
2145 : /* Speculatively read the index of the chain, speculate if a valid
2146 : index and, if so, speculate if the chain element matches the
2147 : query. Note that this assumes element keys have a lifetime of at
2148 : least that of the element. A sufficient (but not a necessary,
2149 : see rant) condition for this is that key is a plain-old-data
2150 : fields in the element. */
2151 :
2152 1073688572 : FD_COMPILER_MFENCE();
2153 1073688572 : ulong ele_idx = MAP_(private_idx)( *cur );
2154 1073688572 : FD_COMPILER_MFENCE();
2155 :
2156 1073688572 : int corrupt = (ele_idx>=ele_max);
2157 1073688572 : int found = ( FD_LIKELY( !corrupt ) &&
2158 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
2159 3966459 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==memo ) &&
2160 : # endif
2161 1073688572 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) ? 1 : 0;
2162 :
2163 : /* Validate the speculation. If validation fails (e.g. the chain
2164 : was modified behind our back), tell the user to try again later.
2165 : If the element index was not valid, tell the user the map has
2166 : been corrupted. If key was found at element, tell the user they
2167 : can speculate element ele_idx contains key. */
2168 :
2169 1073688572 : FD_COMPILER_MFENCE();
2170 1073688572 : now = *_vc;
2171 1073688572 : FD_COMPILER_MFENCE();
2172 :
2173 1073688572 : if( FD_UNLIKELY( now!=then ) ) return FD_MAP_ERR_AGAIN;
2174 1073688572 : if( FD_UNLIKELY( corrupt ) ) return FD_MAP_ERR_CORRUPT;
2175 :
2176 1073688572 : if( FD_LIKELY( found ) ) { /* Optimize for found */
2177 525761808 : query->ele = (MAP_ELE_T *)&ele[ ele_idx ];
2178 525761808 : return FD_MAP_SUCCESS;
2179 525761808 : }
2180 :
2181 : /* The chain element didn't hold the key ... move to next element */
2182 :
2183 547926764 : cur = &ele[ ele_idx ].MAP_NEXT;
2184 547926764 : }
2185 :
2186 : /* At this point, the chain didn't hold the key. We could probably
2187 : return immediately but we speculative read the tail pointer,
2188 : validate it as an additional integrity check. If these checks
2189 : pass, we are confident the whole chain looked valid and did not
2190 : hold key between now and then. */
2191 :
2192 207728439 : ulong ele_idx = MAP_(private_idx)( *cur );
2193 :
2194 207728439 : FD_COMPILER_MFENCE();
2195 207728439 : now = *_vc;
2196 207728439 : FD_COMPILER_MFENCE();
2197 :
2198 207728439 : if( FD_UNLIKELY( now!=then ) ) return FD_MAP_ERR_AGAIN;
2199 207728439 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) return FD_MAP_ERR_CORRUPT;
2200 :
2201 207728439 : return FD_MAP_ERR_KEY;
2202 207728439 : }
2203 :
2204 : /* Note: txn_add is currently optimized for reasonably small number
2205 : of keys per transaction. For a huge number of transaction keys (e.g.
2206 : an iterator over all keys for all keys), probably should use the
2207 : iterator API. For a moderate number of transaction keys, probably
2208 : should consider data structures where set insert/remove/test are
2209 : sublinear time. Similarly, if MAP_KEY_HASH is costly, might be
2210 : useful to stash the key hashes in the transaction, memoize it in the
2211 : elements, etc. */
2212 :
2213 : MAP_STATIC int
2214 : MAP_(txn_add)( MAP_(txn_t) * txn,
2215 : MAP_KEY_T const * key,
2216 8422833 : int lock ) {
2217 :
2218 : /* Unpack txn fields */
2219 :
2220 8422833 : MAP_(shmem_t) * map = txn->map;
2221 8422833 : ulong info_max = txn->info_max;
2222 8422833 : ulong lock_cnt = txn->lock_cnt;
2223 8422833 : ulong spec_cnt = txn->spec_cnt;
2224 :
2225 8422833 : MAP_(txn_private_info_t) * lock_info = MAP_(txn_private_info)( txn );
2226 8422833 : MAP_(txn_private_info_t) * spec_info = lock_info + (info_max - spec_cnt);
2227 :
2228 : /* Determine which chain manages this key */
2229 :
2230 8422833 : ulong memo = MAP_(key_hash)( key, map->seed );
2231 8422833 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, memo );
2232 :
2233 : /* If this chain already needs to be locked for this transaction,
2234 : nothing to do. */
2235 :
2236 18086184 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ )
2237 9722355 : if( FD_UNLIKELY( chain==lock_info[ lock_idx ].chain ) ) return FD_MAP_SUCCESS;
2238 :
2239 8363829 : if( FD_UNLIKELY( !lock ) ) { /* optimize for locked key, possible compile time */
2240 :
2241 : /* At this point, key is used speculatively by the transaction and
2242 : its managing chain isn't in the locked set. If this chain is
2243 : already in the speculative set, nothing to do. */
2244 :
2245 3343212 : for( ulong spec_idx=0UL; spec_idx<spec_cnt; spec_idx++ )
2246 795423 : if( FD_UNLIKELY( chain==spec_info[ spec_idx ].chain ) ) return FD_MAP_SUCCESS;
2247 :
2248 : /* Add the chain to the speculative set. If we don't have any room,
2249 : fail. */
2250 :
2251 2547789 : ulong free_cnt = info_max - lock_cnt - spec_cnt;
2252 2547789 : if( FD_UNLIKELY( !free_cnt ) ) return FD_MAP_ERR_INVAL; /* Impossible if less than key_max keys added */
2253 1611543 : spec_info[-1].chain = chain;
2254 1611543 : txn->spec_cnt = spec_cnt + 1UL;
2255 :
2256 5808663 : } else {
2257 :
2258 : /* At this point, key is used locked by the transaction and its
2259 : managing chain isn't in the locked set. If this chain is
2260 : currently in the speculative set, move it to the locked
2261 : set. */
2262 :
2263 8180973 : for( ulong spec_idx=0UL; spec_idx<spec_cnt; spec_idx++ )
2264 2388156 : if( FD_UNLIKELY( chain==spec_info[ spec_idx ].chain ) ) {
2265 15846 : spec_info[ spec_idx ].chain = spec_info[ 0 ].chain; /* Fill the hole at spec_idx, making a hole at 0 */
2266 15846 : lock_info[ lock_cnt ].chain = chain; /* Either uses unused entry or fills hole at 0 */
2267 15846 : txn->spec_cnt = spec_cnt - 1UL;
2268 15846 : txn->lock_cnt = lock_cnt + 1UL;
2269 15846 : return FD_MAP_SUCCESS;
2270 15846 : }
2271 :
2272 : /* Add the chain to the locked set. If we don't have any room,
2273 : fail. */
2274 :
2275 5792817 : ulong free_cnt = info_max - lock_cnt - spec_cnt;
2276 5792817 : if( FD_UNLIKELY( !free_cnt ) ) return FD_MAP_ERR_INVAL; /* Impossible if less than key_max keys added */
2277 4856826 : lock_info[lock_cnt].chain = chain;
2278 4856826 : txn->lock_cnt = lock_cnt + 1UL;
2279 :
2280 4856826 : }
2281 :
2282 6468369 : return FD_MAP_SUCCESS;
2283 8363829 : }
2284 :
2285 : MAP_STATIC int
2286 : MAP_(txn_try)( MAP_(txn_t) * txn,
2287 1873134 : int flags ) {
2288 1873134 : int non_blocking = !(flags & FD_MAP_FLAG_BLOCKING);
2289 :
2290 : /* Unpack txn fields */
2291 :
2292 1873134 : ulong info_max = txn->info_max;
2293 1873134 : ulong lock_cnt = txn->lock_cnt;
2294 1873134 : ulong spec_cnt = txn->spec_cnt;
2295 :
2296 1873134 : MAP_(txn_private_info_t) * lock_info = MAP_(txn_private_info)( txn );
2297 1873134 : MAP_(txn_private_info_t) * spec_info = lock_info + info_max - spec_cnt;
2298 :
2299 1873134 : ulong backoff_exp = (1UL<<32); /* See iter_lock for details */
2300 1873134 : ulong backoff_seed = ((ulong)(uint)flags)>>2;
2301 :
2302 1873134 : int err;
2303 :
2304 1873134 : for(;; ) {
2305 :
2306 1873134 : err = FD_MAP_SUCCESS;
2307 :
2308 1873134 : FD_COMPILER_MFENCE();
2309 :
2310 : /* Get the chain versions for all keys in the speculative set.
2311 : If any are locked, set AGAIN if any are locked. */
2312 :
2313 3468831 : for( ulong spec_idx=0UL; spec_idx<spec_cnt; spec_idx++ ) {
2314 1595697 : ulong ver_cnt = spec_info[ spec_idx ].chain->ver_cnt;
2315 1595697 : if( FD_UNLIKELY( ver_cnt & (1UL<<MAP_CNT_WIDTH) ) ) { /* Already locked */
2316 0 : err = FD_MAP_ERR_AGAIN;
2317 0 : break;
2318 0 : }
2319 1595697 : spec_info[ spec_idx ].ver_cnt = ver_cnt;
2320 1595697 : }
2321 :
2322 1873134 : if( FD_LIKELY( !err ) ) {
2323 :
2324 : /* At this point, all the chains we are speculating on were
2325 : unlocked and we have recorded their versions. Try to lock
2326 : all the chains for the locked key. */
2327 : /* FIXME: consider reordering like iter_lock? */
2328 :
2329 6745806 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ ) {
2330 :
2331 9745344 : MAP_CRIT( lock_info[ lock_idx ].chain, 0 ) { /* non-blocking */
2332 :
2333 : /* Got the lock ... save the version and retain the lock for
2334 : test. */
2335 :
2336 4872672 : lock_info[ lock_idx ].ver_cnt = ver_cnt;
2337 4872672 : retain_lock = 1;
2338 :
2339 4872672 : } MAP_CRIT_BLOCKED {
2340 :
2341 : /* We hit contention for this lock. Unlock the chains that
2342 : we already locked to prevent possible deadlock (see
2343 : iter_lock) */
2344 :
2345 0 : for( ulong unlock_idx=0UL; unlock_idx<lock_idx; unlock_idx++ )
2346 0 : lock_info[ unlock_idx ].chain->ver_cnt = lock_info[ unlock_idx ].ver_cnt + (2UL<<MAP_CNT_WIDTH);
2347 :
2348 0 : err = FD_MAP_ERR_AGAIN;
2349 :
2350 0 : } MAP_CRIT_END;
2351 :
2352 4872672 : if( FD_UNLIKELY( err ) ) break;
2353 :
2354 4872672 : }
2355 :
2356 1873134 : }
2357 :
2358 1873134 : FD_COMPILER_MFENCE();
2359 :
2360 1873134 : if( FD_LIKELY( (!err) | non_blocking ) ) break;
2361 :
2362 : /* At this point, we hit contention and are blocking (need to try
2363 : again). Do a random backoff (see iter_lock for details). */
2364 :
2365 0 : ulong scale = fd_ulong_min( (fd_ulong_min( lock_cnt+spec_cnt, (1UL<<16)-1UL )*backoff_exp) >> 16, (1UL<<32)-1UL );
2366 0 : backoff_exp = fd_ulong_min( backoff_exp + (backoff_exp>>2) + (backoff_exp>>4), (1UL<<48)-1UL );
2367 0 : MAP_(backoff)( scale, backoff_seed );
2368 0 : }
2369 :
2370 : /* At this point, if we don't have an error, we have the chain
2371 : versions for txn keys used speculatively and they were unlocked and
2372 : we have locks on the chains for txn keys used locked. Otherwise,
2373 : this is a non-blocking call and we return AGAIN. */
2374 :
2375 1873134 : return err;
2376 1873134 : }
2377 :
2378 : MAP_STATIC int
2379 1873134 : MAP_(txn_test)( MAP_(txn_t) * txn ) {
2380 :
2381 : /* Unpack txn fields */
2382 :
2383 1873134 : ulong info_max = txn->info_max;
2384 1873134 : ulong lock_cnt = txn->lock_cnt;
2385 1873134 : ulong spec_cnt = txn->spec_cnt;
2386 :
2387 1873134 : MAP_(txn_private_info_t) * lock_info = MAP_(txn_private_info)( txn );
2388 1873134 : MAP_(txn_private_info_t) * spec_info = lock_info + info_max - spec_cnt;
2389 :
2390 : /* Unlock all chains locked for this transaction. Then test if any
2391 : keys used speculatively could have changed in locking / trying /
2392 : unlocking. If so, tell user to retry later. */
2393 :
2394 1873134 : int err = FD_MAP_SUCCESS;
2395 :
2396 1873134 : FD_COMPILER_MFENCE();
2397 :
2398 6745806 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ ) lock_info[ lock_idx ].chain->ver_cnt += (1UL<<MAP_CNT_WIDTH);
2399 :
2400 3468831 : for( ulong spec_idx=0UL; spec_idx<spec_cnt; spec_idx++ ) {
2401 1595697 : MAP_(shmem_private_chain_t) const * chain = spec_info[ spec_idx ].chain;
2402 1595697 : ulong ver_cnt = spec_info[ spec_idx ].ver_cnt;
2403 1595697 : if( FD_UNLIKELY( chain->ver_cnt!=ver_cnt ) ) {
2404 0 : err = FD_MAP_ERR_AGAIN;
2405 0 : break;
2406 0 : }
2407 1595697 : }
2408 :
2409 1873134 : FD_COMPILER_MFENCE();
2410 :
2411 1873134 : return err;
2412 1873134 : }
2413 :
2414 : MAP_STATIC int
2415 : MAP_(txn_insert)( MAP_(t) * join,
2416 6543726 : MAP_ELE_T * ele ) {
2417 :
2418 : /* Determine the element index (fastest if ele are power-of-two) and
2419 : the chain that should hold ele */
2420 :
2421 6543726 : MAP_(shmem_t) * map = join->map;
2422 6543726 : ulong ele_max = join->ele_max;
2423 :
2424 6543726 : ulong ele_idx = (ulong)(ele - join->ele);
2425 6543726 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) return FD_MAP_ERR_INVAL;
2426 :
2427 1638105 : ulong memo = MAP_(key_hash)( &ele->MAP_KEY, map->seed );
2428 1638105 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, memo );
2429 :
2430 : /* Insert ele_idx at head of chain. */
2431 :
2432 1638105 : ulong ver_cnt = chain->ver_cnt;
2433 1638105 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
2434 1638105 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2435 :
2436 1638105 : ele->MAP_NEXT = chain->head_cidx;
2437 : # if MAP_MEMOIZE
2438 1638105 : ele->MAP_MEMO = memo;
2439 : # endif
2440 1638105 : chain->head_cidx = MAP_(private_cidx)( ele_idx );
2441 1638105 : chain->ver_cnt = MAP_(private_vcnt)( version, ele_cnt+1UL );
2442 :
2443 1638105 : return FD_MAP_SUCCESS;
2444 6543726 : }
2445 :
2446 : MAP_STATIC int
2447 : MAP_(txn_remove)( MAP_(t) * join,
2448 : MAP_KEY_T const * key,
2449 : MAP_ELE_T const * sentinel,
2450 : MAP_(query_t) * query,
2451 1635786 : int flags ) {
2452 :
2453 : /* Determine the chain that should hold key */
2454 :
2455 1635786 : MAP_(shmem_t) * map = join->map;
2456 1635786 : MAP_ELE_T * ele = join->ele;
2457 1635786 : ulong ele_max = join->ele_max;
2458 :
2459 1635786 : ulong memo = (flags & FD_MAP_FLAG_USE_HINT) ? query->memo : MAP_(key_hash)( key, map->seed );
2460 1635786 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, memo );
2461 :
2462 : /* Find the key on the chain and remove it */
2463 :
2464 1635786 : ulong ver_cnt = chain->ver_cnt;
2465 1635786 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
2466 1635786 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2467 :
2468 1635786 : query->memo = memo;
2469 1635786 : query->ele = (MAP_ELE_T *)sentinel;
2470 1635786 : query->chain = chain;
2471 1635786 : query->ver_cnt = ver_cnt;
2472 :
2473 1635786 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not corrupt */
2474 :
2475 1635786 : MAP_IDX_T * cur = &chain->head_cidx;
2476 2244906 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) { /* guarantee bounded exec under corruption */
2477 2238495 : ulong ele_idx = MAP_(private_idx)( *cur );
2478 2238495 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not corrupt */
2479 :
2480 2238495 : if(
2481 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
2482 2238495 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==memo ) &&
2483 2238495 : # endif
2484 2238495 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) { /* optimize for found */
2485 1629375 : *cur = ele[ ele_idx ].MAP_NEXT;
2486 1629375 : chain->ver_cnt = MAP_(private_vcnt)( version, ele_cnt-1UL );
2487 1629375 : query->ele = &ele[ ele_idx ];
2488 1629375 : return FD_MAP_SUCCESS;
2489 1629375 : }
2490 :
2491 609120 : cur = &ele[ ele_idx ].MAP_NEXT; /* Retain the pointer to next so we can rewrite it on found */
2492 609120 : }
2493 :
2494 6411 : ulong ele_idx = MAP_(private_idx)( *cur );
2495 6411 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not found */
2496 6411 : return FD_MAP_ERR_KEY;
2497 6411 : }
2498 :
2499 : MAP_STATIC int
2500 : MAP_(txn_modify)( MAP_(t) * join,
2501 : MAP_KEY_T const * key,
2502 : MAP_ELE_T * sentinel,
2503 : MAP_(query_t) * query,
2504 3276705 : int flags ) {
2505 :
2506 : /* Determine which chain might hold key */
2507 :
2508 3276705 : MAP_(shmem_t) * map = join->map;
2509 3276705 : MAP_ELE_T * ele = join->ele;
2510 3276705 : ulong ele_max = join->ele_max;
2511 :
2512 3276705 : ulong memo = (flags & FD_MAP_FLAG_USE_HINT) ? query->memo : MAP_(key_hash)( key, map->seed );
2513 3276705 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, memo );
2514 :
2515 : /* Search the chain for key */
2516 :
2517 3276705 : ulong ver_cnt = chain->ver_cnt;
2518 3276705 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2519 :
2520 3276705 : query->memo = memo;
2521 3276705 : query->ele = sentinel;
2522 3276705 : query->chain = chain;
2523 3276705 : query->ver_cnt = ver_cnt;
2524 :
2525 3276705 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not corrupt */
2526 :
2527 3276705 : MAP_IDX_T * cur = &chain->head_cidx;
2528 5204931 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) {
2529 5189034 : ulong ele_idx = MAP_(private_idx)( *cur );
2530 :
2531 5189034 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not corrupt */
2532 :
2533 5189034 : if(
2534 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
2535 4487718 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==memo ) &&
2536 4487718 : # endif
2537 4487718 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) { /* optimize for found */
2538 3260808 : if( flags & FD_MAP_FLAG_ADAPTIVE ) {
2539 816405 : *cur = ele[ ele_idx ].MAP_NEXT;
2540 816405 : ele[ ele_idx ].MAP_NEXT = chain->head_cidx;
2541 816405 : chain->head_cidx = MAP_(private_cidx)( ele_idx );
2542 816405 : }
2543 3260808 : query->ele = &ele[ ele_idx ];
2544 3260808 : return FD_MAP_SUCCESS;
2545 3260808 : }
2546 :
2547 1928226 : cur = &ele[ ele_idx ].MAP_NEXT;
2548 1928226 : }
2549 :
2550 15897 : ulong ele_idx = MAP_(private_idx)( *cur );
2551 15897 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not corrupt */
2552 :
2553 15897 : return FD_MAP_ERR_KEY;
2554 15897 : }
2555 :
2556 : MAP_STATIC int
2557 : MAP_(iter_lock)( MAP_(t) * join,
2558 : ulong * lock_seq,
2559 : ulong lock_cnt,
2560 1877061 : int flags ) {
2561 1877061 : if( FD_UNLIKELY( !lock_cnt ) ) return FD_MAP_SUCCESS; /* nothing to do */
2562 1877052 : if( FD_UNLIKELY( (!join) | (!lock_seq) ) ) return FD_MAP_ERR_INVAL;
2563 :
2564 1877046 : int non_blocking = !(flags & FD_MAP_FLAG_BLOCKING);
2565 :
2566 1877046 : MAP_(shmem_t) * map = join->map;
2567 :
2568 1877046 : ulong chain_cnt = map->chain_cnt;
2569 1877046 : if( FD_UNLIKELY( lock_cnt>chain_cnt ) ) return FD_MAP_ERR_INVAL;
2570 :
2571 1877043 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( join->map, 0UL );
2572 :
2573 1877043 : int err;
2574 :
2575 1877043 : ulong backoff = 1UL<<32; /* in [1,2^16)*2^32 */
2576 1877043 : ulong backoff_seed = ((ulong)(uint)flags)>>2; /* 0 usually fine */
2577 1877043 : ulong lock_idx = 0UL;
2578 1877043 : ulong locked_cnt = 0UL;
2579 3755235 : for(;;) {
2580 :
2581 3755235 : err = FD_MAP_SUCCESS;
2582 :
2583 : /* At this point, we've acquired locks [0,locked_cnt), we need to
2584 : acquire locks [locked_cnt,lock_cnt), [locked_cnt,lock_cnt) is non
2585 : empty and i is in [locked_cnt,lock_cnt). Try to acquire lock
2586 : lock_idx this iteration. */
2587 :
2588 3755235 : ulong chain_idx = lock_seq[ lock_idx ];
2589 :
2590 3755235 : if( FD_UNLIKELY( chain_idx>=chain_cnt ) ) { /* optimize for valid lock_seq */
2591 0 : for( ulong unlock_idx=0UL; unlock_idx<locked_cnt; unlock_idx++ )
2592 0 : chain[ lock_seq[ unlock_idx ] ].ver_cnt += (1UL<<MAP_CNT_WIDTH);
2593 0 : locked_cnt = 0UL;
2594 0 : err = FD_MAP_ERR_AGAIN;
2595 0 : break;
2596 0 : }
2597 :
2598 7510470 : MAP_CRIT( chain + chain_idx, 0 ) {
2599 :
2600 : /* At this point, we got the lock. Swap lock at locked_cnt and
2601 : lock_idx and increment locked_cnt to move lock_idx to the
2602 : locked set as the most recently acquired lock. Since we
2603 : increment lock_idx below, when locked_cnt<lock_idx (i.e. we had
2604 : contention for lock locked_cnt recently), this will move the
2605 : next attempt to lock locked_cnt as far as possible from now of
2606 : the remaining locks to acquire. When locked_cnt==lock_idx,
2607 : this is a branchless no-op (and the increment of lock_idx below
2608 : will guarantee lock_idx will be at least locked_cnt next
2609 : iteration, preserving the invariant that lock_idx is in
2610 : [locked_cnt,lock_cnt) on the next iteration if there is one. */
2611 :
2612 3755235 : ulong chain_idx_tmp = lock_seq[ locked_cnt ];
2613 3755235 : lock_seq[ lock_idx ] = chain_idx_tmp;
2614 3755235 : lock_seq[ locked_cnt ] = chain_idx;
2615 3755235 : locked_cnt++;
2616 :
2617 3755235 : retain_lock = 1;
2618 :
2619 3755235 : } MAP_CRIT_BLOCKED {
2620 :
2621 : /* We failed to get lock lock_idx. To avoid deadlock with the
2622 : thread that has this lock and is trying to get a lock we
2623 : already have, we unlock the chains we've already locked (note
2624 : that we need to unlock here in non-blocking operation too).
2625 : Quick experiments in extreme contention scenarios found more
2626 : incremental approaches in blocking operation could take an
2627 : excessively long time to resolve so we bulk unlock. */
2628 :
2629 0 : for( ulong unlock_idx=0UL; unlock_idx<locked_cnt; unlock_idx++ )
2630 0 : chain[ lock_seq[ unlock_idx ] ].ver_cnt += (1UL<<MAP_CNT_WIDTH);
2631 0 : locked_cnt = 0UL;
2632 :
2633 0 : err = FD_MAP_ERR_AGAIN;
2634 :
2635 0 : } MAP_CRIT_END;
2636 :
2637 3755235 : if( FD_UNLIKELY( (locked_cnt==lock_cnt ) | /* all locks acquired */
2638 3755235 : ((!!err) & non_blocking) ) ) break; /* or hit contention and are non-blocking */
2639 :
2640 : /* Move to the next lock. Everytime we wrap around, we hit
2641 : contention since the last wrap / iter start. We do a random
2642 : exponential backoff with saturation on wrapping to minimize
2643 : contention with other threads hitting these locks. Normalizing
2644 : out fixed point scalings baked into the below, we spin pause a
2645 : uniform IID random number of times in [0,unlocked_cnt*backoff]
2646 : where backoff is 1 on the first wrap and increases by ~30% each
2647 : time to a maximum of 2^16 (i.e. hundreds microseconds per
2648 : remaining lock for typical CPU speeds and spin pause delays at
2649 : maximum backoff). */
2650 :
2651 1878192 : lock_idx++;
2652 1878192 : if( FD_UNLIKELY( lock_idx==lock_cnt ) ) { /* optimize for lots of locks */
2653 0 : lock_idx = locked_cnt;
2654 0 : ulong scale = fd_ulong_min( (fd_ulong_min( lock_cnt-locked_cnt, (1UL<<16)-1UL )*backoff) >> 16, (1UL<<32)-1UL );
2655 0 : backoff = fd_ulong_min( backoff + (backoff>>2) + (backoff>>4), (1UL<<48)-1UL );
2656 0 : MAP_(backoff)( scale, backoff_seed );
2657 0 : }
2658 1878192 : }
2659 :
2660 1877043 : return err;
2661 1877046 : }
2662 :
2663 : MAP_STATIC void
2664 : MAP_(iter_unlock)( MAP_(t) * join,
2665 : ulong const * lock_seq,
2666 3755235 : ulong lock_cnt ) {
2667 3755235 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( join->map, 0UL );
2668 :
2669 3755235 : FD_COMPILER_MFENCE();
2670 7510470 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ )
2671 3755235 : chain[ lock_seq[ lock_idx ] ].ver_cnt += (1UL<<MAP_CNT_WIDTH);
2672 3755235 : FD_COMPILER_MFENCE();
2673 3755235 : }
2674 :
2675 : MAP_STATIC void
2676 3 : MAP_(reset)( MAP_(t) * join ) {
2677 3 : MAP_(shmem_t) * map = join->map;
2678 :
2679 3 : ulong chain_cnt = map->chain_cnt;
2680 3 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, 0UL );
2681 :
2682 1539 : for( ulong chain_idx=0UL; chain_idx<chain_cnt; chain_idx++ ) {
2683 1536 : ulong ver_cnt = chain[ chain_idx ].ver_cnt;
2684 1536 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
2685 1536 : chain[ chain_idx ].ver_cnt = MAP_(private_vcnt)( version+2UL, 0UL );
2686 1536 : chain[ chain_idx ].head_cidx = MAP_(private_cidx)( MAP_(private_idx_null)() );
2687 1536 : }
2688 3 : }
2689 :
2690 : MAP_STATIC int
2691 27 : MAP_(verify)( MAP_(t) const * join ) {
2692 :
2693 3149094 : # define MAP_TEST(c) do { \
2694 3149094 : if( FD_UNLIKELY( !(c) ) ) { FD_LOG_WARNING(( "FAIL: %s", #c )); return FD_MAP_ERR_INVAL; } \
2695 3149094 : } while(0)
2696 :
2697 : /* Validate join */
2698 :
2699 27 : MAP_TEST( join );
2700 27 : MAP_TEST( fd_ulong_is_aligned( (ulong)join, alignof(MAP_(t)) ) );
2701 :
2702 27 : MAP_(shmem_t) const * map = join->map;
2703 27 : MAP_ELE_T const * ele = join->ele;
2704 27 : ulong ele_max = join->ele_max;
2705 :
2706 27 : MAP_TEST( map );
2707 27 : MAP_TEST( fd_ulong_is_aligned( (ulong)map, MAP_(align)() ) );
2708 :
2709 27 : MAP_TEST( (!!ele) | (!ele_max) );
2710 27 : MAP_TEST( fd_ulong_is_aligned( (ulong)ele, alignof(MAP_ELE_T) ) );
2711 :
2712 27 : MAP_TEST( ele_max<=MAP_(ele_max_max)() );
2713 :
2714 : /* Validate map metadata */
2715 :
2716 27 : ulong magic = map->magic;
2717 27 : ulong seed = map->seed;
2718 27 : ulong chain_cnt = map->chain_cnt;
2719 :
2720 27 : MAP_TEST( magic==MAP_MAGIC );
2721 : /* seed is arbitrary */
2722 27 : MAP_TEST( fd_ulong_is_pow2( chain_cnt ) );
2723 27 : MAP_TEST( chain_cnt<=MAP_(chain_max)() );
2724 :
2725 27 : MAP_(shmem_private_chain_t) const * chain = MAP_(shmem_private_chain_const)( map, 0UL );
2726 :
2727 : /* Validate the map chains */
2728 :
2729 27 : ulong unmapped_ele_cnt = ele_max;
2730 1574439 : for( ulong chain_idx=0UL; chain_idx<chain_cnt; chain_idx++ ) {
2731 :
2732 : /* Validate the chain length */
2733 :
2734 1574412 : ulong ver_cnt = chain[ chain_idx ].ver_cnt;
2735 :
2736 1574412 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2737 1574412 : MAP_TEST( ele_cnt<=unmapped_ele_cnt );
2738 1574412 : unmapped_ele_cnt -= ele_cnt;
2739 :
2740 : /* Validate chain linkage, element membership and element uniqueness */
2741 :
2742 1574412 : ulong head_idx = MAP_(private_idx)( chain[ chain_idx ].head_cidx );
2743 1574412 : ulong cur_idx = head_idx;
2744 1574412 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) {
2745 0 : MAP_TEST( cur_idx<ele_max ); /* In element store */
2746 :
2747 0 : MAP_KEY_T const * key = &ele[ cur_idx ].MAP_KEY;
2748 :
2749 0 : ulong memo = MAP_(key_hash)( key, seed );
2750 0 : ulong ele_chain_idx = memo & (chain_cnt-1UL);
2751 0 : MAP_TEST( ele_chain_idx==chain_idx ); /* On correct chain */
2752 : # if MAP_MEMOIZE
2753 0 : MAP_TEST( ele[ cur_idx ].MAP_MEMO==memo );
2754 0 : # endif
2755 :
2756 : /* Note that we've already validated linkage from head_idx to
2757 : cur_idx so pointer chasing here is safe. */
2758 :
2759 0 : ulong prv_idx = head_idx;
2760 0 : while( prv_idx!=cur_idx ) {
2761 0 : MAP_TEST( !MAP_(key_eq)( &ele[ prv_idx ].MAP_KEY, key ) ); /* Unique */
2762 0 : prv_idx = MAP_(private_idx)( ele[ prv_idx ].MAP_NEXT );
2763 0 : }
2764 :
2765 0 : cur_idx = MAP_(private_idx)( ele[ cur_idx ].MAP_NEXT );
2766 0 : }
2767 :
2768 1574412 : MAP_TEST( MAP_(private_idx_is_null)( cur_idx ) );
2769 1574412 : }
2770 :
2771 : /* At this point, we know the sum of the chain lengths do not exceed
2772 : the size of the element store, each chain is of their stated
2773 : length, each chain element is in element store, and that every
2774 : element on a chain belongs on that chain (which precludes the
2775 : possibility of two chains merging into one) and that every element
2776 : on a chain is unique (which implies unique among all chains since
2777 : elements with each key maps to a single chain).
2778 :
2779 : That is, join is a current local join to a valid shared mapping of
2780 : unique keys to unique elements in the element store.
2781 :
2782 : We don't know anything about unmapped elements in the element store
2783 : and cannot do any verification of them (here be dragons). But
2784 : that's kinda the point ... what's in the unmapped elements depends
2785 : on how the application is managing those. */
2786 :
2787 27 : # undef MAP_TEST
2788 :
2789 27 : return FD_MAP_SUCCESS;
2790 27 : }
2791 :
2792 : MAP_STATIC char const *
2793 18 : MAP_(strerror)( int err ) {
2794 18 : switch( err ) {
2795 3 : case FD_MAP_SUCCESS: return "success";
2796 3 : case FD_MAP_ERR_INVAL: return "bad input";
2797 3 : case FD_MAP_ERR_AGAIN: return "try again";
2798 3 : case FD_MAP_ERR_CORRUPT: return "corruption detected";
2799 3 : case FD_MAP_ERR_KEY: return "key not found";
2800 3 : default: break;
2801 18 : }
2802 3 : return "unknown";
2803 18 : }
2804 :
2805 : #undef MAP_CRIT_END
2806 : #undef MAP_CRIT_BLOCKED
2807 : #undef MAP_CRIT
2808 :
2809 : #endif
2810 :
2811 : #undef MAP_
2812 : #undef MAP_STATIC
2813 : #undef MAP_VER_WIDTH
2814 :
2815 : #undef MAP_IMPL_STYLE
2816 : #undef MAP_MAGIC
2817 : #undef MAP_ALIGN
2818 : #undef MAP_CNT_WIDTH
2819 : #undef MAP_KEY_EQ_IS_SLOW
2820 : #undef MAP_MEMO
2821 : #undef MAP_MEMOIZE
2822 : #undef MAP_KEY_HASH
2823 : #undef MAP_KEY_EQ
2824 : #undef MAP_NEXT
2825 : #undef MAP_IDX_T
2826 : #undef MAP_KEY
2827 : #undef MAP_KEY_T
2828 : #undef MAP_ELE_T
2829 : #undef MAP_NAME
|