Line data Source code
1 : /* Generate prototypes, inlines and/or implementations for concurrent
2 : persistent shared maps based on chaining. A map can store a
3 : practically unbounded number of elements. If sized on creation for
4 : the maximum number of mapped elements, typical map operations are a
5 : fast O(1) time and map element overhead is a small O(1) space.
6 : Further, large numbers of composite map operations can be done
7 : concurrently with very low risk of conflicts.
8 :
9 : In the current implementation, each map chain has a version number.
10 : Operations that require changing a chain's connectivity (e.g.
11 : inserting or removing an element from a chain) or modifying an
12 : element managed by that chain, the chain's version number is
13 : increased by one (atomic fetch-and-or based) such that other
14 : potential users of keys managed by that chain detect and react
15 : appropriately to a potentially concurrent conflicting operation is in
16 : progress. When an operation completes, the chain version number is
17 : increased by one again to notify other users the operation is no
18 : longer in progress and that the set of keys managed by that chain
19 : and/or values associated with those keys has potentially changed
20 : since the previous version. For example, lockfree queries can
21 : interoperate with this via a zero-copy
22 : try-speculatively-process-then-test pattern similar to that used in
23 : fd_tango for high throughput message processing.
24 :
25 : As such, there can be an arbitrary number of concurrent readers
26 : processing map keys. These readers will not interfere with each
27 : other and will not block any concurrent insert / remove / modify
28 : operations. Insert / remove / modify operations can potentially
29 : block each other. Since there are typically O(1) keys per chain, the
30 : probability of concurrent insert / remove / modify operations
31 : involving different keys blocking each other is small. Further, this
32 : controllable a priori by provisioning the number of chains
33 : appropriately. Concurrent operations on the same key are serialized
34 : (as they necessarily would be any implementation). Since the
35 : operations are HPC implementations, collisions are resolved as fast
36 : as is practical. The upshot is that the map supports massive
37 : concurrency while preserving concurrent operation serializability.
38 :
39 : Version numbers are stored with chain head pointers such that the
40 : cache traffic required for managing chain versioning is covered by
41 : the same cache traffic required for managing chains in a
42 : non-concurrent implementation (e.g. fd_map_chain). Operations do
43 : many internal integrity checking / bounds checking for use in high
44 : reliability applications.
45 :
46 : Lastly, fine grained versioning allows for concurrent execution of
47 : complex operations involving multiple keys simultaneously. This
48 : allows using the map as a concurrent transactional memory and for
49 : serialization of all map elements at a consistent point in time while
50 : minimizing impact on ongoing concurrent operations (e.g. snapshotting
51 : all the elements in the map).
52 :
53 : The main drawback of chain versioning is the extra memory footprint
54 : required for chain metadata storage. The current implementation
55 : supports indexing compression and uses atomic bit field techniques to
56 : minimize this overhead.
57 :
58 : Concurrent operation requires FD_HAS_ATOMIC. This will still work on
59 : platforms without FD_HAS_ATOMIC but concurrent operations will not be
60 : safe.
61 :
62 : In short, if you need a concurrent map, this is a lot better than
63 : protecting a non-concurrent implementation with a global lock. And,
64 : if you don't, it will be comparably performant to a non-concurrent
65 : implementation.
66 :
67 : This generator is designed for ultra tight coupling with pools,
68 : treaps, heaps, lists, other maps, etc. Likewise, a map can be
69 : persisted beyond the lifetime of the creating process, be used
70 : inter-process, relocated in memory, be naively
71 : serialized/deserialized, be moved between hosts, use index
72 : compression for cache and memory bandwidth efficiency, etc.
73 : Concurrency and flexibility are prioritized.
74 :
75 : Typical usage:
76 :
77 : struct myele {
78 : ulong key; // Technically "MAP_KEY_T MAP_KEY" (default is ulong key), managed by mymap when the element is in the mymap
79 : ulong next; // Technically "MAP_IDX_T MAP_NEXT" (default is ulong next), managed by mymap when the element is in the mymap
80 :
81 : ... key and next can be located arbitrarily in the element and
82 : ... can be reused for other purposes when the element is not in a
83 : ... mymap. The mapping of a key to an element in the element
84 : ... store is arbitrary. An element should not be moved /
85 : ... released from the element store while in the mymap.
86 :
87 : };
88 :
89 : typedef struct myele myele_t;
90 :
91 : #define MAP_NAME mymap
92 : #define MAP_ELE_T myele_t
93 : #include "tmpl/fd_map_para.c"
94 :
95 : will declare the following APIs as a header only style library in the
96 : compilation unit:
97 :
98 : // A mymap_t is a stack declaration friendly quasi-opaque local
99 : // object used to hold the state of a local join to a mymap.
100 : // Similarly, a mymap_query_t and a mymap_iter_t hold the local
101 : // state of an ongoing local query and local iteration
102 : // respectively. E.g. it is fine to do mymap_t join[1];" to
103 : // allocate a mymap_t but the contents should not be used directly.
104 :
105 : typedef struct mymap_private mymap_t;
106 : typedef struct mymap_query_private mymap_query_t;
107 : typedef struct mymap_iter_private mymap_iter_t;
108 :
109 : // mymap_ele_max_max returns the maximum element store capacity
110 : // compatible with a mymap.
111 :
112 : ulong mymap_ele_max_max( void );
113 :
114 : // mymap_chain_max returns the maximum number of chains supported
115 : // by a mymap. Will be an integer power-of-two.
116 :
117 : ulong mymap_chain_max( void );
118 :
119 : // mymap_chain_cnt_est returns a reasonable number of chains to use
120 : // for a map that is expected to hold up to ele_max_est elements.
121 : // ele_max_est will be clamped to be in [1,mymap_ele_max_max()] and
122 : // the return value will be a integer power-of-two in
123 : // [1,mymap_chain_max()].
124 :
125 : ulong mymap_chain_cnt_est( ulong ele_max_est );
126 :
127 : // mymap_{align,footprint} returns the alignment and footprint
128 : // needed for a memory region to be used as a mymap. align will be
129 : // an integer power-of-two and footprint will be a multiple of
130 : // align. footprint returns 0 if chain_cnt is not an integer
131 : // power-of-two in [1,mymap_chain_max()].
132 : //
133 : // mymap_new formats a memory region with the required alignment
134 : // and footprint into a mymap. shmem points in the caller's
135 : // address space to the memory region to use. Returns shmem on
136 : // success (mymap has ownership of the memory region) and NULL on
137 : // failure (no changes, logs details). The caller is not joined on
138 : // return. The mymap will be empty with all map chains at version
139 : // 0 (unlocked).
140 : //
141 : // mymap_join joins the caller to an existing mymap. ljoin points
142 : // to a mymap_t compatible memory region in the caller's address
143 : // space, shmap points in the caller's address space to the memory
144 : // region containing the mymap, shele points in the caller's
145 : // address space to mymap's element store and ele_max gives the
146 : // element store's capacity. Returns a handle to the caller's
147 : // local join on success (join has ownership of the ljoin region)
148 : // and NULL on failure (no changes, logs details).
149 : //
150 : // mymap_leave leaves a mymap join. join points to a current local
151 : // join. Returns the memory region used for the join on success
152 : // (caller has ownership on return and the caller is no longer
153 : // joined) and NULL on failure (no changes, logs details). Use the
154 : // join accessors before leaving to get shmap, shele and ele_max
155 : // used by the join if needed.
156 : //
157 : // mymap_delete unformats a memory region used as a mymap. Assumes
158 : // shmap points in the caller's address space to a memory region
159 : // containing the mymap and that there are no joins. Returns shmem
160 : // on success (caller has ownership of the memory region, any
161 : // remaining elements still in the mymap are released to the caller
162 : // implicitly) and NULL on failure (no changes, logs details).
163 :
164 : ulong mymap_align ( void );
165 : ulong mymap_footprint( ulong chain_cnt );
166 : void * mymap_new ( void * shmem, ulong chain_cnt, ulong seed );
167 : mymap_t * mymap_join ( void * ljoin, void * shmap, void * shele, ulong ele_max );
168 : void * mymap_leave ( mymap_t * join );
169 : void * mymap_delete ( void * shmap );
170 :
171 : // mymap_{chain_cnt,seed} return the mymap configuration. Assumes
172 : // join is a current local join. The values will be valid for the
173 : // mymap lifetime.
174 :
175 : ulong mymap_chain_cnt( mymap_t const * join );
176 : ulong mymap_seed ( mymap_t const * join );
177 :
178 : // mymap_{shmap,shele,ele_max} return join details. Assumes join
179 : // is a current local join. The values will be valid for the join
180 : // lifetime. mymap_{shmap_const,shele_const} are const correct
181 : // versions.
182 :
183 : void const * mymap_shmap_const( mymap_t const * join );
184 : void const * mymap_shele_const( mymap_t const * join );
185 : ulong mymap_ele_max ( mymap_t const * join );
186 :
187 : void * mymap_shmap( mymap_t * join );
188 : void * mymap_shele( mymap_t * join );
189 :
190 : // mymap_key_{eq,hash} expose the provided MAP_KEY_{EQ,HASH} macros
191 : // as inlines with strict semantics. They assume that the provided
192 : // pointers are in the caller's address space to keys that will not
193 : // be changed during the call. They retain no interest in any keys
194 : // on return.
195 : //
196 : // mymap_key_eq returns 1 if *k0 and *k1 are equal and 0 otherwise.
197 : //
198 : // mymap_key_hash returns the hash of *key using the hash function
199 : // seed. Should ideally be a random mapping from a MAP_KEY_T to a
200 : // ulong but this depends on what the user actually used for
201 : // MAP_KEY_HASH. The seed used by a particular mymap innstance can
202 : // be obtained above.
203 :
204 : int mymap_key_eq ( ulong * k0, ulong * k1 );
205 : ulong mymap_key_hash( ulong * key, ulong seed );
206 :
207 : // mymap_backoff does FD_SPIN_PAUSE a random number of times. The
208 : // number of pauses is an approximately uniform IID random number
209 : // in [0,scale/2^16] where scale is in [0,2^32). Specifically, the
210 : // number of pauses is:
211 : //
212 : // floor( scale r / 2^48 )
213 : //
214 : // where r is a non-deterministic 32-bit uniform IID random number.
215 : // Under the hood, r is generated by hashing the user provided seed
216 : // and the least significant 32-bits of the CPU tickcounter.
217 : // Ideally, seed is a 32-bit globally unique identifer for the
218 : // logical thread of execution but this is up to the application to
219 : // specify and rarely matters in practice. This is a useful
220 : // building block for random exponential backoffs.
221 :
222 : void mymap_backoff( ulong scale, ulong seed );
223 :
224 : // mymap_query_ele returns a pointer in the caller's address space
225 : // to the element store element associated with the query or a
226 : // sentinel value. The sentinel value is application dependent and
227 : // thus arbitrary (e.g. not necessarily in the element store,
228 : // including NULL, a local temporary used as a bit bucket, etc).
229 : // Assumes query is valid. The lifetime of the returned pointer
230 : // depends on the query. mymap_query_ele_const is a const correct
231 : // version.
232 :
233 : myele_t const * mymap_query_ele_const( mymap_query_t const * query );
234 : myele_t * mymap_query_ele ( mymap_query_t * query );
235 :
236 : // mymap_insert inserts into a mymap a mapping from a key to an
237 : // element store element. ele points in the caller's address space
238 : // to the element and ele->key is initialized to the key. flags is
239 : // a bit-or of FD_MAP_FLAG flags. If FD_MAP_FLAG_BLOCKING is set /
240 : // clear in flags, this is allowed / not allowed to block the
241 : // caller. Assumes join is a current local join, element is not in
242 : // the mymap and the key is not currently mapped to anything in the
243 : // mymap. This is a non-blocking fast O(1) and supports highly
244 : // concurrent operation.
245 : //
246 : // Returns FD_MAP_SUCCESS (0) on success and a FD_MAP_ERR
247 : // (negative) on failure. On success, ele was inserted into the
248 : // mymap at some point during the call (mymap took ownership of the
249 : // element at that time but the application is free to manage all
250 : // fields of the element except ele->key, ele->next and, if the
251 : // mymap is memoized, ele->hash ... insert will initialize the hash
252 : // field to mymap_key_hash( key, seed ) and this will be stable
253 : // while ele is in mymap). On failure, the mymap was not modified
254 : // by the call, no changes of ownership occurred in the call and
255 : // returns:
256 : //
257 : // - FD_MAP_ERR_INVAL: ele is not a pointer to an element store
258 : // element.
259 : //
260 : // - FD_MAP_ERR_AGAIN: A potentially conflicting operation was in
261 : // progress at some point during the call. Try again later (e.g.
262 : // after a random exponential backoff). Specifically, this
263 : // operation requires locking the map chain associated with key.
264 : // Since there are typically O(1) keys per chain, the probability
265 : // of getting AGAIN due to a false conflict is negligible even
266 : // under highly concurrent loads. Since insert / remove are fast
267 : // O(1) operations, any remaining conflicts, real or imagined,
268 : // are typically very short lived. Never returned for a blocking
269 : // call.
270 : //
271 : // IMPORTANT SAFETY TIP! Do not use inside a modify try/test,
272 : // query try/test, txn try/test or iter lock/unlock.
273 :
274 : int mymap_insert( mymap_t * join, myele_t * ele, int flags );
275 :
276 : // mymap_remove removes the mapping (if any) for key from the
277 : // mymap. On return, query will contain information about the
278 : // removed mapping. sentinel gives the query element pointer value
279 : // (arbitrary) to pass through when this did not remove a mapping
280 : // for any reason. flags is a bit-or of FD_MAP_FLAG flags. If
281 : // FD_MAP_FLAG_BLOCKING is set / clear in flags, this is allowed /
282 : // not allowed to block the caller. Assumes join is a current
283 : // local join and key is valid for the duration of the call.
284 : // Retains no interest in key, sentinel or query. This is a
285 : // non-blocking fast O(1) and supports highly concurrent operation.
286 : //
287 : // Returns FD_MAP_SUCCESS (0) on success and a FD_MAP_ERR
288 : // (negative) on failure. On success, key's mapping was removed at
289 : // some point during the call. mymap_query_ele( query ) will point
290 : // in the caller's address space to the element store element where
291 : // key mapped just before it was removed (element ownership
292 : // transferred to the caller at that time). On failure, no changes
293 : // were made by this call, mymap_query_ele( query ) will be
294 : // sentinel and:
295 : //
296 : // - FD_MAP_ERR_KEY: Key was not found in the mymap at some point
297 : // during the call.
298 : //
299 : // - FD_MAP_ERR_AGAIN: A potentially conflicting operation was in
300 : // progress at some point during the call. Same considerations
301 : // as insert above. Never returned for a blocking call.
302 : //
303 : // - FD_MAP_ERR_CORRUPT: Memory corruption was detected at some
304 : // point during the call.
305 : //
306 : // IMPORTANT SAFETY TIP! Do not use inside a modify try/test,
307 : // query try/test, txn try/test or iter lock/unlock.
308 :
309 : int
310 : mymap_remove( mymap_t * join,
311 : ulong const * key,
312 : myele_t const * sentinel,
313 : mymap_query_t * query,
314 : int flags );
315 :
316 : // mymap_modify_try tries to start modification of the mymap
317 : // element corresponding to key. On return, query will hold
318 : // information about the try. sentinel gives the query element
319 : // pointer value (arbitrary) to pass through when it is not safe to
320 : // try. flags is a bit-or of FD_MAP_FLAG flags. If
321 : // FD_MAP_FLAG_BLOCKING is set / clear, this call is allowed / not
322 : // allowed to block the caller. If FD_MAP_FLAG_ADAPTIVE is set /
323 : // clear, this call should / should not adapt the mymap to
324 : // accelerate future operations on this key. Adaptation for a key
325 : // can potentially slow future operations for other keys. The
326 : // marginal benefit of adaptation for a key grows linearly with the
327 : // number of keys managed by the key's chain. Assumes join is a
328 : // current local join and key is valid for the duration of the
329 : // call. Retains no interest in key, sentinel or query. This is a
330 : // non-blocking fast O(1) and supports highly concurrent operation.
331 : //
332 : // Returns FD_MAP_SUCCESS (0) on success and a FD_MAP_ERR
333 : // (negative) on failure. On success, mymap_query_ele( query )
334 : // will point in the caller's address space to the element to
335 : // modify and the chain that manages key will be locked. The mymap
336 : // retains ownership of this element and management of the key and
337 : // next fields. The caller is free to modify any other fields. On
338 : // failure, the mymap was not modified by this call,
339 : // mymap_query_ele( query ) will be sentinel and returns:
340 : //
341 : // - FD_MAP_ERR_KEY: Key was not found in the mymap in some point
342 : // during the call.
343 : //
344 : // - FD_MAP_ERR_AGAIN: A potentially conflicting operation was in
345 : // progress at some point during the try. Same considerations as
346 : // insert above. Never returned for a blocking call.
347 : //
348 : // - FD_MAP_ERR_CORRUPT: Memory corruption was detected at some
349 : // point during the call.
350 : //
351 : // IMPORTANT SAFETY TIP! Do not interleave or nest with a query
352 : // try/test, txn try/test or iter_lock/unlock on the same thread.
353 :
354 : int
355 : mymap_modify_try( mymap_t * join,
356 : ulong const * key,
357 : myele_t * sentinel,
358 : mymap_query_t * query,
359 : int flags );
360 :
361 : // mymap_modify_test finishes an in-progress modification. Assumes
362 : // query is valid and the caller is in a modify try. Returns
363 : // FD_MAP_SUCCESS (0). On return, the caller will no longer be in
364 : // a modify try. Guaranteed to succeed.
365 : //
366 : // IMPORTANT SAFETY TIP! Do not interleave or nest with a query
367 : // try/test, txn try/test or iter_lock/unlock on the same thread.
368 :
369 : int mymap_modify_test( mymap_query_t * query );
370 :
371 : // mymap_query_try tries to speculatively query a mymap for key.
372 : // On return, query will hold information about the try. sentinel
373 : // gives the query element pointer value (arbitrary) to pass
374 : // through when it is not safe to try the query. Assumes join is a
375 : // current local join and key is valid for the duration of the
376 : // call. Does not modify the mymap and retains no interest in key,
377 : // sentinel or query. This is a non-blocking fast O(1) and
378 : // supports highly concurrent operation.
379 : //
380 : // Returns FD_MAP_SUCCESS (0) on success and a FD_MAP_ERR
381 : // (negative) on failure. On success, key mapped to an element in
382 : // the element store at some point during the call.
383 : // mymap_query_ele( query ) will point in the caller's address
384 : // space to the element store element where key mapped at that
385 : // time. The mymap retains ownership of this element but the
386 : // caller can zero copy speculatively process the element. On
387 : // failure, mymap_query_ele( query ) will be sentinel and returns:
388 : //
389 : // - FD_MAP_ERR_KEY: Key was not found in the mymap in some point
390 : // during the call.
391 : //
392 : // - FD_MAP_ERR_AGAIN: A potentially conflicting operation was in
393 : // progress at some point during the call. Try again later (e.g.
394 : // after a random exponential backoff). Unlike insert and
395 : // remove, this call does _not_ require a lock on the chain
396 : // associated with key. As such, AGAIN can only be caused by
397 : // concurrent operations that require a lock on the chain that
398 : // manages key (with similar considerations as insert and remove)
399 : // and this will never interfere with any other concurrent
400 : // operation. Among the many implications, a query will never
401 : // delay a concurrent query and AGAIN will never be returned if
402 : // only concurrent queries are in progress.
403 : //
404 : // - FD_MAP_ERR_CORRUPT: Memory corruption was detected at some
405 : // point during the call.
406 : //
407 : // IMPORTANT SAFETY TIP! THE CALLER SHOULD BE PREPARED TO HANDLE
408 : // ARBITRARY AND/OR INCONSISTENT VALUES FOR ELEMENT FIELDS DURING
409 : // SPECULATIVE PROCESSING. CALLERS SHOULD NOT COMMIT ANY RESULTS
410 : // OF SPECULATIVE PROCESSING UNTIL IT TESTS THE QUERY WAS
411 : // SUCCESSFUL.
412 : //
413 : // The simplest form of speculative processing is to copy the
414 : // element from the element store into a local temporary, test that
415 : // the speculation was valid, and then process the local temporary
416 : // copy at its leisure. Zero copy, more selective copying and/or
417 : // writing speculative results into local tempoaries are more
418 : // advanced examples of speculative processing.
419 : //
420 : // Use mymap_modify to do a blocking (non-speculative) and/or
421 : // adaptive query (just don't actually modify the element).
422 : //
423 : // IMPORTANT SAFETY TIP! Do not interleave or nest with a modify
424 : // try/test, txn try/test or iter_lock/unlock on the same thread.
425 :
426 : int
427 : mymap_query_try( mymap_t const * join,
428 : ulong const * key,
429 : myele_t const * sentinel,
430 : mymap_query_t * query );
431 :
432 : // mymap_query_test tests if an in-progress query is still valid.
433 : // Assumes query is valid, we are still in a query try and chain
434 : // version numbers have not wrapped since we started the try.
435 : // Returns FD_MAP_SUCCESS (0) if the query is still valid and
436 : // FD_MAP_ERR_AGAIN (negative) if a potentially conflicting
437 : // operation was in progress at some point during the try.
438 : //
439 : // IMPORTANT SAFETY TIP! Do not interleave or nest with a modify
440 : // try/test, txn try/test or iter_lock/unlock on the same thread.
441 :
442 : int mymap_query_test( mymap_query_t const * query );
443 :
444 : // mymap_txn_key_max_max() returns the theoretical maximum number
445 : // of keys that can be in a transaction. (Practically unbounded.)
446 :
447 : ulong mymap_txn_key_max_max( void );
448 :
449 : // mymap_txn_{align,footprint} return the alignment and footprint
450 : // required for a mymap_txn_t that can support at least key_max
451 : // keys. align will be an integer power of two. footprint will be
452 : // a multiple of align. Returns 0 if key_max > key_max_max.
453 : //
454 : // mymap_txn_init formats a memory region with the required
455 : // alignment and footprint as a txn_t that can support at least
456 : // key_max keys. ltxn points in the caller's address space to the
457 : // memory region to use. Assumes join is a current local join.
458 : // On success, returns ltxn (txn will have ownership of the memory
459 : // region, txn will be valid with empty speculative and locked key
460 : // sets). The lifetime of the join should be at least the lifetime
461 : // of the txn. On failure (obviously bad inputs), returns NULL (no
462 : // changes).
463 : //
464 : // mymap_txn_fini unformats a memory region as a txn_t and returns
465 : // a pointer to the underlying memory. On success, returns ltxn.
466 : // The caller has ownership of the memory region on return. On
467 : // failure (e.g. NULL input), returns NULL (no changes).
468 :
469 : ulong mymap_txn_align ( void );
470 : ulong mymap_txn_footprint( ulong key_max );
471 : mymap_txn_t * mymap_txn_init ( void * ltxn, mymap_t * join, ulong key_max );
472 : void * mymap_txn_fini ( mymap_txn_t * txn );
473 :
474 : // mymap_txn_add indicates that key may be used in a txn. Assumes
475 : // txn is valid and not in a try and key is valid for duration of
476 : // the call. Retains no interest in key. A zero value for lock
477 : // indicates the key will be operated on speculatively. A non-zero
478 : // value indicates the key will potentially be inserted / removed /
479 : // modified by the transaction. It is okay to have a mixture of
480 : // speculative and locked keys in a transaction. Further, it is
481 : // okay to add the same key multiple times (though not particularly
482 : // efficient), including as a mixture of speculative and locked (if
483 : // _any_ adds of same key are locked, it will be treated as a
484 : // locked key for the txn overall). Returns FD_MAP_SUCCESS (zero)
485 : // on success (txn is valid and not in a try) and FD_MAP_ERR_INVAL
486 : // (negative) on failure (txn is valid and not in a try but key was
487 : // not added). INVAL is only possible when more than key_max adds
488 : // have been done since init.
489 :
490 : int mymap_txn_add( mymap_txn_t * txn, mymap_key_t const * key, int lock );
491 :
492 : // mymap_txn_try returns FD_MAP_SUCCESS (zero) if it is safe to try
493 : // the transaction and FD_MAP_ERR_AGAIN (negative) if the user
494 : // should try again later (e.g. after a random exponential
495 : // backoff). flags is a bit-of of FD_MAP_FLAG flags. If
496 : // FD_MAP_FLAG_BLOCKING is set / clear, this call is allowed / not
497 : // allowed to block the caller. The upper 30-bits of flags can be
498 : // used to provide a seed for random backoffs (but this is up to
499 : // the application and rarely matters in practice). Assumes txn is
500 : // valid and not in a try. On success, txn will be valid and in a
501 : // try. On an failure, txn will be valid and not in a try.
502 : //
503 : // IMPORTANT SAFETY TIP! Do not interleave or nest with modify
504 : // try/test, query try/test or iter_lock/unlock on the same thread.
505 : //
506 : // To better under the restrictions on nesting and interleaving,
507 : // mymap_{insert,remove,query_try,modify_try,query_try} will fail
508 : // with FD_MAP_ERR_AGAIN for any key managed by a chain locked by
509 : // the txn but can succeed for keys on managed by other chains.
510 : // This behavior is unpredictable as it depends on the keys in the
511 : // txn, keys not in the transaction, map seed, map chain count and
512 : // user provided key hash function. Interleaving a query_test,
513 : // modify_test, iter_unlock can be similarly unpredictable. Worse,
514 : // an interleaved modify_test or iter_unlock can muck up the chain
515 : // locks and used by the txn try. Similarly for other cases.
516 : //
517 : // IMPORTANT SAFETY TIP! If some txn keys were speculative, the
518 : // caller should not rely on any reads from the corresponding
519 : // element until the transaction tests successfully. Similar
520 : // considerations as mymap_query_try.
521 :
522 : int mymap_txn_try( mymap_txn_t * txn, int flags );
523 :
524 : // mymap_txn_{insert,remove} behave _identically_ to
525 : // mymap_{insert,remove} from the caller's point of view but
526 : // assumes we are in a txn try and key was added to the txn as
527 : // locked. These will never return FD_MAP_ERR_AGAIN.
528 : //
529 : // Similarly, mymap_txn_query behaves _identically_ to
530 : // mymap_query_try from the caller's point of view but assumes we
531 : // are in a txn try and key was added to txn as either speculative
532 : // or locked. Will never return FD_MAP_ERR_AGAIN.
533 : //
534 : // Likewise, mymap_txn_modify behaves _identically_ to
535 : // mymap_modify_try from the caller's point of view but assumes we
536 : // are in a txn try and key was added to txn as locked. It will
537 : // never return FD_MAP_ERR_AGAIN.
538 : //
539 : // There is no mymap_query_test or mymap_modify_test because these
540 : // are part of the overall txn test.
541 : //
542 : // IMPORTANT SAFETY TIP!
543 : //
544 : // These never should be used outside a txn try.
545 : //
546 : // IMPORTANT SAFETY TIP!
547 : //
548 : // For a speculative txn key, mymap_query can return FD_MAP_ERR_KEY
549 : // and/or FD_MAP_ERR_CORRUPT if there are locked concurrent
550 : // operations on the chain managing key (e.g a concurrent remove of
551 : // a key that happens to be on the same chain). When such
552 : // operations are possible, these errors can be canaries that the
553 : // transaction has already failed (testing the txn is still
554 : // necessary to it "official"). CORRUPT in this situation is most
555 : // likely an "unofficial" failure than memory corruption.
556 : // Similarly, while mymap_txn_query is guaranteed give a pointer to
557 : // an element store element on success, there is no guarantee it
558 : // will be to the correct element, a well formed element (or even
559 : // to the same location if used multiple times in the try). When
560 : // such concurrent operations are not possible (e.g. single
561 : // threaded operation), SUCCESS, KEY, CORRUPT and the element
562 : // pointer returned have their usual interpretations.
563 : //
564 : // TL;DR speculative txn keys are best used for commmon stable
565 : // constant-ish read-only data to minimize concurrent complex
566 : // transactions using these common keys from unnecessarily blocking
567 : // each other.
568 : //
569 : // TL;DR resolve all speculative txn keys to elements at
570 : // transaction start exactly once for sanity.
571 : //
572 : // TL;DR avoid using speculative txn keys at all unless very well
573 : // versed in lockfree programming idioms and gotchas.
574 :
575 : int mymap_txn_insert( mymap_t * join, myele_t * ele );
576 : int mymap_txn_remove( mymap_t * join, ulong const * key, myele_t const * sentinel, mymap_query_t * query );
577 : int mymap_txn_modify( mymap_t * join, ulong const * key, myele_t * sentinel, mymap_query_t * query, int flags );
578 : int mymap_txn_query ( mymap_t const * join, ulong const * key, myele_t const * sentinel, mymap_query_t * query );
579 :
580 : // mymap_txn_test returns FD_MAP_SUCCESS (zero) if the txn try
581 : // succeeded and FD_MAP_AGAIN (negative) if it failed (e.g. the
582 : // test detected a potentially conflicting concurrent operation
583 : // during the try). On success, any results from processing of
584 : // keys marked as speculative can be trusted. On failure, the
585 : // mymap was not changed by the try. Regardless of return value,
586 : // txn will _not_ be in a try on return _but_ will still be valid.
587 : // As such, if a transaction fails, it can be retried (e.g. after a
588 : // random exponential backoff) without needing to recreate it (e.g.
589 : // no need to fini then init/add again). Assumes txn is in a try
590 : // and, for any txn speculative keys, no wrapping of txn version
591 : // numbers has occurred since the try started..
592 : //
593 : // IMPORTANT SAFETY TIP! This is guaranteed to succeed if no keys
594 : // were added to the transaction as speculative.
595 : //
596 : // IMPORTANT SAFETY TIP! Do not interleave or nest with modify
597 : // try/test, query try/test or iter_lock/unlock on the same thread.
598 :
599 : int mymap_txn_test( mymap_txn_t * txn );
600 :
601 : // mymap_iter_lock locks zero or more map chains. Assumes join is
602 : // a current local join. On input, lock_seq[i] for i in
603 : // [0,lock_cnt) gives the set of chains to lock. flags is a bit-or
604 : // of FD_MAP_FLAG flags. If FD_MAP_FLAG_BLOCKING is set / not set,
605 : // this call is allowed / not allowed to block the caller. Assumes
606 : // join, lock_seq and lock_cnt are valid and the caller does not
607 : // already have any of these locks. The upper 30-bits of flags
608 : // can be used to provide a seed for random backoffs (but this is
609 : // up to the application and rarely necessary). In particular,
610 : // lock_seq should contain unique values in [0,chain_cnt), which
611 : // also implies lock_cnt is at most chain_cnt. Retains no interest
612 : // in lock_seq. Returns FD_MAP_SUCCESS (zero) on success and
613 : // FD_MAP_ERR_AGAIN (negative) on failure. On return:
614 : //
615 : // FD_MAP_SUCCESS: lock_seq will be a permutation of the input
616 : // giving the actual order (from oldest to newest) in which the
617 : // locks were acquired. This can be used, for example, to unlock
618 : // in the same order and can be used by the caller to optimize
619 : // the order for iterating over keys to reduce the amount of
620 : // contention with other concurrent operations. If there were no
621 : // potentially conflicting concurrent operations during the call,
622 : // lock_seq will be in the input order.
623 : //
624 : // FD_MAP_ERR_AGAIN: a potentially conflicting operation was in
625 : // progress at some point during the call. lock_seq might have
626 : // been changed (but will still be a permutation of the input).
627 : // The mymap itself wasn't changed by the call.
628 : //
629 : // FD_MAP_ERR_INVAL: join, lock_seq and/or lock_cnt were
630 : // obviously invalid. Same considerations as FD_MAP_ERR_AGAIN.
631 : //
632 : // Guaranteed to succeed if blocking (but will not return to the
633 : // caller until all the requested chains are locked).
634 : //
635 : // IMPORTANT SAFETY TIP! Do not use interleave or nest with modify
636 : // try/test, query try/test or txn try/test on the same thread.
637 :
638 : int
639 : mymap_iter_lock( mymap_t * join,
640 : ulong * lock_seq,
641 : ulong lock_cnt,
642 : int flags );
643 :
644 : // mymap_iter_unlock unlocks chains lock_seq[i] for i in
645 : // [0,lock_cnt) in that order. Assumes join is a current local
646 : // join, lock_seq and lock_cnt are valid (same requirements as
647 : // mymap_iter_lock) and the caller has a lock on those chains.
648 : // Retains no interest in lock_seq. Guaranteed to succeed.
649 : //
650 : // IMPORTANT SAFETY TIP! Do not use interleave or nest with modify
651 : // try/test, query try/test or txn try/test on the same thread.
652 :
653 : void
654 : mymap_iter_unlock( mymap_t * join,
655 : ulong const * lock_seq,
656 : ulong lock_cnt );
657 :
658 : // mymap_iter_chain_idx returns the index of the map chain that
659 : // manages key. Useful for iterating over groups of related keys
660 : // when the map hash function is designed to group all related keys
661 : // onto the same chain.
662 :
663 : ulong
664 : mymap_iter_chain_idx( mymap_t const * join,
665 : ulong const * key );
666 :
667 : // mymap_{iter,iter_done,iter_next,iter_ele,iter_ele_const} iterate
668 : // over a single map chain. Assumes join is a current local join,
669 : // chain_idx is in [0,mymap_chain_cnt(join)) and the caller lock on
670 : // chain idx or the chain is otherwise known to be idle.
671 : //
672 : // These are building blocks for concurrent parallel iteration. As
673 : // the locking and ordering requirements for such an iterator are
674 : // very application specific, no default global iterators are
675 : // provided (i.e. a generic global iterator will need to be so
676 : // conservative on locking than typical application requirements,
677 : // it is practically more mischievious than useful). E.g. a mymap
678 : // snapshot might lock all chains to get the state of the entire
679 : // mymap at a consistent point in time. For each chain (in the
680 : // order given by the lock acquisition), the snapshot would
681 : // serialize all keys on that chain and then unlock it
682 : // incrementally.
683 :
684 : mymap_iter_t mymap_iter ( mymap_t const * join, ulong chain_idx );
685 : mymap_iter_t mymap_iter_done ( mymap_iter_t iter );
686 : mymap_iter_t mymap_iter_next ( mymap_iter_t iter );
687 : myele_t const * mymap_iter_ele_const( mymap_iter_t iter );
688 : myele_t * mymap_iter_ele ( mymap_iter_t iter );
689 :
690 : // mymap_reset removes all elements from the mymap. Caller has
691 : // ownership of all items removed on return. Assumes that join is
692 : // a current local join and the caller has a lock on all map chains
693 : // or the map is otherwise known to be idle.
694 :
695 : void mymap_reset( mymap_t * join );
696 :
697 : // mymap_verify returns FD_MAP_SUCCESS (0) if the join, underlying
698 : // map and underlying element store give a valid mapping of unique
699 : // keys to unique elements in the element store. Assumes that
700 : // caller has a lock on all map chains or the map is otherwise
701 : // known to be idle. Returns FD_MAP_ERR_INVAL (negative) otherwise
702 : // (no changes by this call, logs details).
703 :
704 : int mymap_verify( mymap_t const * join );
705 :
706 : // mymap_strerror converts an FD_MAP_SUCCESS / FD_MAP_ERR code into
707 : // a human readable cstr. The lifetime of the returned pointer is
708 : // infinite. The returned pointer is always to a non-NULL cstr.
709 :
710 : char const * mymap_strerror( int err );
711 :
712 : Do this as often as desired in a compilation unit to get different
713 : types of concurrent maps. Options exist for generating library
714 : header prototypes and/or library implementations for concurrent maps
715 : usable across multiple compilation units. Additional options exist
716 : to use index compression, different hashing functions, key comparison
717 : functions, etc as detailed below.
718 :
719 : To better understand the insert/remove/{modify,query}_{try,test}
720 : APIs:
721 :
722 : ... basic insert
723 :
724 : myele_t * ele = ... acquire an unused element from the element store
725 :
726 : ... populate ele appropriately, including
727 :
728 : ele->key = ... key associated with this element
729 :
730 : int err = mymap_insert( join, err, FD_MAP_FLAG_BLOCKING );
731 :
732 : if( FD_UNLIKELY( err ) ) { // Not possible in this example
733 :
734 : ... If err is FD_MAP_ERR_INVAL, ele did not point at an element
735 : ... store element.
736 : ...
737 : ... If err is FD_MAP_ERR_AGAIN, there was a potentially
738 : ... conflicting operation in progress on the mymap during the
739 : ... call. We can try again later (e.g. after a random backoff or
740 : ... doing other non-conflicting work).
741 :
742 : } else {
743 :
744 : ... At this point, a mapping from key to the element store
745 : ... element pointed to by ele in our address space was added
746 : ... during the call. ele->key will be stable while in the mymap.
747 : ... Neither ele->key nor ele->next should be modified by the
748 : ... application while in the mymap. The application is free to
749 : ... manage all other fields of the element as desired.
750 :
751 : }
752 :
753 : ... basic remove
754 :
755 : ulong key = ... key to remove
756 :
757 : mymap_query_t query[1];
758 : int err = mymap_remove( join, &key, NULL, query, FD_MAP_FLAG_BLOCKING );
759 : mymap_ele_t * ele = mymap_query_ele( query );
760 :
761 : if( FD_UNLIKELY( err ) ) {
762 :
763 : ... At this point, ele==sentinel==NULL.
764 : ...
765 : ... If err is FD_MAP_ERR_KEY, key was not in the mymap at some
766 : ... point during the remove.
767 : ...
768 : ... If err is FD_MAP_ERR_AGAIN, there was a potentially
769 : ... conflicting operation in progress during the remove. We can
770 : ... try again later (e.g. after a random backoff or doing other
771 : ... non-conflicting work). (Not possible in this example.)
772 : ...
773 : ... If err is FD_MAP_ERR_CORRUPT, memory corruption was detected
774 : ... at some point during the call. (Usually abortive.)
775 :
776 : } else {
777 :
778 : ... At this point, ele points into the element store (non-NULL),
779 : ... ele->key matches key, key mapped to that element before the
780 : ... remove, and we have ownership of that element.
781 :
782 : ... release ele to the element store
783 :
784 : }
785 :
786 : ... basic modify
787 :
788 : ulong key = ... key to modify
789 :
790 : mymap_query_t query[1];
791 : int err = mymap_modify_try( join, &key, NULL, query, FD_MAP_FLAG_BLOCKING );
792 : mymap_ele_t * ele = mymap_query_ele( query );
793 :
794 : if( FD_UNLIKELY( err ) ) {
795 :
796 : ... At this point, ele==sentinel==NULL.
797 : ...
798 : ... If err is FD_MAP_ERR_KEY, key was not in the mymap at some
799 : ... point during the try.
800 : ...
801 : ... If err is FD_MAP_ERR_AGAIN, there was a potentially
802 : ... conflicting operation in progress during the try. We can try
803 : ... again later (e.g. after a random backoff or doing other
804 : ... non-conflicting work). (Not possible in this example.)
805 : ...
806 : ... If err is FD_MAP_ERR_CORRUPT, memory corruption was detected
807 : ... at some point during the call. (Usually abortive.)
808 :
809 : } else {
810 :
811 : ... At this point, ele points in our address space to an element
812 : ... store element, ele->key matches key and we are in a modify try
813 : ... such that it is safe to modify fields ele not managed by the
814 : ... mymap.
815 :
816 : ... Modify application managed fields of ele here.
817 :
818 : ... IMPORTANT SAFETY TIP! IF THE USER WANTS TO SUPPORT ROLLING
819 : ... BACK A MODIFICATION AT THIS POINT, THEY CAN DO SO BY SAVING
820 : ... THE ORIGINAL VALUE OF ELE BEFORE MODIFYING ANY FIELDS AND
821 : ... THEN RESTORING IT HERE.
822 :
823 : ... Finish the modification (guaranteed to succeed)
824 :
825 : mymap_modify_test( query );
826 :
827 : ... At this point, the modification is done and we are no
828 : ... longer in a try.
829 :
830 : }
831 :
832 : ... basic speculative query
833 :
834 : ulong key = ... key to query
835 :
836 : mymap_query_t query[1];
837 : int err = mymap_query_try( join, &key, NULL, query );
838 : mymap_ele_t const * ele = mymap_query_ele_const( query );
839 :
840 : if( FD_UNLIKELY( err ) ) {
841 :
842 : ... At this point, ele==sentinel==NULL.
843 : ...
844 : ... If err is FD_MAP_ERR_KEY, key was not in the mymap at some
845 : ... point during the try.
846 : ...
847 : ... If err is FD_MAP_ERR_AGAIN, there was a potentially
848 : ... conflicting operation in progress during the try and we can
849 : ... try again later (e.g. after a random backoff or doing other
850 : ... non-conflicting work).
851 : ...
852 : ... If err is FD_MAP_ERR_CORRUPT, memory corruption was detected
853 : ... during the call. (Usually abortive.)
854 :
855 : } else {
856 :
857 : ... At this point, ele points in our address space to an element
858 : ... in the element store (non-NULL) and ele->key matched key at
859 : ... some point during the try.
860 :
861 : ... Speculatively process ele here.
862 : ...
863 : ... DO NOT TRUST ANY RESULTS OF THIS SPECULATIVE PROCESSING YET.
864 : ... THERE IS NO GUARANTEE YET THAT A CONCURRENT USER HASN'T
865 : ... CHANGED THE MYMAP IN A WAY THAT COULD YIELD ARBITRARY AND
866 : ... INCONSISTENT RESULTS.
867 : ...
868 : ... The simplest and most common form of speculative processing
869 : ... is to copy the needed portions of ele into a local stack
870 : ... temp.
871 : ...
872 : ... Note: concurrent operations could include removing key from
873 : ... the mymap (and maybe multiple cycles of inserting and
874 : ... removing it and then at potentially different element store
875 : ... locations). That's not an issue practically as the ele
876 : ... pointer here will be to an element compatible memory region
877 : ... that will continue to exist regardless and we shouldn't be
878 : ... trusting any query reads yet (the query test will detect if
879 : ... if these can be trusted).
880 : ...
881 : ... Rant: If ele is more complex than plain-old-data, so long ele
882 : ... is using allocators like fd_alloc and fd_wksp for dynamically
883 : ... allocated fields (e.g. not using the giant steaming pile of
884 : ... page based memory virtual memory, operating system, language
885 : ... and standard library fail that is heap based allocation ala
886 : ... malloc/free), concurrent removes are _still_ fine for the
887 : ... exact same reason. That is, the memory that actually backed
888 : ... dynamically allocated fields will still continue to exist
889 : ... post remove ... you know ... just like reality (turns out,
890 : ... surprise, "free" doesn't actually uninstall any DIMMs and
891 : ... malloc/free are the worst possible abstraction for resource
892 : ... management).
893 : ...
894 : ... The concurrent remove case actually demonstrates why fd_alloc
895 : ... / fd_wksp / fd_shmem / etc exist in the first place. Beyond
896 : ... being faster, simpler, more concurrent and more reliable
897 : ... (especially in cases like this), they are more flexible (e.g.
898 : ... sharing and persisting the data structure asynchronously
899 : ... across multiple processes in different address spaces) and
900 : ... more secure (e.g. can easily bounds check memory accesses
901 : ... and then use the memory subsystem to sandbox different
902 : ... components from touching memory they shouldn't, actually
903 : ... using a modern virtual memory subsystem for something useful
904 : ... for a change instead of bending over backwards to provide
905 : ... exactly the wrong abstraction of the real world). Common
906 : ... hardware and software practices have turned computers into an
907 : ... unreliable and insecure Tower of Babel. Had virtual memory
908 : ... been better abstracted and better implemented all levels of
909 : ... the stack, life would be much easier (and things like fast
910 : ... persistent memories might have seen a lot more commerical
911 : ... success). In the meantime, dispelling the magical thinking
912 : ... encourged by the conventional abstractions, the key lessons
913 : ... are:
914 : ...
915 : ... * Friends don't let friends malloc.
916 : ... * Lockfree is not a synonym for garbage collection.
917 : ... * Real world computers aren't infinite tape Turing machines.
918 : ... * Real world memory doesn't magically disappear.
919 :
920 : ... At this point, we are done with speculative processing (or we
921 : ... don't want to do any more speculative processing if the try
922 : ... has already failed).
923 :
924 : err = mymap_query_test( query );
925 : if( FD_UNLKELY( err ) ) {
926 :
927 : ... At this point, err will be FD_MAP_ERR_AGAIN and a
928 : ... potentially conflicting operation in the try was detected
929 : ... by the test.
930 :
931 : ... Application specific handling here (e.g. try again after a
932 : ... random backoff or doing other non-conflicting work).
933 :
934 : } else {
935 :
936 : ... At this point, the results of the speculation thus far can
937 : ... be trusted and can be considered to have been computed at
938 : ... some point in time between try and test.
939 :
940 : }
941 : }
942 :
943 : To better understand the txn API:
944 :
945 : ... allocate a txn
946 :
947 : ulong align = mymap_txn_align();
948 : ulong footprint = mymap_txn_footprint( key_max );
949 : void * ltxn = ... allocate align/footprint local scratch memory
950 : mymap_txn_t * txn = mymap_txn_init( ltxn, join, key_max );
951 :
952 : ... add at most key_max keys to the transaction as locked
953 :
954 : for( ... all keys involved in the transaction ... ) mymap_txn_add( txn, key, 1 ); // guaranteed to succeed for this example
955 :
956 : ... try to do the transaction
957 :
958 : int err = mymap_txn_try( txn, FD_MAP_FLAG_BLOCKING );
959 :
960 : if( FD_UNLIKELY( err ) ) { // Not possible in this example
961 :
962 : ... At this point, err is FD_MAP_ERR_AGAIN and there was a
963 : ... potentially conflicting operation in progress during the try.
964 : ... We can should try again later (e.g. after a random backoff or
965 : ... doing other non-conflicting work). We are no longer in a try
966 : ... but we could reuse the txn as-is to retry.
967 :
968 : } else {
969 :
970 : ... At this point, it is safe to try the transaction.
971 :
972 : ... Do the transaction here. Since all keys are locked in this
973 : ... example, we don't need to worry about any changing behind our
974 : ... back (i.e. the try is guaranteed to succeed).
975 :
976 : ... Like modify, if we wants to rollback the transaction at this
977 : ... point, we should save the state of all locked keys involved
978 : ... to local temporaries before we do the transaction and then
979 : ... restore the state here.
980 :
981 : ... Finish the try (guaranteed to succeed for this example)
982 :
983 : mymap_txn_test( txn );
984 :
985 : ... At this point, we are no longer in a txn try but the txn is
986 : ... valid such that we could reuse the txn as-is for another
987 : ... transaction involving the same keys.
988 :
989 : mymap_txn_fini( txn );
990 :
991 : ... At this point, txn is no longer valid and we have ownership of
992 : ... the ltxn memory region
993 :
994 : ... free ltxn
995 :
996 : }
997 :
998 : To better understand the iter API:
999 :
1000 : ... basic mymap element snapshot (i.e. iterate over all elements in
1001 : ... the mymap at a globally consistent point in time while
1002 : ... minimizing contension with other concurrent operations)
1003 :
1004 : ulong lock_cnt = mymap_chain_cnt( join );
1005 :
1006 : ulong * lock_seq = ... allocate lock_cnt ulong scratch ...
1007 :
1008 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ ) lock_seq[ lock_idx ] = lock_idx;
1009 :
1010 : mymap_iter_lock( join, lock_seq, lock_cnt, FD_MAP_FLAG_BLOCKING );
1011 :
1012 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ ) {
1013 : ulong chain_idx = lock_seq[ lock_idx ]; // process chains in the order they were locked
1014 :
1015 : for( mymap_iter_t iter = mymap_iter( join, chain_idx ); !mymap_iter_done( iter ); iter = mymap_iter_next( iter ) ) {
1016 : myele_t const * ele = mymap_iter_ele_const( iter );
1017 :
1018 : ... append ele to snapshot here (ele will be appended in
1019 : ... no particular order for this example). Note that, as
1020 : ... the caller has a lock on the chain that manages ele,
1021 : ... the caller is free to modify the fields of ele it
1022 : ... manages.
1023 :
1024 : }
1025 :
1026 : mymap_iter_unlock( join, lock_seq + lock_idx, 1UL ); // unlock incrementally
1027 : }
1028 :
1029 : ... free lock_seq here
1030 : */
1031 :
1032 : /* FIXME: consider adding a parallel verify that operates on a
1033 : locked/idle subset of the chains. */
1034 :
1035 : /* MAP_NAME gives the API prefix to use for map */
1036 :
1037 : #ifndef MAP_NAME
1038 : #error "Define MAP_NAME"
1039 : #endif
1040 :
1041 : /* MAP_ELE_T is the map element type */
1042 :
1043 : #ifndef MAP_ELE_T
1044 : #error "Define MAP_ELE_T"
1045 : #endif
1046 :
1047 : /* MAP_KEY_T is the map key type */
1048 :
1049 : #ifndef MAP_KEY_T
1050 : #define MAP_KEY_T ulong
1051 : #endif
1052 :
1053 : /* MAP_KEY is the MAP_ELE_T key field */
1054 :
1055 : #ifndef MAP_KEY
1056 0 : #define MAP_KEY key
1057 : #endif
1058 :
1059 : /* MAP_IDX_T is the map next index type. Should be a primitive unsigned
1060 : integer type large enough to represent the largest capacity element
1061 : store of interest. (E.g. if ushort, the maximum element store
1062 : capacity compatible with the map will be 65535 elements.) */
1063 :
1064 : #ifndef MAP_IDX_T
1065 0 : #define MAP_IDX_T ulong
1066 : #endif
1067 :
1068 : /* MAP_NEXT is the MAP_ELE_T next field */
1069 :
1070 : #ifndef MAP_NEXT
1071 0 : #define MAP_NEXT next
1072 : #endif
1073 :
1074 : /* MAP_KEY_EQ returns 0/1 if *k0 is the same/different as *k1 */
1075 :
1076 : #ifndef MAP_KEY_EQ
1077 22516869 : #define MAP_KEY_EQ(k0,k1) ((*(k0))==(*(k1)))
1078 : #endif
1079 :
1080 : /* MAP_KEY_HASH returns a random mapping of *key into ulong. The
1081 : mapping is parameterized by the 64-bit ulong seed. */
1082 :
1083 : #ifndef MAP_KEY_HASH
1084 : #define MAP_KEY_HASH(key,seed) fd_ulong_hash( (*(key)) ^ (seed) )
1085 : #endif
1086 :
1087 : /* If MAP_MEMOIZE is defined to non-zero, elements have a field that
1088 : can be used while in the map to hold the MAP_KEY_HASH for an
1089 : element's key. This is useful for accelerating user code that might
1090 : need a hash and various map operations. */
1091 :
1092 : #ifndef MAP_MEMOIZE
1093 : #define MAP_MEMOIZE 0
1094 : #endif
1095 :
1096 : /* If MAP_MEMOIZE is non-zero, MAP_MEMO is the memo element field.
1097 : Should be a ulong. Like MAP_KEY and MAP_NEXT, when an element is in
1098 : the map, this value is managed by the map and will contain the
1099 : MAP_KEY_HASH of the element's key and the map's seed. */
1100 :
1101 : #ifndef MAP_MEMO
1102 : #define MAP_MEMO memo
1103 : #endif
1104 :
1105 : /* If MAP_MEMOIZE is defined to non-zero, a non-zero MAP_KEY_EQ_IS_SLOW
1106 : indicates the MAP_MEMO field should be used to accelerate MAP_KEY_EQ
1107 : operations. This is useful when MAP_KEY_EQ is non-trivial (e.g.
1108 : variable length string compare, large buffer compares, etc). */
1109 :
1110 : #ifndef MAP_KEY_EQ_IS_SLOW
1111 : #define MAP_KEY_EQ_IS_SLOW 0
1112 : #endif
1113 :
1114 : /* MAP_CNT_WIDTH gives the number of bits in a ulong to reserve for
1115 : encoding the count in a versioned count. Element store capacity
1116 : should be representable in this width. Default is 43 bits (e.g.
1117 : enough to support a ~1 PiB element store of 128 byte elements). The
1118 : versioning width will be 64-MAP_CNT_WIDTH. Since the least
1119 : significant bit of the version is used to indicate locked, versioning
1120 : width should be at least 2 and ideally as large as possible. With
1121 : the 43 default, a chain's version number will not be reused until
1122 : 2^20 individual operations on a chain have been done. Version
1123 : numbers only impact speculative operations. If not using speculative
1124 : operations, version width can be reduced to the minimum. */
1125 :
1126 : #ifndef MAP_CNT_WIDTH
1127 91250517 : #define MAP_CNT_WIDTH (43)
1128 : #endif
1129 :
1130 : /* MAP_ALIGN gives the alignment required for the map shared memory.
1131 : Default is 128 for double cache line alignment. Should be at least
1132 : ulong alignment. */
1133 :
1134 : #ifndef MAP_ALIGN
1135 : #define MAP_ALIGN (128UL)
1136 : #endif
1137 :
1138 : /* MAP_MAGIC is the shared memory magic number to aid in persistent
1139 : and/or interprocess usage. */
1140 :
1141 : #ifndef MAP_MAGIC
1142 3 : #define MAP_MAGIC (0xf17eda2c37c3a900UL) /* firedancer cmap version 0 */
1143 : #endif
1144 :
1145 : /* MAP_IMPL_STYLE controls what to generate:
1146 : 0 - header only library
1147 : 1 - library header declaration
1148 : 2 - library implementation */
1149 :
1150 : #ifndef MAP_IMPL_STYLE
1151 : #define MAP_IMPL_STYLE 0
1152 : #endif
1153 :
1154 : /* Commom map error codes (FIXME: probably should get around to making
1155 : unified error codes, error strings and/or flags across util at least
1156 : so we don't have to do this in the generator itself) */
1157 :
1158 31813848 : #define FD_MAP_SUCCESS (0)
1159 12422859 : #define FD_MAP_ERR_INVAL (-1)
1160 7488210 : #define FD_MAP_ERR_AGAIN (-2)
1161 3 : #define FD_MAP_ERR_CORRUPT (-3)
1162 : //#define FD_MAP_ERR_EMPTY (-4)
1163 : //#define FD_MAP_ERR_FULL (-5)
1164 5635110 : #define FD_MAP_ERR_KEY (-6)
1165 :
1166 3749418 : #define FD_MAP_FLAG_BLOCKING (1)
1167 5136612 : #define FD_MAP_FLAG_ADAPTIVE (2)
1168 :
1169 : /* Implementation *****************************************************/
1170 :
1171 39328212 : #define MAP_VER_WIDTH (64-MAP_CNT_WIDTH)
1172 :
1173 : #if MAP_IMPL_STYLE==0 /* local use only */
1174 : #define MAP_STATIC FD_FN_UNUSED static
1175 : #else /* library header and/or implementation */
1176 : #define MAP_STATIC
1177 : #endif
1178 :
1179 296464467 : #define MAP_(n) FD_EXPAND_THEN_CONCAT3(MAP_NAME,_,n)
1180 :
1181 : #if MAP_IMPL_STYLE!=2 /* need header */
1182 :
1183 : #include "../bits/fd_bits.h"
1184 :
1185 : /* Note: we don't overalign chain metadata to reduce on map metadata
1186 : footprint requirements. Though this can cause cache false sharing
1187 : for concurrent operations on different keys that are managed
1188 : different chains that share a cache line, this risk can be controlled
1189 : by overprovisioning chain_cnt. That is, for a fixed map metadata
1190 : footprint, this false sharing seems preferable to using fewer chains
1191 : as that would lead to an equivalent increase in the amount of locking
1192 : necessary to avoid potential conflicts for keys managed by the same
1193 : chain (i.e. the former makes good use of the padding that would be
1194 : otherwise wasted if overaligning this). */
1195 :
1196 : struct MAP_(shmem_private_chain) {
1197 : ulong ver_cnt; /* versioned count, cnt is in [0,ele_max] in lsb, ver in msb, odd: chain locked, even: chain unlocked */
1198 : MAP_IDX_T head_cidx; /* compressed index of the first element on the chain */
1199 : };
1200 :
1201 : typedef struct MAP_(shmem_private_chain) MAP_(shmem_private_chain_t);
1202 :
1203 : struct __attribute__((aligned(MAP_ALIGN))) MAP_(shmem_private) {
1204 :
1205 : /* FIXME: consider having a memo of the chain in which an element is
1206 : stored and/or using doubly linked list chains (maybe with the xor
1207 : trick)? We could do faster variants of remove and maybe amortize
1208 : some hash calcs. */
1209 :
1210 : ulong magic; /* == MAP_MAGIC */
1211 : ulong seed; /* Hash seed, arbitrary */
1212 : ulong chain_cnt; /* Number of chains, positive integer power-of-two */
1213 :
1214 : /* Padding to MAP_ALIGN alignment here */
1215 :
1216 : /* MAP_(shmem_private_chain_t) chain[ chain_cnt ] here */
1217 : };
1218 :
1219 : typedef struct MAP_(shmem_private) MAP_(shmem_t);
1220 :
1221 : struct MAP_(private) {
1222 : MAP_(shmem_t) * map; /* Location of the map in the local address space */
1223 : MAP_ELE_T * ele; /* Location of the element store in the local address space */
1224 : ulong ele_max; /* Capacity of the element store, in [0,ele_max_max] */
1225 : };
1226 :
1227 : typedef struct MAP_(private) MAP_(t);
1228 :
1229 : struct MAP_(query_private) {
1230 : MAP_ELE_T * ele; /* Points to the operation element in the local address space (or a sentinel) */
1231 : MAP_(shmem_private_chain_t) * chain; /* Points to the chain that manages element in the local address space */
1232 : ulong ver_cnt; /* Versioned count of the chain at operation try */
1233 : };
1234 :
1235 : typedef struct MAP_(query_private) MAP_(query_t);
1236 :
1237 : struct MAP_(txn_private_info) {
1238 : MAP_(shmem_private_chain_t) * chain; /* Points to the chain that manages one or more txn keys (set by txn_add) */
1239 : ulong ver_cnt; /* Versioned count of the chain at the transaction start (set by txn_try) */
1240 : };
1241 :
1242 : typedef struct MAP_(txn_private_info) MAP_(txn_private_info_t);
1243 :
1244 : struct MAP_(txn_private) {
1245 : MAP_(shmem_t) * map; /* Map used by this transaction */
1246 : ulong info_max; /* Number of chains possible for this transaction */
1247 : ulong lock_cnt; /* Number of chains in the locked set, in [0,info_max] */
1248 : ulong spec_cnt; /* Number of chains in the speculative set, in [0,info_max], lock_cnt + spec_cnt <= info_max */
1249 :
1250 : /* MAP_(txn_private_info_t) info[ info_max ] here (obligatory sigh
1251 : about lagging C++ support for 0 sized structure array footers).
1252 :
1253 : The locked set is at indices [0,lock_cnt), lock_cnt infos.
1254 : The free set is at indices [lock_cnt,info_max-spec_cnt), free_cnt = info_max-spec_cnt-lock_cnt infos.
1255 : The speculative set is at indices [info_max-spec_cnt,info_max), spec_cnt infos.
1256 :
1257 : A chain will appear at most once in a set. A chain will not appear
1258 : in both sets.
1259 :
1260 : Note that it would be trivial to make this shared memory persistent
1261 : though not obvious if that would be useful. (A precomputed
1262 : template for a common transaction done by multiple threads is a
1263 : possibility but the versions would still need to be local.) */
1264 :
1265 : };
1266 :
1267 : typedef struct MAP_(txn_private) MAP_(txn_t);
1268 :
1269 : struct MAP_(iter_private) {
1270 : MAP_ELE_T const * ele; /* Pointer to the element store in the caller's address space */
1271 : ulong ele_idx; /* Current iteration element store index (or the null index) */
1272 : };
1273 :
1274 : typedef struct MAP_(iter_private) MAP_(iter_t);
1275 :
1276 : FD_PROTOTYPES_BEGIN
1277 :
1278 : /* map_private_vcnt pack ver and cnt into a versioned cnt. ver is
1279 : masked to fit into MAP_VER_WIDTH bits. cnt is assumed in
1280 : [0,ele_max_max].
1281 :
1282 : map_private_vcnt_{ver,cnt} extract the {version,index} from a
1283 : versioned index. Return will fit into {MAP_VER_WIDTH,MAP_CNT_WIDTH}
1284 : bits. */
1285 :
1286 7024260 : FD_FN_CONST static inline ulong MAP_(private_vcnt)( ulong ver, ulong cnt ) { return (ver<<MAP_CNT_WIDTH) | cnt; }
1287 :
1288 8901657 : FD_FN_CONST static inline ulong MAP_(private_vcnt_ver)( ulong ver_cnt ) { return ver_cnt >> MAP_CNT_WIDTH; }
1289 19664106 : FD_FN_CONST static inline ulong MAP_(private_vcnt_cnt)( ulong ver_cnt ) { return (ver_cnt << MAP_VER_WIDTH) >> MAP_VER_WIDTH; }
1290 :
1291 : /* map_shmem_private_chain returns the location in the caller's address
1292 : space of the map chain metadata associated with hash. The chain
1293 : associated with hash 0 is the first chain. Assumes map is valid.
1294 : map_shmem_private_chain_const is a const correct version. */
1295 :
1296 : FD_FN_PURE static inline MAP_(shmem_private_chain_t) *
1297 : MAP_(shmem_private_chain)( MAP_(shmem_t) * map,
1298 29974383 : ulong hash ) {
1299 29974383 : return (MAP_(shmem_private_chain_t) *)(map+1) + (hash & (map->chain_cnt-1UL));
1300 29974383 : }
1301 :
1302 : FD_FN_PURE static inline MAP_(shmem_private_chain_t) const *
1303 : MAP_(shmem_private_chain_const)( MAP_(shmem_t) const * map,
1304 7495170 : ulong hash ) {
1305 7495170 : return (MAP_(shmem_private_chain_t) const *)(map+1) + (hash & (map->chain_cnt-1UL));
1306 7495170 : }
1307 :
1308 : /* map_txn_private_info returns the location in the caller's address
1309 : space of the txn info. Assumes txn is valid. */
1310 :
1311 : FD_FN_CONST static inline MAP_(txn_private_info_t) *
1312 12166254 : MAP_(txn_private_info)( MAP_(txn_t) * txn ) {
1313 12166254 : return (MAP_(txn_private_info_t) *)(txn+1);
1314 12166254 : }
1315 :
1316 : /* map_private_{cidx,idx} compress / decompress 64-bit in-register
1317 : indices to/from their in-memory representations. */
1318 :
1319 5265582 : FD_FN_CONST static inline MAP_IDX_T MAP_(private_cidx)( ulong idx ) { return (MAP_IDX_T)idx; }
1320 35081052 : FD_FN_CONST static inline ulong MAP_(private_idx) ( MAP_IDX_T cidx ) { return (ulong) cidx; }
1321 :
1322 : /* map_private_idx_null returns the element storage index that
1323 : represents NULL. */
1324 :
1325 0 : FD_FN_CONST static inline ulong MAP_(private_idx_null)( void ) { return (ulong)(MAP_IDX_T)(~0UL); }
1326 :
1327 : /* map_private_idx_is_null returns 1 if idx is the NULL map index and 0
1328 : otherwise. */
1329 :
1330 13277472 : FD_FN_CONST static inline int MAP_(private_idx_is_null)( ulong idx ) { return idx==(ulong)(MAP_IDX_T)(~0UL); }
1331 :
1332 : /* map_private_fetch_and_or does a ulong FD_ATOMIC_FETCH_AND_OR when the
1333 : target has FD_HAS_ATOMIC and emulates it when not. When emulated,
1334 : the map will not be safe to use concurrently but will still work. */
1335 :
1336 : static inline ulong
1337 : MAP_(private_fetch_and_or)( ulong volatile * p,
1338 17998194 : ulong b ) {
1339 17998194 : ulong x;
1340 17998194 : FD_COMPILER_MFENCE();
1341 17998194 : # if FD_HAS_ATOMIC
1342 17998194 : x = FD_ATOMIC_FETCH_AND_OR( p, b );
1343 : # else
1344 : x = *p;
1345 : *p = x | b;
1346 : # endif
1347 17998194 : FD_COMPILER_MFENCE();
1348 17998194 : return x;
1349 17998194 : }
1350 :
1351 0 : FD_FN_CONST static inline ulong MAP_(ele_max_max)( void ) { return (ulong)(MAP_IDX_T)(ULONG_MAX >> MAP_VER_WIDTH); }
1352 :
1353 : FD_FN_CONST static inline ulong
1354 0 : MAP_(chain_max)( void ) {
1355 0 : return fd_ulong_pow2_dn( (ULONG_MAX - sizeof(MAP_(shmem_t)) - alignof(MAP_(shmem_t)) + 1UL) /
1356 0 : sizeof(MAP_(shmem_private_chain_t)) );
1357 0 : }
1358 :
1359 : FD_FN_CONST static inline ulong
1360 3000015 : MAP_(chain_cnt_est)( ulong ele_max_est ) {
1361 :
1362 : /* Clamp to be in [1,ele_max_max] (as ele_max_est 0 is degenerate and
1363 : as the map is guaranteed to hold at most ele_max_max keys). */
1364 :
1365 3000015 : ele_max_est = fd_ulong_min( fd_ulong_max( ele_max_est, 1UL ), MAP_(ele_max_max)() );
1366 :
1367 : /* Compute the number of chains as the power of 2 that makes the
1368 : average chain length between ~1 and ~2 when ele_max_est are stored
1369 : in the map and then clamp to the chain max. */
1370 :
1371 3000015 : ulong chain_min = (ele_max_est>>1) + (ele_max_est&1UL); /* chain_min = ceil(ele_max_est/2), in [1,2^63], computed w/o overflow */
1372 3000015 : ulong chain_cnt = fd_ulong_pow2_up( chain_min ); /* Power of 2 in [1,2^63] */
1373 :
1374 3000015 : return fd_ulong_min( chain_cnt, MAP_(chain_max)() );
1375 3000015 : }
1376 :
1377 0 : FD_FN_CONST static inline ulong MAP_(align)( void ) { return alignof(MAP_(shmem_t)); }
1378 :
1379 : FD_FN_CONST static inline ulong
1380 18 : MAP_(footprint)( ulong chain_cnt ) {
1381 18 : if( !(fd_ulong_is_pow2( chain_cnt ) & (chain_cnt<=MAP_(chain_max)())) ) return 0UL;
1382 : /* Note: assumes shmem_t and shmem_private_chain_t have compatible alignments */
1383 6 : return fd_ulong_align_up( sizeof(MAP_(shmem_t)) + chain_cnt*sizeof(MAP_(shmem_private_chain_t)),
1384 6 : alignof(MAP_(shmem_t)) ); /* no overflow */
1385 18 : }
1386 :
1387 3 : FD_FN_PURE static inline ulong MAP_(seed) ( MAP_(t) const * join ) { return join->map->seed; }
1388 6 : FD_FN_PURE static inline ulong MAP_(chain_cnt)( MAP_(t) const * join ) { return join->map->chain_cnt; }
1389 :
1390 3 : FD_FN_PURE static inline void const * MAP_(shmap_const)( MAP_(t) const * join ) { return join->map; }
1391 3 : FD_FN_PURE static inline void const * MAP_(shele_const)( MAP_(t) const * join ) { return join->ele; }
1392 3 : FD_FN_PURE static inline ulong MAP_(ele_max) ( MAP_(t) const * join ) { return join->ele_max; }
1393 :
1394 3 : FD_FN_PURE static inline void * MAP_(shmap)( MAP_(t) * join ) { return join->map; }
1395 3 : FD_FN_PURE static inline void * MAP_(shele)( MAP_(t) * join ) { return join->ele; }
1396 :
1397 : FD_FN_PURE static inline int
1398 : MAP_(key_eq)( MAP_KEY_T const * k0,
1399 22516869 : MAP_KEY_T const * k1 ) {
1400 22516869 : return !!(MAP_KEY_EQ( (k0), (k1) ));
1401 22516869 : }
1402 :
1403 : FD_FN_PURE static inline ulong
1404 : MAP_(key_hash)( MAP_KEY_T const * key,
1405 31086894 : ulong seed ) {
1406 31086894 : return (MAP_KEY_HASH( (key), (seed) ));
1407 31086894 : }
1408 :
1409 : static inline void
1410 : MAP_(backoff)( ulong scale,
1411 0 : ulong seed ) {
1412 0 : ulong r = (ulong)(uint)fd_ulong_hash( seed ^ (((ulong)fd_tickcount())<<32) );
1413 0 : for( ulong rem=(scale*r)>>48; rem; rem-- ) FD_SPIN_PAUSE();
1414 0 : }
1415 :
1416 5382963 : FD_FN_PURE static inline MAP_ELE_T const * MAP_(query_ele_const)( MAP_(query_t) const * query ) { return query->ele; }
1417 10769013 : FD_FN_PURE static inline MAP_ELE_T * MAP_(query_ele )( MAP_(query_t) * query ) { return query->ele; }
1418 :
1419 : static inline int
1420 1871076 : MAP_(modify_test)( MAP_(query_t) * query ) {
1421 1871076 : MAP_(shmem_private_chain_t) * chain = query->chain;
1422 1871076 : ulong ver_cnt = query->ver_cnt;
1423 1871076 : FD_COMPILER_MFENCE();
1424 1871076 : chain->ver_cnt = ver_cnt + (2UL<<MAP_CNT_WIDTH);
1425 1871076 : FD_COMPILER_MFENCE();
1426 1871076 : return FD_MAP_SUCCESS;
1427 1871076 : }
1428 :
1429 : static inline int
1430 1869663 : MAP_(query_test)( MAP_(query_t) const * query ) {
1431 1869663 : MAP_(shmem_private_chain_t) const * chain = query->chain;
1432 1869663 : ulong ver_cnt = query->ver_cnt;
1433 1869663 : FD_COMPILER_MFENCE();
1434 1869663 : ulong _ver_cnt = chain->ver_cnt;
1435 1869663 : FD_COMPILER_MFENCE();
1436 1869663 : return fd_int_if( ver_cnt==_ver_cnt, FD_MAP_SUCCESS, FD_MAP_ERR_AGAIN );
1437 1869663 : }
1438 :
1439 : FD_FN_CONST static inline ulong
1440 0 : MAP_(txn_key_max_max)( void ) {
1441 0 : return (ULONG_MAX - sizeof(MAP_(txn_t)) - alignof(MAP_(txn_t)) + 1UL) / sizeof( MAP_(txn_private_info_t) );
1442 0 : }
1443 :
1444 0 : FD_FN_CONST static inline ulong MAP_(txn_align)( void ) { return alignof(MAP_(txn_t)); }
1445 :
1446 : FD_FN_CONST static inline ulong
1447 6 : MAP_(txn_footprint)( ulong key_max ) {
1448 6 : if( key_max > MAP_(txn_key_max_max)() ) return 0UL;
1449 3 : return sizeof(MAP_(txn_t)) + key_max*sizeof(MAP_(txn_private_info_t)); /* no overflow */
1450 6 : }
1451 :
1452 : static inline MAP_(txn_t) *
1453 : MAP_(txn_init)( void * mem,
1454 : MAP_(t) * join,
1455 3743700 : ulong key_max ) {
1456 3743700 : MAP_(txn_t) * txn = (MAP_(txn_t) *)mem;
1457 3743700 : if( FD_UNLIKELY( (!mem ) |
1458 3743700 : (!fd_ulong_is_aligned( (ulong)mem, MAP_(txn_align)() )) |
1459 3743700 : (!join ) |
1460 3743700 : (key_max > MAP_(txn_key_max_max)() ) ) ) return NULL;
1461 3743688 : txn->map = join->map;
1462 3743688 : txn->info_max = key_max; /* Worst case number of chains impacted by this transaction */
1463 3743688 : txn->lock_cnt = 0UL;
1464 3743688 : txn->spec_cnt = 0UL;
1465 3743688 : return txn;
1466 3743700 : }
1467 :
1468 3743691 : FD_FN_CONST static inline void * MAP_(txn_fini)( MAP_(txn_t) * txn ) { return (void *)txn; }
1469 :
1470 : FD_FN_PURE static inline ulong
1471 : MAP_(iter_chain_idx)( MAP_(t) const * join,
1472 0 : MAP_KEY_T const * key ) {
1473 0 : MAP_(shmem_t) const * map = join->map;
1474 0 : return MAP_(key_hash)( key, map->seed ) & (map->chain_cnt-1UL);
1475 0 : }
1476 :
1477 : FD_FN_PURE static inline MAP_(iter_t)
1478 : MAP_(iter)( MAP_(t) const * join,
1479 3752100 : ulong chain_idx ) {
1480 : /* FIXME: consider iter = {NULL,NULL} if chain_idx >= join->map->chain_cnt? */
1481 3752100 : MAP_(shmem_private_chain_t) const * chain = MAP_(shmem_private_chain_const)( join->map, 0UL ) + chain_idx;
1482 3752100 : MAP_(iter_t) iter;
1483 3752100 : iter.ele = join->ele;
1484 3752100 : iter.ele_idx = MAP_(private_idx)( chain->head_cidx );
1485 3752100 : return iter;
1486 3752100 : }
1487 :
1488 7640829 : FD_FN_CONST static inline int MAP_(iter_done)( MAP_(iter_t) iter ) { return MAP_(private_idx_is_null)( iter.ele_idx ); }
1489 :
1490 : FD_FN_PURE static inline MAP_(iter_t)
1491 3888729 : MAP_(iter_next)( MAP_(iter_t) iter ) {
1492 3888729 : MAP_ELE_T const * ele = iter.ele + iter.ele_idx;
1493 3888729 : iter.ele_idx = MAP_(private_idx)( ele->MAP_NEXT );
1494 3888729 : return iter;
1495 3888729 : }
1496 :
1497 : FD_FN_CONST static inline MAP_ELE_T *
1498 0 : MAP_(iter_ele)( MAP_(iter_t) iter ) {
1499 0 : return (MAP_ELE_T *)(iter.ele + iter.ele_idx);
1500 0 : }
1501 :
1502 : FD_FN_CONST static inline MAP_ELE_T const *
1503 3888729 : MAP_(iter_ele_const)( MAP_(iter_t) iter ) {
1504 3888729 : return iter.ele + iter.ele_idx;
1505 3888729 : }
1506 :
1507 : MAP_STATIC void * MAP_(new) ( void * shmem, ulong chain_cnt, ulong seed );
1508 : MAP_STATIC MAP_(t) * MAP_(join) ( void * ljoin, void * shmap, void * shele, ulong ele_max );
1509 : MAP_STATIC void * MAP_(leave) ( MAP_(t) * join );
1510 : MAP_STATIC void * MAP_(delete)( void * shmap );
1511 :
1512 : MAP_STATIC int MAP_(insert)( MAP_(t) * join, MAP_ELE_T * ele, int flags );
1513 :
1514 : MAP_STATIC int
1515 : MAP_(remove)( MAP_(t) * join,
1516 : MAP_KEY_T const * key,
1517 : MAP_ELE_T const * sentinel,
1518 : MAP_(query_t) * query,
1519 : int flags );
1520 :
1521 : MAP_STATIC int
1522 : MAP_(modify_try)( MAP_(t) * join,
1523 : MAP_KEY_T const * key,
1524 : MAP_ELE_T * sentinel,
1525 : MAP_(query_t) * query,
1526 : int flags );
1527 :
1528 : MAP_STATIC int
1529 : MAP_(query_try)( MAP_(t) const * join,
1530 : MAP_KEY_T const * key,
1531 : MAP_ELE_T const * sentinel,
1532 : MAP_(query_t) * query );
1533 :
1534 : MAP_STATIC int MAP_(txn_add)( MAP_(txn_t) * txn, MAP_KEY_T const * key, int lock );
1535 :
1536 : MAP_STATIC int MAP_(txn_try)( MAP_(txn_t) * txn, int flags );
1537 :
1538 : MAP_STATIC int
1539 : MAP_(txn_modify)( MAP_(t) * join,
1540 : MAP_KEY_T const * key,
1541 : MAP_ELE_T * sentinel,
1542 : MAP_(query_t) * query,
1543 : int flags );
1544 :
1545 : static inline int
1546 : MAP_(txn_query)( MAP_(t) const * join,
1547 : MAP_KEY_T const * key,
1548 : MAP_ELE_T const * sentinel,
1549 1639896 : MAP_(query_t) * query ) {
1550 1639896 : return MAP_(txn_modify)( (MAP_(t) *)join, key, (MAP_ELE_T *)sentinel, query, 0 );
1551 1639896 : }
1552 :
1553 : MAP_STATIC int MAP_(txn_test)( MAP_(txn_t) * txn );
1554 :
1555 : MAP_STATIC int
1556 : MAP_(iter_lock)( MAP_(t) * join,
1557 : ulong * lock_seq,
1558 : ulong lock_cnt,
1559 : int flags );
1560 :
1561 : MAP_STATIC void
1562 : MAP_(iter_unlock)( MAP_(t) * join,
1563 : ulong const * lock_seq,
1564 : ulong lock_cnt );
1565 :
1566 : MAP_STATIC void MAP_(reset)( MAP_(t) * join );
1567 :
1568 : MAP_STATIC int MAP_(verify)( MAP_(t) const * join );
1569 :
1570 : MAP_STATIC FD_FN_CONST char const * MAP_(strerror)( int err );
1571 :
1572 : FD_PROTOTYPES_END
1573 :
1574 : #endif
1575 :
1576 : #if MAP_IMPL_STYLE!=1 /* need implementations (assumes header already included) */
1577 :
1578 : #include "../log/fd_log.h" /* Used by constructors and verify (FIXME: Consider making a compile time option) */
1579 :
1580 : /* MAP_CRIT_{BEGIN,BLOCKED,END} handle virtually all atomic boilerplate
1581 : for operations that require modifying a map chain's structure or
1582 : elements managed by that chain. Usage:
1583 :
1584 : MAP_CRIT( chain, blocking ) {
1585 :
1586 : ... At this point, we have a lock on the chain and the "ulong"
1587 : ... ver_cnt contains the chain's versioned count just before we
1588 : ... took the lock. The "int" retain_lock is zero.
1589 : ...
1590 : ... Do locked operations on the map chain here
1591 : ...
1592 : ... On exiting this block, if retain_lock is non-zero, we resume
1593 : ... execution immediately after MAP_CRIT_END. This is used for
1594 : ... "try" style operations where a "test" operation is done to
1595 : ... unlock the chain after the caller does their try/test work.
1596 : ... Otherwise, we will update the version number, unlock the
1597 : ... chain and then resume execution after MAP_CRIT_END.
1598 : ...
1599 : ... Because compiler memory fences are done just before entering
1600 : ... and after exiting this block, there is typically no need to
1601 : ... use any atomics / volatile / fencing here. That is, we can
1602 : ... just write "normal" code on platforms where writes to memory
1603 : ... become visible to other threads in the order in which they
1604 : ... were issued in the machine code (e.g. x86) as the version
1605 : ... update and unlock writes are after the changes done here
1606 : ... and others will not proceed until they see the new version
1607 : ... and unlock. YMMV for non-x86 platforms (probably need
1608 : ... additional hardware store fences in these macros).
1609 : ...
1610 : ... It is safe to use "break" and/or "continue" within this
1611 : ... block. The overall MAP_CRIT will exit with the appropriate
1612 : ... compiler fencing, version update and unlocking and then
1613 : ... execution will resume immediately after MAP_CRIT_END.
1614 : ...
1615 : ... IMPORTANT SAFETY TIP! DO NOT RETURN FROM THIS BLOCK.
1616 : ...
1617 : ... IMPORTANT SAFETY TIP! OPERATIONS THAT CHANGE THE CHAIN
1618 : ... ELEMENT COUNT SHOULD UPDATE VER_CNT's COUNT WHILE HOLDING
1619 : ... THE VERSION CONSTANT.
1620 :
1621 : } MAP_CRIT_BLOCKED {
1622 :
1623 : ... At this point, somebody else had a lock on the chain when we
1624 : ... tried to take the lock.
1625 : ...
1626 : ... Handle blocked here.
1627 : ...
1628 : ... On exiting this block, if blocking was zero in MAP_CRIT, we
1629 : ... will resume execution immediately after MAP_CRIT_END. If
1630 : ... blocking was non-zero, we will resume execution immediately
1631 : ... before MAP_CRIT (e.g. we will retry again after a short spin
1632 : ... pause). Similar considerations to the above for compiler
1633 : ... memory fences, "break" and "continue". As we do not have the
1634 : ... lock here, retain_lock is neither relevant nor available.
1635 : ...
1636 : ... IMPORTANT SAFETY TIP! DO NOT RETURN FROM THIS BLOCK.
1637 :
1638 : } MAP_CRIT_END; */
1639 :
1640 17998194 : #define MAP_CRIT(c,b) do { \
1641 17998194 : ulong volatile * _vc = (ulong volatile *)&(c)->ver_cnt; \
1642 17998194 : int _b = (b); \
1643 17998194 : int retain_lock = 0; \
1644 17998194 : for(;;) { \
1645 17998194 : ulong ver_cnt = *_vc; \
1646 17998194 : /* use a test-and-test-and-set style to reduce atomic contention */ \
1647 17998194 : if( FD_LIKELY( !(ver_cnt & (1UL<<MAP_CNT_WIDTH)) ) ) { /* opt for low contention */ \
1648 17998194 : ver_cnt = MAP_(private_fetch_and_or)( _vc, 1UL<<MAP_CNT_WIDTH ); \
1649 17998194 : if( FD_LIKELY( !(ver_cnt & (1UL<<MAP_CNT_WIDTH)) ) ) { /* opt for low contention */ \
1650 17998194 : FD_COMPILER_MFENCE(); \
1651 17998194 : do
1652 :
1653 : #define MAP_CRIT_BLOCKED \
1654 17998194 : while(0); \
1655 17998194 : FD_COMPILER_MFENCE(); \
1656 17998194 : if( !retain_lock ) *_vc = ver_cnt+(2UL<<MAP_CNT_WIDTH); /* likely compile time */ \
1657 17998194 : FD_COMPILER_MFENCE(); \
1658 7495800 : break; \
1659 17998194 : } \
1660 17998194 : } \
1661 17998194 : FD_COMPILER_MFENCE(); \
1662 0 : do
1663 :
1664 : #define MAP_CRIT_END \
1665 0 : while(0); \
1666 0 : FD_COMPILER_MFENCE(); \
1667 0 : if( !_b ) break; /* likely compile time */ \
1668 0 : FD_SPIN_PAUSE(); \
1669 0 : } \
1670 17998194 : } while(0)
1671 :
1672 : MAP_STATIC void *
1673 : MAP_(new)( void * shmem,
1674 : ulong chain_cnt,
1675 15 : ulong seed ) {
1676 :
1677 15 : if( FD_UNLIKELY( !shmem ) ) {
1678 3 : FD_LOG_WARNING(( "NULL shmem" ));
1679 3 : return NULL;
1680 3 : }
1681 :
1682 12 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, MAP_(align)() ) ) ) {
1683 3 : FD_LOG_WARNING(( "misaligned shmem" ));
1684 3 : return NULL;
1685 3 : }
1686 :
1687 9 : ulong footprint = MAP_(footprint)( chain_cnt );
1688 9 : if( FD_UNLIKELY( !footprint ) ) {
1689 6 : FD_LOG_WARNING(( "bad footprint" ));
1690 6 : return NULL;
1691 6 : }
1692 :
1693 : /* seed is arbitrary */
1694 :
1695 : /* Init the metadata */
1696 :
1697 3 : MAP_(shmem_t) * map = (MAP_(shmem_t) *)shmem;
1698 :
1699 3 : map->seed = seed;
1700 3 : map->chain_cnt = chain_cnt;
1701 :
1702 : /* Set all the chains to version 0 and empty */
1703 :
1704 3 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, 0UL );
1705 1539 : for( ulong chain_idx=0UL; chain_idx<chain_cnt; chain_idx++ ) {
1706 1536 : chain[ chain_idx ].ver_cnt = MAP_(private_vcnt)( 0UL, 0UL );
1707 1536 : chain[ chain_idx ].head_cidx = MAP_(private_cidx)( MAP_(private_idx_null)() );
1708 1536 : }
1709 :
1710 3 : FD_COMPILER_MFENCE();
1711 3 : map->magic = MAP_MAGIC;
1712 3 : FD_COMPILER_MFENCE();
1713 :
1714 3 : return shmem;
1715 9 : }
1716 :
1717 : MAP_STATIC MAP_(t) *
1718 : MAP_(join)( void * ljoin,
1719 : void * shmap,
1720 : void * shele,
1721 24 : ulong ele_max ) {
1722 24 : MAP_(t) * join = (MAP_(t) *)ljoin;
1723 24 : MAP_(shmem_t) * map = (MAP_(shmem_t) *)shmap;
1724 24 : MAP_ELE_T * ele = (MAP_ELE_T *)shele;
1725 :
1726 24 : if( FD_UNLIKELY( !join ) ) {
1727 3 : FD_LOG_WARNING(( "NULL ljoin" ));
1728 3 : return NULL;
1729 3 : }
1730 :
1731 21 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)join, alignof(MAP_(t)) ) ) ) {
1732 3 : FD_LOG_WARNING(( "misaligned ljoin" ));
1733 3 : return NULL;
1734 3 : }
1735 :
1736 18 : if( FD_UNLIKELY( !map ) ) {
1737 3 : FD_LOG_WARNING(( "NULL shmap" ));
1738 3 : return NULL;
1739 3 : }
1740 :
1741 15 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)map, MAP_(align)() ) ) ) {
1742 3 : FD_LOG_WARNING(( "misaligned shmap" ));
1743 3 : return NULL;
1744 3 : }
1745 :
1746 12 : if( FD_UNLIKELY( map->magic!=MAP_MAGIC ) ) {
1747 3 : FD_LOG_WARNING(( "bad magic" ));
1748 3 : return NULL;
1749 3 : }
1750 :
1751 9 : if( FD_UNLIKELY( (!ele) & (!!ele_max) ) ) {
1752 3 : FD_LOG_WARNING(( "NULL shele" ));
1753 3 : return NULL;
1754 3 : }
1755 :
1756 6 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)ele, alignof(MAP_ELE_T) ) ) ) {
1757 3 : FD_LOG_WARNING(( "misaligned shele" ));
1758 3 : return NULL;
1759 3 : }
1760 :
1761 3 : join->map = map;
1762 3 : join->ele = ele;
1763 3 : join->ele_max = ele_max;
1764 :
1765 3 : return join;
1766 6 : }
1767 :
1768 : MAP_STATIC void *
1769 6 : MAP_(leave)( MAP_(t) * join ) {
1770 :
1771 6 : if( FD_UNLIKELY( !join ) ) {
1772 3 : FD_LOG_WARNING(( "NULL join" ));
1773 3 : return NULL;
1774 3 : }
1775 :
1776 3 : return (void *)join;
1777 6 : }
1778 :
1779 : MAP_STATIC void *
1780 12 : MAP_(delete)( void * shmap ) {
1781 12 : MAP_(shmem_t) * map = (MAP_(shmem_t) *)shmap;
1782 :
1783 12 : if( FD_UNLIKELY( !map ) ) {
1784 3 : FD_LOG_WARNING(( "NULL shmap" ));
1785 3 : return NULL;
1786 3 : }
1787 :
1788 9 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)map, MAP_(align)() ) ) ) {
1789 3 : FD_LOG_WARNING(( "misaligned shmap" ));
1790 3 : return NULL;
1791 3 : }
1792 :
1793 6 : if( FD_UNLIKELY( map->magic!=MAP_MAGIC ) ) {
1794 3 : FD_LOG_WARNING(( "bad magic" ));
1795 3 : return NULL;
1796 3 : }
1797 :
1798 3 : FD_COMPILER_MFENCE();
1799 3 : map->magic = 0UL;
1800 3 : FD_COMPILER_MFENCE();
1801 :
1802 3 : return (void *)map;
1803 6 : }
1804 :
1805 : MAP_STATIC int
1806 : MAP_(insert)( MAP_(t) * join,
1807 : MAP_ELE_T * ele,
1808 7508535 : int flags ) {
1809 :
1810 : /* Determine the element index (fastest if ele are power-of-two) and
1811 : the chain that should hold ele */
1812 :
1813 7508535 : ulong ele_idx = (ulong)(ele - join->ele);
1814 7508535 : if( FD_UNLIKELY( ele_idx>=join->ele_max ) ) return FD_MAP_ERR_INVAL;
1815 :
1816 1873887 : MAP_(shmem_t) * map = join->map;
1817 :
1818 1873887 : ulong hash = MAP_(key_hash)( &ele->MAP_KEY, map->seed );
1819 1873887 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, hash );
1820 :
1821 : /* Insert element at the head of chain. If chain is already locked,
1822 : signal to try again later. */
1823 :
1824 1873887 : int err;
1825 :
1826 3747774 : MAP_CRIT( chain, flags & FD_MAP_FLAG_BLOCKING ) {
1827 1873887 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
1828 1873887 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
1829 :
1830 1873887 : ele->MAP_NEXT = chain->head_cidx;
1831 : # if MAP_MEMOIZE
1832 1873887 : ele->MAP_MEMO = hash;
1833 : # endif
1834 1873887 : chain->head_cidx = MAP_(private_cidx)( ele_idx );
1835 1873887 : ver_cnt = MAP_(private_vcnt)( version, ele_cnt+1UL ); /* version updated on exit */
1836 1873887 : err = FD_MAP_SUCCESS;
1837 :
1838 1873887 : } MAP_CRIT_BLOCKED {
1839 :
1840 0 : err = FD_MAP_ERR_AGAIN;
1841 :
1842 0 : } MAP_CRIT_END;
1843 :
1844 1873887 : return err;
1845 7508535 : }
1846 :
1847 : MAP_STATIC int
1848 : MAP_(remove)( MAP_(t) * join,
1849 : MAP_KEY_T const * key,
1850 : MAP_ELE_T const * sentinel,
1851 : MAP_(query_t) * query,
1852 3752916 : int flags ) {
1853 :
1854 : /* Determine the chain that should hold key */
1855 :
1856 3752916 : MAP_(shmem_t) * map = join->map;
1857 3752916 : MAP_ELE_T * ele = join->ele;
1858 3752916 : ulong ele_max = join->ele_max;
1859 :
1860 3752916 : ulong hash = MAP_(key_hash)( key, map->seed );
1861 3752916 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, hash );
1862 :
1863 : /* Find the key on the chain. If found, remove it. If not found,
1864 : corrupt or blocked, fail the operation. */
1865 :
1866 3752916 : query->ele = (MAP_ELE_T *)sentinel;
1867 3752916 : query->chain = chain;
1868 :
1869 3752916 : int err;
1870 :
1871 7505832 : MAP_CRIT( chain, flags & FD_MAP_FLAG_BLOCKING ) {
1872 3752916 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
1873 3752916 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
1874 :
1875 3752916 : query->ver_cnt = ver_cnt;
1876 :
1877 3752916 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) { /* optimize for not corrupt */
1878 0 : err = FD_MAP_ERR_CORRUPT;
1879 0 : goto done;
1880 0 : }
1881 :
1882 3752916 : MAP_IDX_T * cur = &chain->head_cidx;
1883 6673335 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) { /* guarantee bounded exec under corruption */
1884 4800003 : ulong ele_idx = MAP_(private_idx)( *cur );
1885 4800003 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) { /* optimize for not corrupt */
1886 0 : err = FD_MAP_ERR_CORRUPT;
1887 0 : goto done;
1888 0 : }
1889 :
1890 4800003 : if(
1891 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
1892 4800003 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==hash ) &&
1893 4800003 : # endif
1894 4800003 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) { /* optimize for found */
1895 :
1896 1879584 : *cur = ele[ ele_idx ].MAP_NEXT;
1897 1879584 : ver_cnt = MAP_(private_vcnt)( version, ele_cnt-1UL ); /* version updated on exit */
1898 1879584 : query->ele = &ele[ ele_idx ];
1899 1879584 : err = FD_MAP_SUCCESS;
1900 1879584 : goto done;
1901 1879584 : }
1902 :
1903 2920419 : cur = &ele[ ele_idx ].MAP_NEXT; /* Retain the pointer to next so we can rewrite it on found */
1904 2920419 : }
1905 :
1906 : /* Key was not found */
1907 :
1908 1873332 : ulong ele_idx = MAP_(private_idx)( *cur );
1909 1873332 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) { /* optimize for not corrupt */
1910 0 : err = FD_MAP_ERR_CORRUPT;
1911 0 : goto done;
1912 0 : }
1913 :
1914 1873332 : err = FD_MAP_ERR_KEY;
1915 :
1916 3752916 : done: /* silly language restriction */;
1917 :
1918 3752916 : } MAP_CRIT_BLOCKED {
1919 :
1920 0 : query->ver_cnt = ver_cnt;
1921 0 : err = FD_MAP_ERR_AGAIN;
1922 :
1923 0 : } MAP_CRIT_END;
1924 :
1925 3752916 : return err;
1926 3752916 : }
1927 :
1928 : MAP_STATIC int
1929 : MAP_(modify_try)( MAP_(t) * join,
1930 : MAP_KEY_T const * key,
1931 : MAP_ELE_T * sentinel,
1932 : MAP_(query_t) * query,
1933 3742884 : int flags ) {
1934 :
1935 : /* Determine which chain might hold key */
1936 :
1937 3742884 : MAP_(shmem_t) * map = join->map;
1938 3742884 : MAP_ELE_T * ele = join->ele;
1939 3742884 : ulong ele_max = join->ele_max;
1940 :
1941 3742884 : ulong hash = MAP_(key_hash)( key, map->seed );
1942 3742884 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, hash );
1943 :
1944 : /* Search for the key on chain. If found, retain the chain lock
1945 : and return the found element. If not found, corrupt or blocked,
1946 : fail. */
1947 :
1948 3742884 : query->ele = (MAP_ELE_T *)sentinel;
1949 3742884 : query->chain = chain;
1950 :
1951 3742884 : int err;
1952 :
1953 7485768 : MAP_CRIT( chain, flags & FD_MAP_FLAG_BLOCKING ) {
1954 :
1955 3742884 : query->ver_cnt = ver_cnt;
1956 :
1957 3742884 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
1958 3742884 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) { /* optimize for not corrupt */
1959 0 : err = FD_MAP_ERR_CORRUPT;
1960 0 : goto done;
1961 0 : }
1962 :
1963 3742884 : MAP_IDX_T * cur = &chain->head_cidx;
1964 6657618 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) { /* guarantee bounded exec under corruption */
1965 4785810 : ulong ele_idx = MAP_(private_idx)( *cur );
1966 4785810 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) { /* optimize for not corrupt */
1967 0 : err = FD_MAP_ERR_CORRUPT;
1968 0 : goto done;
1969 0 : }
1970 :
1971 4785810 : if(
1972 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
1973 4785810 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==hash ) &&
1974 4785810 : # endif
1975 4785810 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) { /* optimize for found */
1976 1871076 : if( flags & FD_MAP_FLAG_ADAPTIVE ) {
1977 935613 : *cur = ele[ ele_idx ].MAP_NEXT;
1978 935613 : ele[ ele_idx ].MAP_NEXT = chain->head_cidx;
1979 935613 : chain->head_cidx = MAP_(private_cidx)( ele_idx );
1980 935613 : }
1981 1871076 : query->ele = &ele[ ele_idx ];
1982 1871076 : err = FD_MAP_SUCCESS;
1983 1871076 : retain_lock = 1;
1984 1871076 : goto done;
1985 1871076 : }
1986 :
1987 2914734 : cur = &ele[ ele_idx ].MAP_NEXT; /* Retain the pointer to next so we can rewrite it on found */
1988 2914734 : }
1989 :
1990 1871808 : ulong ele_idx = MAP_(private_idx)( *cur );
1991 1871808 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) { /* optimize for not corrupt */
1992 0 : err = FD_MAP_ERR_CORRUPT;
1993 0 : goto done;
1994 0 : }
1995 :
1996 1871808 : err = FD_MAP_ERR_KEY;
1997 :
1998 3742884 : done: /* silly language restriction */;
1999 :
2000 3742884 : } MAP_CRIT_BLOCKED {
2001 :
2002 0 : query->ver_cnt = ver_cnt;
2003 0 : err = FD_MAP_ERR_AGAIN;
2004 :
2005 0 : } MAP_CRIT_END;
2006 :
2007 3742884 : return err;
2008 3742884 : }
2009 :
2010 : MAP_STATIC int
2011 : MAP_(query_try)( MAP_(t) const * join,
2012 : MAP_KEY_T const * key,
2013 : MAP_ELE_T const * sentinel,
2014 3743067 : MAP_(query_t) * query ) {
2015 :
2016 : /* Determine which chain might hold key */
2017 :
2018 3743067 : MAP_(shmem_t) const * map = join->map;
2019 3743067 : MAP_ELE_T const * ele = join->ele;
2020 3743067 : ulong ele_max = join->ele_max;
2021 :
2022 3743067 : ulong hash = MAP_(key_hash)( key, map->seed );
2023 3743067 : MAP_(shmem_private_chain_t) const * chain = MAP_(shmem_private_chain_const)( map, hash );
2024 :
2025 : /* Determine the version of the chain we are querying. Then
2026 : speculatively read and validate the number of elements on the chain
2027 : at that version. If the chain is locked, tell the user to try
2028 : again later. If the number of elements in the chain is invalid,
2029 : tell user the map is corrupt. */
2030 :
2031 3743067 : ulong volatile const * _vc = &chain->ver_cnt;
2032 :
2033 3743067 : FD_COMPILER_MFENCE();
2034 3743067 : ulong then = *_vc;
2035 3743067 : FD_COMPILER_MFENCE();
2036 :
2037 3743067 : ulong ele_cnt = MAP_(private_vcnt_cnt)( then );
2038 :
2039 3743067 : FD_COMPILER_MFENCE();
2040 3743067 : ulong now = *_vc;
2041 3743067 : FD_COMPILER_MFENCE();
2042 :
2043 3743067 : query->ele = (MAP_ELE_T *) sentinel;
2044 3743067 : query->chain = (MAP_(shmem_private_chain_t) *)chain;
2045 3743067 : query->ver_cnt = then;
2046 :
2047 3743067 : if( FD_UNLIKELY( (now!=then) | (!!(then & (1UL<<MAP_CNT_WIDTH))) ) ) return FD_MAP_ERR_AGAIN;
2048 3743067 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) return FD_MAP_ERR_CORRUPT;
2049 :
2050 : /* Search the chain for key. Since we know the numer of elements on
2051 : the chain, we can bound this search to avoid corruption causing out
2052 : of bound reads, infinite loops and such. */
2053 :
2054 3743067 : MAP_IDX_T const * cur = &chain->head_cidx;
2055 6651426 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) {
2056 :
2057 : /* Speculatively read the index of the chain, speculate if a valid
2058 : index and, if so, speculate if the chain element matches the
2059 : query. Note that this assumes element keys have a lifetime of at
2060 : least that of the element. A sufficient (but not a necessary,
2061 : see rant) condition for this is that key is a plain-old-data
2062 : fields in the element. */
2063 :
2064 4778022 : FD_COMPILER_MFENCE();
2065 4778022 : ulong ele_idx = MAP_(private_idx)( *cur );
2066 4778022 : FD_COMPILER_MFENCE();
2067 :
2068 4778022 : int corrupt = (ele_idx>=ele_max);
2069 4778022 : int found = ( FD_LIKELY( !corrupt ) &&
2070 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
2071 4778022 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==hash ) &&
2072 : # endif
2073 4778022 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) ? 1 : 0;
2074 :
2075 : /* Validate the speculation. If validation fails (e.g. the chain
2076 : was modified behind our back), tell the user to try again later.
2077 : If the element index was not valid, tell the user the map has
2078 : been corrupted. If key was found at element, tell the user they
2079 : can speculate element ele_idx contains key. */
2080 :
2081 4778022 : FD_COMPILER_MFENCE();
2082 4778022 : now = *_vc;
2083 4778022 : FD_COMPILER_MFENCE();
2084 :
2085 4778022 : if( FD_UNLIKELY( now!=then ) ) return FD_MAP_ERR_AGAIN;
2086 4778022 : if( FD_UNLIKELY( corrupt ) ) return FD_MAP_ERR_CORRUPT;
2087 :
2088 4778022 : if( FD_LIKELY( found ) ) { /* Optimize for found */
2089 1869663 : query->ele = (MAP_ELE_T *)&ele[ ele_idx ];
2090 1869663 : return FD_MAP_SUCCESS;
2091 1869663 : }
2092 :
2093 : /* The chain element didn't hold the key ... move to next element */
2094 :
2095 2908359 : cur = &ele[ ele_idx ].MAP_NEXT;
2096 2908359 : }
2097 :
2098 : /* At this point, the chain didn't hold the key. We could probably
2099 : return immediately but we speculative read the tail pointer,
2100 : validate it as an additional integrity check. If these checks
2101 : pass, we are confident the whole chain looked valid and did not
2102 : hold key between now and then. */
2103 :
2104 1873404 : ulong ele_idx = MAP_(private_idx)( *cur );
2105 :
2106 1873404 : FD_COMPILER_MFENCE();
2107 1873404 : now = *_vc;
2108 1873404 : FD_COMPILER_MFENCE();
2109 :
2110 1873404 : if( FD_UNLIKELY( now!=then ) ) return FD_MAP_ERR_AGAIN;
2111 1873404 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) return FD_MAP_ERR_CORRUPT;
2112 :
2113 1873404 : return FD_MAP_ERR_KEY;
2114 1873404 : }
2115 :
2116 : /* Note: txn_add is currently optimized for reasonably small number
2117 : of keys per transaction. For a huge number of transaction keys (e.g.
2118 : an iterator over all keys for all keys), probably should use the
2119 : iterator API. For a moderate number of transaction keys, probably
2120 : should consider data structures where set insert/remove/test are
2121 : sublinear time. Similarly, if MAP_KEY_HASH is costly, might be
2122 : useful to stash the key hashes in the transaction, memoize it in the
2123 : elements, etc. */
2124 :
2125 : MAP_STATIC int
2126 : MAP_(txn_add)( MAP_(txn_t) * txn,
2127 : MAP_KEY_T const * key,
2128 8424324 : int lock ) {
2129 :
2130 : /* Unpack txn fields */
2131 :
2132 8424324 : MAP_(shmem_t) * map = txn->map;
2133 8424324 : ulong info_max = txn->info_max;
2134 8424324 : ulong lock_cnt = txn->lock_cnt;
2135 8424324 : ulong spec_cnt = txn->spec_cnt;
2136 :
2137 8424324 : MAP_(txn_private_info_t) * lock_info = MAP_(txn_private_info)( txn );
2138 8424324 : MAP_(txn_private_info_t) * spec_info = lock_info + (info_max - spec_cnt);
2139 :
2140 : /* Determine which chain manages this key */
2141 :
2142 8424324 : ulong hash = MAP_(key_hash)( key, map->seed );
2143 8424324 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, hash );
2144 :
2145 : /* If this chain already needs to be locked for this transaction,
2146 : nothing to do. */
2147 :
2148 18112536 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ )
2149 9739155 : if( FD_UNLIKELY( chain==lock_info[ lock_idx ].chain ) ) return FD_MAP_SUCCESS;
2150 :
2151 8373381 : if( FD_UNLIKELY( !lock ) ) { /* optimize for locked key, possible compile time */
2152 :
2153 : /* At this point, key is used speculatively by the transaction and
2154 : its managing chain isn't in the locked set. If this chain is
2155 : already in the speculative set, nothing to do. */
2156 :
2157 3355446 : for( ulong spec_idx=0UL; spec_idx<spec_cnt; spec_idx++ )
2158 800454 : if( FD_UNLIKELY( chain==spec_info[ spec_idx ].chain ) ) return FD_MAP_SUCCESS;
2159 :
2160 : /* Add the chain to the speculative set. If we don't have any room,
2161 : fail. */
2162 :
2163 2554992 : ulong free_cnt = info_max - lock_cnt - spec_cnt;
2164 2554992 : if( FD_UNLIKELY( !free_cnt ) ) return FD_MAP_ERR_INVAL; /* Impossible if less than key_max keys added */
2165 1617765 : spec_info[-1].chain = chain;
2166 1617765 : txn->spec_cnt = spec_cnt + 1UL;
2167 :
2168 5811903 : } else {
2169 :
2170 : /* At this point, key is used locked by the transaction and its
2171 : managing chain isn't in the locked set. If this chain is
2172 : currently in the speculative set, move it to the locked
2173 : set. */
2174 :
2175 8196489 : for( ulong spec_idx=0UL; spec_idx<spec_cnt; spec_idx++ )
2176 2398788 : if( FD_UNLIKELY( chain==spec_info[ spec_idx ].chain ) ) {
2177 14202 : spec_info[ spec_idx ].chain = spec_info[ 0 ].chain; /* Fill the hole at spec_idx, making a hole at 0 */
2178 14202 : lock_info[ lock_cnt ].chain = chain; /* Either uses unused entry or fills hole at 0 */
2179 14202 : txn->spec_cnt = spec_cnt - 1UL;
2180 14202 : txn->lock_cnt = lock_cnt + 1UL;
2181 14202 : return FD_MAP_SUCCESS;
2182 14202 : }
2183 :
2184 : /* Add the chain to the locked set. If we don't have any room,
2185 : fail. */
2186 :
2187 5797701 : ulong free_cnt = info_max - lock_cnt - spec_cnt;
2188 5797701 : if( FD_UNLIKELY( !free_cnt ) ) return FD_MAP_ERR_INVAL; /* Impossible if less than key_max keys added */
2189 4862205 : lock_info[lock_cnt].chain = chain;
2190 4862205 : txn->lock_cnt = lock_cnt + 1UL;
2191 :
2192 4862205 : }
2193 :
2194 6479970 : return FD_MAP_SUCCESS;
2195 8373381 : }
2196 :
2197 : MAP_STATIC int
2198 : MAP_(txn_try)( MAP_(txn_t) * txn,
2199 1870965 : int flags ) {
2200 1870965 : int non_blocking = !(flags & FD_MAP_FLAG_BLOCKING);
2201 :
2202 : /* Unpack txn fields */
2203 :
2204 1870965 : ulong info_max = txn->info_max;
2205 1870965 : ulong lock_cnt = txn->lock_cnt;
2206 1870965 : ulong spec_cnt = txn->spec_cnt;
2207 :
2208 1870965 : MAP_(txn_private_info_t) * lock_info = MAP_(txn_private_info)( txn );
2209 1870965 : MAP_(txn_private_info_t) * spec_info = lock_info + info_max - spec_cnt;
2210 :
2211 1870965 : ulong backoff_exp = (1UL<<32); /* See iter_lock for details */
2212 1870965 : ulong backoff_seed = ((ulong)(uint)flags)>>2;
2213 :
2214 1870965 : int err;
2215 :
2216 1870965 : for(;; ) {
2217 :
2218 1870965 : err = FD_MAP_SUCCESS;
2219 :
2220 1870965 : FD_COMPILER_MFENCE();
2221 :
2222 : /* Get the chain versions for all keys in the speculative set.
2223 : If any are locked, set AGAIN if any are locked. */
2224 :
2225 3474528 : for( ulong spec_idx=0UL; spec_idx<spec_cnt; spec_idx++ ) {
2226 1603563 : ulong ver_cnt = spec_info[ spec_idx ].chain->ver_cnt;
2227 1603563 : if( FD_UNLIKELY( ver_cnt & (1UL<<MAP_CNT_WIDTH) ) ) { /* Already locked */
2228 0 : err = FD_MAP_ERR_AGAIN;
2229 0 : break;
2230 0 : }
2231 1603563 : spec_info[ spec_idx ].ver_cnt = ver_cnt;
2232 1603563 : }
2233 :
2234 1870965 : if( FD_LIKELY( !err ) ) {
2235 :
2236 : /* At this point, all the chains we are speculating on were
2237 : unlocked and we have have recorded their versions. Try to lock
2238 : all the chains for the locked key. */
2239 : /* FIXME: consider reordering like iter_lock? */
2240 :
2241 6747372 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ ) {
2242 :
2243 9752814 : MAP_CRIT( lock_info[ lock_idx ].chain, 0 ) { /* non-blocking */
2244 :
2245 : /* Got the lock ... save the version and retain the lock for
2246 : test. */
2247 :
2248 4876407 : lock_info[ lock_idx ].ver_cnt = ver_cnt;
2249 4876407 : retain_lock = 1;
2250 :
2251 4876407 : } MAP_CRIT_BLOCKED {
2252 :
2253 : /* We hit contention for this lock. Unlock the any chains
2254 : we already locked to prevent possible deadlock (see
2255 : iter_lock) */
2256 :
2257 0 : for( ulong unlock_idx=0UL; unlock_idx<lock_idx; unlock_idx++ )
2258 0 : lock_info[ unlock_idx ].chain->ver_cnt = lock_info[ unlock_idx ].ver_cnt + (2UL<<MAP_CNT_WIDTH);
2259 :
2260 0 : err = FD_MAP_ERR_AGAIN;
2261 :
2262 0 : } MAP_CRIT_END;
2263 :
2264 4876407 : if( FD_UNLIKELY( err ) ) break;
2265 :
2266 4876407 : }
2267 :
2268 1870965 : }
2269 :
2270 1870965 : FD_COMPILER_MFENCE();
2271 :
2272 1870965 : if( FD_LIKELY( (!err) | non_blocking ) ) break;
2273 :
2274 : /* At this point, we hit contention and are blocking (need to try
2275 : again). Do a random backoff (see iter_lock for details). */
2276 :
2277 0 : ulong scale = fd_ulong_min( (fd_ulong_min( lock_cnt+spec_cnt, (1UL<<16)-1UL )*backoff_exp) >> 16, (1UL<<32)-1UL );
2278 0 : backoff_exp = fd_ulong_min( backoff_exp + (backoff_exp>>2) + (backoff_exp>>4), (1UL<<48)-1UL );
2279 0 : MAP_(backoff)( scale, backoff_seed );
2280 0 : }
2281 :
2282 : /* At this point, if we don't have an error, we have the chain
2283 : versions for txn keys used speculatively and they were unlocked and
2284 : we have locks on the chains for txn keys used locked. Otherwise,
2285 : this is a non-blocking call and we return AGAIN. */
2286 :
2287 1870965 : return err;
2288 1870965 : }
2289 :
2290 : MAP_STATIC int
2291 1870965 : MAP_(txn_test)( MAP_(txn_t) * txn ) {
2292 :
2293 : /* Unpack txn fields */
2294 :
2295 1870965 : ulong info_max = txn->info_max;
2296 1870965 : ulong lock_cnt = txn->lock_cnt;
2297 1870965 : ulong spec_cnt = txn->spec_cnt;
2298 :
2299 1870965 : MAP_(txn_private_info_t) * lock_info = MAP_(txn_private_info)( txn );
2300 1870965 : MAP_(txn_private_info_t) * spec_info = lock_info + info_max - spec_cnt;
2301 :
2302 : /* Unlock all chains locked for this transaction. Then test if any
2303 : keys used speculatively could have changed in locking / trying /
2304 : unlocking. If so, tell user to retry later. */
2305 :
2306 1870965 : int err = FD_MAP_SUCCESS;
2307 :
2308 1870965 : FD_COMPILER_MFENCE();
2309 :
2310 6747372 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ ) lock_info[ lock_idx ].chain->ver_cnt += (1UL<<MAP_CNT_WIDTH);
2311 :
2312 3474528 : for( ulong spec_idx=0UL; spec_idx<spec_cnt; spec_idx++ ) {
2313 1603563 : MAP_(shmem_private_chain_t) const * chain = spec_info[ spec_idx ].chain;
2314 1603563 : ulong ver_cnt = spec_info[ spec_idx ].ver_cnt;
2315 1603563 : if( FD_UNLIKELY( chain->ver_cnt!=ver_cnt ) ) {
2316 0 : err = FD_MAP_ERR_AGAIN;
2317 0 : break;
2318 0 : }
2319 1603563 : }
2320 :
2321 1870965 : FD_COMPILER_MFENCE();
2322 :
2323 1870965 : return err;
2324 1870965 : }
2325 :
2326 : MAP_STATIC int
2327 : MAP_(txn_insert)( MAP_(t) * join,
2328 6552183 : MAP_ELE_T * ele ) {
2329 :
2330 : /* Determine the element index (fastest if ele are power-of-two) and
2331 : the chain that should hold ele */
2332 :
2333 6552183 : MAP_(shmem_t) * map = join->map;
2334 6552183 : ulong ele_max = join->ele_max;
2335 :
2336 6552183 : ulong ele_idx = (ulong)(ele - join->ele);
2337 6552183 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) return FD_MAP_ERR_INVAL;
2338 :
2339 1636707 : ulong hash = MAP_(key_hash)( &ele->MAP_KEY, map->seed );
2340 1636707 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, hash );
2341 :
2342 : /* Insert ele_idx at head of chain. */
2343 :
2344 1636707 : ulong ver_cnt = chain->ver_cnt;
2345 1636707 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
2346 1636707 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2347 :
2348 1636707 : ele->MAP_NEXT = chain->head_cidx;
2349 : # if MAP_MEMOIZE
2350 1636707 : ele->MAP_MEMO = hash;
2351 : # endif
2352 1636707 : chain->head_cidx = MAP_(private_cidx)( ele_idx );
2353 1636707 : chain->ver_cnt = MAP_(private_vcnt)( version, ele_cnt+1UL );
2354 :
2355 1636707 : return FD_MAP_SUCCESS;
2356 6552183 : }
2357 :
2358 : MAP_STATIC int
2359 : MAP_(txn_remove)( MAP_(t) * join,
2360 : MAP_KEY_T const * key,
2361 : MAP_ELE_T const * sentinel,
2362 1636611 : MAP_(query_t) * query ) {
2363 :
2364 : /* Determine the chain that should hold key */
2365 :
2366 1636611 : MAP_(shmem_t) * map = join->map;
2367 1636611 : MAP_ELE_T * ele = join->ele;
2368 1636611 : ulong ele_max = join->ele_max;
2369 :
2370 1636611 : ulong hash = MAP_(key_hash)( key, map->seed );
2371 1636611 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, hash );
2372 :
2373 : /* Find the key on the chain and remove it */
2374 :
2375 1636611 : ulong ver_cnt = chain->ver_cnt;
2376 1636611 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
2377 1636611 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2378 :
2379 1636611 : query->ele = (MAP_ELE_T *)sentinel;
2380 1636611 : query->chain = chain;
2381 1636611 : query->ver_cnt = ver_cnt;
2382 :
2383 1636611 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not corrupt */
2384 :
2385 1636611 : MAP_IDX_T * cur = &chain->head_cidx;
2386 2483469 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) { /* guarantee bounded exec under corruption */
2387 2477868 : ulong ele_idx = MAP_(private_idx)( *cur );
2388 2477868 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not corrupt */
2389 :
2390 2477868 : if(
2391 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
2392 2477868 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==hash ) &&
2393 2477868 : # endif
2394 2477868 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) { /* optimize for found */
2395 1631010 : *cur = ele[ ele_idx ].MAP_NEXT;
2396 1631010 : chain->ver_cnt = MAP_(private_vcnt)( version, ele_cnt-1UL );
2397 1631010 : query->ele = &ele[ ele_idx ];
2398 1631010 : return FD_MAP_SUCCESS;
2399 1631010 : }
2400 :
2401 846858 : cur = &ele[ ele_idx ].MAP_NEXT; /* Retain the pointer to next so we can rewrite it on found */
2402 846858 : }
2403 :
2404 5601 : ulong ele_idx = MAP_(private_idx)( *cur );
2405 5601 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not found */
2406 5601 : return FD_MAP_ERR_KEY;
2407 5601 : }
2408 :
2409 : MAP_STATIC int
2410 : MAP_(txn_modify)( MAP_(t) * join,
2411 : MAP_KEY_T const * key,
2412 : MAP_ELE_T * sentinel,
2413 : MAP_(query_t) * query,
2414 3276498 : int flags ) {
2415 :
2416 : /* Determine which chain might hold key */
2417 :
2418 3276498 : MAP_(shmem_t) * map = join->map;
2419 3276498 : MAP_ELE_T * ele = join->ele;
2420 3276498 : ulong ele_max = join->ele_max;
2421 :
2422 3276498 : ulong hash = MAP_(key_hash)( key, map->seed );
2423 3276498 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, hash );
2424 :
2425 : /* Search the chain for key */
2426 :
2427 3276498 : ulong ver_cnt = chain->ver_cnt;
2428 3276498 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2429 :
2430 3276498 : query->ele = sentinel;
2431 3276498 : query->chain = chain;
2432 3276498 : query->ver_cnt = ver_cnt;
2433 :
2434 3276498 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not corrupt */
2435 :
2436 3276498 : MAP_IDX_T * cur = &chain->head_cidx;
2437 4972839 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) {
2438 4961877 : ulong ele_idx = MAP_(private_idx)( *cur );
2439 :
2440 4961877 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not corrupt */
2441 :
2442 4961877 : if(
2443 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
2444 4961877 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==hash ) &&
2445 4961877 : # endif
2446 4961877 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) { /* optimize for found */
2447 3265536 : if( flags & FD_MAP_FLAG_ADAPTIVE ) {
2448 816303 : *cur = ele[ ele_idx ].MAP_NEXT;
2449 816303 : ele[ ele_idx ].MAP_NEXT = chain->head_cidx;
2450 816303 : chain->head_cidx = MAP_(private_cidx)( ele_idx );
2451 816303 : }
2452 3265536 : query->ele = &ele[ ele_idx ];
2453 3265536 : return FD_MAP_SUCCESS;
2454 3265536 : }
2455 :
2456 1696341 : cur = &ele[ ele_idx ].MAP_NEXT;
2457 1696341 : }
2458 :
2459 10962 : ulong ele_idx = MAP_(private_idx)( *cur );
2460 10962 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not corrupt */
2461 :
2462 10962 : return FD_MAP_ERR_KEY;
2463 10962 : }
2464 :
2465 : MAP_STATIC int
2466 : MAP_(iter_lock)( MAP_(t) * join,
2467 : ulong * lock_seq,
2468 : ulong lock_cnt,
2469 1878468 : int flags ) {
2470 1878468 : if( FD_UNLIKELY( !lock_cnt ) ) return FD_MAP_SUCCESS; /* nothing to do */
2471 1878459 : if( FD_UNLIKELY( (!join) | (!lock_seq) ) ) return FD_MAP_ERR_INVAL;
2472 :
2473 1878453 : int non_blocking = !(flags & FD_MAP_FLAG_BLOCKING);
2474 :
2475 1878453 : MAP_(shmem_t) * map = join->map;
2476 :
2477 1878453 : ulong chain_cnt = map->chain_cnt;
2478 1878453 : if( FD_UNLIKELY( lock_cnt>chain_cnt ) ) return FD_MAP_ERR_INVAL;
2479 :
2480 1878450 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( join->map, 0UL );
2481 :
2482 1878450 : int err;
2483 :
2484 1878450 : ulong backoff = 1UL<<32; /* in [1,2^16)*2^32 */
2485 1878450 : ulong backoff_seed = ((ulong)(uint)flags)>>2; /* 0 usually fine */
2486 1878450 : ulong lock_idx = 0UL;
2487 1878450 : ulong locked_cnt = 0UL;
2488 3752100 : for(;;) {
2489 :
2490 3752100 : err = FD_MAP_SUCCESS;
2491 :
2492 : /* At this point, we've acquired locks [0,locked_cnt), we need to
2493 : acquire locks [locked_cnt,lock_cnt), [locked_cnt,lock_cnt) is non
2494 : empty and i is in [locked_cnt,lock_cnt). Try to acquire lock
2495 : lock_idx this iteration. */
2496 :
2497 3752100 : ulong chain_idx = lock_seq[ lock_idx ];
2498 :
2499 3752100 : if( FD_UNLIKELY( chain_idx>=chain_cnt ) ) { /* optimize for valid lock_seq */
2500 0 : for( ulong unlock_idx=0UL; unlock_idx<locked_cnt; unlock_idx++ )
2501 0 : chain[ lock_seq[ unlock_idx ] ].ver_cnt += (1UL<<MAP_CNT_WIDTH);
2502 0 : locked_cnt = 0UL;
2503 0 : err = FD_MAP_ERR_AGAIN;
2504 0 : break;
2505 0 : }
2506 :
2507 7504200 : MAP_CRIT( chain + chain_idx, 0 ) {
2508 :
2509 : /* At this point, we got the lock. Swap lock at locked_cnt and
2510 : lock_idx and increment locked_cnt to move lock_idx to the
2511 : locked set as the most recently acquired lock. Since we
2512 : increment lock_idx below, when locked_cnt<lock_idx (i.e. we had
2513 : contention for lock locked_cnt recently), this will move the
2514 : next attempt to lock locked_cnt as far as possible from now of
2515 : the remaining locks to acquire. When locked_cnt==lock_idx,
2516 : this is a branchless no-op (and the increment of lock_idx below
2517 : will guarantee lock_idx will be at least locked_cnt next
2518 : iteration, preserving the invariant that lock_idx is in
2519 : [locked_cnt,lock_cnt) on the next iteration if there is one. */
2520 :
2521 3752100 : ulong chain_idx_tmp = lock_seq[ locked_cnt ];
2522 3752100 : lock_seq[ lock_idx ] = chain_idx_tmp;
2523 3752100 : lock_seq[ locked_cnt ] = chain_idx;
2524 3752100 : locked_cnt++;
2525 :
2526 3752100 : retain_lock = 1;
2527 :
2528 3752100 : } MAP_CRIT_BLOCKED {
2529 :
2530 : /* We failed to get lock lock_idx. To avoid deadlock with the
2531 : thread that has this lock and is trying get a lock we already
2532 : have, we unlock the chains we've already locked (note that we
2533 : need to unlock here in non-blocking operation too). Quick
2534 : experiments in extreme contention scenarios found more
2535 : incremental approaches in blocking operation could take an
2536 : excessively long time to resolve so we bulk unlock. */
2537 :
2538 0 : for( ulong unlock_idx=0UL; unlock_idx<locked_cnt; unlock_idx++ )
2539 0 : chain[ lock_seq[ unlock_idx ] ].ver_cnt += (1UL<<MAP_CNT_WIDTH);
2540 0 : locked_cnt = 0UL;
2541 :
2542 0 : err = FD_MAP_ERR_AGAIN;
2543 :
2544 0 : } MAP_CRIT_END;
2545 :
2546 3752100 : if( FD_UNLIKELY( (locked_cnt==lock_cnt ) | /* all locks acquired */
2547 3752100 : ((!!err) & non_blocking) ) ) break; /* or hit contention and are non-blocking */
2548 :
2549 : /* Move to the next lock. Everytime we wrap around, we hit
2550 : contention since the last wrap / iter start. We do a random
2551 : exponential backoff with saturation on wrapping to minimize
2552 : contention with other threads hitting these locks. Normalizing
2553 : out fixed point scalings baked into the below, we spin pause a
2554 : uniform IID random number of times in [0,unlocked_cnt*backoff]
2555 : where backoff is 1 on the first wrap and increases by ~30% each
2556 : time to a maximum of 2^16 (i.e. hundreds microseconds per
2557 : remaining lock for typical CPU speeds and spin pause delays at
2558 : maximum backoff). */
2559 :
2560 1873650 : lock_idx++;
2561 1873650 : if( FD_UNLIKELY( lock_idx==lock_cnt ) ) { /* optimize for lots of locks */
2562 0 : lock_idx = locked_cnt;
2563 0 : ulong scale = fd_ulong_min( (fd_ulong_min( lock_cnt-locked_cnt, (1UL<<16)-1UL )*backoff) >> 16, (1UL<<32)-1UL );
2564 0 : backoff = fd_ulong_min( backoff + (backoff>>2) + (backoff>>4), (1UL<<48)-1UL );
2565 0 : MAP_(backoff)( scale, backoff_seed );
2566 0 : }
2567 1873650 : }
2568 :
2569 1878450 : return err;
2570 1878453 : }
2571 :
2572 : MAP_STATIC void
2573 : MAP_(iter_unlock)( MAP_(t) * join,
2574 : ulong const * lock_seq,
2575 3752100 : ulong lock_cnt ) {
2576 3752100 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( join->map, 0UL );
2577 :
2578 3752100 : FD_COMPILER_MFENCE();
2579 7504200 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ )
2580 3752100 : chain[ lock_seq[ lock_idx ] ].ver_cnt += (1UL<<MAP_CNT_WIDTH);
2581 3752100 : FD_COMPILER_MFENCE();
2582 3752100 : }
2583 :
2584 : MAP_STATIC void
2585 3 : MAP_(reset)( MAP_(t) * join ) {
2586 3 : MAP_(shmem_t) * map = join->map;
2587 :
2588 3 : ulong chain_cnt = map->chain_cnt;
2589 3 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, 0UL );
2590 :
2591 1539 : for( ulong chain_idx=0UL; chain_idx<chain_cnt; chain_idx++ ) {
2592 1536 : ulong ver_cnt = chain[ chain_idx ].ver_cnt;
2593 1536 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
2594 1536 : chain[ chain_idx ].ver_cnt = MAP_(private_vcnt)( version+2UL, 0UL );
2595 1536 : chain[ chain_idx ].head_cidx = MAP_(private_cidx)( MAP_(private_idx_null)() );
2596 1536 : }
2597 3 : }
2598 :
2599 : MAP_STATIC int
2600 3 : MAP_(verify)( MAP_(t) const * join ) {
2601 :
2602 3102 : # define MAP_TEST(c) do { \
2603 3102 : if( FD_UNLIKELY( !(c) ) ) { FD_LOG_WARNING(( "FAIL: %s", #c )); return FD_MAP_ERR_INVAL; } \
2604 3102 : } while(0)
2605 :
2606 : /* Validate join */
2607 :
2608 3 : MAP_TEST( join );
2609 3 : MAP_TEST( fd_ulong_is_aligned( (ulong)join, alignof(MAP_(t)) ) );
2610 :
2611 3 : MAP_(shmem_t) const * map = join->map;
2612 3 : MAP_ELE_T const * ele = join->ele;
2613 3 : ulong ele_max = join->ele_max;
2614 :
2615 3 : MAP_TEST( map );
2616 3 : MAP_TEST( fd_ulong_is_aligned( (ulong)map, MAP_(align)() ) );
2617 :
2618 3 : MAP_TEST( (!!ele) | (!ele_max) );
2619 3 : MAP_TEST( fd_ulong_is_aligned( (ulong)ele, alignof(MAP_ELE_T) ) );
2620 :
2621 3 : MAP_TEST( ele_max<=MAP_(ele_max_max)() );
2622 :
2623 : /* Validate map metadata */
2624 :
2625 3 : ulong magic = map->magic;
2626 3 : ulong seed = map->seed;
2627 3 : ulong chain_cnt = map->chain_cnt;
2628 :
2629 3 : MAP_TEST( magic==MAP_MAGIC );
2630 : /* seed is arbitrary */
2631 3 : MAP_TEST( fd_ulong_is_pow2( chain_cnt ) );
2632 3 : MAP_TEST( chain_cnt<=MAP_(chain_max)() );
2633 :
2634 3 : MAP_(shmem_private_chain_t) const * chain = MAP_(shmem_private_chain_const)( map, 0UL );
2635 :
2636 : /* Validate the map chains */
2637 :
2638 3 : ulong unmapped_ele_cnt = ele_max;
2639 1539 : for( ulong chain_idx=0UL; chain_idx<chain_cnt; chain_idx++ ) {
2640 :
2641 : /* Validate the chain length */
2642 :
2643 1536 : ulong ver_cnt = chain[ chain_idx ].ver_cnt;
2644 :
2645 1536 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2646 1536 : MAP_TEST( ele_cnt<=unmapped_ele_cnt );
2647 1536 : unmapped_ele_cnt -= ele_cnt;
2648 :
2649 : /* Validate chain linkage, element membership and element uniqueness */
2650 :
2651 1536 : ulong head_idx = MAP_(private_idx)( chain[ chain_idx ].head_cidx );
2652 1536 : ulong cur_idx = head_idx;
2653 1536 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) {
2654 0 : MAP_TEST( cur_idx<ele_max ); /* In element store */
2655 :
2656 0 : MAP_KEY_T const * key = &ele[ cur_idx ].MAP_KEY;
2657 :
2658 0 : ulong hash = MAP_(key_hash)( key, seed );
2659 0 : ulong ele_chain_idx = hash & (chain_cnt-1UL);
2660 0 : MAP_TEST( ele_chain_idx==chain_idx ); /* On correct chain */
2661 : # if MAP_MEMOIZE
2662 0 : MAP_TEST( ele[ cur_idx ].MAP_MEMO==hash );
2663 0 : # endif
2664 :
2665 : /* Note that we've already validated linkage from head_idx to
2666 : cur_idx so pointer chasing here is safe. */
2667 :
2668 0 : ulong prv_idx = head_idx;
2669 0 : while( prv_idx!=cur_idx ) {
2670 0 : MAP_TEST( !MAP_(key_eq)( &ele[ prv_idx ].MAP_KEY, key ) ); /* Unique */
2671 0 : prv_idx = MAP_(private_idx)( ele[ prv_idx ].MAP_NEXT );
2672 0 : }
2673 :
2674 0 : cur_idx = MAP_(private_idx)( ele[ cur_idx ].MAP_NEXT );
2675 0 : }
2676 :
2677 1536 : MAP_TEST( MAP_(private_idx_is_null)( cur_idx ) );
2678 1536 : }
2679 :
2680 : /* At this point, we know the sum of the chain lengths do not exceed
2681 : the size of the element store, each chain is of their stated
2682 : length, each chain element is in element store, and that every
2683 : element on a chain belongs on that chain (which precludes the
2684 : possibility of two chains merging into one) and that every element
2685 : on a chain is unique (which implies unique among all chains since
2686 : elements with each key maps to a single chain).
2687 :
2688 : That is, join is a current local join to a valid shared mapping of
2689 : unique keys to unique elements in the element store.
2690 :
2691 : We don't know anything about unmapped elements in the element store
2692 : and cannot do any verification of them (here be dragons). But
2693 : that's kinda the point ... what's in the unmapped elements depends
2694 : on how the application is managing those. */
2695 :
2696 3 : # undef MAP_TEST
2697 :
2698 3 : return FD_MAP_SUCCESS;
2699 3 : }
2700 :
2701 : MAP_STATIC char const *
2702 18 : MAP_(strerror)( int err ) {
2703 18 : switch( err ) {
2704 3 : case FD_MAP_SUCCESS: return "success";
2705 3 : case FD_MAP_ERR_INVAL: return "bad input";
2706 3 : case FD_MAP_ERR_AGAIN: return "try again";
2707 3 : case FD_MAP_ERR_CORRUPT: return "corruption detected";
2708 3 : case FD_MAP_ERR_KEY: return "key not found";
2709 3 : default: break;
2710 18 : }
2711 3 : return "unknown";
2712 18 : }
2713 :
2714 : #undef MAP_CRIT_END
2715 : #undef MAP_CRIT_BLOCKED
2716 : #undef MAP_CRIT
2717 :
2718 : #endif
2719 :
2720 : #undef MAP_
2721 : #undef MAP_STATIC
2722 : #undef MAP_VER_WIDTH
2723 :
2724 : #undef MAP_IMPL_STYLE
2725 : #undef MAP_MAGIC
2726 : #undef MAP_ALIGN
2727 : #undef MAP_CNT_WIDTH
2728 : #undef MAP_KEY_EQ_IS_SLOW
2729 : #undef MAP_MEMO
2730 : #undef MAP_MEMOIZE
2731 : #undef MAP_KEY_HASH
2732 : #undef MAP_KEY_EQ
2733 : #undef MAP_NEXT
2734 : #undef MAP_IDX_T
2735 : #undef MAP_KEY
2736 : #undef MAP_KEY_T
2737 : #undef MAP_ELE_T
2738 : #undef MAP_NAME
|