Line data Source code
1 : /* Generate prototypes, inlines and/or implementations for concurrent
2 : persistent shared maps based on chaining. A map can store a
3 : practically unbounded number of elements. If sized on creation for
4 : the maximum number of mapped elements, typical map operations are a
5 : fast O(1) time and map element overhead is a small O(1) space.
6 : Further, large numbers of composite map operations can be done
7 : concurrently with very low risk of conflicts.
8 :
9 : In the current implementation, each map chain has a version number.
10 : Operations that require changing a chain's connectivity (e.g.
11 : inserting or removing an element from a chain) or modifying an
12 : element managed by that chain, the chain's version number is
13 : increased by one (atomic fetch-and-or based) such that other
14 : potential users of keys managed by that chain detect and react
15 : appropriately to a potentially concurrent conflicting operation is in
16 : progress. When an operation completes, the chain version number is
17 : increased by one again to notify other users the operation is no
18 : longer in progress and that the set of keys managed by that chain
19 : and/or values associated with those keys has potentially changed
20 : since the previous version. For example, lockfree queries can
21 : interoperate with this via a zero-copy
22 : try-speculatively-process-then-test pattern similar to that used in
23 : fd_tango for high throughput message processing.
24 :
25 : As such, there can be an arbitrary number of concurrent readers
26 : processing map keys. These readers will not interfere with each
27 : other and will not block any concurrent insert / remove / modify
28 : operations. Insert / remove / modify operations can potentially
29 : block each other. Since there are typically O(1) keys per chain, the
30 : probability of concurrent insert / remove / modify operations
31 : involving different keys blocking each other is small. Further, this
32 : controllable a priori by provisioning the number of chains
33 : appropriately. Concurrent operations on the same key are serialized
34 : (as they necessarily would be any implementation). Since the
35 : operations are HPC implementations, collisions are resolved as fast
36 : as is practical. The upshot is that the map supports massive
37 : concurrency while preserving concurrent operation serializability.
38 :
39 : Version numbers are stored with chain head pointers such that the
40 : cache traffic required for managing chain versioning is covered by
41 : the same cache traffic required for managing chains in a
42 : non-concurrent implementation (e.g. fd_map_chain). Operations do
43 : many internal integrity checking / bounds checking for use in high
44 : reliability applications.
45 :
46 : Lastly, fine grained versioning allows for concurrent execution of
47 : complex operations involving multiple keys simultaneously. This
48 : allows using the map as a concurrent transactional memory and for
49 : serialization of all map elements at a consistent point in time while
50 : minimizing impact on ongoing concurrent operations (e.g. snapshotting
51 : all the elements in the map).
52 :
53 : The main drawback of chain versioning is the extra memory footprint
54 : required for chain metadata storage. The current implementation
55 : supports indexing compression and uses atomic bit field techniques to
56 : minimize this overhead.
57 :
58 : Concurrent operation requires FD_HAS_ATOMIC. This will still work on
59 : platforms without FD_HAS_ATOMIC but concurrent operations will not be
60 : safe.
61 :
62 : In short, if you need a concurrent map, this is a lot better than
63 : protecting a non-concurrent implementation with a global lock. And,
64 : if you don't, it will be comparably performant to a non-concurrent
65 : implementation.
66 :
67 : This generator is designed for ultra tight coupling with pools,
68 : treaps, heaps, lists, other maps, etc. Likewise, a map can be
69 : persisted beyond the lifetime of the creating process, be used
70 : inter-process, relocated in memory, be naively
71 : serialized/deserialized, be moved between hosts, use index
72 : compression for cache and memory bandwidth efficiency, etc.
73 : Concurrency and flexibility are prioritized.
74 :
75 : Typical usage:
76 :
77 : struct myele {
78 : ulong key; // Technically "MAP_KEY_T MAP_KEY" (default is ulong key), managed by mymap when the element is in the mymap
79 : ulong next; // Technically "MAP_IDX_T MAP_NEXT" (default is ulong next), managed by mymap when the element is in the mymap
80 :
81 : ... key and next can be located arbitrarily in the element and
82 : ... can be reused for other purposes when the element is not in a
83 : ... mymap. The mapping of a key to an element in the element
84 : ... store is arbitrary. An element should not be moved /
85 : ... released from the element store while in the mymap.
86 :
87 : };
88 :
89 : typedef struct myele myele_t;
90 :
91 : #define MAP_NAME mymap
92 : #define MAP_ELE_T myele_t
93 : #include "tmpl/fd_map_para.c"
94 :
95 : will declare the following APIs as a header only style library in the
96 : compilation unit:
97 :
98 : // A mymap_t is a stack declaration friendly quasi-opaque local
99 : // object used to hold the state of a local join to a mymap.
100 : // Similarly, a mymap_query_t and a mymap_iter_t hold the local
101 : // state of an ongoing local query and local iteration
102 : // respectively. E.g. it is fine to do mymap_t join[1];" to
103 : // allocate a mymap_t but the contents should not be used directly.
104 :
105 : typedef struct mymap_private mymap_t;
106 : typedef struct mymap_query_private mymap_query_t;
107 : typedef struct mymap_iter_private mymap_iter_t;
108 :
109 : // mymap_ele_max_max returns the maximum element store capacity
110 : // compatible with a mymap.
111 :
112 : ulong mymap_ele_max_max( void );
113 :
114 : // mymap_chain_max returns the maximum number of chains supported
115 : // by a mymap. Will be an integer power-of-two.
116 :
117 : ulong mymap_chain_max( void );
118 :
119 : // mymap_chain_cnt_est returns a reasonable number of chains to use
120 : // for a map that is expected to hold up to ele_max_est elements.
121 : // ele_max_est will be clamped to be in [1,mymap_ele_max_max()] and
122 : // the return value will be a integer power-of-two in
123 : // [1,mymap_chain_max()].
124 :
125 : ulong mymap_chain_cnt_est( ulong ele_max_est );
126 :
127 : // mymap_{align,footprint} returns the alignment and footprint
128 : // needed for a memory region to be used as a mymap. align will be
129 : // an integer power-of-two and footprint will be a multiple of
130 : // align. footprint returns 0 if chain_cnt is not an integer
131 : // power-of-two in [1,mymap_chain_max()].
132 : //
133 : // mymap_new formats a memory region with the required alignment
134 : // and footprint into a mymap. shmem points in the caller's
135 : // address space to the memory region to use. Returns shmem on
136 : // success (mymap has ownership of the memory region) and NULL on
137 : // failure (no changes, logs details). The caller is not joined on
138 : // return. The mymap will be empty with all map chains at version
139 : // 0 (unlocked).
140 : //
141 : // mymap_join joins the caller to an existing mymap. ljoin points
142 : // to a mymap_t compatible memory region in the caller's address
143 : // space, shmap points in the caller's address space to the memory
144 : // region containing the mymap, shele points in the caller's
145 : // address space to mymap's element store and ele_max gives the
146 : // element store's capacity. Returns a handle to the caller's
147 : // local join on success (join has ownership of the ljoin region)
148 : // and NULL on failure (no changes, logs details).
149 : //
150 : // mymap_leave leaves a mymap join. join points to a current local
151 : // join. Returns the memory region used for the join on success
152 : // (caller has ownership on return and the caller is no longer
153 : // joined) and NULL on failure (no changes, logs details). Use the
154 : // join accessors before leaving to get shmap, shele and ele_max
155 : // used by the join if needed.
156 : //
157 : // mymap_delete unformats a memory region used as a mymap. Assumes
158 : // shmap points in the caller's address space to a memory region
159 : // containing the mymap and that there are no joins. Returns shmem
160 : // on success (caller has ownership of the memory region, any
161 : // remaining elements still in the mymap are released to the caller
162 : // implicitly) and NULL on failure (no changes, logs details).
163 :
164 : ulong mymap_align ( void );
165 : ulong mymap_footprint( ulong chain_cnt );
166 : void * mymap_new ( void * shmem, ulong chain_cnt, ulong seed );
167 : mymap_t * mymap_join ( void * ljoin, void * shmap, void * shele, ulong ele_max );
168 : void * mymap_leave ( mymap_t * join );
169 : void * mymap_delete ( void * shmap );
170 :
171 : // mymap_{chain_cnt,seed} return the mymap configuration. Assumes
172 : // join is a current local join. The values will be valid for the
173 : // mymap lifetime.
174 :
175 : ulong mymap_chain_cnt( mymap_t const * join );
176 : ulong mymap_seed ( mymap_t const * join );
177 :
178 : // mymap_{shmap,shele,ele_max} return join details. Assumes join
179 : // is a current local join. The values will be valid for the join
180 : // lifetime. mymap_{shmap_const,shele_const} are const correct
181 : // versions.
182 :
183 : void const * mymap_shmap_const( mymap_t const * join );
184 : void const * mymap_shele_const( mymap_t const * join );
185 : ulong mymap_ele_max ( mymap_t const * join );
186 :
187 : void * mymap_shmap( mymap_t * join );
188 : void * mymap_shele( mymap_t * join );
189 :
190 : // mymap_key_{eq,hash} expose the provided MAP_KEY_{EQ,HASH} macros
191 : // as inlines with strict semantics. They assume that the provided
192 : // pointers are in the caller's address space to keys that will not
193 : // be changed during the call. They retain no interest in any keys
194 : // on return.
195 : //
196 : // mymap_key_eq returns 1 if *k0 and *k1 are equal and 0 otherwise.
197 : //
198 : // mymap_key_hash returns the hash of *key using the hash function
199 : // seed. Should ideally be a random mapping from a MAP_KEY_T to a
200 : // ulong but this depends on what the user actually used for
201 : // MAP_KEY_HASH. The seed used by a particular mymap innstance can
202 : // be obtained above.
203 :
204 : int mymap_key_eq ( ulong * k0, ulong * k1 );
205 : ulong mymap_key_hash( ulong * key, ulong seed );
206 :
207 : // mymap_backoff does FD_SPIN_PAUSE a random number of times. The
208 : // number of pauses is an approximately uniform IID random number
209 : // in [0,scale/2^16] where scale is in [0,2^32). Specifically, the
210 : // number of pauses is:
211 : //
212 : // floor( scale r / 2^48 )
213 : //
214 : // where r is a non-deterministic 32-bit uniform IID random number.
215 : // Under the hood, r is generated by hashing the user provided seed
216 : // and the least significant 32-bits of the CPU tickcounter.
217 : // Ideally, seed is a 32-bit globally unique identifer for the
218 : // logical thread of execution but this is up to the application to
219 : // specify and rarely matters in practice. This is a useful
220 : // building block for random exponential backoffs.
221 :
222 : void mymap_backoff( ulong scale, ulong seed );
223 :
224 : // mymap_query_ele returns a pointer in the caller's address space
225 : // to the element store element associated with the query or a
226 : // sentinel value. The sentinel value is application dependent and
227 : // thus arbitrary (e.g. not necessarily in the element store,
228 : // including NULL, a local temporary used as a bit bucket, etc).
229 : // Assumes query is valid. The lifetime of the returned pointer
230 : // depends on the query. mymap_query_ele_const is a const correct
231 : // version.
232 :
233 : myele_t const * mymap_query_ele_const( mymap_query_t const * query );
234 : myele_t * mymap_query_ele ( mymap_query_t * query );
235 :
236 : // mymap_insert inserts into a mymap a mapping from a key to an
237 : // element store element. ele points in the caller's address space
238 : // to the element and ele->key is initialized to the key. flags is
239 : // a bit-or of FD_MAP_FLAG flags. If FD_MAP_FLAG_BLOCKING is set /
240 : // clear in flags, this is allowed / not allowed to block the
241 : // caller. Assumes join is a current local join, element is not in
242 : // the mymap and the key is not currently mapped to anything in the
243 : // mymap. This is a non-blocking fast O(1) and supports highly
244 : // concurrent operation.
245 : //
246 : // Returns FD_MAP_SUCCESS (0) on success and a FD_MAP_ERR
247 : // (negative) on failure. On success, ele was inserted into the
248 : // mymap at some point during the call (mymap took ownership of the
249 : // element at that time but the application is free to manage all
250 : // fields of the element except ele->key, ele->next and, if the
251 : // mymap is memoized, ele->hash ... insert will initialize the hash
252 : // field to mymap_key_hash( key, seed ) and this will be stable
253 : // while ele is in mymap). On failure, the mymap was not modified
254 : // by the call, no changes of ownership occurred in the call and
255 : // returns:
256 : //
257 : // - FD_MAP_ERR_INVAL: ele is not a pointer to an element store
258 : // element.
259 : //
260 : // - FD_MAP_ERR_AGAIN: A potentially conflicting operation was in
261 : // progress at some point during the call. Try again later (e.g.
262 : // after a random exponential backoff). Specifically, this
263 : // operation requires locking the map chain associated with key.
264 : // Since there are typically O(1) keys per chain, the probability
265 : // of getting AGAIN due to a false conflict is negligible even
266 : // under highly concurrent loads. Since insert / remove are fast
267 : // O(1) operations, any remaining conflicts, real or imagined,
268 : // are typically very short lived. Never returned for a blocking
269 : // call.
270 : //
271 : // IMPORTANT SAFETY TIP! Do not use inside a modify try/test,
272 : // query try/test, txn try/test or iter lock/unlock.
273 :
274 : int mymap_insert( mymap_t * join, myele_t * ele, int flags );
275 :
276 : // mymap_remove removes the mapping (if any) for key from the
277 : // mymap. On return, query will contain information about the
278 : // removed mapping. sentinel gives the query element pointer value
279 : // (arbitrary) to pass through when this did not remove a mapping
280 : // for any reason. flags is a bit-or of FD_MAP_FLAG flags. If
281 : // FD_MAP_FLAG_BLOCKING is set / clear in flags, this is allowed /
282 : // not allowed to block the caller. Assumes join is a current
283 : // local join and key is valid for the duration of the call.
284 : // Retains no interest in key, sentinel or query. This is a
285 : // non-blocking fast O(1) and supports highly concurrent operation.
286 : //
287 : // Returns FD_MAP_SUCCESS (0) on success and a FD_MAP_ERR
288 : // (negative) on failure. On success, key's mapping was removed at
289 : // some point during the call. mymap_query_ele( query ) will point
290 : // in the caller's address space to the element store element where
291 : // key mapped just before it was removed (element ownership
292 : // transferred to the caller at that time). On failure, no changes
293 : // were made by this call, mymap_query_ele( query ) will be
294 : // sentinel and:
295 : //
296 : // - FD_MAP_ERR_KEY: Key was not found in the mymap at some point
297 : // during the call.
298 : //
299 : // - FD_MAP_ERR_AGAIN: A potentially conflicting operation was in
300 : // progress at some point during the call. Same considerations
301 : // as insert above. Never returned for a blocking call.
302 : //
303 : // - FD_MAP_ERR_CORRUPT: Memory corruption was detected at some
304 : // point during the call.
305 : //
306 : // IMPORTANT SAFETY TIP! Do not use inside a modify try/test,
307 : // query try/test, txn try/test or iter lock/unlock.
308 :
309 : int
310 : mymap_remove( mymap_t * join,
311 : ulong const * key,
312 : myele_t const * sentinel,
313 : mymap_query_t * query,
314 : int flags );
315 :
316 : // mymap_modify_try tries to start modification of the mymap
317 : // element corresponding to key. On return, query will hold
318 : // information about the try. sentinel gives the query element
319 : // pointer value (arbitrary) to pass through when it is not safe to
320 : // try. flags is a bit-or of FD_MAP_FLAG flags. If
321 : // FD_MAP_FLAG_BLOCKING is set / clear, this call is allowed / not
322 : // allowed to block the caller. If FD_MAP_FLAG_ADAPTIVE is set /
323 : // clear, this call should / should not adapt the mymap to
324 : // accelerate future operations on this key. Adaptation for a key
325 : // can potentially slow future operations for other keys. The
326 : // marginal benefit of adaptation for a key grows linearly with the
327 : // number of keys managed by the key's chain. Assumes join is a
328 : // current local join and key is valid for the duration of the
329 : // call. Retains no interest in key, sentinel or query. This is a
330 : // non-blocking fast O(1) and supports highly concurrent operation.
331 : //
332 : // Returns FD_MAP_SUCCESS (0) on success and a FD_MAP_ERR
333 : // (negative) on failure. On success, mymap_query_ele( query )
334 : // will point in the caller's address space to the element to
335 : // modify and the chain that manages key will be locked. The mymap
336 : // retains ownership of this element and management of the key and
337 : // next fields. The caller is free to modify any other fields. On
338 : // failure, the mymap was not modified by this call,
339 : // mymap_query_ele( query ) will be sentinel and returns:
340 : //
341 : // - FD_MAP_ERR_KEY: Key was not found in the mymap in some point
342 : // during the call.
343 : //
344 : // - FD_MAP_ERR_AGAIN: A potentially conflicting operation was in
345 : // progress at some point during the try. Same considerations as
346 : // insert above. Never returned for a blocking call.
347 : //
348 : // - FD_MAP_ERR_CORRUPT: Memory corruption was detected at some
349 : // point during the call.
350 : //
351 : // IMPORTANT SAFETY TIP! Do not interleave or nest with a query
352 : // try/test, txn try/test or iter_lock/unlock on the same thread.
353 :
354 : int
355 : mymap_modify_try( mymap_t * join,
356 : ulong const * key,
357 : myele_t * sentinel,
358 : mymap_query_t * query,
359 : int flags );
360 :
361 : // mymap_modify_test finishes an in-progress modification. Assumes
362 : // query is valid and the caller is in a modify try. Returns
363 : // FD_MAP_SUCCESS (0). On return, the caller will no longer be in
364 : // a modify try. Guaranteed to succeed.
365 : //
366 : // IMPORTANT SAFETY TIP! Do not interleave or nest with a query
367 : // try/test, txn try/test or iter_lock/unlock on the same thread.
368 :
369 : int mymap_modify_test( mymap_query_t * query );
370 :
371 : // mymap_query_try tries to speculatively query a mymap for key.
372 : // On return, query will hold information about the try. sentinel
373 : // gives the query element pointer value (arbitrary) to pass
374 : // through when it is not safe to try the query. Assumes join is a
375 : // current local join and key is valid for the duration of the
376 : // call. Does not modify the mymap and retains no interest in key,
377 : // sentinel or query. This is a non-blocking fast O(1) and
378 : // supports highly concurrent operation.
379 : //
380 : // Returns FD_MAP_SUCCESS (0) on success and a FD_MAP_ERR
381 : // (negative) on failure. On success, key mapped to an element in
382 : // the element store at some point during the call.
383 : // mymap_query_ele( query ) will point in the caller's address
384 : // space to the element store element where key mapped at that
385 : // time. The mymap retains ownership of this element but the
386 : // caller can zero copy speculatively process the element. On
387 : // failure, mymap_query_ele( query ) will be sentinel and returns:
388 : //
389 : // - FD_MAP_ERR_KEY: Key was not found in the mymap in some point
390 : // during the call.
391 : //
392 : // - FD_MAP_ERR_AGAIN: A potentially conflicting operation was in
393 : // progress at some point during the call. Try again later (e.g.
394 : // after a random exponential backoff). Unlike insert and
395 : // remove, this call does _not_ require a lock on the chain
396 : // associated with key. As such, AGAIN can only be caused by
397 : // concurrent operations that require a lock on the chain that
398 : // manages key (with similar considerations as insert and remove)
399 : // and this will never interfere with any other concurrent
400 : // operation. Among the many implications, a query will never
401 : // delay a concurrent query and AGAIN will never be returned if
402 : // only concurrent queries are in progress.
403 : //
404 : // - FD_MAP_ERR_CORRUPT: Memory corruption was detected at some
405 : // point during the call.
406 : //
407 : // IMPORTANT SAFETY TIP! THE CALLER SHOULD BE PREPARED TO HANDLE
408 : // ARBITRARY AND/OR INCONSISTENT VALUES FOR ELEMENT FIELDS DURING
409 : // SPECULATIVE PROCESSING. CALLERS SHOULD NOT COMMIT ANY RESULTS
410 : // OF SPECULATIVE PROCESSING UNTIL IT TESTS THE QUERY WAS
411 : // SUCCESSFUL.
412 : //
413 : // The simplest form of speculative processing is to copy the
414 : // element from the element store into a local temporary, test that
415 : // the speculation was valid, and then process the local temporary
416 : // copy at its leisure. Zero copy, more selective copying and/or
417 : // writing speculative results into local tempoaries are more
418 : // advanced examples of speculative processing.
419 : //
420 : // Use mymap_modify to do a blocking (non-speculative) and/or
421 : // adaptive query (just don't actually modify the element).
422 : //
423 : // IMPORTANT SAFETY TIP! Do not interleave or nest with a modify
424 : // try/test, txn try/test or iter_lock/unlock on the same thread.
425 :
426 : int
427 : mymap_query_try( mymap_t const * join,
428 : ulong const * key,
429 : myele_t const * sentinel,
430 : mymap_query_t * query );
431 :
432 : // mymap_query_test tests if an in-progress query is still valid.
433 : // Assumes query is valid, we are still in a query try and chain
434 : // version numbers have not wrapped since we started the try.
435 : // Returns FD_MAP_SUCCESS (0) if the query is still valid and
436 : // FD_MAP_ERR_AGAIN (negative) if a potentially conflicting
437 : // operation was in progress at some point during the try.
438 : //
439 : // IMPORTANT SAFETY TIP! Do not interleave or nest with a modify
440 : // try/test, txn try/test or iter_lock/unlock on the same thread.
441 :
442 : int mymap_query_test( mymap_query_t const * query );
443 :
444 : // mymap_txn_key_max_max() returns the theoretical maximum number
445 : // of keys that can be in a transaction. (Practically unbounded.)
446 :
447 : ulong mymap_txn_key_max_max( void );
448 :
449 : // mymap_txn_{align,footprint} return the alignment and footprint
450 : // required for a mymap_txn_t that can support at least key_max
451 : // keys. align will be an integer power of two. footprint will be
452 : // a multiple of align. Returns 0 if key_max > key_max_max.
453 : //
454 : // mymap_txn_init formats a memory region with the required
455 : // alignment and footprint as a txn_t that can support at least
456 : // key_max keys. ltxn points in the caller's address space to the
457 : // memory region to use. Assumes join is a current local join.
458 : // On success, returns ltxn (txn will have ownership of the memory
459 : // region, txn will be valid with empty speculative and locked key
460 : // sets). The lifetime of the join should be at least the lifetime
461 : // of the txn. On failure (obviously bad inputs), returns NULL (no
462 : // changes).
463 : //
464 : // mymap_txn_fini unformats a memory region as a txn_t and returns
465 : // a pointer to the underlying memory. On success, returns ltxn.
466 : // The caller has ownership of the memory region on return. On
467 : // failure (e.g. NULL input), returns NULL (no changes).
468 :
469 : ulong mymap_txn_align ( void );
470 : ulong mymap_txn_footprint( ulong key_max );
471 : mymap_txn_t * mymap_txn_init ( void * ltxn, mymap_t * join, ulong key_max );
472 : void * mymap_txn_fini ( mymap_txn_t * txn );
473 :
474 : // mymap_txn_add indicates that key may be used in a txn. Assumes
475 : // txn is valid and not in a try and key is valid for duration of
476 : // the call. Retains no interest in key. A zero value for lock
477 : // indicates the key will be operated on speculatively. A non-zero
478 : // value indicates the key will potentially be inserted / removed /
479 : // modified by the transaction. It is okay to have a mixture of
480 : // speculative and locked keys in a transaction. Further, it is
481 : // okay to add the same key multiple times (though not particularly
482 : // efficient), including as a mixture of speculative and locked (if
483 : // _any_ adds of same key are locked, it will be treated as a
484 : // locked key for the txn overall). Returns FD_MAP_SUCCESS (zero)
485 : // on success (txn is valid and not in a try) and FD_MAP_ERR_INVAL
486 : // (negative) on failure (txn is valid and not in a try but key was
487 : // not added). INVAL is only possible when more than key_max adds
488 : // have been done since init.
489 :
490 : int mymap_txn_add( mymap_txn_t * txn, mymap_key_t const * key, int lock );
491 :
492 : // mymap_txn_try returns FD_MAP_SUCCESS (zero) if it is safe to try
493 : // the transaction and FD_MAP_ERR_AGAIN (negative) if the user
494 : // should try again later (e.g. after a random exponential
495 : // backoff). flags is a bit-of of FD_MAP_FLAG flags. If
496 : // FD_MAP_FLAG_BLOCKING is set / clear, this call is allowed / not
497 : // allowed to block the caller. The upper 30-bits of flags can be
498 : // used to provide a seed for random backoffs (but this is up to
499 : // the application and rarely matters in practice). Assumes txn is
500 : // valid and not in a try. On success, txn will be valid and in a
501 : // try. On an failure, txn will be valid and not in a try.
502 : //
503 : // IMPORTANT SAFETY TIP! Do not interleave or nest with modify
504 : // try/test, query try/test or iter_lock/unlock on the same thread.
505 : //
506 : // To better under the restrictions on nesting and interleaving,
507 : // mymap_{insert,remove,query_try,modify_try,query_try} will fail
508 : // with FD_MAP_ERR_AGAIN for any key managed by a chain locked by
509 : // the txn but can succeed for keys on managed by other chains.
510 : // This behavior is unpredictable as it depends on the keys in the
511 : // txn, keys not in the transaction, map seed, map chain count and
512 : // user provided key hash function. Interleaving a query_test,
513 : // modify_test, iter_unlock can be similarly unpredictable. Worse,
514 : // an interleaved modify_test or iter_unlock can muck up the chain
515 : // locks and used by the txn try. Similarly for other cases.
516 : //
517 : // IMPORTANT SAFETY TIP! If some txn keys were speculative, the
518 : // caller should not rely on any reads from the corresponding
519 : // element until the transaction tests successfully. Similar
520 : // considerations as mymap_query_try.
521 :
522 : int mymap_txn_try( mymap_txn_t * txn, int flags );
523 :
524 : // mymap_txn_{insert,remove} behave _identically_ to
525 : // mymap_{insert,remove} from the caller's point of view but
526 : // assumes we are in a txn try and key was added to the txn as
527 : // locked. These will never return FD_MAP_ERR_AGAIN.
528 : //
529 : // Similarly, mymap_txn_query behaves _identically_ to
530 : // mymap_query_try from the caller's point of view but assumes we
531 : // are in a txn try and key was added to txn as either speculative
532 : // or locked. Will never return FD_MAP_ERR_AGAIN.
533 : //
534 : // Likewise, mymap_txn_modify behaves _identically_ to
535 : // mymap_modify_try from the caller's point of view but assumes we
536 : // are in a txn try and key was added to txn as locked. It will
537 : // never return FD_MAP_ERR_AGAIN.
538 : //
539 : // There is no mymap_query_test or mymap_modify_test because these
540 : // are part of the overall txn test.
541 : //
542 : // IMPORTANT SAFETY TIP!
543 : //
544 : // These never should be used outside a txn try.
545 : //
546 : // IMPORTANT SAFETY TIP!
547 : //
548 : // For a speculative txn key, mymap_query can return FD_MAP_ERR_KEY
549 : // and/or FD_MAP_ERR_CORRUPT if there are locked concurrent
550 : // operations on the chain managing key (e.g a concurrent remove of
551 : // a key that happens to be on the same chain). When such
552 : // operations are possible, these errors can be canaries that the
553 : // transaction has already failed (testing the txn is still
554 : // necessary to it "official"). CORRUPT in this situation is most
555 : // likely an "unofficial" failure than memory corruption.
556 : // Similarly, while mymap_txn_query is guaranteed give a pointer to
557 : // an element store element on success, there is no guarantee it
558 : // will be to the correct element, a well formed element (or even
559 : // to the same location if used multiple times in the try). When
560 : // such concurrent operations are not possible (e.g. single
561 : // threaded operation), SUCCESS, KEY, CORRUPT and the element
562 : // pointer returned have their usual interpretations.
563 : //
564 : // TL;DR speculative txn keys are best used for commmon stable
565 : // constant-ish read-only data to minimize concurrent complex
566 : // transactions using these common keys from unnecessarily blocking
567 : // each other.
568 : //
569 : // TL;DR resolve all speculative txn keys to elements at
570 : // transaction start exactly once for sanity.
571 : //
572 : // TL;DR avoid using speculative txn keys at all unless very well
573 : // versed in lockfree programming idioms and gotchas.
574 :
575 : int mymap_txn_insert( mymap_t * join, myele_t * ele );
576 : int mymap_txn_remove( mymap_t * join, ulong const * key, myele_t const * sentinel, mymap_query_t * query );
577 : int mymap_txn_modify( mymap_t * join, ulong const * key, myele_t * sentinel, mymap_query_t * query, int flags );
578 : int mymap_txn_query ( mymap_t const * join, ulong const * key, myele_t const * sentinel, mymap_query_t * query );
579 :
580 : // mymap_txn_test returns FD_MAP_SUCCESS (zero) if the txn try
581 : // succeeded and FD_MAP_AGAIN (negative) if it failed (e.g. the
582 : // test detected a potentially conflicting concurrent operation
583 : // during the try). On success, any results from processing of
584 : // keys marked as speculative can be trusted. On failure, the
585 : // mymap was not changed by the try. Regardless of return value,
586 : // txn will _not_ be in a try on return _but_ will still be valid.
587 : // As such, if a transaction fails, it can be retried (e.g. after a
588 : // random exponential backoff) without needing to recreate it (e.g.
589 : // no need to fini then init/add again). Assumes txn is in a try
590 : // and, for any txn speculative keys, no wrapping of txn version
591 : // numbers has occurred since the try started..
592 : //
593 : // IMPORTANT SAFETY TIP! This is guaranteed to succeed if no keys
594 : // were added to the transaction as speculative.
595 : //
596 : // IMPORTANT SAFETY TIP! Do not interleave or nest with modify
597 : // try/test, query try/test or iter_lock/unlock on the same thread.
598 :
599 : int mymap_txn_test( mymap_txn_t * txn );
600 :
601 : // mymap_iter_lock locks zero or more map chains. Assumes join is
602 : // a current local join. On input, lock_seq[i] for i in
603 : // [0,lock_cnt) gives the set of chains to lock. flags is a bit-or
604 : // of FD_MAP_FLAG flags. If FD_MAP_FLAG_BLOCKING is set / not set,
605 : // this call is allowed / not allowed to block the caller. Assumes
606 : // join, lock_seq and lock_cnt are valid and the caller does not
607 : // already have any of these locks. The upper 30-bits of flags
608 : // can be used to provide a seed for random backoffs (but this is
609 : // up to the application and rarely necessary). In particular,
610 : // lock_seq should contain unique values in [0,chain_cnt), which
611 : // also implies lock_cnt is at most chain_cnt. Retains no interest
612 : // in lock_seq. Returns FD_MAP_SUCCESS (zero) on success and
613 : // FD_MAP_ERR_AGAIN (negative) on failure. On return:
614 : //
615 : // FD_MAP_SUCCESS: lock_seq will be a permutation of the input
616 : // giving the actual order (from oldest to newest) in which the
617 : // locks were acquired. This can be used, for example, to unlock
618 : // in the same order and can be used by the caller to optimize
619 : // the order for iterating over keys to reduce the amount of
620 : // contention with other concurrent operations. If there were no
621 : // potentially conflicting concurrent operations during the call,
622 : // lock_seq will be in the input order.
623 : //
624 : // FD_MAP_ERR_AGAIN: a potentially conflicting operation was in
625 : // progress at some point during the call. lock_seq might have
626 : // been changed (but will still be a permutation of the input).
627 : // The mymap itself wasn't changed by the call.
628 : //
629 : // FD_MAP_ERR_INVAL: join, lock_seq and/or lock_cnt were
630 : // obviously invalid. Same considerations as FD_MAP_ERR_AGAIN.
631 : //
632 : // Guaranteed to succeed if blocking (but will not return to the
633 : // caller until all the requested chains are locked).
634 : //
635 : // IMPORTANT SAFETY TIP! Do not use interleave or nest with modify
636 : // try/test, query try/test or txn try/test on the same thread.
637 :
638 : int
639 : mymap_iter_lock( mymap_t * join,
640 : ulong * lock_seq,
641 : ulong lock_cnt,
642 : int flags );
643 :
644 : // mymap_iter_unlock unlocks chains lock_seq[i] for i in
645 : // [0,lock_cnt) in that order. Assumes join is a current local
646 : // join, lock_seq and lock_cnt are valid (same requirements as
647 : // mymap_iter_lock) and the caller has a lock on those chains.
648 : // Retains no interest in lock_seq. Guaranteed to succeed.
649 : //
650 : // IMPORTANT SAFETY TIP! Do not use interleave or nest with modify
651 : // try/test, query try/test or txn try/test on the same thread.
652 :
653 : void
654 : mymap_iter_unlock( mymap_t * join,
655 : ulong const * lock_seq,
656 : ulong lock_cnt );
657 :
658 : // mymap_iter_chain_idx returns the index of the map chain that
659 : // manages key. Useful for iterating over groups of related keys
660 : // when the map hash function is designed to group all related keys
661 : // onto the same chain.
662 :
663 : ulong
664 : mymap_iter_chain_idx( mymap_t const * join,
665 : ulong const * key );
666 :
667 : // mymap_{iter,iter_done,iter_next,iter_ele,iter_ele_const} iterate
668 : // over a single map chain. Assumes join is a current local join,
669 : // chain_idx is in [0,mymap_chain_cnt(join)) and the caller lock on
670 : // chain idx or the chain is otherwise known to be idle.
671 : //
672 : // These are building blocks for concurrent parallel iteration. As
673 : // the locking and ordering requirements for such an iterator are
674 : // very application specific, no default global iterators are
675 : // provided (i.e. a generic global iterator will need to be so
676 : // conservative on locking than typical application requirements,
677 : // it is practically more mischievious than useful). E.g. a mymap
678 : // snapshot might lock all chains to get the state of the entire
679 : // mymap at a consistent point in time. For each chain (in the
680 : // order given by the lock acquisition), the snapshot would
681 : // serialize all keys on that chain and then unlock it
682 : // incrementally.
683 :
684 : mymap_iter_t mymap_iter ( mymap_t const * join, ulong chain_idx );
685 : mymap_iter_t mymap_iter_done ( mymap_iter_t iter );
686 : mymap_iter_t mymap_iter_next ( mymap_iter_t iter );
687 : myele_t const * mymap_iter_ele_const( mymap_iter_t iter );
688 : myele_t * mymap_iter_ele ( mymap_iter_t iter );
689 :
690 : // mymap_reset removes all elements from the mymap. Caller has
691 : // ownership of all items removed on return. Assumes that join is
692 : // a current local join and the caller has a lock on all map chains
693 : // or the map is otherwise known to be idle.
694 :
695 : void mymap_reset( mymap_t * join );
696 :
697 : // mymap_verify returns FD_MAP_SUCCESS (0) if the join, underlying
698 : // map and underlying element store give a valid mapping of unique
699 : // keys to unique elements in the element store. Assumes that
700 : // caller has a lock on all map chains or the map is otherwise
701 : // known to be idle. Returns FD_MAP_ERR_INVAL (negative) otherwise
702 : // (no changes by this call, logs details).
703 :
704 : int mymap_verify( mymap_t const * join );
705 :
706 : Do this as often as desired in a compilation unit to get different
707 : types of concurrent maps. Options exist for generating library
708 : header prototypes and/or library implementations for concurrent maps
709 : usable across multiple compilation units. Additional options exist
710 : to use index compression, different hashing functions, key comparison
711 : functions, etc as detailed below.
712 :
713 : To better understand the insert/remove/{modify,query}_{try,test}
714 : APIs:
715 :
716 : ... basic insert
717 :
718 : myele_t * ele = ... acquire an unused element from the element store
719 :
720 : ... populate ele appropriately, including
721 :
722 : ele->key = ... key associated with this element
723 :
724 : int err = mymap_insert( join, err, FD_MAP_FLAG_BLOCKING );
725 :
726 : if( FD_UNLIKELY( err ) ) { // Not possible in this example
727 :
728 : ... If err is FD_MAP_ERR_INVAL, ele did not point at an element
729 : ... store element.
730 : ...
731 : ... If err is FD_MAP_ERR_AGAIN, there was a potentially
732 : ... conflicting operation in progress on the mymap during the
733 : ... call. We can try again later (e.g. after a random backoff or
734 : ... doing other non-conflicting work).
735 :
736 : } else {
737 :
738 : ... At this point, a mapping from key to the element store
739 : ... element pointed to by ele in our address space was added
740 : ... during the call. ele->key will be stable while in the mymap.
741 : ... Neither ele->key nor ele->next should be modified by the
742 : ... application while in the mymap. The application is free to
743 : ... manage all other fields of the element as desired.
744 :
745 : }
746 :
747 : ... basic remove
748 :
749 : ulong key = ... key to remove
750 :
751 : mymap_query_t query[1];
752 : int err = mymap_remove( join, &key, NULL, query, FD_MAP_FLAG_BLOCKING );
753 : mymap_ele_t * ele = mymap_query_ele( query );
754 :
755 : if( FD_UNLIKELY( err ) ) {
756 :
757 : ... At this point, ele==sentinel==NULL.
758 : ...
759 : ... If err is FD_MAP_ERR_KEY, key was not in the mymap at some
760 : ... point during the remove.
761 : ...
762 : ... If err is FD_MAP_ERR_AGAIN, there was a potentially
763 : ... conflicting operation in progress during the remove. We can
764 : ... try again later (e.g. after a random backoff or doing other
765 : ... non-conflicting work). (Not possible in this example.)
766 : ...
767 : ... If err is FD_MAP_ERR_CORRUPT, memory corruption was detected
768 : ... at some point during the call. (Usually abortive.)
769 :
770 : } else {
771 :
772 : ... At this point, ele points into the element store (non-NULL),
773 : ... ele->key matches key, key mapped to that element before the
774 : ... remove, and we have ownership of that element.
775 :
776 : ... release ele to the element store
777 :
778 : }
779 :
780 : ... basic modify
781 :
782 : ulong key = ... key to modify
783 :
784 : mymap_query_t query[1];
785 : int err = mymap_modify_try( join, &key, NULL, query, FD_MAP_FLAG_BLOCKING );
786 : mymap_ele_t * ele = mymap_query_ele( query );
787 :
788 : if( FD_UNLIKELY( err ) ) {
789 :
790 : ... At this point, ele==sentinel==NULL.
791 : ...
792 : ... If err is FD_MAP_ERR_KEY, key was not in the mymap at some
793 : ... point during the try.
794 : ...
795 : ... If err is FD_MAP_ERR_AGAIN, there was a potentially
796 : ... conflicting operation in progress during the try. We can try
797 : ... again later (e.g. after a random backoff or doing other
798 : ... non-conflicting work). (Not possible in this example.)
799 : ...
800 : ... If err is FD_MAP_ERR_CORRUPT, memory corruption was detected
801 : ... at some point during the call. (Usually abortive.)
802 :
803 : } else {
804 :
805 : ... At this point, ele points in our address space to an element
806 : ... store element, ele->key matches key and we are in a modify try
807 : ... such that it is safe to modify fields ele not managed by the
808 : ... mymap.
809 :
810 : ... Modify application managed fields of ele here.
811 :
812 : ... IMPORTANT SAFETY TIP! IF THE USER WANTS TO SUPPORT ROLLING
813 : ... BACK A MODIFICATION AT THIS POINT, THEY CAN DO SO BY SAVING
814 : ... THE ORIGINAL VALUE OF ELE BEFORE MODIFYING ANY FIELDS AND
815 : ... THEN RESTORING IT HERE.
816 :
817 : ... Finish the modification (guaranteed to succeed)
818 :
819 : mymap_modify_test( query );
820 :
821 : ... At this point, the modification is done and we are no
822 : ... longer in a try.
823 :
824 : }
825 :
826 : ... basic speculative query
827 :
828 : ulong key = ... key to query
829 :
830 : mymap_query_t query[1];
831 : int err = mymap_query_try( join, &key, NULL, query );
832 : mymap_ele_t const * ele = mymap_query_ele_const( query );
833 :
834 : if( FD_UNLIKELY( err ) ) {
835 :
836 : ... At this point, ele==sentinel==NULL.
837 : ...
838 : ... If err is FD_MAP_ERR_KEY, key was not in the mymap at some
839 : ... point during the try.
840 : ...
841 : ... If err is FD_MAP_ERR_AGAIN, there was a potentially
842 : ... conflicting operation in progress during the try and we can
843 : ... try again later (e.g. after a random backoff or doing other
844 : ... non-conflicting work).
845 : ...
846 : ... If err is FD_MAP_ERR_CORRUPT, memory corruption was detected
847 : ... during the call. (Usually abortive.)
848 :
849 : } else {
850 :
851 : ... At this point, ele points in our address space to an element
852 : ... in the element store (non-NULL) and ele->key matched key at
853 : ... some point during the try.
854 :
855 : ... Speculatively process ele here.
856 : ...
857 : ... DO NOT TRUST ANY RESULTS OF THIS SPECULATIVE PROCESSING YET.
858 : ... THERE IS NO GUARANTEE YET THAT A CONCURRENT USER HASN'T
859 : ... CHANGED THE MYMAP IN A WAY THAT COULD YIELD ARBITRARY AND
860 : ... INCONSISTENT RESULTS.
861 : ...
862 : ... The simplest and most common form of speculative processing
863 : ... is to copy the needed portions of ele into a local stack
864 : ... temp.
865 : ...
866 : ... Note: concurrent operations could include removing key from
867 : ... the mymap (and maybe multiple cycles of inserting and
868 : ... removing it and then at potentially different element store
869 : ... locations). That's not an issue practically as the ele
870 : ... pointer here will be to an element compatible memory region
871 : ... that will continue to exist regardless and we shouldn't be
872 : ... trusting any query reads yet (the query test will detect if
873 : ... if these can be trusted).
874 : ...
875 : ... Rant: If ele is more complex than plain-old-data, so long ele
876 : ... is using allocators like fd_alloc and fd_wksp for dynamically
877 : ... allocated fields (e.g. not using the giant steaming pile of
878 : ... page based memory virtual memory, operating system, language
879 : ... and standard library fail that is heap based allocation ala
880 : ... malloc/free), concurrent removes are _still_ fine for the
881 : ... exact same reason. That is, the memory that actually backed
882 : ... dynamically allocated fields will still continue to exist
883 : ... post remove ... you know ... just like reality (turns out,
884 : ... surprise, "free" doesn't actually uninstall any DIMMs and
885 : ... malloc/free are the worst possible abstraction for resource
886 : ... management).
887 : ...
888 : ... The concurrent remove case actually demonstrates why fd_alloc
889 : ... / fd_wksp / fd_shmem / etc exist in the first place. Beyond
890 : ... being faster, simpler, more concurrent and more reliable
891 : ... (especially in cases like this), they are more flexible (e.g.
892 : ... sharing and persisting the data structure asynchronously
893 : ... across multiple processes in different address spaces) and
894 : ... more secure (e.g. can easily bounds check memory accesses
895 : ... and then use the memory subsystem to sandbox different
896 : ... components from touching memory they shouldn't, actually
897 : ... using a modern virtual memory subsystem for something useful
898 : ... for a change instead of bending over backwards to provide
899 : ... exactly the wrong abstraction of the real world). Common
900 : ... hardware and software practices have turned computers into an
901 : ... unreliable and insecure Tower of Babel. Had virtual memory
902 : ... been better abstracted and better implemented all levels of
903 : ... the stack, life would be much easier (and things like fast
904 : ... persistent memories might have seen a lot more commerical
905 : ... success). In the meantime, dispelling the magical thinking
906 : ... encourged by the conventional abstractions, the key lessons
907 : ... are:
908 : ...
909 : ... * Friends don't let friends malloc.
910 : ... * Lockfree is not a synonym for garbage collection.
911 : ... * Real world computers aren't infinite tape Turing machines.
912 : ... * Real world memory doesn't magically disappear.
913 :
914 : ... At this point, we are done with speculative processing (or we
915 : ... don't want to do any more speculative processing if the try
916 : ... has already failed).
917 :
918 : err = mymap_query_test( query );
919 : if( FD_UNLKELY( err ) ) {
920 :
921 : ... At this point, err will be FD_MAP_ERR_AGAIN and a
922 : ... potentially conflicting operation in the try was detected
923 : ... by the test.
924 :
925 : ... Application specific handling here (e.g. try again after a
926 : ... random backoff or doing other non-conflicting work).
927 :
928 : } else {
929 :
930 : ... At this point, the results of the speculation thus far can
931 : ... be trusted and can be considered to have been computed at
932 : ... some point in time between try and test.
933 :
934 : }
935 : }
936 :
937 : To better understand the txn API:
938 :
939 : ... allocate a txn
940 :
941 : ulong align = mymap_txn_align();
942 : ulong footprint = mymap_txn_footprint( key_max );
943 : void * ltxn = ... allocate align/footprint local scratch memory
944 : mymap_txn_t * txn = mymap_txn_init( ltxn, join, key_max );
945 :
946 : ... add at most key_max keys to the transaction as locked
947 :
948 : for( ... all keys involved in the transaction ... ) mymap_txn_add( txn, key, 1 ); // guaranteed to succeed for this example
949 :
950 : ... try to do the transaction
951 :
952 : int err = mymap_txn_try( txn, FD_MAP_FLAG_BLOCKING );
953 :
954 : if( FD_UNLIKELY( err ) ) { // Not possible in this example
955 :
956 : ... At this point, err is FD_MAP_ERR_AGAIN and there was a
957 : ... potentially conflicting operation in progress during the try.
958 : ... We can should try again later (e.g. after a random backoff or
959 : ... doing other non-conflicting work). We are no longer in a try
960 : ... but we could reuse the txn as-is to retry.
961 :
962 : } else {
963 :
964 : ... At this point, it is safe to try the transaction.
965 :
966 : ... Do the transaction here. Since all keys are locked in this
967 : ... example, we don't need to worry about any changing behind our
968 : ... back (i.e. the try is guaranteed to succeed).
969 :
970 : ... Like modify, if we wants to rollback the transaction at this
971 : ... point, we should save the state of all locked keys involved
972 : ... to local temporaries before we do the transaction and then
973 : ... restore the state here.
974 :
975 : ... Finish the try (guaranteed to succeed for this example)
976 :
977 : mymap_txn_test( txn );
978 :
979 : ... At this point, we are no longer in a txn try but the txn is
980 : ... valid such that we could reuse the txn as-is for another
981 : ... transaction involving the same keys.
982 :
983 : mymap_txn_fini( txn );
984 :
985 : ... At this point, txn is no longer valid and we have ownership of
986 : ... the ltxn memory region
987 :
988 : ... free ltxn
989 :
990 : }
991 :
992 : To better understand the iter API:
993 :
994 : ... basic mymap element snapshot (i.e. iterate over all elements in
995 : ... the mymap at a globally consistent point in time while
996 : ... minimizing contension with other concurrent operations)
997 :
998 : ulong lock_cnt = mymap_chain_cnt( join );
999 :
1000 : ulong * lock_seq = ... allocate lock_cnt ulong scratch ...
1001 :
1002 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ ) lock_seq[ lock_idx ] = lock_idx;
1003 :
1004 : mymap_iter_lock( join, lock_seq, lock_cnt, FD_MAP_FLAG_BLOCKING );
1005 :
1006 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ ) {
1007 : ulong chain_idx = lock_seq[ lock_idx ]; // process chains in the order they were locked
1008 :
1009 : for( mymap_iter_t iter = mymap_iter( join, chain_idx ); !mymap_iter_done( iter ); iter = mymap_iter_next( iter ) ) {
1010 : myele_t const * ele = mymap_iter_ele_const( iter );
1011 :
1012 : ... append ele to snapshot here (ele will be appended in
1013 : ... no particular order for this example). Note that, as
1014 : ... the caller has a lock on the chain that manages ele,
1015 : ... the caller is free to modify the fields of ele it
1016 : ... manages.
1017 :
1018 : }
1019 :
1020 : mymap_iter_unlock( lock_seq + lock_idx, 1UL ); // unlock incrementally
1021 : }
1022 :
1023 : ... free lock_seq here
1024 : */
1025 :
1026 : /* FIXME: consider adding a parallel verify that operates on a
1027 : locked/idle subset of the chains. */
1028 :
1029 : /* MAP_NAME gives the API prefix to use for map */
1030 :
1031 : #ifndef MAP_NAME
1032 : #error "Define MAP_NAME"
1033 : #endif
1034 :
1035 : /* MAP_ELE_T is the map element type */
1036 :
1037 : #ifndef MAP_ELE_T
1038 : #error "Define MAP_ELE_T"
1039 : #endif
1040 :
1041 : /* MAP_KEY_T is the map key type */
1042 :
1043 : #ifndef MAP_KEY_T
1044 : #define MAP_KEY_T ulong
1045 : #endif
1046 :
1047 : /* MAP_KEY is the MAP_ELE_T key field */
1048 :
1049 : #ifndef MAP_KEY
1050 : #define MAP_KEY key
1051 : #endif
1052 :
1053 : /* MAP_IDX_T is the map next index type. Should be a primitive unsigned
1054 : integer type large enough to represent the largest capacity element
1055 : store of interest. (E.g. if ushort, the maximum element store
1056 : capacity compatible with the map will be 65535 elements.) */
1057 :
1058 : #ifndef MAP_IDX_T
1059 : #define MAP_IDX_T ulong
1060 : #endif
1061 :
1062 : /* MAP_NEXT is the MAP_ELE_T next field */
1063 :
1064 : #ifndef MAP_NEXT
1065 : #define MAP_NEXT next
1066 : #endif
1067 :
1068 : /* MAP_KEY_EQ returns 0/1 if *k0 is the same/different as *k1 */
1069 :
1070 : #ifndef MAP_KEY_EQ
1071 22516869 : #define MAP_KEY_EQ(k0,k1) ((*(k0))==(*(k1)))
1072 : #endif
1073 :
1074 : /* MAP_KEY_HASH returns a random mapping of *key into ulong. The
1075 : mapping is parameterized by the 64-bit ulong seed. */
1076 :
1077 : #ifndef MAP_KEY_HASH
1078 : #define MAP_KEY_HASH(key,seed) fd_ulong_hash( (*(key)) ^ (seed) )
1079 : #endif
1080 :
1081 : /* If MAP_MEMOIZE is defined to non-zero, elements have a field that
1082 : can be used while in the map to hold the MAP_KEY_HASH for an
1083 : element's key. This is useful for accelerating user code that might
1084 : need a hash and various map operations. */
1085 :
1086 : #ifndef MAP_MEMOIZE
1087 : #define MAP_MEMOIZE 0
1088 : #endif
1089 :
1090 : /* If MAP_MEMOIZE is non-zero, MAP_MEMO is the memo element field.
1091 : Should be a ulong. Like MAP_KEY and MAP_NEXT, when an element is in
1092 : the map, this value is managed by the map and will contain the
1093 : MAP_KEY_HASH of the element's key and the map's seed. */
1094 :
1095 : #ifndef MAP_MEMO
1096 : #define MAP_MEMO memo
1097 : #endif
1098 :
1099 : /* If MAP_MEMOIZE is defined to non-zero, a non-zero MAP_KEY_EQ_IS_SLOW
1100 : indicates the MAP_MEMO field should be used to accelerate MAP_KEY_EQ
1101 : operations. This is useful when MAP_KEY_EQ is non-trivial (e.g.
1102 : variable length string compare, large buffer compares, etc). */
1103 :
1104 : #ifndef MAP_KEY_EQ_IS_SLOW
1105 : #define MAP_KEY_EQ_IS_SLOW 0
1106 : #endif
1107 :
1108 : /* MAP_CNT_WIDTH gives the number of bits in a ulong to reserve for
1109 : encoding the count in a versioned count. Element store capacity
1110 : should be representable in this width. Default is 43 bits (e.g.
1111 : enough to support a ~1 PiB element store of 128 byte elements). The
1112 : versioning width will be 64-MAP_CNT_WIDTH. Since the least
1113 : significant bit of the version is used to indicate locked, versioning
1114 : width should be at least 2 and ideally as large as possible. With
1115 : the 43 default, a chain's version number will not be reused until
1116 : 2^20 individual operations on a chain have been done. Version
1117 : numbers only impact speculative operations. If not using speculative
1118 : operations, version width can be reduced to the minimum. */
1119 :
1120 : #ifndef MAP_CNT_WIDTH
1121 91250517 : #define MAP_CNT_WIDTH (43)
1122 : #endif
1123 :
1124 : /* MAP_ALIGN gives the alignment required for the map shared memory.
1125 : Default is 128 for double cache line alignment. Should be at least
1126 : ulong alignment. */
1127 :
1128 : #ifndef MAP_ALIGN
1129 : #define MAP_ALIGN (128UL)
1130 : #endif
1131 :
1132 : /* MAP_MAGIC is the shared memory magic number to aid in persistent
1133 : and/or interprocess usage. */
1134 :
1135 : #ifndef MAP_MAGIC
1136 3 : #define MAP_MAGIC (0xf17eda2c37c3a900UL) /* firedancer cmap version 0 */
1137 : #endif
1138 :
1139 : /* MAP_IMPL_STYLE controls what to generate:
1140 : 0 - header only library
1141 : 1 - library header declaration
1142 : 2 - library implementation */
1143 :
1144 : #ifndef MAP_IMPL_STYLE
1145 : #define MAP_IMPL_STYLE 0
1146 : #endif
1147 :
1148 : /* Commom map error codes (FIXME: probably should get around to making
1149 : unified error codes, error strings and/or flags across util at least
1150 : so we don't have to do this in the generator itself) */
1151 :
1152 31813848 : #define FD_MAP_SUCCESS (0)
1153 12422859 : #define FD_MAP_ERR_INVAL (-1)
1154 7488210 : #define FD_MAP_ERR_AGAIN (-2)
1155 3 : #define FD_MAP_ERR_CORRUPT (-3)
1156 : //#define FD_MAP_ERR_EMPTY (-4)
1157 : //#define FD_MAP_ERR_FULL (-5)
1158 5635110 : #define FD_MAP_ERR_KEY (-6)
1159 :
1160 3749418 : #define FD_MAP_FLAG_BLOCKING (1)
1161 5136612 : #define FD_MAP_FLAG_ADAPTIVE (2)
1162 :
1163 : /* Implementation *****************************************************/
1164 :
1165 39328212 : #define MAP_VER_WIDTH (64-MAP_CNT_WIDTH)
1166 :
1167 : #if MAP_IMPL_STYLE==0 /* local use only */
1168 : #define MAP_STATIC FD_FN_UNUSED static
1169 : #else /* library header and/or implementation */
1170 : #define MAP_STATIC
1171 : #endif
1172 :
1173 296464467 : #define MAP_(n) FD_EXPAND_THEN_CONCAT3(MAP_NAME,_,n)
1174 :
1175 : #if MAP_IMPL_STYLE!=2 /* need header */
1176 :
1177 : #include "../bits/fd_bits.h"
1178 :
1179 : /* Note: we don't overalign chain metadata to reduce on map metadata
1180 : footprint requirements. Though this can cause cache false sharing
1181 : for concurrent operations on different keys that are managed
1182 : different chains that share a cache line, this risk can be controlled
1183 : by overprovisioning chain_cnt. That is, for a fixed map metadata
1184 : footprint, this false sharing seems preferable to using fewer chains
1185 : as that would lead to an equivalent increase in the amount of locking
1186 : necessary to avoid potential conflicts for keys managed by the same
1187 : chain (i.e. the former makes good use of the padding that would be
1188 : otherwise wasted if overaligning this). */
1189 :
1190 : struct MAP_(shmem_private_chain) {
1191 : ulong ver_cnt; /* versioned count, cnt is in [0,ele_max] in lsb, ver in msb, odd: chain locked, even: chain unlocked */
1192 : MAP_IDX_T head_cidx; /* compressed index of the first element on the chain */
1193 : };
1194 :
1195 : typedef struct MAP_(shmem_private_chain) MAP_(shmem_private_chain_t);
1196 :
1197 : struct __attribute__((aligned(MAP_ALIGN))) MAP_(shmem_private) {
1198 :
1199 : /* FIXME: consider having a memo of the chain in which an element is
1200 : stored and/or using doubly linked list chains (maybe with the xor
1201 : trick)? We could do faster variants of remove and maybe amortize
1202 : some hash calcs. */
1203 :
1204 : ulong magic; /* == MAP_MAGIC */
1205 : ulong seed; /* Hash seed, arbitrary */
1206 : ulong chain_cnt; /* Number of chains, positive integer power-of-two */
1207 :
1208 : /* Padding to MAP_ALIGN alignment here */
1209 :
1210 : /* MAP_(shmem_private_chain_t) chain[ chain_cnt ] here */
1211 : };
1212 :
1213 : typedef struct MAP_(shmem_private) MAP_(shmem_t);
1214 :
1215 : struct MAP_(private) {
1216 : MAP_(shmem_t) * map; /* Location of the map in the local address space */
1217 : MAP_ELE_T * ele; /* Location of the element store in the local address space */
1218 : ulong ele_max; /* Capacity of the element store, in [0,ele_max_max] */
1219 : };
1220 :
1221 : typedef struct MAP_(private) MAP_(t);
1222 :
1223 : struct MAP_(query_private) {
1224 : MAP_ELE_T * ele; /* Points to the operation element in the local address space (or a sentinel) */
1225 : MAP_(shmem_private_chain_t) * chain; /* Points to the chain that manages element in the local address space */
1226 : ulong ver_cnt; /* Versioned count of the chain at operation try */
1227 : };
1228 :
1229 : typedef struct MAP_(query_private) MAP_(query_t);
1230 :
1231 : struct MAP_(txn_private_info) {
1232 : MAP_(shmem_private_chain_t) * chain; /* Points to the chain that manages one or more txn keys (set by txn_add) */
1233 : ulong ver_cnt; /* Versioned count of the chain at the transaction start (set by txn_try) */
1234 : };
1235 :
1236 : typedef struct MAP_(txn_private_info) MAP_(txn_private_info_t);
1237 :
1238 : struct MAP_(txn_private) {
1239 : MAP_(shmem_t) * map; /* Map used by this transaction */
1240 : ulong info_max; /* Number of chains possible for this transaction */
1241 : ulong lock_cnt; /* Number of chains in the locked set, in [0,info_max] */
1242 : ulong spec_cnt; /* Number of chains in the speculative set, in [0,info_max], lock_cnt + spec_cnt <= info_max */
1243 :
1244 : /* MAP_(txn_private_info_t) info[ info_max ] here (obligatory sigh
1245 : about lagging C++ support for 0 sized structure array footers).
1246 :
1247 : The locked set is at indices [0,lock_cnt), lock_cnt infos.
1248 : The free set is at indices [lock_cnt,info_max-spec_cnt), free_cnt = info_max-spec_cnt-lock_cnt infos.
1249 : The speculative set is at indices [info_max-spec_cnt,info_max), spec_cnt infos.
1250 :
1251 : A chain will appear at most once in a set. A chain will not appear
1252 : in both sets.
1253 :
1254 : Note that it would be trivial to make this shared memory persistent
1255 : though not obvious if that would be useful. (A precomputed
1256 : template for a common transaction done by multiple threads is a
1257 : possibility but the versions would still need to be local.) */
1258 :
1259 : };
1260 :
1261 : typedef struct MAP_(txn_private) MAP_(txn_t);
1262 :
1263 : struct MAP_(iter_private) {
1264 : MAP_ELE_T const * ele; /* Pointer to the element store in the caller's address space */
1265 : ulong ele_idx; /* Current iteration element store index (or the null index) */
1266 : };
1267 :
1268 : typedef struct MAP_(iter_private) MAP_(iter_t);
1269 :
1270 : FD_PROTOTYPES_BEGIN
1271 :
1272 : /* map_private_vcnt pack ver and cnt into a versioned cnt. ver is
1273 : masked to fit into MAP_VER_WIDTH bits. cnt is assumed in
1274 : [0,ele_max_max].
1275 :
1276 : map_private_vcnt_{ver,cnt} extract the {version,index} from a
1277 : versioned index. Return will fit into {MAP_VER_WIDTH,MAP_CNT_WIDTH}
1278 : bits. */
1279 :
1280 7024260 : FD_FN_CONST static inline ulong MAP_(private_vcnt)( ulong ver, ulong cnt ) { return (ver<<MAP_CNT_WIDTH) | cnt; }
1281 :
1282 8901657 : FD_FN_CONST static inline ulong MAP_(private_vcnt_ver)( ulong ver_cnt ) { return ver_cnt >> MAP_CNT_WIDTH; }
1283 19664106 : FD_FN_CONST static inline ulong MAP_(private_vcnt_cnt)( ulong ver_cnt ) { return (ver_cnt << MAP_VER_WIDTH) >> MAP_VER_WIDTH; }
1284 :
1285 : /* map_shmem_private_chain returns the location in the caller's address
1286 : space of the map chain metadata associated with hash. The chain
1287 : associated with hash 0 is the first chain. Assumes map is valid.
1288 : map_shmem_private_chain_const is a const correct version. */
1289 :
1290 : FD_FN_PURE static inline MAP_(shmem_private_chain_t) *
1291 : MAP_(shmem_private_chain)( MAP_(shmem_t) * map,
1292 29974383 : ulong hash ) {
1293 29974383 : return (MAP_(shmem_private_chain_t) *)(map+1) + (hash & (map->chain_cnt-1UL));
1294 29974383 : }
1295 :
1296 : FD_FN_PURE static inline MAP_(shmem_private_chain_t) const *
1297 : MAP_(shmem_private_chain_const)( MAP_(shmem_t) const * map,
1298 7495170 : ulong hash ) {
1299 7495170 : return (MAP_(shmem_private_chain_t) const *)(map+1) + (hash & (map->chain_cnt-1UL));
1300 7495170 : }
1301 :
1302 : /* map_txn_private_info returns the location in the caller's address
1303 : space of the txn info. Assumes txn is valid. */
1304 :
1305 : FD_FN_CONST static inline MAP_(txn_private_info_t) *
1306 12166254 : MAP_(txn_private_info)( MAP_(txn_t) * txn ) {
1307 12166254 : return (MAP_(txn_private_info_t) *)(txn+1);
1308 12166254 : }
1309 :
1310 : /* map_private_{cidx,idx} compress / decompress 64-bit in-register
1311 : indices to/from their in-memory representations. */
1312 :
1313 5265582 : FD_FN_CONST static inline MAP_IDX_T MAP_(private_cidx)( ulong idx ) { return (MAP_IDX_T)idx; }
1314 35081052 : FD_FN_CONST static inline ulong MAP_(private_idx) ( MAP_IDX_T cidx ) { return (ulong) cidx; }
1315 :
1316 : /* map_private_idx_null returns the element storage index that
1317 : represents NULL. */
1318 :
1319 0 : FD_FN_CONST static inline ulong MAP_(private_idx_null)( void ) { return (ulong)(MAP_IDX_T)(~0UL); }
1320 :
1321 : /* map_private_idx_is_null returns 1 if idx is the NULL map index and 0
1322 : otherwise. */
1323 :
1324 13277472 : FD_FN_CONST static inline int MAP_(private_idx_is_null)( ulong idx ) { return idx==(ulong)(MAP_IDX_T)(~0UL); }
1325 :
1326 : /* map_private_fetch_and_or does a ulong FD_ATOMIC_FETCH_AND_OR when the
1327 : target has FD_HAS_ATOMIC and emulates it when not. When emulated,
1328 : the map will not be safe to use concurrently but will still work. */
1329 :
1330 : static inline ulong
1331 : MAP_(private_fetch_and_or)( ulong volatile * p,
1332 17998194 : ulong b ) {
1333 17998194 : ulong x;
1334 17998194 : FD_COMPILER_MFENCE();
1335 17998194 : # if FD_HAS_ATOMIC
1336 17998194 : x = FD_ATOMIC_FETCH_AND_OR( p, b );
1337 : # else
1338 : x = *p;
1339 : *p = x | b;
1340 : # endif
1341 17998194 : FD_COMPILER_MFENCE();
1342 17998194 : return x;
1343 17998194 : }
1344 :
1345 0 : FD_FN_CONST static inline ulong MAP_(ele_max_max)( void ) { return (ulong)(MAP_IDX_T)(ULONG_MAX >> MAP_VER_WIDTH); }
1346 :
1347 : FD_FN_CONST static inline ulong
1348 0 : MAP_(chain_max)( void ) {
1349 0 : return fd_ulong_pow2_dn( (ULONG_MAX - sizeof(MAP_(shmem_t)) - alignof(MAP_(shmem_t)) + 1UL) /
1350 0 : sizeof(MAP_(shmem_private_chain_t)) );
1351 0 : }
1352 :
1353 : FD_FN_CONST static inline ulong
1354 3000015 : MAP_(chain_cnt_est)( ulong ele_max_est ) {
1355 :
1356 : /* Clamp to be in [1,ele_max_max] (as ele_max_est 0 is degenerate and
1357 : as the map is guaranteed to hold at most ele_max_max keys). */
1358 :
1359 3000015 : ele_max_est = fd_ulong_min( fd_ulong_max( ele_max_est, 1UL ), MAP_(ele_max_max)() );
1360 :
1361 : /* Compute the number of chains as the power of 2 that makes the
1362 : average chain length between ~1 and ~2 when ele_max_est are stored
1363 : in the map and then clamp to the chain max. */
1364 :
1365 3000015 : ulong chain_min = (ele_max_est>>1) + (ele_max_est&1UL); /* chain_min = ceil(ele_max_est/2), in [1,2^63], computed w/o overflow */
1366 3000015 : ulong chain_cnt = fd_ulong_pow2_up( chain_min ); /* Power of 2 in [1,2^63] */
1367 :
1368 3000015 : return fd_ulong_min( chain_cnt, MAP_(chain_max)() );
1369 3000015 : }
1370 :
1371 0 : FD_FN_CONST static inline ulong MAP_(align)( void ) { return alignof(MAP_(shmem_t)); }
1372 :
1373 : FD_FN_CONST static inline ulong
1374 18 : MAP_(footprint)( ulong chain_cnt ) {
1375 18 : if( !(fd_ulong_is_pow2( chain_cnt ) & (chain_cnt<=MAP_(chain_max)())) ) return 0UL;
1376 : /* Note: assumes shmem_t and shmem_private_chain_t have compatible alignments */
1377 6 : return fd_ulong_align_up( sizeof(MAP_(shmem_t)) + chain_cnt*sizeof(MAP_(shmem_private_chain_t)),
1378 6 : alignof(MAP_(shmem_t)) ); /* no overflow */
1379 18 : }
1380 :
1381 3 : FD_FN_PURE static inline ulong MAP_(seed) ( MAP_(t) const * join ) { return join->map->seed; }
1382 6 : FD_FN_PURE static inline ulong MAP_(chain_cnt)( MAP_(t) const * join ) { return join->map->chain_cnt; }
1383 :
1384 3 : FD_FN_PURE static inline void const * MAP_(shmap_const)( MAP_(t) const * join ) { return join->map; }
1385 3 : FD_FN_PURE static inline void const * MAP_(shele_const)( MAP_(t) const * join ) { return join->ele; }
1386 3 : FD_FN_PURE static inline ulong MAP_(ele_max) ( MAP_(t) const * join ) { return join->ele_max; }
1387 :
1388 3 : FD_FN_PURE static inline void * MAP_(shmap)( MAP_(t) * join ) { return join->map; }
1389 3 : FD_FN_PURE static inline void * MAP_(shele)( MAP_(t) * join ) { return join->ele; }
1390 :
1391 : FD_FN_PURE static inline int
1392 : MAP_(key_eq)( MAP_KEY_T const * k0,
1393 22516869 : MAP_KEY_T const * k1 ) {
1394 22516869 : return !!(MAP_KEY_EQ( (k0), (k1) ));
1395 22516869 : }
1396 :
1397 : FD_FN_PURE static inline ulong
1398 : MAP_(key_hash)( MAP_KEY_T const * key,
1399 31086894 : ulong seed ) {
1400 31086894 : return (MAP_KEY_HASH( (key), (seed) ));
1401 31086894 : }
1402 :
1403 : static inline void
1404 : MAP_(backoff)( ulong scale,
1405 0 : ulong seed ) {
1406 0 : ulong r = (ulong)(uint)fd_ulong_hash( seed ^ (((ulong)fd_tickcount())<<32) );
1407 0 : for( ulong rem=(scale*r)>>48; rem; rem-- ) FD_SPIN_PAUSE();
1408 0 : }
1409 :
1410 5382963 : FD_FN_PURE static inline MAP_ELE_T const * MAP_(query_ele_const)( MAP_(query_t) const * query ) { return query->ele; }
1411 10769013 : FD_FN_PURE static inline MAP_ELE_T * MAP_(query_ele )( MAP_(query_t) * query ) { return query->ele; }
1412 :
1413 : static inline int
1414 1871076 : MAP_(modify_test)( MAP_(query_t) * query ) {
1415 1871076 : MAP_(shmem_private_chain_t) * chain = query->chain;
1416 1871076 : ulong ver_cnt = query->ver_cnt;
1417 1871076 : FD_COMPILER_MFENCE();
1418 1871076 : chain->ver_cnt = ver_cnt + (2UL<<MAP_CNT_WIDTH);
1419 1871076 : FD_COMPILER_MFENCE();
1420 1871076 : return FD_MAP_SUCCESS;
1421 1871076 : }
1422 :
1423 : static inline int
1424 1869663 : MAP_(query_test)( MAP_(query_t) const * query ) {
1425 1869663 : MAP_(shmem_private_chain_t) const * chain = query->chain;
1426 1869663 : ulong ver_cnt = query->ver_cnt;
1427 1869663 : FD_COMPILER_MFENCE();
1428 1869663 : ulong _ver_cnt = chain->ver_cnt;
1429 1869663 : FD_COMPILER_MFENCE();
1430 1869663 : return fd_int_if( ver_cnt==_ver_cnt, FD_MAP_SUCCESS, FD_MAP_ERR_AGAIN );
1431 1869663 : }
1432 :
1433 : FD_FN_CONST static inline ulong
1434 0 : MAP_(txn_key_max_max)( void ) {
1435 0 : return (ULONG_MAX - sizeof(MAP_(txn_t)) - alignof(MAP_(txn_t)) + 1UL) / sizeof( MAP_(txn_private_info_t) );
1436 0 : }
1437 :
1438 0 : FD_FN_CONST static inline ulong MAP_(txn_align)( void ) { return alignof(MAP_(txn_t)); }
1439 :
1440 : FD_FN_CONST static inline ulong
1441 6 : MAP_(txn_footprint)( ulong key_max ) {
1442 6 : if( key_max > MAP_(txn_key_max_max)() ) return 0UL;
1443 3 : return sizeof(MAP_(txn_t)) + key_max*sizeof(MAP_(txn_private_info_t)); /* no overflow */
1444 6 : }
1445 :
1446 : static inline MAP_(txn_t) *
1447 : MAP_(txn_init)( void * mem,
1448 : MAP_(t) * join,
1449 3743700 : ulong key_max ) {
1450 3743700 : MAP_(txn_t) * txn = (MAP_(txn_t) *)mem;
1451 3743700 : if( FD_UNLIKELY( (!mem ) |
1452 3743700 : (!fd_ulong_is_aligned( (ulong)mem, MAP_(txn_align)() )) |
1453 3743700 : (!join ) |
1454 3743700 : (key_max > MAP_(txn_key_max_max)() ) ) ) return NULL;
1455 3743688 : txn->map = join->map;
1456 3743688 : txn->info_max = key_max; /* Worst case number of chains impacted by this transaction */
1457 3743688 : txn->lock_cnt = 0UL;
1458 3743688 : txn->spec_cnt = 0UL;
1459 3743688 : return txn;
1460 3743700 : }
1461 :
1462 3743691 : FD_FN_CONST static inline void * MAP_(txn_fini)( MAP_(txn_t) * txn ) { return (void *)txn; }
1463 :
1464 : FD_FN_PURE static inline ulong
1465 : MAP_(iter_chain_idx)( MAP_(t) const * join,
1466 0 : MAP_KEY_T const * key ) {
1467 0 : MAP_(shmem_t) const * map = join->map;
1468 0 : return MAP_(key_hash)( key, map->seed ) & (map->chain_cnt-1UL);
1469 0 : }
1470 :
1471 : FD_FN_PURE static inline MAP_(iter_t)
1472 : MAP_(iter)( MAP_(t) const * join,
1473 3752100 : ulong chain_idx ) {
1474 : /* FIXME: consider iter = {NULL,NULL} if chain_idx >= join->map->chain_cnt? */
1475 3752100 : MAP_(shmem_private_chain_t) const * chain = MAP_(shmem_private_chain_const)( join->map, 0UL ) + chain_idx;
1476 3752100 : MAP_(iter_t) iter;
1477 3752100 : iter.ele = join->ele;
1478 3752100 : iter.ele_idx = MAP_(private_idx)( chain->head_cidx );
1479 3752100 : return iter;
1480 3752100 : }
1481 :
1482 7640829 : FD_FN_CONST static inline int MAP_(iter_done)( MAP_(iter_t) iter ) { return MAP_(private_idx_is_null)( iter.ele_idx ); }
1483 :
1484 : FD_FN_PURE static inline MAP_(iter_t)
1485 3888729 : MAP_(iter_next)( MAP_(iter_t) iter ) {
1486 3888729 : MAP_ELE_T const * ele = iter.ele + iter.ele_idx;
1487 3888729 : iter.ele_idx = MAP_(private_idx)( ele->MAP_NEXT );
1488 3888729 : return iter;
1489 3888729 : }
1490 :
1491 : FD_FN_CONST static inline MAP_ELE_T *
1492 0 : MAP_(iter_ele)( MAP_(iter_t) iter ) {
1493 0 : return (MAP_ELE_T *)(iter.ele + iter.ele_idx);
1494 0 : }
1495 :
1496 : FD_FN_CONST static inline MAP_ELE_T const *
1497 3888729 : MAP_(iter_ele_const)( MAP_(iter_t) iter ) {
1498 3888729 : return iter.ele + iter.ele_idx;
1499 3888729 : }
1500 :
1501 : MAP_STATIC void * MAP_(new) ( void * shmem, ulong chain_cnt, ulong seed );
1502 : MAP_STATIC MAP_(t) * MAP_(join) ( void * ljoin, void * shmap, void * shele, ulong ele_max );
1503 : MAP_STATIC void * MAP_(leave) ( MAP_(t) * join );
1504 : MAP_STATIC void * MAP_(delete)( void * shmap );
1505 :
1506 : MAP_STATIC int MAP_(insert)( MAP_(t) * join, MAP_ELE_T * ele, int flags );
1507 :
1508 : MAP_STATIC int
1509 : MAP_(remove)( MAP_(t) * join,
1510 : MAP_KEY_T const * key,
1511 : MAP_ELE_T const * sentinel,
1512 : MAP_(query_t) * query,
1513 : int flags );
1514 :
1515 : MAP_STATIC int
1516 : MAP_(modify_try)( MAP_(t) * join,
1517 : MAP_KEY_T const * key,
1518 : MAP_ELE_T * sentinel,
1519 : MAP_(query_t) * query,
1520 : int flags );
1521 :
1522 : MAP_STATIC int
1523 : MAP_(query_try)( MAP_(t) const * join,
1524 : MAP_KEY_T const * key,
1525 : MAP_ELE_T const * sentinel,
1526 : MAP_(query_t) * query );
1527 :
1528 : MAP_STATIC int MAP_(txn_add)( MAP_(txn_t) * txn, MAP_KEY_T const * key, int lock );
1529 :
1530 : MAP_STATIC int MAP_(txn_try)( MAP_(txn_t) * txn, int flags );
1531 :
1532 : MAP_STATIC int
1533 : MAP_(txn_modify)( MAP_(t) * join,
1534 : MAP_KEY_T const * key,
1535 : MAP_ELE_T * sentinel,
1536 : MAP_(query_t) * query,
1537 : int flags );
1538 :
1539 : static inline int
1540 : MAP_(txn_query)( MAP_(t) const * join,
1541 : MAP_KEY_T const * key,
1542 : MAP_ELE_T const * sentinel,
1543 1639896 : MAP_(query_t) * query ) {
1544 1639896 : return MAP_(txn_modify)( (MAP_(t) *)join, key, (MAP_ELE_T *)sentinel, query, 0 );
1545 1639896 : }
1546 :
1547 : MAP_STATIC int MAP_(txn_test)( MAP_(txn_t) * txn );
1548 :
1549 : MAP_STATIC int
1550 : MAP_(iter_lock)( MAP_(t) * join,
1551 : ulong * lock_seq,
1552 : ulong lock_cnt,
1553 : int flags );
1554 :
1555 : MAP_STATIC void
1556 : MAP_(iter_unlock)( MAP_(t) * join,
1557 : ulong const * lock_seq,
1558 : ulong lock_cnt );
1559 :
1560 : MAP_STATIC void MAP_(reset)( MAP_(t) * join );
1561 :
1562 : MAP_STATIC FD_FN_PURE int MAP_(verify)( MAP_(t) const * join );
1563 :
1564 : MAP_STATIC FD_FN_CONST char const * MAP_(strerror)( int err );
1565 :
1566 : FD_PROTOTYPES_END
1567 :
1568 : #endif
1569 :
1570 : #if MAP_IMPL_STYLE!=1 /* need implementations (assumes header already included) */
1571 :
1572 : #include "../log/fd_log.h" /* Used by constructors and verify (FIXME: Consider making a compile time option) */
1573 :
1574 : /* MAP_CRIT_{BEGIN,BLOCKED,END} handle virtually all atomic boilerplate
1575 : for operations that require modifying a map chain's structure or
1576 : elements managed by that chain. Usage:
1577 :
1578 : MAP_CRIT( chain, blocking ) {
1579 :
1580 : ... At this point, we have a lock on the chain and the "ulong"
1581 : ... ver_cnt contains the chain's versioned count just before we
1582 : ... took the lock. The "int" retain_lock is zero.
1583 : ...
1584 : ... Do locked operations on the map chain here
1585 : ...
1586 : ... On exiting this block, if retain_lock is non-zero, we resume
1587 : ... execution immediately after MAP_CRIT_END. This is used for
1588 : ... "try" style operations where a "test" operation is done to
1589 : ... unlock the chain after the caller does their try/test work.
1590 : ... Otherwise, we will update the version number, unlock the
1591 : ... chain and then resume execution after MAP_CRIT_END.
1592 : ...
1593 : ... Because compiler memory fences are done just before entering
1594 : ... and after exiting this block, there is typically no need to
1595 : ... use any atomics / volatile / fencing here. That is, we can
1596 : ... just write "normal" code on platforms where writes to memory
1597 : ... become visible to other threads in the order in which they
1598 : ... were issued in the machine code (e.g. x86) as the version
1599 : ... update and unlock writes are after the changes done here
1600 : ... and others will not proceed until they see the new version
1601 : ... and unlock. YMMV for non-x86 platforms (probably need
1602 : ... additional hardware store fences in these macros).
1603 : ...
1604 : ... It is safe to use "break" and/or "continue" within this
1605 : ... block. The overall MAP_CRIT will exit with the appropriate
1606 : ... compiler fencing, version update and unlocking and then
1607 : ... execution will resume immediately after MAP_CRIT_END.
1608 : ...
1609 : ... IMPORTANT SAFETY TIP! DO NOT RETURN FROM THIS BLOCK.
1610 : ...
1611 : ... IMPORTANT SAFETY TIP! OPERATIONS THAT CHANGE THE CHAIN
1612 : ... ELEMENT COUNT SHOULD UPDATE VER_CNT's COUNT WHILE HOLDING
1613 : ... THE VERSION CONSTANT.
1614 :
1615 : } MAP_CRIT_BLOCKED {
1616 :
1617 : ... At this point, somebody else had a lock on the chain when we
1618 : ... tried to take the lock.
1619 : ...
1620 : ... Handle blocked here.
1621 : ...
1622 : ... On exiting this block, if blocking was zero in MAP_CRIT, we
1623 : ... will resume execution immediately after MAP_CRIT_END. If
1624 : ... blocking was non-zero, we will resume execution immediately
1625 : ... before MAP_CRIT (e.g. we will retry again after a short spin
1626 : ... pause). Similar considerations to the above for compiler
1627 : ... memory fences, "break" and "continue". As we do not have the
1628 : ... lock here, retain_lock is neither relevant nor available.
1629 : ...
1630 : ... IMPORTANT SAFETY TIP! DO NOT RETURN FROM THIS BLOCK.
1631 :
1632 : } MAP_CRIT_END; */
1633 :
1634 17998194 : #define MAP_CRIT(c,b) do { \
1635 17998194 : ulong volatile * _vc = (ulong volatile *)&(c)->ver_cnt; \
1636 17998194 : int _b = (b); \
1637 17998194 : int retain_lock = 0; \
1638 17998194 : for(;;) { \
1639 17998194 : ulong ver_cnt = *_vc; \
1640 17998194 : /* use a test-and-test-and-set style to reduce atomic contention */ \
1641 17998194 : if( FD_LIKELY( !(ver_cnt & (1UL<<MAP_CNT_WIDTH)) ) ) { /* opt for low contention */ \
1642 17998194 : ver_cnt = MAP_(private_fetch_and_or)( _vc, 1UL<<MAP_CNT_WIDTH ); \
1643 17998194 : if( FD_LIKELY( !(ver_cnt & (1UL<<MAP_CNT_WIDTH)) ) ) { /* opt for low contention */ \
1644 17998194 : FD_COMPILER_MFENCE(); \
1645 17998194 : do
1646 :
1647 : #define MAP_CRIT_BLOCKED \
1648 17998194 : while(0); \
1649 17998194 : FD_COMPILER_MFENCE(); \
1650 17998194 : if( !retain_lock ) *_vc = ver_cnt+(2UL<<MAP_CNT_WIDTH); /* likely compile time */ \
1651 17998194 : FD_COMPILER_MFENCE(); \
1652 7495800 : break; \
1653 17998194 : } \
1654 17998194 : } \
1655 17998194 : FD_COMPILER_MFENCE(); \
1656 0 : do
1657 :
1658 : #define MAP_CRIT_END \
1659 0 : while(0); \
1660 0 : FD_COMPILER_MFENCE(); \
1661 0 : if( !_b ) break; /* likely compile time */ \
1662 0 : FD_SPIN_PAUSE(); \
1663 0 : } \
1664 17998194 : } while(0)
1665 :
1666 : MAP_STATIC void *
1667 : MAP_(new)( void * shmem,
1668 : ulong chain_cnt,
1669 15 : ulong seed ) {
1670 :
1671 15 : if( FD_UNLIKELY( !shmem ) ) {
1672 3 : FD_LOG_WARNING(( "NULL shmem" ));
1673 3 : return NULL;
1674 3 : }
1675 :
1676 12 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, MAP_(align)() ) ) ) {
1677 3 : FD_LOG_WARNING(( "misaligned shmem" ));
1678 3 : return NULL;
1679 3 : }
1680 :
1681 9 : ulong footprint = MAP_(footprint)( chain_cnt );
1682 9 : if( FD_UNLIKELY( !footprint ) ) {
1683 6 : FD_LOG_WARNING(( "bad footprint" ));
1684 6 : return NULL;
1685 6 : }
1686 :
1687 : /* seed is arbitrary */
1688 :
1689 : /* Init the metadata */
1690 :
1691 3 : MAP_(shmem_t) * map = (MAP_(shmem_t) *)shmem;
1692 :
1693 3 : map->seed = seed;
1694 3 : map->chain_cnt = chain_cnt;
1695 :
1696 : /* Set all the chains to version 0 and empty */
1697 :
1698 3 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, 0UL );
1699 1539 : for( ulong chain_idx=0UL; chain_idx<chain_cnt; chain_idx++ ) {
1700 1536 : chain[ chain_idx ].ver_cnt = MAP_(private_vcnt)( 0UL, 0UL );
1701 1536 : chain[ chain_idx ].head_cidx = MAP_(private_cidx)( MAP_(private_idx_null)() );
1702 1536 : }
1703 :
1704 3 : FD_COMPILER_MFENCE();
1705 3 : map->magic = MAP_MAGIC;
1706 3 : FD_COMPILER_MFENCE();
1707 :
1708 3 : return shmem;
1709 9 : }
1710 :
1711 : MAP_STATIC MAP_(t) *
1712 : MAP_(join)( void * ljoin,
1713 : void * shmap,
1714 : void * shele,
1715 24 : ulong ele_max ) {
1716 24 : MAP_(t) * join = (MAP_(t) *)ljoin;
1717 24 : MAP_(shmem_t) * map = (MAP_(shmem_t) *)shmap;
1718 24 : MAP_ELE_T * ele = (MAP_ELE_T *)shele;
1719 :
1720 24 : if( FD_UNLIKELY( !join ) ) {
1721 3 : FD_LOG_WARNING(( "NULL ljoin" ));
1722 3 : return NULL;
1723 3 : }
1724 :
1725 21 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)join, alignof(MAP_(t)) ) ) ) {
1726 3 : FD_LOG_WARNING(( "misaligned ljoin" ));
1727 3 : return NULL;
1728 3 : }
1729 :
1730 18 : if( FD_UNLIKELY( !map ) ) {
1731 3 : FD_LOG_WARNING(( "NULL shmap" ));
1732 3 : return NULL;
1733 3 : }
1734 :
1735 15 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)map, MAP_(align)() ) ) ) {
1736 3 : FD_LOG_WARNING(( "misaligned shmap" ));
1737 3 : return NULL;
1738 3 : }
1739 :
1740 12 : if( FD_UNLIKELY( map->magic!=MAP_MAGIC ) ) {
1741 3 : FD_LOG_WARNING(( "bad magic" ));
1742 3 : return NULL;
1743 3 : }
1744 :
1745 9 : if( FD_UNLIKELY( (!ele) & (!!ele_max) ) ) {
1746 3 : FD_LOG_WARNING(( "NULL shele" ));
1747 3 : return NULL;
1748 3 : }
1749 :
1750 6 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)ele, alignof(MAP_ELE_T) ) ) ) {
1751 3 : FD_LOG_WARNING(( "misaligned shele" ));
1752 3 : return NULL;
1753 3 : }
1754 :
1755 3 : join->map = map;
1756 3 : join->ele = ele;
1757 3 : join->ele_max = ele_max;
1758 :
1759 3 : return join;
1760 6 : }
1761 :
1762 : MAP_STATIC void *
1763 6 : MAP_(leave)( MAP_(t) * join ) {
1764 :
1765 6 : if( FD_UNLIKELY( !join ) ) {
1766 3 : FD_LOG_WARNING(( "NULL join" ));
1767 3 : return NULL;
1768 3 : }
1769 :
1770 3 : return (void *)join;
1771 6 : }
1772 :
1773 : MAP_STATIC void *
1774 12 : MAP_(delete)( void * shmap ) {
1775 12 : MAP_(shmem_t) * map = (MAP_(shmem_t) *)shmap;
1776 :
1777 12 : if( FD_UNLIKELY( !map ) ) {
1778 3 : FD_LOG_WARNING(( "NULL shmap" ));
1779 3 : return NULL;
1780 3 : }
1781 :
1782 9 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)map, MAP_(align)() ) ) ) {
1783 3 : FD_LOG_WARNING(( "misaligned shmap" ));
1784 3 : return NULL;
1785 3 : }
1786 :
1787 6 : if( FD_UNLIKELY( map->magic!=MAP_MAGIC ) ) {
1788 3 : FD_LOG_WARNING(( "bad magic" ));
1789 3 : return NULL;
1790 3 : }
1791 :
1792 3 : FD_COMPILER_MFENCE();
1793 3 : map->magic = 0UL;
1794 3 : FD_COMPILER_MFENCE();
1795 :
1796 3 : return (void *)map;
1797 6 : }
1798 :
1799 : MAP_STATIC int
1800 : MAP_(insert)( MAP_(t) * join,
1801 : MAP_ELE_T * ele,
1802 7508535 : int flags ) {
1803 :
1804 : /* Determine the element index (fastest if ele are power-of-two) and
1805 : the chain that should hold ele */
1806 :
1807 7508535 : ulong ele_idx = (ulong)(ele - join->ele);
1808 7508535 : if( FD_UNLIKELY( ele_idx>=join->ele_max ) ) return FD_MAP_ERR_INVAL;
1809 :
1810 1873887 : MAP_(shmem_t) * map = join->map;
1811 :
1812 1873887 : ulong hash = MAP_(key_hash)( &ele->MAP_KEY, map->seed );
1813 1873887 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, hash );
1814 :
1815 : /* Insert element at the head of chain. If chain is already locked,
1816 : signal to try again later. */
1817 :
1818 1873887 : int err;
1819 :
1820 3747774 : MAP_CRIT( chain, flags & FD_MAP_FLAG_BLOCKING ) {
1821 1873887 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
1822 1873887 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
1823 :
1824 1873887 : ele->MAP_NEXT = chain->head_cidx;
1825 1873887 : # if MAP_MEMOIZE
1826 1873887 : ele->MAP_MEMO = hash;
1827 1873887 : # endif
1828 1873887 : chain->head_cidx = MAP_(private_cidx)( ele_idx );
1829 1873887 : ver_cnt = MAP_(private_vcnt)( version, ele_cnt+1UL ); /* version updated on exit */
1830 1873887 : err = FD_MAP_SUCCESS;
1831 :
1832 1873887 : } MAP_CRIT_BLOCKED {
1833 :
1834 0 : err = FD_MAP_ERR_AGAIN;
1835 :
1836 0 : } MAP_CRIT_END;
1837 :
1838 1873887 : return err;
1839 7508535 : }
1840 :
1841 : MAP_STATIC int
1842 : MAP_(remove)( MAP_(t) * join,
1843 : MAP_KEY_T const * key,
1844 : MAP_ELE_T const * sentinel,
1845 : MAP_(query_t) * query,
1846 3752916 : int flags ) {
1847 :
1848 : /* Determine the chain that should hold key */
1849 :
1850 3752916 : MAP_(shmem_t) * map = join->map;
1851 3752916 : MAP_ELE_T * ele = join->ele;
1852 3752916 : ulong ele_max = join->ele_max;
1853 :
1854 3752916 : ulong hash = MAP_(key_hash)( key, map->seed );
1855 3752916 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, hash );
1856 :
1857 : /* Find the key on the chain. If found, remove it. If not found,
1858 : corrupt or blocked, fail the operation. */
1859 :
1860 3752916 : query->ele = (MAP_ELE_T *)sentinel;
1861 3752916 : query->chain = chain;
1862 :
1863 3752916 : int err;
1864 :
1865 7505832 : MAP_CRIT( chain, flags & FD_MAP_FLAG_BLOCKING ) {
1866 3752916 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
1867 3752916 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
1868 :
1869 3752916 : query->ver_cnt = ver_cnt;
1870 :
1871 3752916 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) { /* optimize for not corrupt */
1872 0 : err = FD_MAP_ERR_CORRUPT;
1873 0 : goto done;
1874 0 : }
1875 :
1876 3752916 : MAP_IDX_T * cur = &chain->head_cidx;
1877 6673335 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) { /* guarantee bounded exec under corruption */
1878 4800003 : ulong ele_idx = MAP_(private_idx)( *cur );
1879 4800003 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) { /* optimize for not corrupt */
1880 0 : err = FD_MAP_ERR_CORRUPT;
1881 0 : goto done;
1882 0 : }
1883 :
1884 4800003 : if(
1885 4800003 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
1886 4800003 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==hash ) &&
1887 4800003 : # endif
1888 4800003 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) { /* optimize for found */
1889 :
1890 1879584 : *cur = ele[ ele_idx ].MAP_NEXT;
1891 1879584 : ver_cnt = MAP_(private_vcnt)( version, ele_cnt-1UL ); /* version updated on exit */
1892 1879584 : query->ele = &ele[ ele_idx ];
1893 1879584 : err = FD_MAP_SUCCESS;
1894 1879584 : goto done;
1895 1879584 : }
1896 :
1897 2920419 : cur = &ele[ ele_idx ].MAP_NEXT; /* Retain the pointer to next so we can rewrite it on found */
1898 2920419 : }
1899 :
1900 : /* Key was not found */
1901 :
1902 1873332 : ulong ele_idx = MAP_(private_idx)( *cur );
1903 1873332 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) { /* optimize for not corrupt */
1904 0 : err = FD_MAP_ERR_CORRUPT;
1905 0 : goto done;
1906 0 : }
1907 :
1908 1873332 : err = FD_MAP_ERR_KEY;
1909 :
1910 3752916 : done: /* silly language restriction */;
1911 :
1912 3752916 : } MAP_CRIT_BLOCKED {
1913 :
1914 0 : query->ver_cnt = ver_cnt;
1915 0 : err = FD_MAP_ERR_AGAIN;
1916 :
1917 0 : } MAP_CRIT_END;
1918 :
1919 3752916 : return err;
1920 3752916 : }
1921 :
1922 : MAP_STATIC int
1923 : MAP_(modify_try)( MAP_(t) * join,
1924 : MAP_KEY_T const * key,
1925 : MAP_ELE_T * sentinel,
1926 : MAP_(query_t) * query,
1927 3742884 : int flags ) {
1928 :
1929 : /* Determine which chain might hold key */
1930 :
1931 3742884 : MAP_(shmem_t) * map = join->map;
1932 3742884 : MAP_ELE_T * ele = join->ele;
1933 3742884 : ulong ele_max = join->ele_max;
1934 :
1935 3742884 : ulong hash = MAP_(key_hash)( key, map->seed );
1936 3742884 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, hash );
1937 :
1938 : /* Search for the key on chain. If found, retain the chain lock
1939 : and return the found element. If not found, corrupt or blocked,
1940 : fail. */
1941 :
1942 3742884 : query->ele = (MAP_ELE_T *)sentinel;
1943 3742884 : query->chain = chain;
1944 :
1945 3742884 : int err;
1946 :
1947 7485768 : MAP_CRIT( chain, flags & FD_MAP_FLAG_BLOCKING ) {
1948 :
1949 3742884 : query->ver_cnt = ver_cnt;
1950 :
1951 3742884 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
1952 3742884 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) { /* optimize for not corrupt */
1953 0 : err = FD_MAP_ERR_CORRUPT;
1954 0 : goto done;
1955 0 : }
1956 :
1957 3742884 : MAP_IDX_T * cur = &chain->head_cidx;
1958 6657618 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) { /* guarantee bounded exec under corruption */
1959 4785810 : ulong ele_idx = MAP_(private_idx)( *cur );
1960 4785810 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) { /* optimize for not corrupt */
1961 0 : err = FD_MAP_ERR_CORRUPT;
1962 0 : goto done;
1963 0 : }
1964 :
1965 4785810 : if(
1966 4785810 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
1967 4785810 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==hash ) &&
1968 4785810 : # endif
1969 4785810 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) { /* optimize for found */
1970 1871076 : if( flags & FD_MAP_FLAG_ADAPTIVE ) {
1971 935613 : *cur = ele[ ele_idx ].MAP_NEXT;
1972 935613 : ele[ ele_idx ].MAP_NEXT = chain->head_cidx;
1973 935613 : chain->head_cidx = MAP_(private_cidx)( ele_idx );
1974 935613 : }
1975 1871076 : query->ele = &ele[ ele_idx ];
1976 1871076 : err = FD_MAP_SUCCESS;
1977 1871076 : retain_lock = 1;
1978 1871076 : goto done;
1979 1871076 : }
1980 :
1981 2914734 : cur = &ele[ ele_idx ].MAP_NEXT; /* Retain the pointer to next so we can rewrite it on found */
1982 2914734 : }
1983 :
1984 1871808 : ulong ele_idx = MAP_(private_idx)( *cur );
1985 1871808 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) { /* optimize for not corrupt */
1986 0 : err = FD_MAP_ERR_CORRUPT;
1987 0 : goto done;
1988 0 : }
1989 :
1990 1871808 : err = FD_MAP_ERR_KEY;
1991 :
1992 3742884 : done: /* silly language restriction */;
1993 :
1994 3742884 : } MAP_CRIT_BLOCKED {
1995 :
1996 0 : query->ver_cnt = ver_cnt;
1997 0 : err = FD_MAP_ERR_AGAIN;
1998 :
1999 0 : } MAP_CRIT_END;
2000 :
2001 3742884 : return err;
2002 3742884 : }
2003 :
2004 : MAP_STATIC int
2005 : MAP_(query_try)( MAP_(t) const * join,
2006 : MAP_KEY_T const * key,
2007 : MAP_ELE_T const * sentinel,
2008 3743067 : MAP_(query_t) * query ) {
2009 :
2010 : /* Determine which chain might hold key */
2011 :
2012 3743067 : MAP_(shmem_t) const * map = join->map;
2013 3743067 : MAP_ELE_T const * ele = join->ele;
2014 3743067 : ulong ele_max = join->ele_max;
2015 :
2016 3743067 : ulong hash = MAP_(key_hash)( key, map->seed );
2017 3743067 : MAP_(shmem_private_chain_t) const * chain = MAP_(shmem_private_chain_const)( map, hash );
2018 :
2019 : /* Determine the version of the chain we are querying. Then
2020 : speculatively read and validate the number of elements on the chain
2021 : at that version. If the chain is locked, tell the user to try
2022 : again later. If the number of elements in the chain is invalid,
2023 : tell user the map is corrupt. */
2024 :
2025 3743067 : ulong volatile const * _vc = &chain->ver_cnt;
2026 :
2027 3743067 : FD_COMPILER_MFENCE();
2028 3743067 : ulong then = *_vc;
2029 3743067 : FD_COMPILER_MFENCE();
2030 :
2031 3743067 : ulong ele_cnt = MAP_(private_vcnt_cnt)( then );
2032 :
2033 3743067 : FD_COMPILER_MFENCE();
2034 3743067 : ulong now = *_vc;
2035 3743067 : FD_COMPILER_MFENCE();
2036 :
2037 3743067 : query->ele = (MAP_ELE_T *) sentinel;
2038 3743067 : query->chain = (MAP_(shmem_private_chain_t) *)chain;
2039 3743067 : query->ver_cnt = then;
2040 :
2041 3743067 : if( FD_UNLIKELY( (now!=then) | (!!(then & (1UL<<MAP_CNT_WIDTH))) ) ) return FD_MAP_ERR_AGAIN;
2042 3743067 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) return FD_MAP_ERR_CORRUPT;
2043 :
2044 : /* Search the chain for key. Since we know the numer of elements on
2045 : the chain, we can bound this search to avoid corruption causing out
2046 : of bound reads, infinite loops and such. */
2047 :
2048 3743067 : MAP_IDX_T const * cur = &chain->head_cidx;
2049 6651426 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) {
2050 :
2051 : /* Speculatively read the index of the chain, speculate if a valid
2052 : index and, if so, speculate if the chain element matches the
2053 : query. Note that this assumes element keys have a lifetime of at
2054 : least that of the element. A sufficient (but not a necessary,
2055 : see rant) condition for this is that key is a plain-old-data
2056 : fields in the element. */
2057 :
2058 4778022 : FD_COMPILER_MFENCE();
2059 4778022 : ulong ele_idx = MAP_(private_idx)( *cur );
2060 4778022 : FD_COMPILER_MFENCE();
2061 :
2062 4778022 : int corrupt = (ele_idx>=ele_max);
2063 4778022 : int found = ( FD_LIKELY( !corrupt ) &&
2064 4778022 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
2065 4778022 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==hash ) &&
2066 4778022 : # endif
2067 4778022 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) ? 1 : 0;
2068 :
2069 : /* Validate the speculation. If validation fails (e.g. the chain
2070 : was modified behind our back), tell the user to try again later.
2071 : If the element index was not valid, tell the user the map has
2072 : been corrupted. If key was found at element, tell the user they
2073 : can speculate element ele_idx contains key. */
2074 :
2075 4778022 : FD_COMPILER_MFENCE();
2076 4778022 : now = *_vc;
2077 4778022 : FD_COMPILER_MFENCE();
2078 :
2079 4778022 : if( FD_UNLIKELY( now!=then ) ) return FD_MAP_ERR_AGAIN;
2080 4778022 : if( FD_UNLIKELY( corrupt ) ) return FD_MAP_ERR_CORRUPT;
2081 :
2082 4778022 : if( FD_LIKELY( found ) ) { /* Optimize for found */
2083 1869663 : query->ele = (MAP_ELE_T *)&ele[ ele_idx ];
2084 1869663 : return FD_MAP_SUCCESS;
2085 1869663 : }
2086 :
2087 : /* The chain element didn't hold the key ... move to next element */
2088 :
2089 2908359 : cur = &ele[ ele_idx ].MAP_NEXT;
2090 2908359 : }
2091 :
2092 : /* At this point, the chain didn't hold the key. We could probably
2093 : return immediately but we speculative read the tail pointer,
2094 : validate it as an additional integrity check. If these checks
2095 : pass, we are confident the whole chain looked valid and did not
2096 : hold key between now and then. */
2097 :
2098 1873404 : ulong ele_idx = MAP_(private_idx)( *cur );
2099 :
2100 1873404 : FD_COMPILER_MFENCE();
2101 1873404 : now = *_vc;
2102 1873404 : FD_COMPILER_MFENCE();
2103 :
2104 1873404 : if( FD_UNLIKELY( now!=then ) ) return FD_MAP_ERR_AGAIN;
2105 1873404 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) return FD_MAP_ERR_CORRUPT;
2106 :
2107 1873404 : return FD_MAP_ERR_KEY;
2108 1873404 : }
2109 :
2110 : /* Note: txn_add is currently optimized for reasonably small number
2111 : of keys per transaction. For a huge number of transaction keys (e.g.
2112 : an iterator over all keys for all keys), probably should use the
2113 : iterator API. For a moderate number of transaction keys, probably
2114 : should consider data structures where set insert/remove/test are
2115 : sublinear time. Similarly, if MAP_KEY_HASH is costly, might be
2116 : useful to stash the key hashes in the transaction, memoize it in the
2117 : elements, etc. */
2118 :
2119 : MAP_STATIC int
2120 : MAP_(txn_add)( MAP_(txn_t) * txn,
2121 : MAP_KEY_T const * key,
2122 8424324 : int lock ) {
2123 :
2124 : /* Unpack txn fields */
2125 :
2126 8424324 : MAP_(shmem_t) * map = txn->map;
2127 8424324 : ulong info_max = txn->info_max;
2128 8424324 : ulong lock_cnt = txn->lock_cnt;
2129 8424324 : ulong spec_cnt = txn->spec_cnt;
2130 :
2131 8424324 : MAP_(txn_private_info_t) * lock_info = MAP_(txn_private_info)( txn );
2132 8424324 : MAP_(txn_private_info_t) * spec_info = lock_info + (info_max - spec_cnt);
2133 :
2134 : /* Determine which chain manages this key */
2135 :
2136 8424324 : ulong hash = MAP_(key_hash)( key, map->seed );
2137 8424324 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, hash );
2138 :
2139 : /* If this chain already needs to be locked for this transaction,
2140 : nothing to do. */
2141 :
2142 18112536 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ )
2143 9739155 : if( FD_UNLIKELY( chain==lock_info[ lock_idx ].chain ) ) return FD_MAP_SUCCESS;
2144 :
2145 8373381 : if( FD_UNLIKELY( !lock ) ) { /* optimize for locked key, possible compile time */
2146 :
2147 : /* At this point, key is used speculatively by the transaction and
2148 : its managing chain isn't in the locked set. If this chain is
2149 : already in the speculative set, nothing to do. */
2150 :
2151 3355446 : for( ulong spec_idx=0UL; spec_idx<spec_cnt; spec_idx++ )
2152 800454 : if( FD_UNLIKELY( chain==spec_info[ spec_idx ].chain ) ) return FD_MAP_SUCCESS;
2153 :
2154 : /* Add the chain to the speculative set. If we don't have any room,
2155 : fail. */
2156 :
2157 2554992 : ulong free_cnt = info_max - lock_cnt - spec_cnt;
2158 2554992 : if( FD_UNLIKELY( !free_cnt ) ) return FD_MAP_ERR_INVAL; /* Impossible if less than key_max keys added */
2159 1617765 : spec_info[-1].chain = chain;
2160 1617765 : txn->spec_cnt = spec_cnt + 1UL;
2161 :
2162 5811903 : } else {
2163 :
2164 : /* At this point, key is used locked by the transaction and its
2165 : managing chain isn't in the locked set. If this chain is
2166 : currently in the speculative set, move it to the locked
2167 : set. */
2168 :
2169 8196489 : for( ulong spec_idx=0UL; spec_idx<spec_cnt; spec_idx++ )
2170 2398788 : if( FD_UNLIKELY( chain==spec_info[ spec_idx ].chain ) ) {
2171 14202 : spec_info[ spec_idx ].chain = spec_info[ 0 ].chain; /* Fill the hole at spec_idx, making a hole at 0 */
2172 14202 : lock_info[ lock_cnt ].chain = chain; /* Either uses unused entry or fills hole at 0 */
2173 14202 : txn->spec_cnt = spec_cnt - 1UL;
2174 14202 : txn->lock_cnt = lock_cnt + 1UL;
2175 14202 : return FD_MAP_SUCCESS;
2176 14202 : }
2177 :
2178 : /* Add the chain to the locked set. If we don't have any room,
2179 : fail. */
2180 :
2181 5797701 : ulong free_cnt = info_max - lock_cnt - spec_cnt;
2182 5797701 : if( FD_UNLIKELY( !free_cnt ) ) return FD_MAP_ERR_INVAL; /* Impossible if less than key_max keys added */
2183 4862205 : lock_info[lock_cnt].chain = chain;
2184 4862205 : txn->lock_cnt = lock_cnt + 1UL;
2185 :
2186 4862205 : }
2187 :
2188 6479970 : return FD_MAP_SUCCESS;
2189 8373381 : }
2190 :
2191 : MAP_STATIC int
2192 : MAP_(txn_try)( MAP_(txn_t) * txn,
2193 1870965 : int flags ) {
2194 1870965 : int non_blocking = !(flags & FD_MAP_FLAG_BLOCKING);
2195 :
2196 : /* Unpack txn fields */
2197 :
2198 1870965 : ulong info_max = txn->info_max;
2199 1870965 : ulong lock_cnt = txn->lock_cnt;
2200 1870965 : ulong spec_cnt = txn->spec_cnt;
2201 :
2202 1870965 : MAP_(txn_private_info_t) * lock_info = MAP_(txn_private_info)( txn );
2203 1870965 : MAP_(txn_private_info_t) * spec_info = lock_info + info_max - spec_cnt;
2204 :
2205 1870965 : ulong backoff_exp = (1UL<<32); /* See iter_lock for details */
2206 1870965 : ulong backoff_seed = ((ulong)(uint)flags)>>2;
2207 :
2208 1870965 : int err;
2209 :
2210 1870965 : for(;; ) {
2211 :
2212 1870965 : err = FD_MAP_SUCCESS;
2213 :
2214 1870965 : FD_COMPILER_MFENCE();
2215 :
2216 : /* Get the chain versions for all keys in the speculative set.
2217 : If any are locked, set AGAIN if any are locked. */
2218 :
2219 3474528 : for( ulong spec_idx=0UL; spec_idx<spec_cnt; spec_idx++ ) {
2220 1603563 : ulong ver_cnt = spec_info[ spec_idx ].chain->ver_cnt;
2221 1603563 : if( FD_UNLIKELY( ver_cnt & (1UL<<MAP_CNT_WIDTH) ) ) { /* Already locked */
2222 0 : err = FD_MAP_ERR_AGAIN;
2223 0 : break;
2224 0 : }
2225 1603563 : spec_info[ spec_idx ].ver_cnt = ver_cnt;
2226 1603563 : }
2227 :
2228 1870965 : if( FD_LIKELY( !err ) ) {
2229 :
2230 : /* At this point, all the chains we are speculating on were
2231 : unlocked and we have have recorded their versions. Try to lock
2232 : all the chains for the locked key. */
2233 : /* FIXME: consider reordering like iter_lock? */
2234 :
2235 6747372 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ ) {
2236 :
2237 9752814 : MAP_CRIT( lock_info[ lock_idx ].chain, 0 ) { /* non-blocking */
2238 :
2239 : /* Got the lock ... save the version and retain the lock for
2240 : test. */
2241 :
2242 4876407 : lock_info[ lock_idx ].ver_cnt = ver_cnt;
2243 4876407 : retain_lock = 1;
2244 :
2245 4876407 : } MAP_CRIT_BLOCKED {
2246 :
2247 : /* We hit contention for this lock. Unlock the any chains
2248 : we already locked to prevent possible deadlock (see
2249 : iter_lock) */
2250 :
2251 0 : for( ulong unlock_idx=0UL; unlock_idx<lock_idx; unlock_idx++ )
2252 0 : lock_info[ unlock_idx ].chain->ver_cnt = lock_info[ unlock_idx ].ver_cnt + (2UL<<MAP_CNT_WIDTH);
2253 :
2254 0 : err = FD_MAP_ERR_AGAIN;
2255 :
2256 0 : } MAP_CRIT_END;
2257 :
2258 4876407 : if( FD_UNLIKELY( err ) ) break;
2259 :
2260 4876407 : }
2261 :
2262 1870965 : }
2263 :
2264 1870965 : FD_COMPILER_MFENCE();
2265 :
2266 1870965 : if( FD_LIKELY( (!err) | non_blocking ) ) break;
2267 :
2268 : /* At this point, we hit contention and are blocking (need to try
2269 : again). Do a random backoff (see iter_lock for details). */
2270 :
2271 0 : ulong scale = fd_ulong_min( (fd_ulong_min( lock_cnt+spec_cnt, (1UL<<16)-1UL )*backoff_exp) >> 16, (1UL<<32)-1UL );
2272 0 : backoff_exp = fd_ulong_min( backoff_exp + (backoff_exp>>2) + (backoff_exp>>4), (1UL<<48)-1UL );
2273 0 : mymap_backoff( scale, backoff_seed );
2274 0 : }
2275 :
2276 : /* At this point, if we don't have an error, we have the chain
2277 : versions for txn keys used speculatively and they were unlocked and
2278 : we have locks on the chains for txn keys used locked. Otherwise,
2279 : this is a non-blocking call and we return AGAIN. */
2280 :
2281 1870965 : return err;
2282 1870965 : }
2283 :
2284 : MAP_STATIC int
2285 1870965 : MAP_(txn_test)( MAP_(txn_t) * txn ) {
2286 :
2287 : /* Unpack txn fields */
2288 :
2289 1870965 : ulong info_max = txn->info_max;
2290 1870965 : ulong lock_cnt = txn->lock_cnt;
2291 1870965 : ulong spec_cnt = txn->spec_cnt;
2292 :
2293 1870965 : MAP_(txn_private_info_t) * lock_info = MAP_(txn_private_info)( txn );
2294 1870965 : MAP_(txn_private_info_t) * spec_info = lock_info + info_max - spec_cnt;
2295 :
2296 : /* Unlock all chains locked for this transaction. Then test if any
2297 : keys used speculatively could have changed in locking / trying /
2298 : unlocking. If so, tell user to retry later. */
2299 :
2300 1870965 : int err = FD_MAP_SUCCESS;
2301 :
2302 1870965 : FD_COMPILER_MFENCE();
2303 :
2304 6747372 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ ) lock_info[ lock_idx ].chain->ver_cnt += (1UL<<MAP_CNT_WIDTH);
2305 :
2306 3474528 : for( ulong spec_idx=0UL; spec_idx<spec_cnt; spec_idx++ ) {
2307 1603563 : MAP_(shmem_private_chain_t) const * chain = spec_info[ spec_idx ].chain;
2308 1603563 : ulong ver_cnt = spec_info[ spec_idx ].ver_cnt;
2309 1603563 : if( FD_UNLIKELY( chain->ver_cnt!=ver_cnt ) ) {
2310 0 : err = FD_MAP_ERR_AGAIN;
2311 0 : break;
2312 0 : }
2313 1603563 : }
2314 :
2315 1870965 : FD_COMPILER_MFENCE();
2316 :
2317 1870965 : return err;
2318 1870965 : }
2319 :
2320 : MAP_STATIC int
2321 : MAP_(txn_insert)( MAP_(t) * join,
2322 6552183 : MAP_ELE_T * ele ) {
2323 :
2324 : /* Determine the element index (fastest if ele are power-of-two) and
2325 : the chain that should hold ele */
2326 :
2327 6552183 : MAP_(shmem_t) * map = join->map;
2328 6552183 : ulong ele_max = join->ele_max;
2329 :
2330 6552183 : ulong ele_idx = (ulong)(ele - join->ele);
2331 6552183 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) return FD_MAP_ERR_INVAL;
2332 :
2333 1636707 : ulong hash = MAP_(key_hash)( &ele->MAP_KEY, map->seed );
2334 1636707 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, hash );
2335 :
2336 : /* Insert ele_idx at head of chain. */
2337 :
2338 1636707 : ulong ver_cnt = chain->ver_cnt;
2339 1636707 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
2340 1636707 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2341 :
2342 1636707 : ele->MAP_NEXT = chain->head_cidx;
2343 1636707 : # if MAP_MEMOIZE
2344 1636707 : ele->MAP_MEMO = hash;
2345 1636707 : # endif
2346 1636707 : chain->head_cidx = MAP_(private_cidx)( ele_idx );
2347 1636707 : chain->ver_cnt = MAP_(private_vcnt)( version, ele_cnt+1UL );
2348 :
2349 1636707 : return FD_MAP_SUCCESS;
2350 6552183 : }
2351 :
2352 : MAP_STATIC int
2353 : MAP_(txn_remove)( MAP_(t) * join,
2354 : MAP_KEY_T const * key,
2355 : MAP_ELE_T const * sentinel,
2356 1636611 : MAP_(query_t) * query ) {
2357 :
2358 : /* Determine the chain that should hold key */
2359 :
2360 1636611 : MAP_(shmem_t) * map = join->map;
2361 1636611 : MAP_ELE_T * ele = join->ele;
2362 1636611 : ulong ele_max = join->ele_max;
2363 :
2364 1636611 : ulong hash = MAP_(key_hash)( key, map->seed );
2365 1636611 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, hash );
2366 :
2367 : /* Find the key on the chain and remove it */
2368 :
2369 1636611 : ulong ver_cnt = chain->ver_cnt;
2370 1636611 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
2371 1636611 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2372 :
2373 1636611 : query->ele = (MAP_ELE_T *)sentinel;
2374 1636611 : query->chain = chain;
2375 1636611 : query->ver_cnt = ver_cnt;
2376 :
2377 1636611 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not corrupt */
2378 :
2379 1636611 : MAP_IDX_T * cur = &chain->head_cidx;
2380 2483469 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) { /* guarantee bounded exec under corruption */
2381 2477868 : ulong ele_idx = MAP_(private_idx)( *cur );
2382 2477868 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not corrupt */
2383 :
2384 2477868 : if(
2385 2477868 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
2386 2477868 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==hash ) &&
2387 2477868 : # endif
2388 2477868 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) { /* optimize for found */
2389 1631010 : *cur = ele[ ele_idx ].MAP_NEXT;
2390 1631010 : chain->ver_cnt = MAP_(private_vcnt)( version, ele_cnt-1UL );
2391 1631010 : query->ele = &ele[ ele_idx ];
2392 1631010 : return FD_MAP_SUCCESS;
2393 1631010 : }
2394 :
2395 846858 : cur = &ele[ ele_idx ].MAP_NEXT; /* Retain the pointer to next so we can rewrite it on found */
2396 846858 : }
2397 :
2398 5601 : ulong ele_idx = MAP_(private_idx)( *cur );
2399 5601 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not found */
2400 5601 : return FD_MAP_ERR_KEY;
2401 5601 : }
2402 :
2403 : MAP_STATIC int
2404 : MAP_(txn_modify)( MAP_(t) * join,
2405 : MAP_KEY_T const * key,
2406 : MAP_ELE_T * sentinel,
2407 : MAP_(query_t) * query,
2408 3276498 : int flags ) {
2409 :
2410 : /* Determine which chain might hold key */
2411 :
2412 3276498 : MAP_(shmem_t) * map = join->map;
2413 3276498 : MAP_ELE_T * ele = join->ele;
2414 3276498 : ulong ele_max = join->ele_max;
2415 :
2416 3276498 : ulong hash = MAP_(key_hash)( key, map->seed );
2417 3276498 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, hash );
2418 :
2419 : /* Search the chain for key */
2420 :
2421 3276498 : ulong ver_cnt = chain->ver_cnt;
2422 3276498 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2423 :
2424 3276498 : query->ele = sentinel;
2425 3276498 : query->chain = chain;
2426 3276498 : query->ver_cnt = ver_cnt;
2427 :
2428 3276498 : if( FD_UNLIKELY( ele_cnt>ele_max ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not corrupt */
2429 :
2430 3276498 : MAP_IDX_T * cur = &chain->head_cidx;
2431 4972839 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) {
2432 4961877 : ulong ele_idx = MAP_(private_idx)( *cur );
2433 :
2434 4961877 : if( FD_UNLIKELY( ele_idx>=ele_max ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not corrupt */
2435 :
2436 4961877 : if(
2437 4961877 : # if MAP_MEMOIZE && MAP_KEY_EQ_IS_SLOW
2438 4961877 : FD_LIKELY( ele[ ele_idx ].MAP_MEMO==hash ) &&
2439 4961877 : # endif
2440 4961877 : FD_LIKELY( MAP_(key_eq)( &ele[ ele_idx ].MAP_KEY, key ) ) ) { /* optimize for found */
2441 3265536 : if( flags & FD_MAP_FLAG_ADAPTIVE ) {
2442 816303 : *cur = ele[ ele_idx ].MAP_NEXT;
2443 816303 : ele[ ele_idx ].MAP_NEXT = chain->head_cidx;
2444 816303 : chain->head_cidx = MAP_(private_cidx)( ele_idx );
2445 816303 : }
2446 3265536 : query->ele = &ele[ ele_idx ];
2447 3265536 : return FD_MAP_SUCCESS;
2448 3265536 : }
2449 :
2450 1696341 : cur = &ele[ ele_idx ].MAP_NEXT;
2451 1696341 : }
2452 :
2453 10962 : ulong ele_idx = MAP_(private_idx)( *cur );
2454 10962 : if( FD_UNLIKELY( !MAP_(private_idx_is_null( ele_idx ) ) ) ) return FD_MAP_ERR_CORRUPT; /* optimize for not corrupt */
2455 :
2456 10962 : return FD_MAP_ERR_KEY;
2457 10962 : }
2458 :
2459 : MAP_STATIC int
2460 : MAP_(iter_lock)( MAP_(t) * join,
2461 : ulong * lock_seq,
2462 : ulong lock_cnt,
2463 1878468 : int flags ) {
2464 1878468 : if( FD_UNLIKELY( !lock_cnt ) ) return FD_MAP_SUCCESS; /* nothing to do */
2465 1878459 : if( FD_UNLIKELY( (!join) | (!lock_seq) ) ) return FD_MAP_ERR_INVAL;
2466 :
2467 1878453 : int non_blocking = !(flags & FD_MAP_FLAG_BLOCKING);
2468 :
2469 1878453 : MAP_(shmem_t) * map = join->map;
2470 :
2471 1878453 : ulong chain_cnt = map->chain_cnt;
2472 1878453 : if( FD_UNLIKELY( lock_cnt>chain_cnt ) ) return FD_MAP_ERR_INVAL;
2473 :
2474 1878450 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( join->map, 0UL );
2475 :
2476 1878450 : int err;
2477 :
2478 1878450 : ulong backoff = 1UL<<32; /* in [1,2^16)*2^32 */
2479 1878450 : ulong backoff_seed = ((ulong)(uint)flags)>>2; /* 0 usually fine */
2480 1878450 : ulong lock_idx = 0UL;
2481 1878450 : ulong locked_cnt = 0UL;
2482 3752100 : for(;;) {
2483 :
2484 3752100 : err = FD_MAP_SUCCESS;
2485 :
2486 : /* At this point, we've acquired locks [0,locked_cnt), we need to
2487 : acquire locks [locked_cnt,lock_cnt), [locked_cnt,lock_cnt) is non
2488 : empty and i is in [locked_cnt,lock_cnt). Try to acquire lock
2489 : lock_idx this iteration. */
2490 :
2491 3752100 : ulong chain_idx = lock_seq[ lock_idx ];
2492 :
2493 3752100 : if( FD_UNLIKELY( chain_idx>=chain_cnt ) ) { /* optimize for valid lock_seq */
2494 0 : for( ulong unlock_idx=0UL; unlock_idx<locked_cnt; unlock_idx++ )
2495 0 : chain[ lock_seq[ unlock_idx ] ].ver_cnt += (1UL<<MAP_CNT_WIDTH);
2496 0 : locked_cnt = 0UL;
2497 0 : err = FD_MAP_ERR_AGAIN;
2498 0 : break;
2499 0 : }
2500 :
2501 7504200 : MAP_CRIT( chain + chain_idx, 0 ) {
2502 :
2503 : /* At this point, we got the lock. Swap lock at locked_cnt and
2504 : lock_idx and increment locked_cnt to move lock_idx to the
2505 : locked set as the most recently acquired lock. Since we
2506 : increment lock_idx below, when locked_cnt<lock_idx (i.e. we had
2507 : contention for lock locked_cnt recently), this will move the
2508 : next attempt to lock locked_cnt as far as possible from now of
2509 : the remaining locks to acquire. When locked_cnt==lock_idx,
2510 : this is a branchless no-op (and the increment of lock_idx below
2511 : will guarantee lock_idx will be at least locked_cnt next
2512 : iteration, preserving the invariant that lock_idx is in
2513 : [locked_cnt,lock_cnt) on the next iteration if there is one. */
2514 :
2515 3752100 : ulong chain_idx_tmp = lock_seq[ locked_cnt ];
2516 3752100 : lock_seq[ lock_idx ] = chain_idx_tmp;
2517 3752100 : lock_seq[ locked_cnt ] = chain_idx;
2518 3752100 : locked_cnt++;
2519 :
2520 3752100 : retain_lock = 1;
2521 :
2522 3752100 : } MAP_CRIT_BLOCKED {
2523 :
2524 : /* We failed to get lock lock_idx. To avoid deadlock with the
2525 : thread that has this lock and is trying get a lock we already
2526 : have, we unlock the chains we've already locked (note that we
2527 : need to unlock here in non-blocking operation too). Quick
2528 : experiments in extreme contention scenarios found more
2529 : incremental approaches in blocking operation could take an
2530 : excessively long time to resolve so we bulk unlock. */
2531 :
2532 0 : for( ulong unlock_idx=0UL; unlock_idx<locked_cnt; unlock_idx++ )
2533 0 : chain[ lock_seq[ unlock_idx ] ].ver_cnt += (1UL<<MAP_CNT_WIDTH);
2534 0 : locked_cnt = 0UL;
2535 :
2536 0 : err = FD_MAP_ERR_AGAIN;
2537 :
2538 0 : } MAP_CRIT_END;
2539 :
2540 3752100 : if( FD_UNLIKELY( (locked_cnt==lock_cnt ) | /* all locks acquired */
2541 3752100 : ((!!err) & non_blocking) ) ) break; /* or hit contention and are non-blocking */
2542 :
2543 : /* Move to the next lock. Everytime we wrap around, we hit
2544 : contention since the last wrap / iter start. We do a random
2545 : exponential backoff with saturation on wrapping to minimize
2546 : contention with other threads hitting these locks. Normalizing
2547 : out fixed point scalings baked into the below, we spin pause a
2548 : uniform IID random number of times in [0,unlocked_cnt*backoff]
2549 : where backoff is 1 on the first wrap and increases by ~30% each
2550 : time to a maximum of 2^16 (i.e. hundreds microseconds per
2551 : remaining lock for typical CPU speeds and spin pause delays at
2552 : maximum backoff). */
2553 :
2554 1873650 : lock_idx++;
2555 1873650 : if( FD_UNLIKELY( lock_idx==lock_cnt ) ) { /* optimize for lots of locks */
2556 0 : lock_idx = locked_cnt;
2557 0 : ulong scale = fd_ulong_min( (fd_ulong_min( lock_cnt-locked_cnt, (1UL<<16)-1UL )*backoff) >> 16, (1UL<<32)-1UL );
2558 0 : backoff = fd_ulong_min( backoff + (backoff>>2) + (backoff>>4), (1UL<<48)-1UL );
2559 0 : mymap_backoff( scale, backoff_seed );
2560 0 : }
2561 1873650 : }
2562 :
2563 1878450 : return err;
2564 1878453 : }
2565 :
2566 : MAP_STATIC void
2567 : MAP_(iter_unlock)( MAP_(t) * join,
2568 : ulong const * lock_seq,
2569 3752100 : ulong lock_cnt ) {
2570 3752100 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( join->map, 0UL );
2571 :
2572 3752100 : FD_COMPILER_MFENCE();
2573 7504200 : for( ulong lock_idx=0UL; lock_idx<lock_cnt; lock_idx++ )
2574 3752100 : chain[ lock_seq[ lock_idx ] ].ver_cnt += (1UL<<MAP_CNT_WIDTH);
2575 3752100 : FD_COMPILER_MFENCE();
2576 3752100 : }
2577 :
2578 : MAP_STATIC void
2579 3 : MAP_(reset)( MAP_(t) * join ) {
2580 3 : MAP_(shmem_t) * map = join->map;
2581 :
2582 3 : ulong chain_cnt = map->chain_cnt;
2583 3 : MAP_(shmem_private_chain_t) * chain = MAP_(shmem_private_chain)( map, 0UL );
2584 :
2585 1539 : for( ulong chain_idx=0UL; chain_idx<chain_cnt; chain_idx++ ) {
2586 1536 : ulong ver_cnt = chain[ chain_idx ].ver_cnt;
2587 1536 : ulong version = MAP_(private_vcnt_ver)( ver_cnt );
2588 1536 : chain[ chain_idx ].ver_cnt = MAP_(private_vcnt)( version+2UL, 0UL );
2589 1536 : chain[ chain_idx ].head_cidx = MAP_(private_cidx)( MAP_(private_idx_null)() );
2590 1536 : }
2591 3 : }
2592 :
2593 : MAP_STATIC int
2594 3 : MAP_(verify)( MAP_(t) const * join ) {
2595 :
2596 3102 : # define MAP_TEST(c) do { \
2597 3102 : if( FD_UNLIKELY( !(c) ) ) { FD_LOG_WARNING(( "FAIL: %s", #c )); return FD_MAP_ERR_INVAL; } \
2598 3102 : } while(0)
2599 :
2600 : /* Validate join */
2601 :
2602 3 : MAP_TEST( join );
2603 3 : MAP_TEST( fd_ulong_is_aligned( (ulong)join, alignof(MAP_(t)) ) );
2604 :
2605 3 : MAP_(shmem_t) const * map = join->map;
2606 3 : MAP_ELE_T const * ele = join->ele;
2607 3 : ulong ele_max = join->ele_max;
2608 :
2609 3 : MAP_TEST( map );
2610 3 : MAP_TEST( fd_ulong_is_aligned( (ulong)map, MAP_(align)() ) );
2611 :
2612 3 : MAP_TEST( (!!ele) | (!ele_max) );
2613 3 : MAP_TEST( fd_ulong_is_aligned( (ulong)ele, alignof(MAP_ELE_T) ) );
2614 :
2615 3 : MAP_TEST( ele_max<=MAP_(ele_max_max)() );
2616 :
2617 : /* Validate map metadata */
2618 :
2619 3 : ulong magic = map->magic;
2620 3 : ulong seed = map->seed;
2621 3 : ulong chain_cnt = map->chain_cnt;
2622 :
2623 3 : MAP_TEST( magic==MAP_MAGIC );
2624 : /* seed is arbitrary */
2625 3 : MAP_TEST( fd_ulong_is_pow2( chain_cnt ) );
2626 3 : MAP_TEST( chain_cnt<=MAP_(chain_max)() );
2627 :
2628 3 : MAP_(shmem_private_chain_t) const * chain = MAP_(shmem_private_chain_const)( map, 0UL );
2629 :
2630 : /* Validate the map chains */
2631 :
2632 3 : ulong unmapped_ele_cnt = ele_max;
2633 1539 : for( ulong chain_idx=0UL; chain_idx<chain_cnt; chain_idx++ ) {
2634 :
2635 : /* Validate the chain length */
2636 :
2637 1536 : ulong ver_cnt = chain[ chain_idx ].ver_cnt;
2638 :
2639 1536 : ulong ele_cnt = MAP_(private_vcnt_cnt)( ver_cnt );
2640 1536 : MAP_TEST( ele_cnt<=unmapped_ele_cnt );
2641 1536 : unmapped_ele_cnt -= ele_cnt;
2642 :
2643 : /* Validate chain linkage, element membership and element uniqueness */
2644 :
2645 1536 : ulong head_idx = MAP_(private_idx)( chain[ chain_idx ].head_cidx );
2646 1536 : ulong cur_idx = head_idx;
2647 1536 : for( ulong ele_rem=ele_cnt; ele_rem; ele_rem-- ) {
2648 0 : MAP_TEST( cur_idx<ele_max ); /* In element store */
2649 :
2650 0 : MAP_KEY_T const * key = &ele[ cur_idx ].MAP_KEY;
2651 :
2652 0 : ulong hash = MAP_(key_hash)( key, seed );
2653 0 : ulong ele_chain_idx = hash & (chain_cnt-1UL);
2654 0 : MAP_TEST( ele_chain_idx==chain_idx ); /* On correct chain */
2655 0 : # if MAP_MEMOIZE
2656 0 : MAP_TEST( ele[ cur_idx ].MAP_MEMO==hash );
2657 0 : # endif
2658 :
2659 : /* Note that we've already validated linkage from head_idx to
2660 : cur_idx so pointer chasing here is safe. */
2661 :
2662 0 : ulong prv_idx = head_idx;
2663 0 : while( prv_idx!=cur_idx ) {
2664 0 : MAP_TEST( !MAP_(key_eq)( &ele[ prv_idx ].MAP_KEY, key ) ); /* Unique */
2665 0 : prv_idx = MAP_(private_idx)( ele[ prv_idx ].MAP_NEXT );
2666 0 : }
2667 :
2668 0 : cur_idx = MAP_(private_idx)( ele[ cur_idx ].MAP_NEXT );
2669 0 : }
2670 :
2671 1536 : MAP_TEST( MAP_(private_idx_is_null)( cur_idx ) );
2672 1536 : }
2673 :
2674 : /* At this point, we know the sum of the chain lengths do not exceed
2675 : the size of the element store, each chain is of their stated
2676 : length, each chain element is in element store, and that every
2677 : element on a chain belongs on that chain (which precludes the
2678 : possibility of two chains merging into one) and that every element
2679 : on a chain is unique (which implies unique among all chains since
2680 : elements with each key maps to a single chain).
2681 :
2682 : That is, join is a current local join to a valid shared mapping of
2683 : unique keys to unique elements in the element store.
2684 :
2685 : We don't know anything about unmapped elements in the element store
2686 : and cannot do any verification of them (here be dragons). But
2687 : that's kinda the point ... what's in the unmapped elements depends
2688 : on how the application is managing those. */
2689 :
2690 3 : # undef MAP_TEST
2691 :
2692 3 : return FD_MAP_SUCCESS;
2693 3 : }
2694 :
2695 : MAP_STATIC char const *
2696 18 : MAP_(strerror)( int err ) {
2697 18 : switch( err ) {
2698 3 : case FD_MAP_SUCCESS: return "success";
2699 3 : case FD_MAP_ERR_INVAL: return "bad input";
2700 3 : case FD_MAP_ERR_AGAIN: return "try again";
2701 3 : case FD_MAP_ERR_CORRUPT: return "corruption detected";
2702 3 : case FD_MAP_ERR_KEY: return "key not found";
2703 3 : default: break;
2704 18 : }
2705 3 : return "unknown";
2706 18 : }
2707 :
2708 : #undef MAP_CRIT_END
2709 : #undef MAP_CRIT_BLOCKED
2710 : #undef MAP_CRIT
2711 :
2712 : #endif
2713 :
2714 : #undef MAP_
2715 : #undef MAP_STATIC
2716 : #undef MAP_VER_WIDTH
2717 :
2718 : #undef MAP_IMPL_STYLE
2719 : #undef MAP_MAGIC
2720 : #undef MAP_ALIGN
2721 : #undef MAP_CNT_WIDTH
2722 : #undef MAP_KEY_EQ_IS_SLOW
2723 : #undef MAP_MEMO
2724 : #undef MAP_MEMOIZE
2725 : #undef MAP_KEY_HASH
2726 : #undef MAP_KEY_EQ
2727 : #undef MAP_NEXT
2728 : #undef MAP_IDX_T
2729 : #undef MAP_KEY
2730 : #undef MAP_KEY_T
2731 : #undef MAP_ELE_T
2732 : #undef MAP_NAME
|