Line data Source code
1 : /* Declare ultra high performance dynamic key-val maps of bounded
2 : compile time size. Typical usage:
3 :
4 : struct mymap {
5 : ulong key; // Technically "MAP_KEY_T MAP_KEY;" (default is ulong key)
6 : uint hash; // Technically "MAP_HASH_T MAP_HASH;" (default is uint hash), ==mymap_hash(key)
7 : ... key and hash can be located arbitrarily in struct
8 : ... hash is not required if MAP_MEMOIZE is zero
9 : ... the rest of the struct is POD state/values associated with key
10 : ... the mapping of a key to a map slot is arbitrary and might
11 : ... change over the lifetime of the key
12 : };
13 :
14 : typedef struct mymap mymap_t;
15 :
16 : #define MAP_NAME mymap
17 : #define MAP_T mymap_t
18 : #define MAP_LG_SLOT_CNT 12
19 : #include "util/tmpl/fd_map.c"
20 :
21 : will declare the following static inline APIs as a header only style
22 : library in the compilation unit:
23 :
24 : // align/footprint - Return the alignment/footprint required for a
25 : // memory region to be used as mymap.
26 : //
27 : // new - Format a memory region pointed to by shmem into a mymap.
28 : // Assumes shmem points to a region with the required alignment and
29 : // footprint not in use by anything else. Caller is not joined on
30 : // return. Returns shmem.
31 : //
32 : // join - Join a mymap. Assumes shmap points at a region formatted
33 : // as a mymap. Returns a handle of the callers join.
34 : //
35 : // leave - Leave a mymap. Assumes mymap points to a current join.
36 : // Returns a pointer to the shared memory region the join.
37 : //
38 : // delete - Unformat a memory region used as a mymap. Assumes
39 : // shmymap points to a formatted region with no current joins.
40 : // Returns a pointer to the unformatted memory region.
41 :
42 : ulong mymap_align ( void );
43 : ulong mymap_footprint( void );
44 : void * mymap_new ( void * shmem );
45 : mymap_t * mymap_join ( void * shmymap );
46 : void * mymap_leave ( mymap_t * mymap );
47 : void * mymap_delete ( void * shmymap );
48 :
49 : // Return the maximum number of keys that can be inserted into a
50 : // mymap. The mymap will become increasingly inefficient the more
51 : // keys there are. Recommend not using more than around half this
52 : // value.
53 :
54 : ulong mymap_key_max( void ); // == 2^MAP_LG_SLOT_CNT - 1
55 :
56 : // Return the number of slots in a mymap. This is to facilitate
57 : // iterating / listing all contents of a mymap (this process is not
58 : // algorithmically ideal for a sparse mymap). E.g.
59 : // mymap[slot_idx].key for slot_idx in [0,mymap_slot_cnt()] when
60 : // key!=0 is the set of all current key-vals in the mymap.
61 :
62 : ulong mymap_slot_cnt( void ); // == 2^MAP_LG_SLOT_CNT
63 :
64 : // Returns the index of the slot (allows communicating locations of
65 : // map entries between users of mymap in different address spaces).
66 : // Might imply a division (probably via magic multiply) if
67 : // sizeof(mymap_t) is not a power of two (as such, power-of-2
68 : // sizeof(mymap_t) have good Feng Shui). Assumes that mymap is a
69 : // current join and slot points to a slot in that join.
70 :
71 : ulong mymap_slot_idx( mymap_t const * mymap, mymap_t const * slot );
72 :
73 : // Returns the "null" key, which is the canonical key that will
74 : // never be inserted (typically zero).
75 :
76 : ulong mymap_key_null( void ); // == MAP_KEY_NULL
77 :
78 : // Return the 1/0 if key is a key that will never/might be inserted.
79 :
80 : int mymap_key_inval( ulong key );
81 :
82 : // Return the 1/0 if k0 and k1 are keys that are the same
83 :
84 : int mymap_key_equal( ulong k0, ulong k1 );
85 :
86 : // Return the hash of key used by the map. Should ideally be a
87 : // random mapping from MAP_KEY_T -> MAP_HASH_T.
88 :
89 : uint mymap_key_hash( ulong key );
90 :
91 : // Insert key into the map, fast O(1). Returns a pointer to the the
92 : // map entry with key on success and NULL on failure (i.e. key is
93 : // already in the map). The returned pointer lifetime is until
94 : // _any_ map remove or map leave. The caller should not change the
95 : // values in key or hash but is free to modify other fields in the
96 : // entry on return. Assumes map is a current join, there are less
97 : // than mymap_key_max() keys in the map and key is not a value that
98 : // will never be inserted.
99 :
100 : mymap_t * mymap_insert( mymap_t * map, ulong key );
101 :
102 : // Remove entry from map, fast O(1). Assumes map is a current join
103 : // and that entry points to a full entry currently in the map. When
104 : // the function returns, the entry pointed to by entry may be
105 : // clobbered.
106 : // Removal performance very slightly more optimal if sizeof(mymap_t)
107 : // is a power of two.
108 :
109 : void mymap_remove( mymap_t * map, mymap_t * entry );
110 :
111 : // Remove all entries from the map. O(map size).
112 :
113 : void mymap_clear( mymap_t * map );
114 :
115 : // Query map for key, fast O(1). Returns a pointer to the map slot
116 : // holding key or null if key not in map. The returned pointer
117 : // lifetime is until the next map remove or map leave. The caller
118 : // should not change key or hash but is free to modify other fields
119 : // in the entry. Assumes map is a current join and that key is
120 : // non-zero. mymap_query_const is a const correct version.
121 :
122 : mymap_t * mymap_query ( mymap_t * map, ulong key, mymap_t * null );
123 : mymap_t const * mymap_query_const( mymap_t const * map, ulong key, mymap_t const * null );
124 :
125 : You can do this as often as you like in a compilation unit to get
126 : different types of maps. Since it is all static inline, it is fine
127 : to do this in a header too. Further, options exist to use different
128 : hashing functions, variable length keys, etc as detailed below.
129 :
130 : For use with non-POD C++ structs, map_new assumes that a slot can be
131 : initialized to empty by assigning its key to MAP_KEY_NULL. Map insert
132 : will use MAP_KEY_MOVE to move the user provided key into the slot key
133 : value. Map remove will MAP_MOVE slots as necessary to preserve their
134 : probe sequences and assumes the user has already cleaned up the entry
135 : to remove enough so that all that needs to be done to eliminate the
136 : entry from the map is assign the entry's key to MAP_KEY_NULL.
137 :
138 : mymap_t * slot = mymap_insert( map, cxx_key );
139 : if( FD_UNLIKELY( !slot ) ) ... handle failure (cxx_key was not moved)
140 : else {
141 : ... mymap_insert did a MAP_KEY_MOVE of cxx_key into slot->key
142 : ... clean cxx_key's shell here as necessary here
143 : ... initialize other slot fields as necessary
144 : }
145 :
146 : and on removal:
147 :
148 : ... clean up other slot fields as necessary
149 : ... clean up slot->key as necessary
150 : mymap_remove( map, slot );
151 : ... the mapping of keys to map slots might have been changed by the
152 : ... mymap_remove. Any motion of slots was done via:
153 : ... MAP_MOVE( dst_slot, src_slot )
154 :
155 : fd_map align and footprint are declaration friendly. E.g.
156 :
157 : mymap_t map[ ... map slot cnt ... ];
158 :
159 : will have the appropriate alignment and footprint for a mymap_t map. */
160 :
161 : #include "../bits/fd_bits.h"
162 :
163 : #ifndef MAP_NAME
164 : #error "Define MAP_NAME"
165 : #endif
166 :
167 : /* A MAP_T should be something something reasonable to shallow copy with
168 : the fields described above. */
169 :
170 : #ifndef MAP_T
171 : #error "Define MAP_T struct"
172 : #endif
173 :
174 : /* MAP_HASH_T should be an unsigned integral type. */
175 :
176 : #ifndef MAP_HASH_T
177 1849858416 : #define MAP_HASH_T uint
178 : #endif
179 :
180 : /* MAP_HASH is the MAP_T hash field name. Defaults to hash. */
181 :
182 : #ifndef MAP_HASH
183 223146 : #define MAP_HASH hash
184 : #endif
185 :
186 : /* MAP_LG_SLOT_CNT is the log2 of the number of slots in the map. */
187 :
188 : #ifndef MAP_LG_SLOT_CNT
189 : #error "Define MAP_LG_SLOT_CNT, should be at least 1 and much less than 63"
190 : #endif
191 :
192 : /* MAP_KEY_T should be something reasonable to pass to a static inline
193 : by value, assign to MAP_KEY_NULL, compare for equality and copy.
194 : E.g. a uint, ulong, __m128i, etc. */
195 :
196 : #ifndef MAP_KEY_T
197 433544166 : #define MAP_KEY_T ulong
198 : #else
199 : #if !defined(MAP_KEY_NULL) || !defined(MAP_KEY_INVAL) || !defined(MAP_KEY_EQUAL) || !defined(MAP_KEY_EQUAL_IS_SLOW) || !defined(MAP_KEY_HASH)
200 : #error "Define MAP_KEY_NULL, MAP_KEY_INVAL, MAP_KEY_EQUAL, MAP_KEY_EQUAL_IS_SLOW, and MAP_KEY_HASH if using a custom MAP_KEY_T"
201 : #endif
202 : #endif
203 :
204 : /* MAP_KEY is the MAP_T key field name. Defaults to key. */
205 :
206 : #ifndef MAP_KEY
207 5522816364 : #define MAP_KEY key
208 : #endif
209 :
210 : /* MAP_KEY_NULL is a key that will never be inserted. */
211 :
212 : #ifndef MAP_KEY_NULL
213 529308 : #define MAP_KEY_NULL 0UL
214 : #endif
215 :
216 : /* MAP_KEY_INVAL returns 1 if k0 is key that will never be inserted
217 : and zero otherwise. Note that MAP_KEY_INVAL( MAP_KEY_NULL ) should
218 : be true. This should be generally fast. */
219 :
220 : #ifndef MAP_KEY_INVAL
221 433545702 : #define MAP_KEY_INVAL(k) !(k)
222 : #endif
223 :
224 : /* MAP_KEY_EQUAL returns 0/1 if k0 is the same/different. Note that
225 : this function may also be called with MAP_KEY_NULL. */
226 :
227 : #ifndef MAP_KEY_EQUAL
228 470285517 : #define MAP_KEY_EQUAL(k0,k1) (k0)==(k1)
229 : #endif
230 :
231 : /* If MAP_KEY_EQUAL_IS_SLOW is slow (e.g. variable length string
232 : compare, large buffer compares, etc), set MAP_KEY_EQUAL_IS_SLOW to
233 : non-zero. Then, if MAP_MEMOIZE (below) is set, precomputed key hashes
234 : will be used accelerate key insert and key query. */
235 :
236 : #ifndef MAP_KEY_EQUAL_IS_SLOW
237 : #define MAP_KEY_EQUAL_IS_SLOW 0
238 : #endif
239 :
240 : /* MAP_KEY_HASH takes a key and maps it into MAP_HASH_T uniform pseudo
241 : randomly. */
242 :
243 : #ifndef MAP_KEY_HASH
244 121091733 : #define MAP_KEY_HASH(key) (MAP_HASH_T)fd_ulong_hash( key )
245 : #endif
246 :
247 : /* MAP_KEY_MOVE moves the contents from src to dst. Non-POD key types
248 : need to customize this accordingly (and handle the case of
249 : ks==MAP_KEY_NULL). Defaults to shallow copy. */
250 :
251 : #ifndef MAP_KEY_MOVE
252 1603988046 : #define MAP_KEY_MOVE(kd,ks) (kd)=(ks)
253 : #endif
254 :
255 : /* MAP_MOVE moves the contents of a MAP_T from src to dst. Non-POD key
256 : types need to customize this accordingly. Defaults to shallow copy. */
257 :
258 : #ifndef MAP_MOVE
259 374472 : #define MAP_MOVE(d,s) (d)=(s)
260 : #endif
261 :
262 : /* If MAP_MEMOIZE is defined to non-zero, the MAP_T requires a hash
263 : field that will hold the value of the MAP_KEY_HASH of the MAP_T's key
264 : field when the map slot is not empty (undefined otherwise). This is
265 : useful for accelerating user operations that might need a hash of the
266 : key and for accelerating remove operations. It is also potentially
267 : useful as a way to accelerate slow key comparison operations (see
268 : MAP_KEY_EQUAL_IS_SLOW). */
269 :
270 : #ifndef MAP_MEMOIZE
271 : #define MAP_MEMOIZE 1
272 : #endif
273 :
274 : /* MAP_QUERY_OPT allows the user to specify how the map query function
275 : should be optimized.
276 : 0 -> optimize for low fill ratio
277 : 1 -> optimize for low fill ratio and extremely rare query failure
278 : 2 -> optimize for low fill ratio and extremely rare query success */
279 :
280 : #ifndef MAP_QUERY_OPT
281 : #define MAP_QUERY_OPT 0
282 : #endif
283 :
284 : /* Implementation *****************************************************/
285 :
286 10059519141 : #define MAP_(n) FD_EXPAND_THEN_CONCAT3(MAP_NAME,_,n)
287 4476956601 : #define MAP_SLOT_CNT (1UL<<(MAP_LG_SLOT_CNT))
288 4019163225 : #define MAP_SLOT_MASK (MAP_SLOT_CNT-1UL)
289 :
290 : FD_PROTOTYPES_BEGIN
291 :
292 : /* Private APIs *******************************************************/
293 :
294 : /* Get the linear probing starting slot for a key and the slot to probe
295 : after a given slot */
296 :
297 1849858500 : FD_FN_CONST static inline ulong MAP_(private_start)( MAP_HASH_T hash ) { return (ulong)(hash & (MAP_HASH_T)MAP_SLOT_MASK); }
298 2169304725 : FD_FN_CONST static inline ulong MAP_(private_next) ( ulong slot ) { return (++slot) & MAP_SLOT_MASK; }
299 :
300 : /* Public APIS ********************************************************/
301 :
302 0 : FD_FN_CONST static inline ulong MAP_(align) ( void ) { return alignof(MAP_T); }
303 0 : FD_FN_CONST static inline ulong MAP_(footprint)( void ) { return sizeof(MAP_T)*MAP_SLOT_CNT; }
304 :
305 : static inline void *
306 20478 : MAP_(new)( void * shmem ) {
307 20478 : MAP_T * map = (MAP_T *)shmem;
308 455716734 : for( ulong slot_idx=0UL; slot_idx<MAP_SLOT_CNT; slot_idx++ ) map[ slot_idx ].MAP_KEY = (MAP_KEY_NULL);
309 20478 : return map;
310 20478 : }
311 :
312 69455101 : static inline MAP_T * MAP_(join) ( void * shmap ) { return (MAP_T *)shmap; }
313 69455101 : static inline void * MAP_(leave) ( MAP_T * map ) { return map; }
314 3627 : static inline void * MAP_(delete)( void * shmap ) { return shmap; }
315 :
316 0 : FD_FN_CONST static inline ulong MAP_(key_max) ( void ) { return MAP_SLOT_MASK; }
317 0 : FD_FN_CONST static inline ulong MAP_(slot_cnt)( void ) { return MAP_SLOT_CNT; }
318 :
319 1603454943 : FD_FN_CONST static inline ulong MAP_(slot_idx)( MAP_T const * map, MAP_T const * entry ) { return (ulong)(entry - map); }
320 :
321 0 : FD_FN_CONST static inline MAP_KEY_T MAP_(key_null)( void ) { return (MAP_KEY_NULL); }
322 :
323 : /* These are FD_FN_PURE instead of FD_FN_CONST in case a non-POD
324 : MAP_KEY_T. FIXME: CONSIDER LETTING THE COMPILER SORT THIS OUT? */
325 :
326 5628111879 : FD_FN_PURE static inline int MAP_(key_inval)( MAP_KEY_T k0 ) { return (MAP_KEY_INVAL(k0)); }
327 603950394 : FD_FN_PURE static inline int MAP_(key_equal)( MAP_KEY_T k0, MAP_KEY_T k1 ) { return (MAP_KEY_EQUAL(k0,k1)); }
328 :
329 1849858500 : FD_FN_PURE static inline MAP_HASH_T MAP_(key_hash)( MAP_KEY_T key ) { return (MAP_KEY_HASH(key)); }
330 :
331 : FD_FN_UNUSED static MAP_T * /* Work around -Winline */
332 : MAP_(insert)( MAP_T * map,
333 1604238885 : MAP_KEY_T key ) {
334 1604238885 : MAP_HASH_T hash = MAP_(key_hash)( key );
335 1604238885 : ulong slot = MAP_(private_start)( hash );
336 1604238885 : MAP_T * m;
337 1738195473 : for(;;) {
338 1738195473 : m = map + slot;
339 1738195473 : MAP_KEY_T map_key = m->MAP_KEY;
340 1738195473 : if( FD_LIKELY( MAP_(key_inval)( map_key ) ) ) break; /* Optimize for not found */
341 : # if MAP_MEMOIZE && MAP_KEY_EQUAL_IS_SLOW /* ... and then for searching */
342 0 : if( FD_UNLIKELY( m->MAP_HASH==hash && MAP_(key_equal)( map_key, key ) ) ) return NULL;
343 : # else
344 134207427 : if( FD_UNLIKELY( MAP_(key_equal)( map_key, key ) ) ) return NULL;
345 133956588 : # endif
346 133956588 : slot = MAP_(private_next)( slot );
347 133956588 : }
348 1603988046 : MAP_KEY_MOVE( m->MAP_KEY, key );
349 : # if MAP_MEMOIZE
350 1584 : m->MAP_HASH = hash;
351 : # endif
352 1603988046 : return m;
353 1604238885 : }
354 :
355 : static inline void
356 : MAP_(remove)( MAP_T * map,
357 1603453407 : MAP_T * entry ) {
358 1603453407 : ulong slot = MAP_(slot_idx)( map, entry );
359 :
360 1603827879 : for(;;) {
361 :
362 : /* Make a hole at slot */
363 :
364 1603827879 : map[slot].MAP_KEY = (MAP_KEY_NULL);
365 1603827879 : ulong hole = slot;
366 :
367 : /* The creation of a hole at slot might have disrupted the probe
368 : sequence involving the keys in any contiguously occupied map
369 : entry after slot. */
370 :
371 1727311473 : for(;;) {
372 1727311473 : slot = MAP_(private_next)( slot );
373 :
374 : /* At this point, map entries (hole,slot) (cyclic) are occupied
375 : and the probe sequence for these has been confirmed to be
376 : intact. If slot is empty, then all probe sequences are intact. */
377 :
378 1727311473 : MAP_KEY_T key = map[slot].MAP_KEY;
379 1727311473 : if( MAP_(key_inval)(key) ) return;
380 :
381 : /* slot is occupied. If a probe looking for the key at slot does
382 : not start its scan in (hole,slot] (cyclic), its scan will fail
383 : erroneously due to the hole just made. In this case, we move
384 : slot to hole to restore the probe sequence for the key at slot
385 : and then make a new hole at slot. As the new hole could break
386 : the other probe sequences, we start over on the new hole. */
387 :
388 : # if MAP_MEMOIZE
389 0 : MAP_HASH_T hash = map[slot].MAP_HASH;
390 : # else
391 123858066 : MAP_HASH_T hash = MAP_(key_hash)( key );
392 : # endif
393 123858066 : ulong start = MAP_(private_start)( hash );
394 123858066 : if( !(((hole<start) & (start<=slot)) | ((hole>slot) & ((hole<start) | (start<=slot)))) ) break;
395 123858066 : }
396 :
397 374472 : MAP_MOVE( map[hole], map[slot] );
398 374472 : }
399 : /* never get here */
400 1603453407 : }
401 :
402 : static inline void
403 16098 : MAP_(clear)( MAP_T * map ) {
404 2076642 : for( ulong slot_idx=0UL; slot_idx<MAP_SLOT_CNT; slot_idx++ ) map[ slot_idx ].MAP_KEY = (MAP_KEY_NULL);
405 16098 : }
406 :
407 : FD_FN_PURE FD_FN_UNUSED static MAP_T * /* Work around -Winline */
408 : MAP_(query)( MAP_T * map,
409 : MAP_KEY_T key,
410 121761549 : MAP_T * null ) {
411 121761549 : MAP_HASH_T hash = MAP_(key_hash)( key );
412 121761549 : ulong slot = MAP_(private_start)( hash );
413 121761549 : MAP_T * m;
414 429798213 : for(;;) {
415 429798213 : m = map + slot;
416 429798213 : MAP_KEY_T map_key = m->MAP_KEY;
417 :
418 : # if MAP_MEMOIZE && MAP_KEY_EQUAL_IS_SLOW
419 110781 : # define MAP_IMPL_QUERY_FOUND (hash==m->MAP_HASH && MAP_(key_equal)( map_key, key ))
420 : # else
421 429687432 : # define MAP_IMPL_QUERY_FOUND MAP_(key_equal)( map_key, key )
422 : # endif
423 :
424 : # if MAP_QUERY_OPT==0
425 429798213 : int found = MAP_IMPL_QUERY_FOUND;
426 429798213 : int empty = MAP_(key_inval)( map_key );
427 : int done = found | empty;
428 429798213 : if( empty ) m = null; /* cmov */
429 429798213 : FD_COMPILER_FORGET( done );
430 429798213 : if( FD_LIKELY( done ) ) break;
431 : # elif MAP_QUERY_OPT==1
432 0 : if( FD_LIKELY( MAP_IMPL_QUERY_FOUND ) ) break;
433 0 : if( FD_LIKELY( MAP_(key_inval)( map_key ) ) ) return null;
434 : # else
435 : if( FD_LIKELY( MAP_(key_inval)( map_key ) ) ) return null;
436 : if( FD_LIKELY( MAP_IMPL_QUERY_FOUND ) ) break;
437 : # endif
438 :
439 0 : # undef MAP_IMPL_QUERY_FOUND
440 :
441 308036664 : slot = MAP_(private_next)( slot );
442 308036664 : }
443 0 : return m;
444 121761549 : }
445 :
446 : FD_FN_PURE static inline MAP_T const *
447 : MAP_(query_const)( MAP_T const * map,
448 : MAP_KEY_T key,
449 136740 : MAP_T const * null ) {
450 136740 : return (MAP_T const *)MAP_(query)( (MAP_T *)map, key, (MAP_T *)null ); /* query doesn't actual change any memory */
451 136740 : }
452 :
453 : FD_PROTOTYPES_END
454 :
455 : #undef MAP_SLOT_MASK
456 : #undef MAP_SLOT_CNT
457 : #undef MAP_
458 :
459 : /* End implementation *************************************************/
460 :
461 : #undef MAP_QUERY_OPT
462 : #undef MAP_MEMOIZE
463 : #undef MAP_MOVE
464 : #undef MAP_KEY_MOVE
465 : #undef MAP_KEY_HASH
466 : #undef MAP_KEY_EQUAL_IS_SLOW
467 : #undef MAP_KEY_EQUAL
468 : #undef MAP_KEY_INVAL
469 : #undef MAP_KEY_NULL
470 : #undef MAP_KEY
471 : #undef MAP_KEY_T
472 : #undef MAP_LG_SLOT_CNT
473 : #undef MAP_HASH
474 : #undef MAP_HASH_T
475 : #undef MAP_T
476 : #undef MAP_NAME
477 :
|