Line data Source code
1 : /* Declare a bunch of functions for fast manipulation of index sets that
2 : can contain a large compile time bounded number of elements and that
3 : can be shared between processes. The implementation is optimized for
4 : dense-ish sets with a largish maximum element contains (thousands+).
5 : Example:
6 :
7 : #define SET_NAME my_set
8 : #define SET_MAX 65536 ... Should be in [1,2^31-64]
9 : #include "util/tmpl/fd_set.c"
10 :
11 : Will implement and expose the following header only library in the
12 : local compilation unit:
13 :
14 : my_set_t // opaque handle type for the set
15 :
16 : // interprocess shared memory constructors / destructors
17 : // these obey the usual conventions
18 :
19 : ulong my_set_align ( void ); // required byte alignment of a my_set_t
20 : ulong my_set_footprint( void ); // required byte footprint of a my_set_t
21 : void * my_set_new ( void * shmem ); // format memory region into a my_set, my_set will be empty
22 : // (caller not joined on return, mem has required align/footprint, etc)
23 : my_set_t * my_set_join ( void * shset ); // join a my_set_t (unlimited joins, etc)
24 : void * my_set_leave ( my_set_t * set ); // leave a my_set_t (matched with join, etc)
25 : void * my_set_delete ( void * shset ); // unformat memory (no active joins, etc)
26 :
27 : // it is also possible to do declaration based allocation via
28 :
29 : my_set_t set[ my_set_word_cnt ];
30 :
31 : // Returns 1 if set appears to be point to a valid set, 0 otherwise
32 : // (e.g. set is NULL, there's corruption in the set zero padding,
33 : // etc).
34 :
35 : int my_set_valid( my_set_t const * set )
36 :
37 : // Returns 1 if idx is a valid set element index (i.e. in [0,max)
38 : // (It is okay to pass NULL for set here as it is technically
39 : // ignored).
40 :
41 : int my_set_valid_idx( my_set_t const * set, ulong idx )
42 :
43 : // Return the maximum number of elements this set can contain. Set
44 : // elements are indexed [0,max). (It is okay to pass NULL for set
45 : // here as it is technically ignored.)
46 :
47 : ulong my_set_max( my_set_t const * set ); // Return positive
48 :
49 : // Return the current number of elements this set contains
50 :
51 : ulong my_set_cnt( my_set_t const * set ); // Return in [0,max]
52 :
53 : // Return 1 if set contains no elements and 0 if not
54 :
55 : int my_set_is_null( my_set_t const * set );
56 :
57 : // Return 1 if set contains all elements and 0 if not
58 :
59 : int my_set_is_full( my_set_t const * set );
60 :
61 : // Return the lowest indexed element in the set
62 :
63 : ulong my_set_first( my_set_t const * set ); // Return in [0,max) on success, >=max if empty set
64 :
65 : // Two pair of functions for writing efficient iterators over all
66 : // members of sparse sets. The first pair is a destructive
67 : // iterator:
68 : //
69 : // for( ulong idx=my_set_iter_init( set ); !my_set_iter_done( idx ); idx=my_set_iter_next( set, idx ) ) {
70 : // ... idx is the next element of the set, will be in [0,max)
71 : // ... set elements will be iterated over in increasing order
72 : // ... do not modify set, modify idx; there are no elements
73 : // ... in set before idx at this point
74 : // }
75 : // ... set will be empty at this point
76 : //
77 : // The second pair is a non-destructive iterator:
78 : //
79 : // for( ulong idx=my_set_const_iter_init( set ); !my_set_const_iter_done( idx ); idx=my_set_const_iter_next( set, idx ) ) {
80 : // ... idx is the next element of the set, will be in [0,max)
81 : // ... set elements will be iterated over in increasing order
82 : // ... do not modify set or modify idx; set is unchanged
83 : // ... at this point
84 : // }
85 : // ... set is unchanged at this point
86 : //
87 : // The performance difference between the two styles are situation
88 : // dependent (depends on the sizes of the set, cache conditions,
89 : // distribution of elements in the set, etc) but not expected to be
90 : // large. In general though, the above iterators are best for very
91 : // sparse sets. For dense sets, the idiom:
92 : //
93 : // ulong max = my_set_max( set );
94 : // for( ulong idx=0UL; idx<max; idx++ ) {
95 : // if( !my_set_test( set, idx ) ) continue;
96 : // ... idx is the next element of the set, will be in [0,max)
97 : // ... set elements will be iterated over in increasing order
98 : // ... do not modify set or modify idx; set is unchanged
99 : // ... at this point
100 : // }
101 : // ... set is unchanged at this point
102 : //
103 : // or is more predictable branchless speculative variant:
104 : //
105 : // ulong max = my_set_max( set );
106 : // for( ulong idx=0UL; idx<max; idx++ ) {
107 : // ... speculate idx is in the set, will be in [0,max)
108 : // ... set elements will be iterated over in increasing order
109 : // ... do not modify set or modify idx; set is unchanged
110 : // ... at this point
111 : // ... commit speculation when my_set_test( set, idx ) is true
112 : // }
113 : // ... set is unchanged at this point
114 : //
115 : // might be preferable.
116 :
117 : ulong my_set_iter_init( my_set_t * set );
118 : int my_set_iter_done( ulong idx );
119 : ulong my_set_iter_next( my_set_t * set, ulong idx );
120 :
121 : ulong my_set_const_iter_init( my_set_t const * set );
122 : int my_set_const_iter_done( ulong idx );
123 : ulong my_set_const_iter_next( my_set_t const * set, ulong idx );
124 :
125 : // Insert/remove element idx to a set if not already present (no-op
126 : // otherwise). Returns set.
127 :
128 : my_set_t * my_set_insert( my_set_t * set, ulong idx ); // in [0,max).
129 : my_set_t * my_set_remove( my_set_t * set, ulong idx ); // in [0,max).
130 :
131 : // Fast version of "c ? my_set_{insert,remove}( set, idx ) : set".
132 :
133 : my_set_t * my_set_insert_if( my_set_t * set, int c, ulong idx ); // in [0,max).
134 : my_set_t * my_set_remove_if( my_set_t * set, int c, ulong idx ); // in [0,max).
135 :
136 : // Return 1 if idx is in set and 0 otherwise
137 :
138 : int my_set_test( my_set_t const * set, ulong idx ); // in [0,max).
139 :
140 : // Returns 1 if x and y contain the same elements, 0 otherwise. In
141 : // place is fine.
142 :
143 : int my_set_eq( my_set_t const * x, my_set_t const * y );
144 :
145 : // Returns 1 if x is a subset of y (including x==y), 0 otherwise.
146 : // In place is fine.
147 :
148 : int my_set_subset( my_set_t const * x, my_set_t const * y );
149 :
150 : // Operations that populate an output set. All of these return z
151 : // and inplace operation is fine.
152 :
153 : my_set_t * my_set_null ( my_set_t * z ); // z = {}
154 : my_set_t * my_set_full ( my_set_t * z ); // z = ~{}
155 : my_set_t * my_set_full_if ( my_set_t * z, int c ); // z = c ? {idx} : {}
156 : my_set_t * my_set_ele ( my_set_t * z, ulong idx ); // z = {idx}
157 : my_set_t * my_set_ele_if ( my_set_t * z, int c, ulong idx ); // z = c ? {idx} : {}
158 : my_set_t * my_set_copy ( my_set_t * z, my_set_t const * x ); // z = x
159 : my_set_t * my_set_complement( my_set_t * z, my_set_t const * x ); // z = ~x
160 : my_set_t * my_set_union ( my_set_t * z, my_set_t const * x, my_set_t const * y ); // z = x u y
161 : my_set_t * my_set_intersect ( my_set_t * z, my_set_t const * x, my_set_t const * y ); // z = x n y
162 : my_set_t * my_set_subtract ( my_set_t * z, my_set_t const * x, my_set_t const * y ); // z = x - y
163 : my_set_t * my_set_xor ( my_set_t * z, my_set_t const * x, my_set_t const * y ); // z = (x u y) - (x n y)
164 : my_set_t * my_set_if ( my_set_t * z, int c, my_set_t const * x, my_set_t const * y ); // z = c ? x : y
165 :
166 : With the exception of my_set_valid_idx and my_set_valid, all of these
167 : assume the inputs are value and will produce strictly valid outputs
168 : unless otherwise explicitly noted. */
169 :
170 : #include "../bits/fd_bits.h"
171 :
172 : #ifndef SET_NAME
173 : #error "Define SET_NAME"
174 : #endif
175 :
176 : #ifndef SET_MAX
177 : #error "Define SET_MAX"
178 : #endif
179 :
180 : FD_STATIC_ASSERT( 1<=SET_MAX && SET_MAX<=2147483584 /* 2^31-64 */, invalid_set_max );
181 :
182 : /* Implementation *****************************************************/
183 :
184 1843829442 : #define SET_(x) FD_EXPAND_THEN_CONCAT3(SET_NAME,_,x)
185 :
186 : typedef ulong SET_(t);
187 :
188 : enum { SET_(word_cnt) = (((int)(SET_MAX))+63)>>6 };
189 :
190 : FD_PROTOTYPES_BEGIN
191 :
192 : /* Private APIs *******************************************************/
193 :
194 : FD_FN_CONST static inline ulong
195 0 : SET_(private_full_last_word)( void ) {
196 0 : return fd_ulong_mask_lsb( SET_MAX - ((SET_(word_cnt)-1)<<6) );
197 0 : }
198 :
199 : /* Public APIs ********************************************************/
200 :
201 0 : FD_FN_CONST static inline ulong SET_(align) ( void ) { return alignof(ulong); }
202 0 : FD_FN_CONST static inline ulong SET_(footprint)( void ) { return 8UL*(ulong)SET_(word_cnt); }
203 :
204 369 : static inline void * SET_(new) ( void * shmem ) { return fd_memset( shmem, 0, SET_(footprint)() ); }
205 369 : static inline SET_(t) * SET_(join) ( void * shset ) { return (SET_(t) *)shset; }
206 369 : static inline void * SET_(leave) ( SET_(t) * set ) { return (void *)set; }
207 369 : static inline void * SET_(delete)( void * shset ) { return shset; }
208 :
209 0 : FD_FN_CONST static inline ulong SET_(max)( SET_(t) const * set ) { (void)set; return (ulong)SET_MAX; }
210 :
211 : FD_FN_PURE static inline int
212 333315 : SET_(valid)( SET_(t) const * set ) {
213 333315 : if( FD_UNLIKELY( !set ) ) return 0; /* FIXME: TEST ALIGNMENT TOO? */
214 333315 : return !(set[ SET_(word_cnt)-1 ] & ~SET_(private_full_last_word()));
215 333315 : }
216 :
217 : FD_FN_CONST static inline int
218 : SET_(valid_idx)( SET_(t) const * set,
219 74070 : ulong idx ) {
220 74070 : (void)set;
221 74070 : return idx<(ulong)SET_MAX;
222 74070 : }
223 :
224 : FD_FN_PURE static inline ulong
225 149463 : SET_(cnt)( SET_(t) const * set ) {
226 149463 : ulong word_cnt = (ulong)SET_(word_cnt);
227 149463 : ulong cnt = 0UL;
228 28761651 : for( ulong i=0UL; i<word_cnt; i++ ) cnt += (ulong)fd_ulong_popcnt( set[i] );
229 149463 : return cnt;
230 149463 : }
231 :
232 : FD_FN_PURE static inline int
233 296286 : SET_(is_null)( SET_(t) const * set ) {
234 296286 : ulong word_cnt = (ulong)SET_(word_cnt);
235 39589563 : for( ulong i=0UL; i<word_cnt; i++ ) if( set[i] ) return 0;
236 185181 : return 1;
237 296286 : }
238 :
239 : FD_FN_PURE static inline int
240 148146 : SET_(is_full)( SET_(t) const * set ) {
241 148146 : ulong word_cnt = ((ulong)SET_(word_cnt))-1UL;
242 10813362 : for( ulong i=0UL; i<word_cnt; i++ ) if( ~set[i] ) return 0;
243 37212 : return set[word_cnt]==SET_(private_full_last_word());
244 148146 : }
245 :
246 : FD_FN_PURE static inline ulong
247 150600 : SET_(first)( SET_(t) const * set ) {
248 150600 : ulong word_cnt = (ulong)SET_(word_cnt);
249 10851699 : for( ulong i=0UL; i<word_cnt; i++ ) {
250 10814664 : ulong w = set[i];
251 10814664 : if( w ) return (i<<6) + (ulong)fd_ulong_find_lsb( w );
252 10814664 : }
253 37035 : return ~0UL;
254 150600 : }
255 :
256 : FD_FN_UNUSED static ulong /* Work around -Winline */
257 : SET_(iter_next)( SET_(t) * set,
258 914542542 : ulong j ) { /* We've considered all bits up to and including j */
259 914542542 : j++; /* Lowest bit we haven't considered */
260 914542542 : ulong word_cnt = (ulong)SET_(word_cnt);
261 928917780 : for( ulong i=(j>>6); i<word_cnt; i++ ) { /* For all words with bits we haven't considered */
262 928769634 : ulong w = set[i]; /* Get the bits we haven't considered for the current word */
263 928769634 : if( w ) { /* If any bits are set in this word */
264 914394396 : set[i] = fd_ulong_pop_lsb( w ); /* Clear the lsb */
265 914394396 : return (i<<6) + (ulong)fd_ulong_find_lsb( w ); /* And return the index */
266 914394396 : }
267 928769634 : }
268 148146 : return ~0UL; /* No more bits to consider */
269 914542542 : }
270 148224 : static inline ulong SET_(iter_init)( SET_(t) * set ) { return SET_(iter_next)( set, ~0UL ); }
271 914542542 : FD_FN_CONST static inline ulong SET_(iter_done)( ulong j ) { return !~j; }
272 :
273 : FD_FN_PURE FD_FN_UNUSED static ulong /* Work around -Winline */
274 : SET_(const_iter_next)( SET_(t) const * set,
275 914542290 : ulong j ) { /* We've considered all bits up to and including j */
276 914542290 : j++; /* Lowest bit we haven't considered */
277 914542290 : ulong m = (1UL<<(j&63UL))-1UL; /* Bits in first word that have been considered */
278 914542290 : ulong word_cnt = (ulong)SET_(word_cnt);
279 928911870 : for( ulong i=(j>>6); i<word_cnt; i++ ) { /* For all words with bits we haven't considered */
280 928763730 : ulong w = set[i] & ~m; /* Get the bits we haven't considered for the current word */
281 928763730 : if( w ) return (i<<6) + (ulong)fd_ulong_find_lsb( w ); /* If any bit is set in this word, return its index */
282 14369580 : m = 0UL; /* Otherwise, continue to next word (haven't considered any bits) */
283 14369580 : }
284 148140 : return ~0UL; /* No more bits to consider */
285 914542290 : }
286 148140 : FD_FN_PURE static inline ulong SET_(const_iter_init)( SET_(t) const * set ) { return SET_(const_iter_next)( set, ~0UL ); }
287 914542290 : FD_FN_CONST static inline ulong SET_(const_iter_done)( ulong j ) { return !~j; }
288 :
289 : static inline SET_(t) *
290 : SET_(insert)( SET_(t) * set,
291 223785 : ulong idx ) {
292 223785 : set[ idx >> 6 ] |= 1UL << (idx & 63UL);
293 223785 : return set;
294 223785 : }
295 :
296 : static inline SET_(t) *
297 : SET_(remove)( SET_(t) * set,
298 547164 : ulong idx ) {
299 547164 : set[ idx >> 6 ] &= ~(1UL << (idx & 63UL));
300 547164 : return set;
301 547164 : }
302 :
303 : static inline SET_(t) *
304 : SET_(insert_if)( SET_(t) * set,
305 : int c,
306 376194 : ulong idx ) {
307 376194 : set[ idx >> 6 ] |= ((ulong)!!c) << (idx & 63UL);
308 376194 : return set;
309 376194 : }
310 :
311 : static inline SET_(t) *
312 : SET_(remove_if)( SET_(t) * set,
313 : int c,
314 296280 : ulong idx ) {
315 296280 : set[ idx >> 6 ] &= ~(((ulong)!!c) << (idx & 63UL));
316 296280 : return set;
317 296280 : }
318 :
319 : FD_FN_PURE static inline int
320 : SET_(test)( SET_(t) const * set,
321 341073 : ulong idx ) {
322 341073 : return (int)((set[ idx >> 6 ] >> (idx & 63UL)) & 1UL);
323 341073 : }
324 :
325 : FD_FN_PURE static inline int
326 : SET_(eq)( SET_(t) const * x,
327 5555271 : SET_(t) const * y ) {
328 5555271 : ulong word_cnt = (ulong)SET_(word_cnt);
329 1006160187 : for( ulong i=0UL; i<word_cnt; i++ ) if( x[i]!=y[i] ) return 0;
330 5110848 : return 1;
331 5555271 : }
332 :
333 : FD_FN_PURE static inline int
334 : SET_(subset)( SET_(t) const * x,
335 592560 : SET_(t) const * y ) {
336 592560 : ulong word_cnt = (ulong)SET_(word_cnt);
337 75582387 : for( ulong i=0UL; i<word_cnt; i++ ) if( x[i]!=(y[i] & x[i]) ) return 0;
338 333315 : return 1;
339 592560 : }
340 :
341 : static inline SET_(t) *
342 111132 : SET_(null)( SET_(t) * z ) {
343 111132 : ulong word_cnt = (ulong)SET_(word_cnt);
344 21554829 : for( ulong i=0UL; i<word_cnt; i++ ) z[i] = 0UL;
345 111132 : return z;
346 111132 : }
347 :
348 : static inline SET_(t) *
349 93 : SET_(full)( SET_(t) * z ) {
350 93 : ulong word_cnt = ((ulong)SET_(word_cnt))-1UL;
351 54573 : for( ulong i=0UL; i<word_cnt; i++ ) z[i] = ~0UL;
352 93 : z[word_cnt] = SET_(private_full_last_word)();
353 93 : return z;
354 93 : }
355 :
356 : static inline SET_(t) *
357 : SET_(full_if)( SET_(t) * z,
358 6 : int c ) {
359 6 : ulong word_cnt = ((ulong)SET_(word_cnt))-1UL;
360 6 : ulong word = ((ulong)!c)-1UL;
361 1158 : for( ulong i=0UL; i<word_cnt; i++ ) z[i] = word;
362 6 : z[word_cnt] = word & SET_(private_full_last_word)();
363 6 : return z;
364 6 : }
365 :
366 : static inline SET_(t) *
367 : SET_(ele)( SET_(t) * z,
368 37035 : ulong idx ) {
369 37035 : return SET_(insert)( SET_(null)( z ), idx );
370 37035 : }
371 :
372 : static inline SET_(t) *
373 : SET_(ele_if)( SET_(t) * z,
374 : int c,
375 74070 : ulong idx ) {
376 74070 : return SET_(insert_if)( SET_(null)( z ), c, idx );
377 74070 : }
378 :
379 : static inline SET_(t) *
380 : SET_(copy)( SET_(t) * z,
381 2518380 : SET_(t) const * x ) {
382 2518380 : ulong word_cnt = (ulong)SET_(word_cnt);
383 488565720 : for( ulong i=0UL; i<word_cnt; i++ ) z[i] = x[i];
384 2518380 : return z;
385 2518380 : }
386 :
387 : static inline SET_(t) *
388 : SET_(complement)( SET_(t) * z,
389 185175 : SET_(t) const * x ) {
390 185175 : ulong word_cnt = ((ulong)SET_(word_cnt))-1UL;
391 35738775 : for( ulong i=0UL; i<word_cnt; i++ ) z[i] = ~x[i];
392 185175 : z[word_cnt] = (~x[word_cnt]) & SET_(private_full_last_word)();
393 185175 : return z;
394 185175 : }
395 :
396 : static inline SET_(t) *
397 : SET_(union)( SET_(t) * z,
398 : SET_(t) const * x,
399 629598 : SET_(t) const * y ) {
400 629598 : ulong word_cnt = (ulong)SET_(word_cnt);
401 122141481 : for( ulong i=0UL; i<word_cnt; i++ ) z[i] = x[i] | y[i];
402 629598 : return z;
403 629598 : }
404 :
405 : static inline SET_(t) *
406 : SET_(intersect)( SET_(t) * z,
407 : SET_(t) const * x,
408 592563 : SET_(t) const * y ) {
409 592563 : ulong word_cnt = (ulong)SET_(word_cnt);
410 114956691 : for( ulong i=0UL; i<word_cnt; i++ ) z[i] = x[i] & y[i];
411 592563 : return z;
412 592563 : }
413 :
414 : static inline SET_(t) *
415 : SET_(subtract)( SET_(t) * z,
416 : SET_(t) const * x,
417 629595 : SET_(t) const * y ) {
418 629595 : ulong word_cnt = (ulong)SET_(word_cnt);
419 122141430 : for( ulong i=0UL; i<word_cnt; i++ ) z[i] = x[i] & ~y[i];
420 629595 : return z;
421 629595 : }
422 :
423 : static inline SET_(t) *
424 : SET_(xor)( SET_(t) * z,
425 : SET_(t) const * x,
426 592563 : SET_(t) const * y ) {
427 592563 : ulong word_cnt = (ulong)SET_(word_cnt);
428 114956691 : for( ulong i=0UL; i<word_cnt; i++ ) z[i] = x[i] ^ y[i];
429 592563 : return z;
430 592563 : }
431 :
432 : static inline SET_(t) *
433 : SET_(if)( SET_(t) * z,
434 : int c,
435 : SET_(t) const * x,
436 1185120 : SET_(t) const * y ) {
437 1185120 : return SET_(copy)( z, c ? x : y );
438 1185120 : }
439 :
440 : FD_PROTOTYPES_END
441 :
442 : #undef SET_
443 :
444 : #undef SET_MAX
445 : #undef SET_NAME
446 :
|