Line data Source code
1 : #ifndef HEADER_fd_src_tango_tcache_fd_tcache_h
2 : #define HEADER_fd_src_tango_tcache_fd_tcache_h
3 :
4 : /* A fd_tcache_t is a cache of the most recently observed unique 64-bit
5 : tags. It is useful for, among other things, deduplication of traffic
6 : based on a thumbprint / hash / signature. Makes no demands on the
7 : application on tagging scheme except that there be a "null" tag (a
8 : tag value that will never occur).
9 :
10 : The amount of history ("depth") of a tcache is theoretically
11 : unlimited but the implementation below was optimized for large-ish
12 : depths (e.g. millions) on platforms where memory footprint is
13 : reasonable cheap. The implementation was also optimized for
14 : situations where heavily duplication is common and temporally
15 : localized. Lastly, the implementation is optimized for the case that
16 : tags behave like IID random values (e.g. a tag is a hash of a packet
17 : payload).
18 :
19 : It is strongly recommend that the tcache be backed by a single NUMA
20 : page (e.g. in a gigantic page backed workspace) to avoid TLB
21 : thrashing if used in performance critical contexts. */
22 :
23 : #include "../fd_tango_base.h"
24 :
25 : /* FD_TCACHE_{ALIGN,FOOTPRINT} specify the alignment and footprint
26 : needed for a tcache with depth history and a tag key-only map with
27 : map_cnt slots. ALIGN is at least double cache line to mitigate
28 : various kinds of false sharing. depth and map_cnt are assumed to be
29 : valid (i.e. depth is positive, map_cnt is an integer power of 2 of at
30 : least depth+2 and the combination will not require a footprint larger
31 : than ULONG_MAX). These are provided to facilitate compile time
32 : declarations. */
33 :
34 700569 : #define FD_TCACHE_ALIGN (128UL)
35 : #define FD_TCACHE_FOOTPRINT( depth, map_cnt ) \
36 : FD_LAYOUT_FINI( FD_LAYOUT_APPEND( FD_LAYOUT_INIT, \
37 : FD_TCACHE_ALIGN, (4UL + (depth) + (map_cnt))*sizeof(ulong) ), \
38 : FD_TCACHE_ALIGN )
39 :
40 : /* FD_TCACHE_TAG_NULL is a tag value that will never be inserted. */
41 :
42 882425664 : #define FD_TCACHE_TAG_NULL (0UL)
43 :
44 : /* FD_TCACHE_SPARSE_DEFAULT specifies how sparse a default map_cnt
45 : tcache map should be. Must be a positive and << 64. After startup, a
46 : tcache with a large depth with a default map size will have fixed
47 : fill ratio somewhere between ~2^-SPARSE_DEFAULT and
48 : ~2^-(SPARSE_DEFAULT-1) (i.e. for SPARSE_DEFAULT=2, the fill ratio
49 : will be in the range 25% to 50%). This trades off between tcache
50 : memory footprint efficiency and tcache computational efficiency.
51 : Larger values are more computationally efficient but with an
52 : exponentially rapidly diminishing return and an exponentially
53 : increasing memory footprint. SPARSE_DEFAULT of 1 is usually a bad
54 : idea as operational costs can spike exponentially if the worst fill
55 : ratio for the depth ends up close to 1. */
56 :
57 93783 : #define FD_TCACHE_SPARSE_DEFAULT (2)
58 :
59 : /* fd_tcache_t is an opaque handle of a tcache object. Details are
60 : exposed here to facilitate usage of tcache in performance critical
61 : contexts. */
62 :
63 18 : #define FD_TCACHE_MAGIC (0xf17eda2c377ca540UL) /* firedancer tcash ver 0 */
64 :
65 : struct __attribute((aligned(FD_TCACHE_ALIGN))) fd_tcache_private {
66 : ulong magic; /* ==FD_TCACHE_MAGIC */
67 : ulong depth; /* The tcache will maintain a history of the most recent depth tags */
68 : ulong map_cnt;
69 : ulong oldest; /* oldest is in [0,depth) */
70 :
71 : /* depth ulong (ring):
72 :
73 : After the tcache has started up (i.e. at least depth unique tags
74 : have been inserted), ring[oldest] will be contain the oldest tag in
75 : the tcache. As strongly hinted by the name, ring is cyclic: the
76 : ring entry before oldest (cyclic) is the newest tag in the tcache
77 : and the ring entry after oldest (cyclic) is the 2nd oldest tag in
78 : the tcache. During startup (the first depth-1 unique tags
79 : inserted), ring[oldest] will be FD_TCACHE_NULL. In high
80 : performance operation, only the slots around oldest will be in
81 : active use / occupy local cache and the access pattern will be
82 : highly sequential. */
83 :
84 : /* map_cnt ulong (map):
85 :
86 : This is a sparse linear probed key-only map of tags currently in
87 : the tcache. Since it is sparse, probe collisions are rare (and thus
88 : the branches involved in various cache operations are highly
89 : predictable. While the sparsity makes the map reasonably
90 : inefficient from a memory footprint point of view, memory footprint
91 : is quite cheap in practice and the actual cache utilization is
92 : quite mild. Specifically, if tag duplication is rare, only the
93 : slots around the newest and oldest tags will be in use typically.
94 : Further, if any tag duplication is temporally clustered (as is
95 : commonly the case), duplicate tags will be a cache hit against the
96 : (still cached because of recent use) original insertion.
97 :
98 : In the typical case of randomized tags, this randomly accesses the
99 : map aggressively. The NUMA and TLB thrashing impacts of that can
100 : be reduced / eliminated by backing the tcache with a huge /
101 : gigantic page shared workspace on a NUMA node nearby the tcache
102 : using threads. */
103 :
104 : /* Padding to FD_TCACHE align */
105 : };
106 :
107 : typedef struct fd_tcache_private fd_tcache_t;
108 :
109 : FD_PROTOTYPES_BEGIN
110 :
111 : /* fd_tcache_map_cnt_default returns the default map_cnt to use for the
112 : given depth. Returns 0 if the depth is invalid / results in a
113 : map_cnt larger than ULONG_MAX. */
114 :
115 : FD_FN_CONST static inline ulong
116 93882 : fd_tcache_map_cnt_default( ulong depth ) {
117 :
118 93882 : if( FD_UNLIKELY( !depth ) ) return 0UL; /* depth must be positive */
119 :
120 93783 : if( FD_UNLIKELY( depth==ULONG_MAX ) ) return 0UL; /* overflow */
121 93783 : int lg_map_cnt = fd_ulong_find_msb( depth + 1UL ) + FD_TCACHE_SPARSE_DEFAULT; /* no overflow */
122 93783 : if( FD_UNLIKELY( lg_map_cnt>63 ) ) return 0UL; /* depth too large */
123 :
124 : /* At this point:
125 :
126 : 2^(lg_map_cnt-s) <= depth+n < 2^(lg_map_cnt-s+1)
127 :
128 : where s is SPARSE_DEFAULT > 0 and n is 1.
129 :
130 : map_cnt/2^s - n <= depth < map_cnt/2^(s-1) - n
131 : 1/2^s - n/map_cnt <= depth/map_cnt < 1/2^(s-1) - n/map_cnt
132 :
133 : For asymptotically large depth / map_cnt, the worst case map fill
134 : ratio will asymptote to something in ~( 1/2^s, 1/2^(s-1) ).
135 : Flipping this around, we also have:
136 :
137 : -> 2^(s-1) (depth+n) < map_cnt <= 2^s (depth+n)
138 :
139 : In the worst case, s==1, depth+1 < map_cnt -> map_cnt>=depth+2. */
140 :
141 93783 : return 1UL << lg_map_cnt;
142 93783 : }
143 :
144 : /* fd_tcache_{align,footprint} return the required alignment and
145 : footprint of a memory region suitable for use as a tcache.
146 : fd_tcache_align returns FD_TCACHE_ALIGN. For fd_tcache_footprint, a
147 : map_cnt of 0 indicates to use fd_tcache_map_cnt_default above. If
148 : depth is not positive, map_cnt is not a power of 2 of at least
149 : depth+2 and/or the required footprint would be larger than ULONG_MAX,
150 : footprint will silently return 0 (and thus can be used by the caller
151 : to validate the tcache configuration parameters). Otherwise, it
152 : returns FD_TCACHE_FOOTPRINT for actual value of map_cnt used. */
153 :
154 : FD_FN_CONST ulong
155 : fd_tcache_align( void );
156 :
157 : FD_FN_CONST ulong
158 : fd_tcache_footprint( ulong depth,
159 : ulong map_cnt );
160 :
161 : /* fd_tcache_new formats an unused memory region for use as a tcache.
162 : shmem is a non-NULL pointer to this region in the local address space
163 : with the required footprint and alignment. depth is the number of
164 : unique tags that can be stored in the tcache and should be positive
165 : (positive integer powers of 2 minus 2 have good memory footprint Feng
166 : Shui and positive integer powers of 2 minus 1 have good computational
167 : efficiency Feng Shui). map_cnt is the number of slots to use for the
168 : map. A map_cnt of 0 indicates to fd_tcache_map_cnt_default above.
169 :
170 : Returns shmem (and the memory region it points to will be formatted
171 : as a tcache, caller is not joined, tcache will be empty) on success
172 : and NULL on failure (logs details). Reasons for failure include
173 : obviously bad shmem, bad depth or bad map_cnt. */
174 :
175 : void *
176 : fd_tcache_new( void * shmem,
177 : ulong depth,
178 : ulong map_cnt );
179 :
180 : /* fd_tcache_join joins the caller to the tcache. _tcache points to the
181 : first byte of the memory region backing the tcache in the caller's
182 : address space.
183 :
184 : Returns a pointer in the local address space to the tcache's entries
185 : on success (this is not necessarily just a cast of _tcache) and NULL
186 : on failure (logs details). Reasons for failure are that _tcache is
187 : obviously not a pointer to memory region holding a tcache. Every
188 : successful join should have a matching leave. The lifetime of the
189 : join is until the matching leave or thread group is terminated. */
190 :
191 : fd_tcache_t *
192 : fd_tcache_join( void * _tcache );
193 :
194 : /* fd_tcache_leave leaves a current local join. Returns a pointer to
195 : the underlying shared memory region on success (this is not
196 : necessarily just a cast of _tcache) and NULL on failure (logs
197 : details). Reasons for failure include tcache is NULL. */
198 :
199 : void *
200 : fd_tcache_leave( fd_tcache_t * tcache );
201 :
202 : /* fd_tcache_delete unformats a memory region used as a tcache. Assumes
203 : nobody is joined to the region. Returns a pointer to the underlying
204 : shared memory region or NULL if used obviously in error (e.g.
205 : _tcache is obviously not a tcache ... logs details). The ownership
206 : of the memory region is transferred to the caller. */
207 :
208 : void *
209 : fd_tcache_delete( void * _tcache );
210 :
211 : /* fd_tcache_{depth,map_cnt,oldest_laddr,ring_laddr,map_laddr} return
212 : various properties of the tcache. These assume tcache is a valid
213 : local join. Since tcache is used in performance critical code paths,
214 : typical usage will unpack tcache ring and map pointers into registers
215 : and the current value for oldest will be tracked in a register as
216 : well. It is the responsibility of users to update the value at
217 : oldest_laddr at termination to do clean restarts on an in progress
218 : tcache. */
219 :
220 18 : FD_FN_PURE static inline ulong fd_tcache_depth ( fd_tcache_t const * tcache ) { return tcache->depth; }
221 18 : FD_FN_PURE static inline ulong fd_tcache_map_cnt ( fd_tcache_t const * tcache ) { return tcache->map_cnt; }
222 :
223 15 : FD_FN_CONST static inline ulong * fd_tcache_oldest_laddr( fd_tcache_t * tcache ) { return &tcache->oldest; }
224 36 : FD_FN_CONST static inline ulong * fd_tcache_ring_laddr ( fd_tcache_t * tcache ) { return ((ulong *)tcache)+4UL; }
225 36 : FD_FN_PURE static inline ulong * fd_tcache_map_laddr ( fd_tcache_t * tcache ) { return ((ulong *)tcache)+4UL+tcache->depth; }
226 :
227 : /* fd_tcache_tag_is_null returns non-zero if tag is FD_TCACHE_TAG_NULL
228 : and zero otherwise. */
229 :
230 624657195 : FD_FN_CONST static inline int fd_tcache_tag_is_null( ulong tag ) { return tag==FD_TCACHE_TAG_NULL; }
231 :
232 : /* fd_tcache_reset resets a tcache to empty, the same state the tcache
233 : was in at creation. For performance critical usage, does no input
234 : argument checking, uses the unpacked tcache fields and returns the
235 : value to use for oldest. */
236 :
237 : static inline ulong
238 : fd_tcache_reset( ulong * ring,
239 : ulong depth,
240 : ulong * map,
241 42 : ulong map_cnt ) {
242 37752273 : for( ulong ring_idx=0UL; ring_idx<depth; ring_idx++ ) ring[ ring_idx ] = FD_TCACHE_TAG_NULL;
243 151009002 : for( ulong map_idx =0UL; map_idx <map_cnt; map_idx++ ) map [ map_idx ] = FD_TCACHE_TAG_NULL;
244 42 : return 0UL; /* ring_oldest */
245 42 : }
246 :
247 : /* fd_tcache_map_start returns the location in a tcache map to start
248 : probing for tag. Assumes tag is not null and map_cnt is a positive
249 : integer power of 2. Implementation here is optimized for the case
250 : where tags are randomized.
251 :
252 : fd_tcache_map_next returns the next location to probe given the
253 : current location. idx is assumed in [0,map_cnt) and map_cnt is
254 : assumed to be a positive integer power of 2. */
255 :
256 282078957 : FD_FN_CONST static inline ulong fd_tcache_map_start( ulong tag, ulong map_cnt ) { return tag & (map_cnt-1UL); }
257 143293986 : FD_FN_CONST static inline ulong fd_tcache_map_next ( ulong idx, ulong map_cnt ) { return (idx+1UL) & (map_cnt-1UL); }
258 :
259 : /* FD_TCACHE_QUERY searches for tag in a map with map_cnt slots. On
260 : return, map_idx will be in [0,map_cnt) and found will be in [0,1].
261 : If found is 0, map_idx is a suitable location where tag can be
262 : inserted into the map, assuming the map has at most map_cnt-2 entries
263 : currently in it. If found is is 1, map_idx is the index into the map
264 : where tag is currently located (this index will be valid until the
265 : next map remove or map destruction).
266 :
267 : For sparse fill ratios and properly randomized map_starts, this is a
268 : fast O(1).
269 :
270 : This is implemented as a macro to support multiple return values
271 : (found and map_idx), especially as this is used in performance
272 : critical contexts. Similarly, does no input argument checking and
273 : uses the unpacked fields of a tcache. Assumes map is non-NULL, map
274 : is indexed [0,map_cnt), map_cnt is a positive integer power-of-two
275 : and tag is not null.
276 :
277 : This macro is robust (e.g. evaluates its arguments a minimal number
278 : of times) and pure (i.e. found / map_idx will not change between
279 : calls given the same map / map[*] / tag). */
280 :
281 273709149 : #define FD_TCACHE_QUERY( found, map_idx, map, map_cnt, tag ) do { \
282 273709149 : ulong const * _ftq_map = (map); \
283 273709149 : ulong _ftq_map_cnt = (map_cnt); \
284 273709149 : ulong _ftq_tag = (tag); \
285 273709149 : int _ftq_found; \
286 273709149 : ulong _ftq_map_idx = fd_tcache_map_start( _ftq_tag, _ftq_map_cnt ); \
287 329984925 : for(;;) { \
288 329984925 : ulong _ftq_map_tag = _ftq_map[ _ftq_map_idx ]; \
289 329984925 : _ftq_found = (_ftq_tag==_ftq_map_tag); \
290 329984925 : if( FD_LIKELY( _ftq_found | fd_tcache_tag_is_null( _ftq_map_tag ) ) ) break; \
291 329984925 : _ftq_map_idx = fd_tcache_map_next( _ftq_map_idx, _ftq_map_cnt ); \
292 56275776 : } \
293 273709149 : (found) = _ftq_found; \
294 273709149 : (map_idx) = _ftq_map_idx; \
295 273709149 : } while(0)
296 :
297 : /* fd_tcache_remove removes tag in a map with map_cnt slots. For
298 : sparsely populated maps and properly randomized tags, this is a fast
299 : O(1). This does not remove tag from the ring, so oldest may still
300 : take a value that was removed. As this is used in performance
301 : critical contexts, does no input argument checking and uses the
302 : unpacked fields of a tcache. Assumes map is non-NULL, map is indexed
303 : [0,map_cnt) and map_cnt is a positive integer power-of-two. Does
304 : nothing if tag is null or if tag is not currently in the map. */
305 :
306 : FD_FN_UNUSED static void /* Work around -Winline */
307 : fd_tcache_remove( ulong * map,
308 : ulong map_cnt,
309 66065508 : ulong tag ) {
310 :
311 : /* If tag is a null tag (e.g. less than depth unique tags have been
312 : inserted into the tcache), nothing to do. */
313 :
314 66065508 : if( FD_LIKELY( !fd_tcache_tag_is_null( tag ) ) ) {
315 :
316 : /* Look up tag in the tcache. If not found, nothing to do. (This
317 : should always succeed at this point in typical tcache usage but we
318 : keep the check for paranoia / minimize risk of silent corruption.) */
319 :
320 53482584 : int found;
321 53482584 : ulong slot;
322 53482584 : FD_TCACHE_QUERY( found, slot, map, map_cnt, tag );
323 53482584 : if( FD_LIKELY( found ) ) {
324 :
325 : /* slot contains the tag to remove. Remove it. See util/fd_map*
326 : for details how this works. */
327 :
328 69007278 : for(;;) {
329 69007278 : map[ slot ] = FD_TCACHE_TAG_NULL;
330 69007278 : ulong hole = slot;
331 87018210 : for(;;) {
332 87018210 : slot = fd_tcache_map_next( slot, map_cnt );
333 87018210 : tag = map[ slot ];
334 87018210 : if( FD_LIKELY( fd_tcache_tag_is_null( tag ) ) ) return;
335 33535626 : ulong start = fd_tcache_map_start( tag, map_cnt );
336 33535626 : if( !(((hole<start) & (start<=slot)) | ((hole>slot) & ((hole<start) | (start<=slot)))) ) break;
337 33535626 : }
338 15524694 : map[ hole ] = tag;
339 15524694 : }
340 53482584 : }
341 53482584 : }
342 66065508 : }
343 :
344 : /* FD_TCACHE_INSERT inserts tag into the tcache in fast O(1) operations.
345 : On return, if dup is non-zero, tag is already in the tcache and the
346 : tcache in unchanged. If dup is zero, tag was inserted and, if the
347 : tcache was full (e.g. has had depth values previously inserted), the
348 : oldest tag in the tcache will have been evicted.
349 :
350 : This is implemented as a macro to support multiple return values (dup
351 : and oldest in particular), especially as this is used in performance
352 : critical contexts. Similarly, does no input argument checking.
353 : Assumes oldest is in [0,depth), ring is non-NULL and indexed
354 : [0,depth), depth is positive, map is non-NULL, map is indexed
355 : [0,map_cnt), map_cnt is an integer power-of-two of at least depth+2,
356 : and tag is not null.
357 :
358 : Note that, given a duplicate tag, insertion is _not_ LRU-like (i.e.
359 : does not make the duplicate tag the most recently used tag in the
360 : tcache). This is usually the desired behavior in deduplication and
361 : it is cheaper as well. LRU-like behavior is still possible with
362 : fast-ish O(1) and more memory footprint. Specifically, the map
363 : entries would store the location in the ring structure and ring
364 : structure would need to be made a doubly linked list. On a duplicate
365 : tag, insert would move the duplicate tag from its current location in
366 : the ring (given from the query itself) to one immediately before the
367 : oldest tag in the ring and update the map entry (and similar for
368 : unique tag insert).
369 :
370 : This macro is robust (e.g. evaluates its arguments a minimal number
371 : of times). */
372 :
373 106980342 : #define FD_TCACHE_INSERT( dup, oldest, ring, depth, map, map_cnt, tag ) do { \
374 106980342 : ulong _fti_oldest = (oldest); \
375 106980342 : ulong * _fti_ring = (ring); \
376 106980342 : ulong _fti_depth = (depth); \
377 106980342 : ulong * _fti_map = (map); \
378 106980342 : ulong _fti_map_cnt = (map_cnt); \
379 106980342 : ulong _fti_tag = (tag); \
380 106980342 : \
381 106980342 : int _fti_dup; \
382 106980342 : ulong _fti_map_idx; \
383 106980342 : FD_TCACHE_QUERY( _fti_dup, _fti_map_idx, _fti_map, _fti_map_cnt, _fti_tag ); \
384 106980342 : if( !_fti_dup ) { /* application dependent branch probability */ \
385 53482599 : \
386 53482599 : /* Insert tag into the map (assumes depth <= map_cnt-2) */ \
387 53482599 : /* map has at most map_cnt-2 entries here */ \
388 53482599 : _fti_map[ _fti_map_idx ] = _fti_tag; \
389 53482599 : /* map has at most map_cnt-1 entries here */ \
390 53482599 : \
391 53482599 : /* Evict oldest tag / insert tag into ring */ \
392 53482599 : ulong _fti_tag_oldest = _fti_ring[ _fti_oldest ]; \
393 53482599 : _fti_ring[ _fti_oldest ] = _fti_tag; \
394 53482599 : _fti_oldest++; \
395 53482599 : if( _fti_oldest >= _fti_depth ) _fti_oldest = 0UL; /* cmov */ \
396 53482599 : \
397 53482599 : /* Remove oldest tag from map */ \
398 53482599 : /* _fti_tag_oldest will be null at startup but remove handles that case */ \
399 53482599 : fd_tcache_remove( _fti_map, _fti_map_cnt, _fti_tag_oldest ); \
400 53482599 : /* Map has at most map_cnt-2 entries here */ \
401 53482599 : } \
402 106980342 : (dup) = _fti_dup; \
403 106980342 : (oldest) = _fti_oldest; \
404 106980342 : } while(0)
405 :
406 : FD_PROTOTYPES_END
407 :
408 : #endif /* HEADER_fd_src_tango_tcache_fd_tcache_h */
409 :
|