Line data Source code
1 : #ifndef HEADER_fd_src_tango_lru_fd_lru_h
2 : #define HEADER_fd_src_tango_lru_fd_lru_h
3 :
4 : /* fd_lru_t is very similar to fd_tcache_t. The main differences are:
5 : 1. instead of a ring, it uses a doubly linked list.
6 : 2. instead of a map of tag -> index, it uses a map of tag -> node.
7 :
8 : Keeping in mind these differences, the API and documentation is otherwise
9 : based on `fd_tcache.h`.
10 :
11 : A fd_lru_t is a cache of the most recently observed unique 64-bit
12 : tags. It is useful for, among other things, deduplication of traffic
13 : based on a thumbprint / hash / signature. Makes no demands on the
14 : application on tagging scheme except that there be a "null" tag (a
15 : tag value that will never occur).
16 :
17 : The amount of history ("depth") of a lru is theoretically
18 : unlimited but the implementation below was optimized for large-ish
19 : depths (e.g. millions) on platforms where memory footprint is
20 : reasonable cheap. The implementation was also optimized for
21 : situations where heavily duplication is common and temporally
22 : localized. Lastly, the implementation is optimized for the case that
23 : tags behave like IID random values (e.g. a tag is a hash of a packet
24 : payload).
25 :
26 : It is strongly recommend that the lru be backed by a single NUMA
27 : page (e.g. in a gigantic page backed workspace) to avoid TLB
28 : thrashing if used in performance critical contexts. */
29 :
30 : #include "../fd_tango_base.h"
31 : #include "fd_list.h"
32 :
33 : /* FD_TCACHE_{ALIGN,FOOTPRINT} specify the alignment and footprint
34 : needed for a tcache with depth history and a tag key-only map with
35 : map_cnt slots. ALIGN is at least double cache line to mitigate
36 : various kinds of false sharing. depth and map_cnt are assumed to be
37 : valid (i.e. depth is positive, map_cnt is an integer power of 2 of at
38 : least depth+2 and the combination will not require a footprint larger
39 : than ULONG_MAX). */
40 9 : #define FD_LRU_ALIGN ( 128UL )
41 :
42 12901447509 : #define FD_LRU_TAG_NULL ( 0UL )
43 :
44 24 : #define FD_LRU_SPARSE_DEFAULT ( 2 )
45 :
46 3 : #define FD_LRU_MAGIC ( 0xf17eda2c3712C0UL ) /* firedancer lru ver 0 */
47 :
48 : struct __attribute( ( aligned( FD_LRU_ALIGN ) ) ) fd_lru_private {
49 : ulong magic; /* ==FD_LRU_MAGIC */
50 : ulong depth; /* The lru will maintain a history of the most recent depth tags */
51 : ulong free_top;
52 : ulong map_cnt;
53 :
54 : /* depth ulong (doubly linked list):
55 :
56 : After the tcache has started up (i.e. at least depth unique tags
57 : have been inserted), list[oldest] will contain the oldest tag in the
58 : tcache. This is a circular doubly linked list with a sentinel: the
59 : entry before sentinel (cyclic) is the newest tag in the tcache and
60 : the list entry after oldest (cyclic) is the 2nd oldest tag in the
61 : tcache. During startup (the first depth-1 unique tags inserted),
62 : list[oldest] will be FD_TCACHE_NULL. In high performance operation,
63 : only the slots around oldest will be in active use / occupy local
64 : cache and the access pattern will be highly sequential. */
65 :
66 : /* map_cnt ulong (map):
67 :
68 : This is a sparse linear probed key-only map of tags currently in
69 : the tcache. Since it is sparse, probe collisions are rare (and thus
70 : the branches involved in various cache operations are highly
71 : predictable. While the sparsity makes the map reasonably
72 : inefficient from a memory footprint point of view, memory footprint
73 : is quite cheap in practice and the actual cache utilization is
74 : quite mild. Specifically, if tag duplication is rare, only the
75 : slots around the newest and oldest tags will be in use typically.
76 : Further, if any tag duplication is temporally clustered (as is
77 : commonly the case), duplicate tags will be a cache hit against the
78 : (still cached because of recent use) original insertion.
79 :
80 : In the typical case of randomized tags, this randomly accesses the
81 : map aggressively. The NUMA and TLB thrashing impacts of that can
82 : be reduced / eliminated by backing the tcache with a huge /
83 : gigantic page shared workspace on a NUMA node nearby the tcache
84 : using threads. */
85 :
86 : /* Padding to FD_TCACHE align */
87 : };
88 :
89 : typedef struct fd_lru_private fd_lru_t;
90 :
91 : FD_PROTOTYPES_BEGIN
92 :
93 : /* fd_lru_map_cnt_default returns the default map_cnt to use for the
94 : given depth. Returns 0 if the depth is invalid / results in a
95 : map_cnt larger than ULONG_MAX. */
96 :
97 : FD_FN_CONST static inline ulong
98 27 : fd_lru_map_cnt_default( ulong depth ) {
99 :
100 27 : if ( FD_UNLIKELY( !depth ) ) return 0UL; /* depth must be positive */
101 :
102 24 : if ( FD_UNLIKELY( depth == ULONG_MAX ) ) return 0UL; /* overflow */
103 24 : int lg_map_cnt = fd_ulong_find_msb( depth + 1UL ) + FD_LRU_SPARSE_DEFAULT; /* no overflow */
104 24 : if ( FD_UNLIKELY( lg_map_cnt > 63 ) ) return 0UL; /* depth too large */
105 :
106 : /* At this point:
107 :
108 : 2^(lg_map_cnt-s) <= depth+n < 2^(lg_map_cnt-s+1)
109 :
110 : where s is SPARSE_DEFAULT > 0 and n is 1.
111 :
112 : map_cnt/2^s - n <= depth < map_cnt/2^(s-1) - n
113 : 1/2^s - n/map_cnt <= depth/map_cnt < 1/2^(s-1) - n/map_cnt
114 :
115 : For asymptotically large depth / map_cnt, the worst case map fill
116 : ratio will asymptote to something in ~( 1/2^s, 1/2^(s-1) ).
117 : Flipping this around, we also have:
118 :
119 : -> 2^(s-1) (depth+n) < map_cnt <= 2^s (depth+n)
120 :
121 : In the worst case, s==1, depth+1 < map_cnt -> map_cnt>=depth+2. */
122 :
123 24 : return 1UL << lg_map_cnt;
124 24 : }
125 :
126 : /* fd_lru_{align,footprint} return the required alignment and
127 : footprint of a memory region suitable for use as a lru.
128 : fd_lru_align returns FD_LRU_ALIGN. For fd_lru_footprint, a
129 : map_cnt of 0 indicates to use fd_lru_map_cnt_default above. If
130 : depth is not positive, map_cnt is not a power of 2 of at least
131 : depth+2 and/or the required footprint would be larger than ULONG_MAX,
132 : footprint will silently return 0 (and thus can be used by the caller
133 : to validate the lru configuration parameters). Otherwise, it
134 : returns FD_LRU_FOOTPRINT for actual value of map_cnt used. */
135 :
136 : FD_FN_CONST ulong
137 : fd_lru_align( void );
138 :
139 : FD_FN_CONST ulong
140 : fd_lru_footprint( ulong depth, ulong map_cnt );
141 :
142 : /* fd_lru_new formats an unused memory region for use as a lru.
143 : shmem is a non-NULL pointer to this region in the local address space
144 : with the required footprint and alignment. depth is the number of
145 : unique tags that can be stored in the lru and should be positive
146 : (positive integer powers of 2 minus 2 have good memory footprint Feng
147 : Shui and positive integer powers of 2 minus 1 have good computational
148 : efficiency Feng Shui). map_cnt is the number of slots to use for the
149 : map. A map_cnt of 0 indicates to fd_lru_map_cnt_default above.
150 :
151 : Returns shmem (and the memory region it points to will be formatted
152 : as a lru, caller is not joined, lru will be empty) on success
153 : and NULL on failure (logs details). Reasons for failure include
154 : obviously bad shmem, bad depth or bad map_cnt. */
155 :
156 : void *
157 : fd_lru_new( void * shmem, ulong depth, ulong map_cnt );
158 :
159 : /* fd_lru_join joins the caller to the lru. _lru points to the
160 : first byte of the memory region backing the lru in the caller's
161 : address space.
162 :
163 : Returns a pointer in the local address space to the lru's entries
164 : on success (this is not necessarily just a cast of _lru) and NULL
165 : on failure (logs details). Reasons for failure are that _lru is
166 : obviously not a pointer to memory region holding a lru. Every
167 : successful join should have a matching leave. The lifetime of the
168 : join is until the matching leave or thread group is terminated. */
169 :
170 : fd_lru_t *
171 : fd_lru_join( void * _lru );
172 :
173 : /* fd_lru_leave leaves a current local join. Returns a pointer to
174 : the underlying shared memory region on success (this is not
175 : necessarily just a cast of _lru) and NULL on failure (logs
176 : details). Reasons for failure include lru is NULL. */
177 :
178 : void *
179 : fd_lru_leave( fd_lru_t * lru );
180 :
181 : /* fd_lru_delete unformats a memory region used as a lru. Assumes
182 : nobody is joined to the region. Returns a pointer to the underlying
183 : shared memory region or NULL if used obviously in error (e.g.
184 : _lru is obviously not a lru ... logs details). The ownership
185 : of the memory region is transferred to the caller. */
186 :
187 : void *
188 : fd_lru_delete( void * _lru );
189 :
190 : /* fd_lru_{depth,map_cnt,oldest_laddr,list_laddr,map_laddr} return
191 : various properties of the lru. These assume lru is a valid
192 : local join. Since lru is used in performance critical code paths,
193 : typical usage will unpack lru list and map pointers into registers
194 : and the current value for oldest will be tracked in a register as
195 : well. It is the responsibility of users to update the value at
196 : oldest_laddr at termination to do clean restarts on an in progress
197 : lru. */
198 :
199 : FD_FN_PURE static inline ulong
200 3 : fd_lru_depth( fd_lru_t const * lru ) {
201 3 : return lru->depth;
202 3 : }
203 : FD_FN_PURE static inline ulong
204 196611 : fd_lru_free_top( fd_lru_t const * lru ) {
205 196611 : return lru->free_top;
206 196611 : }
207 : FD_FN_PURE static inline ulong
208 3 : fd_lru_map_cnt( fd_lru_t const * lru ) {
209 3 : return lru->map_cnt;
210 3 : }
211 :
212 : FD_FN_CONST static inline fd_list_t *
213 2359314 : fd_lru_list_laddr( fd_lru_t * lru ) {
214 2359314 : return ( (fd_list_t *)fd_type_pun( lru ) ) + 1UL;
215 2359314 : } /* both metadata and fd_list_t are 32-byte */
216 :
217 : FD_FN_PURE static inline fd_list_t **
218 786441 : fd_lru_map_laddr( fd_lru_t * lru ) {
219 786441 : return (fd_list_t **)fd_type_pun( ( (fd_list_t *)lru ) + 1UL /*metadata*/ + 1UL /*sentinel*/ +
220 786441 : lru->depth );
221 786441 : }
222 :
223 : /* fd_lru_tag_is_null returns non-zero if tag is FD_LRU_TAG_NULL
224 : and zero otherwise. */
225 :
226 : FD_FN_CONST static inline int
227 12901250901 : fd_lru_tag_is_null( ulong tag ) {
228 12901250901 : return tag == FD_LRU_TAG_NULL;
229 12901250901 : }
230 :
231 : /* fd_lru_reset resets a lru to empty, the same state the lru
232 : was in at creation. For performance critical usage, does no input
233 : argument checking, uses the unpacked lru fields and returns the
234 : value to use for oldest. */
235 :
236 : static inline ulong
237 0 : fd_lru_reset( ulong * list, ulong depth, ulong * map, ulong map_cnt ) {
238 0 : for ( ulong list_idx = 0UL; list_idx < depth; list_idx++ )
239 0 : list[list_idx] = FD_LRU_TAG_NULL;
240 0 : for ( ulong map_idx = 0UL; map_idx < map_cnt; map_idx++ )
241 0 : map[map_idx] = FD_LRU_TAG_NULL;
242 0 : return 0UL; /* list_oldest */
243 0 : }
244 :
245 : /* fd_lru_map_start returns the location in a lru map to start
246 : probing for tag. Assumes tag is not null and map_cnt is a positive
247 : integer power of 2. Implementation here is optimized for the case
248 : where tags are randomized.
249 :
250 : fd_lru_map_next returns the next location to probe given the
251 : current location. idx is assumed in [0,map_cnt) and map_cnt is
252 : assumed to be a positive integer power of 2. */
253 :
254 : FD_FN_CONST static inline ulong
255 1966083 : fd_lru_map_start( ulong tag, ulong map_cnt ) {
256 1966083 : return tag & ( map_cnt - 1UL );
257 1966083 : }
258 : FD_FN_CONST static inline ulong
259 12900071241 : fd_lru_map_next( ulong idx, ulong map_cnt ) {
260 12900071241 : return ( idx + 1UL ) & ( map_cnt - 1UL );
261 12900071241 : }
262 :
263 : /* FD_LRU_QUERY searches for tag in a map with map_cnt slots. On
264 : return, map_idx will be in [0,map_cnt) and found will be in [0,1].
265 : If found is 0, map_idx is a suitable location where tag can be
266 : inserted into the map, assuming the map has at most map_cnt-2 entries
267 : currently in it. If found is is 1, map_idx is the index into the map
268 : where tag is currently located (this index will be valid until the
269 : next map remove or map destruction).
270 :
271 : For sparse fill ratios and properly randomized map_starts, this is a
272 : fast O(1).
273 :
274 : This is implemented as a macro to support multiple return values
275 : (found and map_idx), especially as this is used in performance
276 : critical contexts. Similarly, does no input argument checking and
277 : uses the unpacked fields of a lru. Assumes map is non-NULL, map
278 : is indexed [0,map_cnt), map_cnt is a positive integer power-of-two
279 : and tag is not null.
280 :
281 : This macro is robust (e.g. evaluates its arguments a minimal number
282 : of times) and pure (i.e. found / map_idx will not change between
283 : calls given the same map / map[*] / tag). */
284 :
285 : #define FD_LRU_QUERY( found, map_idx, map, map_cnt, tag ) \
286 2162694 : do { \
287 2162694 : fd_list_t * const * _flq_map = ( map ); \
288 2162694 : ulong _flq_map_cnt = ( map_cnt ); \
289 2162694 : ulong _flq_tag = ( tag ); \
290 2162694 : int _flq_found = 0; \
291 2162694 : ulong _flq_map_idx = fd_lru_map_start( _flq_tag, _flq_map_cnt ); \
292 12902037327 : for ( ;; ) { \
293 12902037327 : fd_list_t * _flq_map_slot = _flq_map[_flq_map_idx]; \
294 12902037327 : if ( _flq_map_slot == NULL ) break; \
295 12902037327 : _flq_found = ( _flq_tag == _flq_map_slot->tag ); \
296 12901054296 : if ( FD_LIKELY( _flq_found | fd_lru_tag_is_null( _flq_map_slot->tag ) ) ) break; \
297 12901054296 : _flq_map_idx = fd_lru_map_next( _flq_map_idx, _flq_map_cnt ); \
298 12899874633 : } \
299 2162694 : ( found ) = _flq_found; \
300 2162694 : ( map_idx ) = _flq_map_idx; \
301 2162694 : } while ( 0 )
302 :
303 : /* fd_lru_remove removes tag in a map with map_cnt slots. For
304 : sparsely populated maps and properly randomized tags, this is a fast
305 : O(1). This does not remove tag from the list, so the user is responsible
306 : for the removing the value from the list. As this is used in performance
307 : critical contexts, does no input argument checking and uses the
308 : unpacked fields of a lru. Assumes map is non-NULL, map is indexed
309 : [0,map_cnt) and map_cnt is a positive integer power-of-two. Does
310 : nothing if tag is null or if tag is not currently in the map. */
311 :
312 : FD_FN_UNUSED static void /* Work around -Winline */
313 196608 : fd_lru_remove( fd_lru_t * lru, ulong tag ) {
314 :
315 : /* Look up tag in the lru. If not found, nothing to do. (This
316 : should always succeed at this point in typical lru usage but we
317 : keep the check for paranoia / minimize risk of silent corruption.) */
318 :
319 196608 : int found;
320 196608 : ulong slot;
321 196608 : fd_list_t ** map = fd_lru_map_laddr( lru );
322 196608 : FD_LRU_QUERY( found, slot, map, lru->map_cnt, tag );
323 196608 : if ( FD_LIKELY( found ) ) {
324 :
325 : /* slot contains the tag to remove. Remove it. See util/fd_map*
326 : for details how this works. */
327 :
328 196608 : for ( ;; ) {
329 196608 : map[slot] = FD_LRU_TAG_NULL;
330 196608 : ulong hole = slot;
331 196608 : fd_list_t * next;
332 196608 : for ( ;; ) {
333 196608 : slot = fd_lru_map_next( slot, lru->map_cnt );
334 196608 : next = map[slot];
335 196608 : if ( FD_LIKELY( next == NULL || fd_lru_tag_is_null( next->tag ) ) ) return;
336 196605 : ulong start = fd_lru_map_start( tag, lru->map_cnt );
337 196605 : if ( !( ( ( hole < start ) & ( start <= slot ) ) |
338 196605 : ( ( hole > slot ) & ( ( hole < start ) | ( start <= slot ) ) ) ) )
339 196605 : break;
340 196605 : }
341 196605 : map[hole] = next;
342 196605 : }
343 3 : }
344 196608 : }
345 :
346 : static inline fd_list_t *
347 393216 : fd_lru_list_acquire( fd_lru_t * lru ) {
348 393216 : fd_list_t * sentinel = fd_lru_list_laddr( lru );
349 393216 : fd_list_t * free_top = sentinel + lru->free_top;
350 393216 : lru->free_top = free_top->next;
351 393216 : return fd_list_remove( free_top );
352 393216 : }
353 :
354 : /* user is responsible for ensuring curr is removed */
355 : static inline void
356 196608 : fd_lru_list_release( fd_lru_t * lru, fd_list_t * curr ) {
357 196608 : fd_list_t * sentinel = fd_lru_list_laddr( lru );
358 196608 : fd_list_t * free_top = sentinel + lru->free_top;
359 196608 : fd_list_insert( fd_list_prev( free_top ), curr );
360 196608 : lru->free_top = curr->curr;
361 196608 : }
362 :
363 : static inline fd_list_t *
364 786435 : fd_lru_list_head( fd_lru_t * lru ) {
365 786435 : return fd_list_next( fd_lru_list_laddr( lru ) );
366 786435 : }
367 :
368 : static inline fd_list_t *
369 983049 : fd_lru_list_tail( fd_lru_t * lru ) {
370 983049 : return fd_list_prev( fd_lru_list_laddr( lru ) + lru->free_top );
371 983049 : }
372 :
373 : /* fd_lru_upsert upserts tag into the lru in fast O(1) operations.
374 : On return, if tag is already in the lru, the tag will be moved to
375 : most recent position (back). If tag is not in the lru, tag was inserted,
376 : and if the lru was full (i.e. had already contained depth values), the
377 : oldest tag in the lru will have been evicted.
378 :
379 : Returns the evicted element (if any) or NULL if no element was evicted.
380 :
381 : Assumes oldest is in [0,depth), list is non-NULL and indexed
382 : [0,depth), depth is positive, map is non-NULL, map is indexed
383 : [0,map_cnt), map_cnt is an integer power-of-two of at least depth+2,
384 : and tag is not null.
385 :
386 : Map entries store the location in the list structure. On a duplicate
387 : tag, insert will move the duplicate tag from its current location in
388 : the list (given from the query itself) to one immediately before the
389 : oldest tag in the list and update the map entry (and similar for
390 : unique tag insert). */
391 :
392 : static inline fd_list_t *
393 589830 : fd_lru_upsert( fd_lru_t * lru, ulong tag, int * dup ) {
394 589830 : ulong map_idx;
395 589830 : fd_list_t ** map = fd_lru_map_laddr( lru );
396 589830 : int found;
397 589830 : FD_LRU_QUERY( found, map_idx, map, lru->map_cnt, tag );
398 589830 : *dup = found;
399 589830 : fd_list_t * evicted = NULL;
400 :
401 : /* LRU insert*/
402 589830 : if ( !*dup ) { /* application dependent branch probability */
403 : /* Evict oldest tag / insert tag into list */
404 393216 : if ( FD_LIKELY( lru->free_top == 0 ) ) {
405 196608 : fd_list_t * remove = fd_list_remove( fd_lru_list_head( lru ) );
406 196608 : fd_lru_list_release( lru, remove );
407 196608 : fd_lru_remove( lru, remove->tag );
408 196608 : evicted = remove;
409 196608 : }
410 393216 : fd_list_t * insert = fd_lru_list_acquire( lru );
411 393216 : insert->tag = tag;
412 393216 : fd_list_insert( fd_lru_list_tail( lru ), insert );
413 :
414 : /* Insert tag into the map (assumes depth <= map_cnt-2) */
415 : /* map has at most map_cnt-2 entries here */
416 393216 : map[map_idx] = insert;
417 : /* map has at most map_cnt-1 entries here */
418 :
419 : /* LRU update */
420 393216 : } else {
421 196614 : fd_list_t * next = fd_list_remove( map[map_idx] );
422 196614 : fd_list_t * tail = fd_lru_list_tail( lru );
423 196614 : fd_list_insert( tail, next );
424 196614 : }
425 589830 : return evicted;
426 589830 : }
427 :
428 : FD_PROTOTYPES_END
429 :
430 : #endif /* HEADER_fd_src_tango_lru_fd_lru_h */
|