LCOV - code coverage report
Current view: top level - tango/lru - fd_lru.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 118 125 94.4 %
Date: 2024-11-13 11:58:15 Functions: 17 32 53.1 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_tango_lru_fd_lru_h
       2             : #define HEADER_fd_src_tango_lru_fd_lru_h
       3             : 
       4             : /* fd_lru_t is very similar to fd_tcache_t. The main differences are:
       5             :       1. instead of a ring, it uses a doubly linked list.
       6             :       2. instead of a map of tag -> index, it uses a map of tag -> node.
       7             : 
       8             :    Keeping in mind these differences, the API and documentation is otherwise
       9             :    based on `fd_tcache.h`.
      10             : 
      11             :    A fd_lru_t is a cache of the most recently observed unique 64-bit
      12             :    tags.  It is useful for, among other things, deduplication of traffic
      13             :    based on a thumbprint / hash / signature.  Makes no demands on the
      14             :    application on tagging scheme except that there be a "null" tag (a
      15             :    tag value that will never occur).
      16             : 
      17             :    The amount of history ("depth") of a lru is theoretically
      18             :    unlimited but the implementation below was optimized for large-ish
      19             :    depths (e.g. millions) on platforms where memory footprint is
      20             :    reasonable cheap.  The implementation was also optimized for
      21             :    situations where heavily duplication is common and temporally
      22             :    localized.  Lastly, the implementation is optimized for the case that
      23             :    tags behave like IID random values (e.g. a tag is a hash of a packet
      24             :    payload).
      25             : 
      26             :    It is strongly recommend that the lru be backed by a single NUMA
      27             :    page (e.g. in a gigantic page backed workspace) to avoid TLB
      28             :    thrashing if used in performance critical contexts. */
      29             : 
      30             : #include "../fd_tango_base.h"
      31             : #include "fd_list.h"
      32             : 
      33             : /* FD_TCACHE_{ALIGN,FOOTPRINT} specify the alignment and footprint
      34             :    needed for a tcache with depth history and a tag key-only map with
      35             :    map_cnt slots.  ALIGN is at least double cache line to mitigate
      36             :    various kinds of false sharing.  depth and map_cnt are assumed to be
      37             :    valid (i.e. depth is positive, map_cnt is an integer power of 2 of at
      38             :    least depth+2 and the combination will not require a footprint larger
      39             :    than ULONG_MAX). */
      40           9 : #define FD_LRU_ALIGN ( 128UL )
      41             : 
      42 12901447509 : #define FD_LRU_TAG_NULL ( 0UL )
      43             : 
      44          24 : #define FD_LRU_SPARSE_DEFAULT ( 2 )
      45             : 
      46           3 : #define FD_LRU_MAGIC ( 0xf17eda2c3712C0UL ) /* firedancer lru ver 0 */
      47             : 
      48             : struct __attribute( ( aligned( FD_LRU_ALIGN ) ) ) fd_lru_private {
      49             :   ulong magic; /* ==FD_LRU_MAGIC */
      50             :   ulong depth; /* The lru will maintain a history of the most recent depth tags */
      51             :   ulong free_top;
      52             :   ulong map_cnt;
      53             : 
      54             :   /* depth ulong (doubly linked list):
      55             : 
      56             :      After the tcache has started up (i.e. at least depth unique tags
      57             :      have been inserted), list[oldest] will contain the oldest tag in the
      58             :      tcache.  This is a circular doubly linked list with a sentinel: the
      59             :      entry before sentinel (cyclic) is the newest tag in the tcache and
      60             :      the list entry after oldest (cyclic) is the 2nd oldest tag in the
      61             :      tcache. During startup (the first depth-1 unique tags inserted),
      62             :      list[oldest] will be FD_TCACHE_NULL.  In high performance operation,
      63             :      only the slots around oldest will be in active use / occupy local
      64             :      cache and the access pattern will be highly sequential. */
      65             : 
      66             :   /* map_cnt ulong (map):
      67             : 
      68             :      This is a sparse linear probed key-only map of tags currently in
      69             :      the tcache.  Since it is sparse, probe collisions are rare (and thus
      70             :      the branches involved in various cache operations are highly
      71             :      predictable.  While the sparsity makes the map reasonably
      72             :      inefficient from a memory footprint point of view, memory footprint
      73             :      is quite cheap in practice and the actual cache utilization is
      74             :      quite mild.  Specifically, if tag duplication is rare, only the
      75             :      slots around the newest and oldest tags will be in use typically.
      76             :      Further, if any tag duplication is temporally clustered (as is
      77             :      commonly the case), duplicate tags will be a cache hit against the
      78             :      (still cached because of recent use) original insertion.
      79             : 
      80             :      In the typical case of randomized tags, this randomly accesses the
      81             :      map aggressively.  The NUMA and TLB thrashing impacts of that can
      82             :      be reduced / eliminated by backing the tcache with a huge /
      83             :      gigantic page shared workspace on a NUMA node nearby the tcache
      84             :      using threads. */
      85             : 
      86             :   /* Padding to FD_TCACHE align */
      87             : };
      88             : 
      89             : typedef struct fd_lru_private fd_lru_t;
      90             : 
      91             : FD_PROTOTYPES_BEGIN
      92             : 
      93             : /* fd_lru_map_cnt_default returns the default map_cnt to use for the
      94             :    given depth.  Returns 0 if the depth is invalid / results in a
      95             :    map_cnt larger than ULONG_MAX. */
      96             : 
      97             : FD_FN_CONST static inline ulong
      98          27 : fd_lru_map_cnt_default( ulong depth ) {
      99             : 
     100          27 :   if ( FD_UNLIKELY( !depth ) ) return 0UL; /* depth must be positive */
     101             : 
     102          24 :   if ( FD_UNLIKELY( depth == ULONG_MAX ) ) return 0UL;                       /* overflow */
     103          24 :   int lg_map_cnt = fd_ulong_find_msb( depth + 1UL ) + FD_LRU_SPARSE_DEFAULT; /* no overflow */
     104          24 :   if ( FD_UNLIKELY( lg_map_cnt > 63 ) ) return 0UL;                          /* depth too large */
     105             : 
     106             :   /* At this point:
     107             : 
     108             :        2^(lg_map_cnt-s) <= depth+n < 2^(lg_map_cnt-s+1)
     109             : 
     110             :      where s is SPARSE_DEFAULT > 0 and n is 1.
     111             : 
     112             :        map_cnt/2^s - n <= depth < map_cnt/2^(s-1) - n
     113             :        1/2^s - n/map_cnt <= depth/map_cnt < 1/2^(s-1) - n/map_cnt
     114             : 
     115             :      For asymptotically large depth / map_cnt, the worst case map fill
     116             :      ratio will asymptote to something in ~( 1/2^s, 1/2^(s-1) ).
     117             :      Flipping this around, we also have:
     118             : 
     119             :        -> 2^(s-1) (depth+n) < map_cnt <= 2^s (depth+n)
     120             : 
     121             :      In the worst case, s==1, depth+1 < map_cnt -> map_cnt>=depth+2. */
     122             : 
     123          24 :   return 1UL << lg_map_cnt;
     124          24 : }
     125             : 
     126             : /* fd_lru_{align,footprint} return the required alignment and
     127             :    footprint of a memory region suitable for use as a lru.
     128             :    fd_lru_align returns FD_LRU_ALIGN.  For fd_lru_footprint, a
     129             :    map_cnt of 0 indicates to use fd_lru_map_cnt_default above.  If
     130             :    depth is not positive, map_cnt is not a power of 2 of at least
     131             :    depth+2 and/or the required footprint would be larger than ULONG_MAX,
     132             :    footprint will silently return 0 (and thus can be used by the caller
     133             :    to validate the lru configuration parameters).  Otherwise, it
     134             :    returns FD_LRU_FOOTPRINT for actual value of map_cnt used. */
     135             : 
     136             : FD_FN_CONST ulong
     137             : fd_lru_align( void );
     138             : 
     139             : FD_FN_CONST ulong
     140             : fd_lru_footprint( ulong depth, ulong map_cnt );
     141             : 
     142             : /* fd_lru_new formats an unused memory region for use as a lru.
     143             :    shmem is a non-NULL pointer to this region in the local address space
     144             :    with the required footprint and alignment.  depth is the number of
     145             :    unique tags that can be stored in the lru and should be positive
     146             :    (positive integer powers of 2 minus 2 have good memory footprint Feng
     147             :    Shui and positive integer powers of 2 minus 1 have good computational
     148             :    efficiency Feng Shui).  map_cnt is the number of slots to use for the
     149             :    map.  A map_cnt of 0 indicates to fd_lru_map_cnt_default above.
     150             : 
     151             :    Returns shmem (and the memory region it points to will be formatted
     152             :    as a lru, caller is not joined, lru will be empty) on success
     153             :    and NULL on failure (logs details).  Reasons for failure include
     154             :    obviously bad shmem, bad depth or bad map_cnt. */
     155             : 
     156             : void *
     157             : fd_lru_new( void * shmem, ulong depth, ulong map_cnt );
     158             : 
     159             : /* fd_lru_join joins the caller to the lru.  _lru points to the
     160             :    first byte of the memory region backing the lru in the caller's
     161             :    address space.
     162             : 
     163             :    Returns a pointer in the local address space to the lru's entries
     164             :    on success (this is not necessarily just a cast of _lru) and NULL
     165             :    on failure (logs details).  Reasons for failure are that _lru is
     166             :    obviously not a pointer to memory region holding a lru.  Every
     167             :    successful join should have a matching leave.  The lifetime of the
     168             :    join is until the matching leave or thread group is terminated. */
     169             : 
     170             : fd_lru_t *
     171             : fd_lru_join( void * _lru );
     172             : 
     173             : /* fd_lru_leave leaves a current local join.  Returns a pointer to
     174             :    the underlying shared memory region on success (this is not
     175             :    necessarily just a cast of _lru) and NULL on failure (logs
     176             :    details).  Reasons for failure include lru is NULL. */
     177             : 
     178             : void *
     179             : fd_lru_leave( fd_lru_t * lru );
     180             : 
     181             : /* fd_lru_delete unformats a memory region used as a lru.  Assumes
     182             :    nobody is joined to the region.  Returns a pointer to the underlying
     183             :    shared memory region or NULL if used obviously in error (e.g.
     184             :    _lru is obviously not a lru ... logs details).  The ownership
     185             :    of the memory region is transferred to the caller. */
     186             : 
     187             : void *
     188             : fd_lru_delete( void * _lru );
     189             : 
     190             : /* fd_lru_{depth,map_cnt,oldest_laddr,list_laddr,map_laddr} return
     191             :    various properties of the lru.  These assume lru is a valid
     192             :    local join.  Since lru is used in performance critical code paths,
     193             :    typical usage will unpack lru list and map pointers into registers
     194             :    and the current value for oldest will be tracked in a register as
     195             :    well.  It is the responsibility of users to update the value at
     196             :    oldest_laddr at termination to do clean restarts on an in progress
     197             :    lru. */
     198             : 
     199             : FD_FN_PURE static inline ulong
     200           3 : fd_lru_depth( fd_lru_t const * lru ) {
     201           3 :   return lru->depth;
     202           3 : }
     203             : FD_FN_PURE static inline ulong
     204      196611 : fd_lru_free_top( fd_lru_t const * lru ) {
     205      196611 :   return lru->free_top;
     206      196611 : }
     207             : FD_FN_PURE static inline ulong
     208           3 : fd_lru_map_cnt( fd_lru_t const * lru ) {
     209           3 :   return lru->map_cnt;
     210           3 : }
     211             : 
     212             : FD_FN_CONST static inline fd_list_t *
     213     2359314 : fd_lru_list_laddr( fd_lru_t * lru ) {
     214     2359314 :   return ( (fd_list_t *)fd_type_pun( lru ) ) + 1UL;
     215     2359314 : } /* both metadata and fd_list_t are 32-byte */
     216             : 
     217             : FD_FN_PURE static inline fd_list_t **
     218      786441 : fd_lru_map_laddr( fd_lru_t * lru ) {
     219      786441 :   return (fd_list_t **)fd_type_pun( ( (fd_list_t *)lru ) + 1UL /*metadata*/ + 1UL /*sentinel*/ +
     220      786441 :                                     lru->depth );
     221      786441 : }
     222             : 
     223             : /* fd_lru_tag_is_null returns non-zero if tag is FD_LRU_TAG_NULL
     224             :    and zero otherwise. */
     225             : 
     226             : FD_FN_CONST static inline int
     227 12901250901 : fd_lru_tag_is_null( ulong tag ) {
     228 12901250901 :   return tag == FD_LRU_TAG_NULL;
     229 12901250901 : }
     230             : 
     231             : /* fd_lru_reset resets a lru to empty, the same state the lru
     232             :    was in at creation.  For performance critical usage, does no input
     233             :    argument checking, uses the unpacked lru fields and returns the
     234             :    value to use for oldest. */
     235             : 
     236             : static inline ulong
     237           0 : fd_lru_reset( ulong * list, ulong depth, ulong * map, ulong map_cnt ) {
     238           0 :   for ( ulong list_idx = 0UL; list_idx < depth; list_idx++ )
     239           0 :     list[list_idx] = FD_LRU_TAG_NULL;
     240           0 :   for ( ulong map_idx = 0UL; map_idx < map_cnt; map_idx++ )
     241           0 :     map[map_idx] = FD_LRU_TAG_NULL;
     242           0 :   return 0UL; /* list_oldest */
     243           0 : }
     244             : 
     245             : /* fd_lru_map_start returns the location in a lru map to start
     246             :    probing for tag.  Assumes tag is not null and map_cnt is a positive
     247             :    integer power of 2.  Implementation here is optimized for the case
     248             :    where tags are randomized.
     249             : 
     250             :    fd_lru_map_next returns the next location to probe given the
     251             :    current location.  idx is assumed in [0,map_cnt) and map_cnt is
     252             :    assumed to be a positive integer power of 2. */
     253             : 
     254             : FD_FN_CONST static inline ulong
     255     1966083 : fd_lru_map_start( ulong tag, ulong map_cnt ) {
     256     1966083 :   return tag & ( map_cnt - 1UL );
     257     1966083 : }
     258             : FD_FN_CONST static inline ulong
     259 12900071241 : fd_lru_map_next( ulong idx, ulong map_cnt ) {
     260 12900071241 :   return ( idx + 1UL ) & ( map_cnt - 1UL );
     261 12900071241 : }
     262             : 
     263             : /* FD_LRU_QUERY searches for tag in a map with map_cnt slots.  On
     264             :    return, map_idx will be in [0,map_cnt) and found will be in [0,1].
     265             :    If found is 0, map_idx is a suitable location where tag can be
     266             :    inserted into the map, assuming the map has at most map_cnt-2 entries
     267             :    currently in it.  If found is is 1, map_idx is the index into the map
     268             :    where tag is currently located (this index will be valid until the
     269             :    next map remove or map destruction).
     270             : 
     271             :    For sparse fill ratios and properly randomized map_starts, this is a
     272             :    fast O(1).
     273             : 
     274             :    This is implemented as a macro to support multiple return values
     275             :    (found and map_idx), especially as this is used in performance
     276             :    critical contexts.  Similarly, does no input argument checking and
     277             :    uses the unpacked fields of a lru.  Assumes map is non-NULL, map
     278             :    is indexed [0,map_cnt), map_cnt is a positive integer power-of-two
     279             :    and tag is not null.
     280             : 
     281             :    This macro is robust (e.g. evaluates its arguments a minimal number
     282             :    of times) and pure (i.e. found / map_idx will not change between
     283             :    calls given the same map / map[*] / tag). */
     284             : 
     285             : #define FD_LRU_QUERY( found, map_idx, map, map_cnt, tag )                                          \
     286     2162694 :   do {                                                                                             \
     287     2162694 :     fd_list_t * const * _flq_map     = ( map );                                                    \
     288     2162694 :     ulong               _flq_map_cnt = ( map_cnt );                                                \
     289     2162694 :     ulong               _flq_tag     = ( tag );                                                    \
     290     2162694 :     int                 _flq_found   = 0;                                                          \
     291     2162694 :     ulong               _flq_map_idx = fd_lru_map_start( _flq_tag, _flq_map_cnt );                 \
     292 12902037327 :     for ( ;; ) {                                                                                   \
     293 12902037327 :       fd_list_t * _flq_map_slot = _flq_map[_flq_map_idx];                                          \
     294 12902037327 :       if ( _flq_map_slot == NULL ) break;                                                          \
     295 12902037327 :       _flq_found = ( _flq_tag == _flq_map_slot->tag );                                             \
     296 12901054296 :       if ( FD_LIKELY( _flq_found | fd_lru_tag_is_null( _flq_map_slot->tag ) ) ) break;             \
     297 12901054296 :       _flq_map_idx = fd_lru_map_next( _flq_map_idx, _flq_map_cnt );                                \
     298 12899874633 :     }                                                                                              \
     299     2162694 :     ( found )   = _flq_found;                                                                      \
     300     2162694 :     ( map_idx ) = _flq_map_idx;                                                                    \
     301     2162694 :   } while ( 0 )
     302             : 
     303             : /* fd_lru_remove removes tag in a map with map_cnt slots.  For
     304             :    sparsely populated maps and properly randomized tags, this is a fast
     305             :    O(1).  This does not remove tag from the list, so the user is responsible
     306             :    for the removing the value from the list.  As this is used in performance
     307             :    critical contexts, does no input argument checking and uses the
     308             :    unpacked fields of a lru.  Assumes map is non-NULL, map is indexed
     309             :    [0,map_cnt) and map_cnt is a positive integer power-of-two.  Does
     310             :    nothing if tag is null or if tag is not currently in the map. */
     311             : 
     312             : FD_FN_UNUSED static void /* Work around -Winline */
     313      196608 : fd_lru_remove( fd_lru_t * lru, ulong tag ) {
     314             : 
     315             :   /* Look up tag in the lru.  If not found, nothing to do.  (This
     316             :      should always succeed at this point in typical lru usage but we
     317             :      keep the check for paranoia / minimize risk of silent corruption.) */
     318             : 
     319      196608 :   int          found;
     320      196608 :   ulong        slot;
     321      196608 :   fd_list_t ** map = fd_lru_map_laddr( lru );
     322      196608 :   FD_LRU_QUERY( found, slot, map, lru->map_cnt, tag );
     323      196608 :   if ( FD_LIKELY( found ) ) {
     324             : 
     325             :     /* slot contains the tag to remove.  Remove it.  See util/fd_map*
     326             :        for details how this works. */
     327             : 
     328      196608 :     for ( ;; ) {
     329      196608 :       map[slot]        = FD_LRU_TAG_NULL;
     330      196608 :       ulong       hole = slot;
     331      196608 :       fd_list_t * next;
     332      196608 :       for ( ;; ) {
     333      196608 :         slot = fd_lru_map_next( slot, lru->map_cnt );
     334      196608 :         next = map[slot];
     335      196608 :         if ( FD_LIKELY( next == NULL || fd_lru_tag_is_null( next->tag ) ) ) return;
     336      196605 :         ulong start = fd_lru_map_start( tag, lru->map_cnt );
     337      196605 :         if ( !( ( ( hole < start ) & ( start <= slot ) ) |
     338      196605 :                 ( ( hole > slot ) & ( ( hole < start ) | ( start <= slot ) ) ) ) )
     339      196605 :           break;
     340      196605 :       }
     341      196605 :       map[hole] = next;
     342      196605 :     }
     343           3 :   }
     344      196608 : }
     345             : 
     346             : static inline fd_list_t *
     347      393216 : fd_lru_list_acquire( fd_lru_t * lru ) {
     348      393216 :   fd_list_t * sentinel = fd_lru_list_laddr( lru );
     349      393216 :   fd_list_t * free_top = sentinel + lru->free_top;
     350      393216 :   lru->free_top        = free_top->next;
     351      393216 :   return fd_list_remove( free_top );
     352      393216 : }
     353             : 
     354             : /* user is responsible for ensuring curr is removed */
     355             : static inline void
     356      196608 : fd_lru_list_release( fd_lru_t * lru, fd_list_t * curr ) {
     357      196608 :   fd_list_t * sentinel = fd_lru_list_laddr( lru );
     358      196608 :   fd_list_t * free_top = sentinel + lru->free_top;
     359      196608 :   fd_list_insert( fd_list_prev( free_top ), curr );
     360      196608 :   lru->free_top = curr->curr;
     361      196608 : }
     362             : 
     363             : static inline fd_list_t *
     364      786435 : fd_lru_list_head( fd_lru_t * lru ) {
     365      786435 :   return fd_list_next( fd_lru_list_laddr( lru ) );
     366      786435 : }
     367             : 
     368             : static inline fd_list_t *
     369      983049 : fd_lru_list_tail( fd_lru_t * lru ) {
     370      983049 :   return fd_list_prev( fd_lru_list_laddr( lru ) + lru->free_top );
     371      983049 : }
     372             : 
     373             : /* fd_lru_upsert upserts tag into the lru in fast O(1) operations.
     374             :    On return, if tag is already in the lru, the tag will be moved to
     375             :    most recent position (back). If tag is not in the lru, tag was inserted,
     376             :    and if the lru was full (i.e. had already contained depth values), the
     377             :    oldest tag in the lru will have been evicted.
     378             : 
     379             :    Returns the evicted element (if any) or NULL if no element was evicted.
     380             : 
     381             :    Assumes oldest is in [0,depth), list is non-NULL and indexed
     382             :    [0,depth), depth is positive, map is non-NULL, map is indexed
     383             :    [0,map_cnt), map_cnt is an integer power-of-two of at least depth+2,
     384             :    and tag is not null.
     385             : 
     386             :    Map entries store the location in the list structure. On a duplicate
     387             :    tag, insert will move the duplicate tag from its current location in
     388             :    the list (given from the query itself) to one immediately before the
     389             :    oldest tag in the list and update the map entry (and similar for
     390             :    unique tag insert). */
     391             : 
     392             : static inline fd_list_t *
     393      589830 : fd_lru_upsert( fd_lru_t * lru, ulong tag, int * dup ) {
     394      589830 :   ulong        map_idx;
     395      589830 :   fd_list_t ** map = fd_lru_map_laddr( lru );
     396      589830 :   int          found;
     397      589830 :   FD_LRU_QUERY( found, map_idx, map, lru->map_cnt, tag );
     398      589830 :   *dup                = found;
     399      589830 :   fd_list_t * evicted = NULL;
     400             : 
     401             :   /* LRU insert*/
     402      589830 :   if ( !*dup ) { /* application dependent branch probability */
     403             :     /* Evict oldest tag / insert tag into list */
     404      393216 :     if ( FD_LIKELY( lru->free_top == 0 ) ) {
     405      196608 :       fd_list_t * remove = fd_list_remove( fd_lru_list_head( lru ) );
     406      196608 :       fd_lru_list_release( lru, remove );
     407      196608 :       fd_lru_remove( lru, remove->tag );
     408      196608 :       evicted = remove;
     409      196608 :     }
     410      393216 :     fd_list_t * insert = fd_lru_list_acquire( lru );
     411      393216 :     insert->tag        = tag;
     412      393216 :     fd_list_insert( fd_lru_list_tail( lru ), insert );
     413             : 
     414             :     /* Insert tag into the map (assumes depth <= map_cnt-2) */
     415             :     /* map has at most map_cnt-2 entries here */
     416      393216 :     map[map_idx] = insert;
     417             :     /* map has at most map_cnt-1 entries here */
     418             : 
     419             :     /* LRU update */
     420      393216 :   } else {
     421      196614 :     fd_list_t * next = fd_list_remove( map[map_idx] );
     422      196614 :     fd_list_t * tail = fd_lru_list_tail( lru );
     423      196614 :     fd_list_insert( tail, next );
     424      196614 :   }
     425      589830 :   return evicted;
     426      589830 : }
     427             : 
     428             : FD_PROTOTYPES_END
     429             : 
     430             : #endif /* HEADER_fd_src_tango_lru_fd_lru_h */

Generated by: LCOV version 1.14