LCOV - code coverage report
Current view: top level - tango/dcache - fd_dcache.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 17 17 100.0 %
Date: 2025-12-04 04:56:06 Functions: 29 1360 2.1 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_tango_dcache_fd_dcache_h
       2             : #define HEADER_fd_src_tango_dcache_fd_dcache_h
       3             : 
       4             : #include "../fd_tango_base.h"
       5             : 
       6             : /* FD_DCACHE_{ALIGN,FOOTPRINT} specify the alignment and footprint
       7             :    needed for a dcache with a data region of data_sz bytes and an
       8             :    application region of app_sz bytes.  ALIGN is at least FD_CHUNK_ALIGN
       9             :    and recommended to be at least double cache line to mitigate various
      10             :    kinds of false sharing.  data_sz and app_sz are assumed to be valid
      11             :    (e.g. will not require a footprint larger than ULONG_MAX).  These are
      12             :    provided to facilitate compile time dcache declarations. */
      13             : 
      14     6048003 : #define FD_DCACHE_ALIGN (4096UL)
      15             : #define FD_DCACHE_FOOTPRINT( data_sz, app_sz )                                                            \
      16             :   FD_LAYOUT_FINI( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_INIT, \
      17             :     FD_DCACHE_SLOT_ALIGN, 128UL                     ), /* hdr   */                                             \
      18             :     FD_DCACHE_SLOT_ALIGN, FD_DCACHE_GUARD_FOOTPRINT ), /* guard */                                             \
      19             :     FD_DCACHE_ALIGN,      (data_sz)                 ), /* data  */                                             \
      20             :     FD_DCACHE_ALIGN,      (app_sz)                  ), /* app   */                                             \
      21             :     FD_DCACHE_ALIGN )
      22             : 
      23             : /* FD_DCACHE_GUARD_FOOTPRINT specify the footprint of the guard region
      24             :    immediately before the dcache data region.  The guard region
      25             :    footprint is FD_DCACHE_SLOT_ALIGN aligned and a FD_DCACHE_SLOT_ALIGN
      26             :    multiple.  It provides flexibility (up to the magnitude of the
      27             :    footprint) to align how a producer might write directly into a dcache
      28             :    such that the frag payload alignment a consumer sees is consistent
      29             :    regardless of the details of the underlying producer. */
      30             : 
      31          12 : #define FD_DCACHE_GUARD_FOOTPRINT (3968UL)
      32             : 
      33             : /* FD_DCACHE_SLOT_FOOTPRINT returns the footprint of a
      34             :    FD_DCACHE_SLOT_ALIGN aligned slot sufficient to hold a frag payload
      35             :    of up to mtu bytes.  Returns 0 if mtu is not valid (i.e. so large
      36             :    that the required slot size is larger than ULONG_MAX). */
      37             : 
      38             : #define FD_DCACHE_SLOT_ALIGN            (128UL)
      39     2878572 : #define FD_DCACHE_SLOT_FOOTPRINT( mtu ) FD_ULONG_ALIGN_UP( (mtu), FD_DCACHE_SLOT_ALIGN )
      40             : 
      41             : /* FD_DCACHE_REQ_DATA_SZ returns the size of a data region in bytes
      42             :    sufficient for a dcache whose producer writes frag payloads up to mtu
      43             :    (should be positive) bytes in size, that can have up to depth (should
      44             :    be positive) frag payloads visible to consumers while the producer
      45             :    can be concurrently preparing up to burst (should be positive) frag
      46             :    payloads.  Assumes mtu, depth, burst and compact are valid and
      47             :    payload footprints are rounded up to at most a FD_DCACHE_ALIGN
      48             :    multiple when written by a producer.  Note that payloads written by a
      49             :    producer will generally be at least FD_DCACHE_ALIGN aligned to
      50             :    facilitate interoperability with fd_frag_meta_t chunk indexing.  Also
      51             :    note that for a compactly stored ring, it is usually not useful to
      52             :    use a burst larger than 1 (but not particularly harmful outside
      53             :    resulting a data region larger than necessary ... might use it to
      54             :    quasi-batch publish frags). */
      55             : 
      56             : #define FD_DCACHE_REQ_DATA_SZ( mtu, depth, burst, compact ) (FD_DCACHE_SLOT_FOOTPRINT( mtu )*((depth)+(burst)+(ulong)!!(compact)))
      57             : 
      58             : FD_PROTOTYPES_BEGIN
      59             : 
      60             : /* Construction API */
      61             : 
      62             : /* fd_dcache_req_data_sz is the same as FD_DCACHE_REQ_DATA_SZ but does
      63             :    not assume valid arguments.  Returns sz on success or 0 on failure.
      64             :    Reasons for failure include zero mtu, too large mtu, zero depth, zero
      65             :    burst or the required data_sz would be larger than ULONG_MAX. */
      66             : 
      67             : FD_FN_CONST ulong
      68             : fd_dcache_req_data_sz( ulong mtu,
      69             :                        ulong depth,
      70             :                        ulong burst,
      71             :                        int   compact );
      72             : 
      73             : /* fd_dcache_{align,footprint} return the required alignment and
      74             :    footprint of a memory region suitable for use as dcache with a data
      75             :    region of data_sz bytes and an application region of app_sz bytes.
      76             :    align returns FD_DCACHE_ALIGN.  If data_sz or app_sz are invalid
      77             :    (e.g. the required footprint is larger than a ULONG_MAX), footprint
      78             :    will silently return 0 (and thus can be used by the caller to
      79             :    validate dcache configuration parameters).  Zero is valid for data_sz
      80             :    and/or app_sz. */
      81             : 
      82             : FD_FN_CONST ulong
      83             : fd_dcache_align( void );
      84             : 
      85             : FD_FN_CONST ulong
      86             : fd_dcache_footprint( ulong data_sz,
      87             :                      ulong app_sz );
      88             : 
      89             : /* fd_dcache_new formats an unused memory region for use as a dcache.
      90             :    shmem is a non-NULL pointer to this region in the local address space
      91             :    with the required footprint and alignment.  The size of the dcache
      92             :    data size region is data_sz bytes and the size of the application
      93             :    region is app_sz bytes.  Zero is valid for data_sz and/or app_sz.
      94             : 
      95             :    Returns shmem (and the memory region it points to will be formatted
      96             :    as a dcache with the application region initialized to zero, caller
      97             :    is not joined) on success and NULL on failure (logs details).
      98             :    Reasons for failure include obviously bad shmem, bad data_sz or bad
      99             :    app_sz. */
     100             : 
     101             : void *
     102             : fd_dcache_new( void * shmem,
     103             :                ulong  data_sz,
     104             :                ulong  app_sz );
     105             : 
     106             : /* fd_dcache_join joins the caller to the dcache.  shdcache points to
     107             :    the first byte of the memory region backing the dcache in the
     108             :    caller's address space.
     109             : 
     110             :    Returns a pointer in the local address space to the dcache's data
     111             :    region on success (IMPORTANT! THIS IS NOT JUST A CAST OF SHDCACHE)
     112             :    and NULL on failure (logs details).  Reasons for failure are that
     113             :    shdcache is obviously not a pointer to memory region holding a
     114             :    dcache.  Every successful join should have a matching leave.  The
     115             :    lifetime of the join is until the matching leave or the thread group
     116             :    is terminated.
     117             : 
     118             :    This region will have a guard region of FD_DCACHE_GUARD_FOOTPRINT
     119             :    just before it and data_sz bytes available after it. */
     120             : 
     121             : uchar *
     122             : fd_dcache_join( void * shdcache );
     123             : 
     124             : /* fd_dcache_leave leaves a current local join.  Returns a pointer to
     125             :    the underlying shared memory region on success (IMPORTANT!  THIS IS
     126             :    NOT JUST A CAST OF DCACHE) and NULL on failure (logs details).
     127             :    Reasons for failure include dcache is NULL. */
     128             : 
     129             : void *
     130             : fd_dcache_leave( uchar const * dcache );
     131             : 
     132             : /* fd_dcache_delete unformats a memory region used as a dcache.  Assumes
     133             :    nobody is joined to the region.  Returns a pointer to the underlying
     134             :    shared memory region or NULL if used obviously in error (e.g.
     135             :    shdcache is obviously not a dcache ...  logs details).  The ownership
     136             :    of the memory region is transferred to the caller. */
     137             : 
     138             : void *
     139             : fd_dcache_delete( void * shdcache );
     140             : 
     141             : /* Accessor API */
     142             : 
     143             : /* fd_dcache_{data_sz,app_sz} return the sizes of the {data,app}
     144             :    regions.  Assumes dcache is a current local join. */
     145             : 
     146             : FD_FN_PURE ulong fd_dcache_data_sz( uchar const * dcache );
     147             : FD_FN_PURE ulong fd_dcache_app_sz ( uchar const * dcache );
     148             : 
     149             : /* fd_dcache_app_laddr returns location in the caller's local address
     150             :    space of memory set aside for application specific usage.  Assumes
     151             :    dcache is a current local join.  The lifetime of the returned pointer
     152             :    is the same as the underlying join.  This region has FD_DCACHE_ALIGN
     153             :    alignment (double cache line) and is fd_cache_app_sz( dcache ) in
     154             :    size.  laddr_const is a const-correct version. */
     155             : 
     156             : FD_FN_PURE uchar const * fd_dcache_app_laddr_const( uchar const * dcache );
     157             : FD_FN_PURE uchar *       fd_dcache_app_laddr      ( uchar *       dcache );
     158             : 
     159             : /* fd_dcache_compact_is_safe return whether the dcache can safely store
     160             :    frags in compactly quasi ring like as described in
     161             :    fd_dcache_chunk_next below.
     162             : 
     163             :    Chunks are indexed relative to base (e.g. the wksp containing the
     164             :    dcache to facilitate multiple dcaches written by multiple producers
     165             :    concurrently in the same wksp using a common chunk indexing scheme at
     166             :    consumers ... base==dcache is fine and implies chunks in this dcache
     167             :    region will be indexed starting from zero).
     168             : 
     169             :    base and dcache should be double chunk aligned, dcache should be
     170             :    current local join, base and dcache should be relatively spaced
     171             :    identically between different thread groups that might use the chunk
     172             :    indices and sufficiently close in the local address space that the
     173             :    all data region chunk addresses can be losslessly compressed and
     174             :    shared via a 32-bit fd_frag_meta_t chunk field.
     175             : 
     176             :    mtu is the maximum frag that a producer might write into this dcache.
     177             :    It is assumed that the producer will round up the footprint of frags
     178             :    into the dcache into double chunk aligned boundaries.
     179             : 
     180             :    depth is the maximum number of frags that might be concurrently
     181             :    accessing frags in this dcache.
     182             : 
     183             :    Returns 1 if the dcache is safe and 0 if not (with details logged). */
     184             : 
     185             : int
     186             : fd_dcache_compact_is_safe( void const * base,
     187             :                            void const * dcache,
     188             :                            ulong        mtu,
     189             :                            ulong        depth );
     190             : 
     191             : /* fd_dcache_compact_{chunk0,chunk1,wmark} returns the range of chunk indices
     192             :    [chunk0,chunk1) that relative to the base address covered by the
     193             :    dcache's data region and watermark chunk index for use by
     194             :    fd_dcache_compact_chunk_next below.
     195             :    0<=chunk0<=wmark<=chunk1<=UINT_MAX.  These assume dcache is current
     196             :    local join and the base / dcache pass fd_dcache_is_compact_safe
     197             :    above. */
     198             : 
     199             : FD_FN_CONST static inline ulong
     200             : fd_dcache_compact_chunk0( void const * base,
     201        2301 :                           void const * dcache ) {
     202        2301 :   return ((ulong)dcache - (ulong)base) >> FD_CHUNK_LG_SZ;
     203        2301 : }
     204             : 
     205             : FD_FN_PURE static inline ulong
     206             : fd_dcache_compact_chunk1( void const * base,
     207        4458 :                           void const * dcache ) {
     208        4458 :   return ((ulong)dcache + fd_dcache_data_sz( (uchar const *)dcache ) - (ulong)base) >> FD_CHUNK_LG_SZ;
     209        4458 : }
     210             : 
     211             : FD_FN_PURE static inline ulong
     212             : fd_dcache_compact_wmark( void const * base,
     213             :                          void const * dcache,
     214        2307 :                          ulong        mtu ) {
     215        2307 :   ulong chunk_mtu = ((mtu + 2UL*FD_CHUNK_SZ-1UL) >> (1+FD_CHUNK_LG_SZ)) << 1;
     216        2307 :   return fd_dcache_compact_chunk1( base, dcache ) - chunk_mtu;
     217        2307 : }
     218             : 
     219             : /* fd_dcache_compact_chunk_next:
     220             : 
     221             :    Let a dcache have space for at least chunk_mtu*(depth+2)-1 chunks
     222             :    where chunks are indexed [chunk0,chunk1) and chunk_mtu is a
     223             :    sufficient number of chunks to hold the worst case frag size.
     224             :    Further, let the dcache's producer write frags into the dcache at
     225             :    chunk aligned positions with a footprint of at most chunk_mtu chunks
     226             :    (with one exception noted below).  Lastly, let the producer write
     227             :    frags contiguously into the dcache such that consumers do not need to
     228             :    do any special handling for frags that wrap around the end of the
     229             :    dcache.
     230             : 
     231             :    Since the producer does not necessarily know the size of a frag as it
     232             :    is producing it but does know a priori the maximum size of a frag it
     233             :    might produce, the producer can achieve this by making the first
     234             :    chunk of any frag it writes in:
     235             : 
     236             :      [chunk0,wmark]
     237             : 
     238             :    where:
     239             : 
     240             :      wmark = chunk1 - chunk_mtu
     241             : 
     242             :    This is equivalent to saying that, if there are at least chunk_mtu
     243             :    chunks until the end of a dcache after a frag, that frag's footprint
     244             :    will be enough contiguous chunks to cover the frag (up to chunk_mtu).
     245             :    But if there are less than chunk_mtu chunks, that frag's footprint
     246             :    will be until the end of the dcache.
     247             : 
     248             :    This implies, in the worst case, there at least depth+1 chunk_mtu
     249             :    footprint frags (those not near the end) and 1 frag with a
     250             :    2*chunk_mtu-1 footprint (the one frag nearest the dcache end) in the
     251             :    dcache.  depth of these are exposed to consumers and 1 in preparation
     252             :    by the producer.  It also implies that the set of chunks in the
     253             :    dcache in use is cyclically contiguous starting from the oldest
     254             :    consumer exposed frag until the currently exposed frag.
     255             : 
     256             :    Noting that the act of publishing in the in preparation frag also
     257             :    unpublishes the oldest exposed frag.  Given the above, this
     258             :    guarantees that there is at least chunk_mtu contiguous space
     259             :    available for use by the next frag so long as chunk_mtu is large
     260             :    enough to cover the worst case frag and the dcache has room at least
     261             :    for chunk_mtu*(depth+2)-1 chunks. */
     262             : 
     263             : FD_FN_CONST static inline ulong         /* Will be in [chunk0,wmark] */
     264             : fd_dcache_compact_next( ulong chunk,    /* Assumed in [chunk0,wmark] */
     265             :                         ulong sz,       /* Assumed in [0,mtu] */
     266             :                         ulong chunk0,   /* From fd_dcache_compact_chunk0 */
     267    62895740 :                         ulong wmark ) { /* From fd_dcache_compact_wmark */
     268    62895740 :   chunk += ((sz+(2UL*FD_CHUNK_SZ-1UL)) >> (1+FD_CHUNK_LG_SZ)) << 1; /* Advance to next chunk pair, no overflow if init passed */
     269    62895740 :   return fd_ulong_if( chunk>wmark, chunk0, chunk );                 /* If that goes over the high water mark, wrap to zero */
     270    62895740 : }
     271             : 
     272             : FD_PROTOTYPES_END
     273             : 
     274             : #endif /* HEADER_fd_src_tango_dcache_fd_dcache_h */
     275             : 

Generated by: LCOV version 1.14