LCOV - code coverage report
Current view: top level - tango/dcache - fd_dcache.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 17 17 100.0 %
Date: 2025-01-08 12:08:44 Functions: 16 472 3.4 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_tango_dcache_fd_dcache_h
       2             : #define HEADER_fd_src_tango_dcache_fd_dcache_h
       3             : 
       4             : #include "../fd_tango_base.h"
       5             : 
       6             : /* FD_DCACHE_{ALIGN,FOOTPRINT} specify the alignment and footprint
       7             :    needed for a dcache with a data region of data_sz bytes and an
       8             :    application region of app_sz bytes.  ALIGN is at least FD_CHUNK_ALIGN
       9             :    and recommended to be at least double cache line to mitigate various
      10             :    kinds of false sharing.  data_sz and app_sz are assumed to be valid
      11             :    (e.g. will not require a footprint larger than ULONG_MAX).  These are
      12             :    provided to facilitate compile time dcache declarations. */
      13             : 
      14     6037266 : #define FD_DCACHE_ALIGN (128UL)
      15             : #define FD_DCACHE_FOOTPRINT( data_sz, app_sz )                                                            \
      16             :   FD_LAYOUT_FINI( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_INIT, \
      17             :     FD_DCACHE_ALIGN, 128UL                     ), /* hdr   */                                             \
      18             :     FD_DCACHE_ALIGN, FD_DCACHE_GUARD_FOOTPRINT ), /* guard */                                             \
      19             :     FD_DCACHE_ALIGN, (data_sz)                 ), /* data  */                                             \
      20             :     FD_DCACHE_ALIGN, (app_sz)                  ), /* app   */                                             \
      21             :     FD_DCACHE_ALIGN )
      22             : 
      23             : /* FD_DCACHE_GUARD_FOOTPRINT specify the footprint of the guard region
      24             :    immediately before the dcache data region.  The guard region
      25             :    footprint is FD_DCACHE_ALIGN aligned and a FD_DCACHE_ALIGN multiple.
      26             :    It provides flexibility (up to the magnitude of the footprint) to
      27             :    align how a producer might write directly into a dcache such that the
      28             :    frag payload alignment a consumer sees is consistent regardless of
      29             :    the details of the underlying producer. */
      30             : 
      31          12 : #define FD_DCACHE_GUARD_FOOTPRINT (128UL)
      32             : 
      33             : /* FD_DCACHE_SLOT_FOOTPRINT returns the footprint of a FD_DCACHE_ALIGN
      34             :    aligned slot sufficient to hold a frag payload of up to mtu bytes.
      35             :    Returns 0 if mtu is not valid (i.e. so large that the required slot
      36             :    size is larger than ULONG_MAX). */
      37             : 
      38     2878419 : #define FD_DCACHE_SLOT_FOOTPRINT( mtu ) FD_ULONG_ALIGN_UP( (mtu), FD_DCACHE_ALIGN )
      39             : 
      40             : /* FD_DCACHE_REQ_DATA_SZ returns the size of a data region in bytes
      41             :    sufficient for a dcache whose producer writes frag payloads up to mtu
      42             :    (should be positive) bytes in size, that can have up to depth (should
      43             :    be positive) frag payloads visible to consumers while the producer
      44             :    can be concurrently preparing up to burst (should be positive) frag
      45             :    payloads.  Assumes mtu, depth, burst and compact are valid and
      46             :    payload footprints are rounded up to at most a FD_DCACHE_ALIGN
      47             :    multiple when written by a producer.  Note that payloads written by a
      48             :    producer will generally be at least FD_DCACHE_ALIGN aligned to
      49             :    facilitate interoperability with fd_frag_meta_t chunk indexing.  Also
      50             :    note that for a compactly stored ring, it is usually not useful to
      51             :    use a burst larger than 1 (but not particularly harmful outside
      52             :    resulting a data region larger than necessary ... might use it to
      53             :    quasi-batch publish frags). */
      54             : 
      55             : #define FD_DCACHE_REQ_DATA_SZ( mtu, depth, burst, compact ) (FD_DCACHE_SLOT_FOOTPRINT( mtu )*((depth)+(burst)+(ulong)!!(compact)))
      56             : 
      57             : FD_PROTOTYPES_BEGIN
      58             : 
      59             : /* Construction API */
      60             : 
      61             : /* fd_dcache_req_data_sz is the same as FD_DCACHE_REQ_DATA_SZ but does
      62             :    not assume valid arguments.  Returns sz on success or 0 on failure.
      63             :    Reasons for failure include zero mtu, too large mtu, zero depth, zero
      64             :    burst or the required data_sz would be larger than ULONG_MAX. */
      65             : 
      66             : FD_FN_CONST ulong
      67             : fd_dcache_req_data_sz( ulong mtu,
      68             :                        ulong depth,
      69             :                        ulong burst,
      70             :                        int   compact );
      71             : 
      72             : /* fd_dcache_{align,footprint} return the required alignment and
      73             :    footprint of a memory region suitable for use as dcache with a data
      74             :    region of data_sz bytes and an application region of app_sz bytes.
      75             :    align returns FD_DCACHE_ALIGN.  If data_sz or app_sz are invalid
      76             :    (e.g. the required footprint is larger than a ULONG_MAX), footprint
      77             :    will silently return 0 (and thus can be used by the caller to
      78             :    validate dcache configuration parameters).  Zero is valid for data_sz
      79             :    and/or app_sz. */
      80             : 
      81             : FD_FN_CONST ulong
      82             : fd_dcache_align( void );
      83             : 
      84             : FD_FN_CONST ulong
      85             : fd_dcache_footprint( ulong data_sz,
      86             :                      ulong app_sz );
      87             : 
      88             : /* fd_dcache_new formats an unused memory region for use as a dcache.
      89             :    shmem is a non-NULL pointer to this region in the local address space
      90             :    with the required footprint and alignment.  The size of the dcache
      91             :    data size region is data_sz bytes and the size of the application
      92             :    region is app_sz bytes.  Zero is valid for data_sz and/or app_sz.
      93             : 
      94             :    Returns shmem (and the memory region it points to will be formatted
      95             :    as a dcache with the data and application regions initialized to
      96             :    zero, caller is not joined) on success and NULL on failure (logs
      97             :    details).  Reasons for failure include obviously bad shmem, bad
      98             :    data_sz or bad app_sz. */
      99             : 
     100             : void *
     101             : fd_dcache_new( void * shmem,
     102             :                ulong  data_sz,
     103             :                ulong  app_sz );
     104             : 
     105             : /* fd_dcache_join joins the caller to the dcache.  shdcache points to
     106             :    the first byte of the memory region backing the dcache in the
     107             :    caller's address space.
     108             : 
     109             :    Returns a pointer in the local address space to the dcache's data
     110             :    region on success (IMPORTANT! THIS IS NOT JUST A CAST OF SHDCACHE)
     111             :    and NULL on failure (logs details).  Reasons for failure are that
     112             :    shdcache is obviously not a pointer to memory region holding a
     113             :    dcache.  Every successful join should have a matching leave.  The
     114             :    lifetime of the join is until the matching leave or the thread group
     115             :    is terminated.
     116             : 
     117             :    This region will have a guard region of FD_DCACHE_GUARD_FOOTPRINT
     118             :    just before it and data_sz bytes available after it. */
     119             : 
     120             : uchar *
     121             : fd_dcache_join( void * shdcache );
     122             : 
     123             : /* fd_dcache_leave leaves a current local join.  Returns a pointer to
     124             :    the underlying shared memory region on success (IMPORTANT!  THIS IS
     125             :    NOT JUST A CAST OF DCACHE) and NULL on failure (logs details).
     126             :    Reasons for failure include dcache is NULL. */
     127             : 
     128             : void *
     129             : fd_dcache_leave( uchar const * dcache );
     130             : 
     131             : /* fd_dcache_delete unformats a memory region used as a dcache.  Assumes
     132             :    nobody is joined to the region.  Returns a pointer to the underlying
     133             :    shared memory region or NULL if used obviously in error (e.g.
     134             :    shdcache is obviously not a dcache ...  logs details).  The ownership
     135             :    of the memory region is transferred to the caller. */
     136             : 
     137             : void *
     138             : fd_dcache_delete( void * shdcache );
     139             : 
     140             : /* Accessor API */
     141             : 
     142             : /* fd_dcache_{data_sz,app_sz} return the sizes of the {data,app}
     143             :    regions.  Assumes dcache is a current local join. */
     144             : 
     145             : FD_FN_PURE ulong fd_dcache_data_sz( uchar const * dcache );
     146             : FD_FN_PURE ulong fd_dcache_app_sz ( uchar const * dcache );
     147             : 
     148             : /* fd_dcache_app_laddr returns location in the caller's local address
     149             :    space of memory set aside for application specific usage.  Assumes
     150             :    dcache is a current local join.  The lifetime of the returned pointer
     151             :    is the same as the underlying join.  This region has FD_DCACHE_ALIGN
     152             :    alignment (double cache line) and is fd_cache_app_sz( dcache ) in
     153             :    size.  laddr_const is a const-correct version. */
     154             : 
     155             : FD_FN_PURE uchar const * fd_dcache_app_laddr_const( uchar const * dcache );
     156             : FD_FN_PURE uchar *       fd_dcache_app_laddr      ( uchar *       dcache );
     157             : 
     158             : /* fd_dcache_compact_is_safe return whether the dcache can safely store
     159             :    frags in compactly quasi ring like as described in
     160             :    fd_dcache_chunk_next below.
     161             : 
     162             :    Chunks are indexed relative to base (e.g. the wksp containing the
     163             :    dcache to facilitate multiple dcaches written by multiple producers
     164             :    concurrently in the same wksp using a common chunk indexing scheme at
     165             :    consumers ... base==dcache is fine and implies chunks in this dcache
     166             :    region will be indexed starting from zero).
     167             : 
     168             :    base and dcache should be double chunk aligned, dcache should be
     169             :    current local join, base and dcache should be relatively spaced
     170             :    identically between different thread groups that might use the chunk
     171             :    indices and sufficiently close in the local address space that the
     172             :    all data region chunk addresses can be losslessly compressed and
     173             :    shared via a 32-bit fd_frag_meta_t chunk field.
     174             : 
     175             :    mtu is the maximum frag that a producer might write into this dcache.
     176             :    It is assumed that the producer will round up the footprint of frags
     177             :    into the dcache into double chunk aligned boundaries.
     178             : 
     179             :    depth is the maximum number of frags that might be concurrently
     180             :    accessing frags in this dcache.
     181             : 
     182             :    Returns 1 if the dcache is safe and 0 if not (with details logged). */
     183             : 
     184             : int
     185             : fd_dcache_compact_is_safe( void const * base,
     186             :                            void const * dcache,
     187             :                            ulong        mtu,
     188             :                            ulong        depth );
     189             : 
     190             : /* fd_dcache_compact_{chunk0,chunk1,wmark} returns the range of chunk indices
     191             :    [chunk0,chunk1) that relative to the base address covered by the
     192             :    dcache's data region and watermark chunk index for use by
     193             :    fd_dcache_compact_chunk_next below.
     194             :    0<=chunk0<=wmark<=chunk1<=UINT_MAX.  These assume dcache is current
     195             :    local join and the base / dcache pass fd_dcache_is_compact_safe
     196             :    above. */
     197             : 
     198             : FD_FN_CONST static inline ulong
     199             : fd_dcache_compact_chunk0( void const * base,
     200        2148 :                           void const * dcache ) {
     201        2148 :   return ((ulong)dcache - (ulong)base) >> FD_CHUNK_LG_SZ;
     202        2148 : }
     203             : 
     204             : FD_FN_PURE static inline ulong
     205             : fd_dcache_compact_chunk1( void const * base,
     206        4284 :                           void const * dcache ) {
     207        4284 :   return ((ulong)dcache + fd_dcache_data_sz( (uchar const *)dcache ) - (ulong)base) >> FD_CHUNK_LG_SZ;
     208        4284 : }
     209             : 
     210             : FD_FN_PURE static inline ulong
     211             : fd_dcache_compact_wmark( void const * base,
     212             :                          void const * dcache,
     213        2145 :                          ulong        mtu ) {
     214        2145 :   ulong chunk_mtu = ((mtu + 2UL*FD_CHUNK_SZ-1UL) >> (1+FD_CHUNK_LG_SZ)) << 1;
     215        2145 :   return fd_dcache_compact_chunk1( base, dcache ) - chunk_mtu;
     216        2145 : }
     217             : 
     218             : /* fd_dcache_compact_chunk_next:
     219             : 
     220             :    Let a dcache have space for at least chunk_mtu*(depth+2)-1 chunks
     221             :    where chunks are indexed [chunk0,chunk1) and chunk_mtu is a
     222             :    sufficient number of chunks to hold the worst case frag size.
     223             :    Further, let the dcache's producer write frags into the dcache at
     224             :    chunk aligned positions with a footprint of at most chunk_mtu chunks
     225             :    (with one exception noted below).  Lastly, let the producer write
     226             :    frags contiguously into the dcache such that consumers do not need to
     227             :    do any special handling for frags that wrap around the end of the
     228             :    dcache.
     229             : 
     230             :    Since the producer does not necessarily know the size of a frag as it
     231             :    is producing it but does know a priori the maximum size of a frag it
     232             :    might produce, the producer can achieve this by making the first
     233             :    chunk of any frag it writes in:
     234             : 
     235             :      [chunk0,wmark]
     236             : 
     237             :    where:
     238             : 
     239             :      wmark = chunk1 - chunk_mtu
     240             : 
     241             :    This is equivalent to saying that, if there are at least chunk_mtu
     242             :    chunks until the end of a dcache after a frag, that frag's footprint
     243             :    will be enough contiguous chunks to cover the frag (up to chunk_mtu).
     244             :    But if there are less than chunk_mtu chunks, that frag's footprint
     245             :    will be until the end of the dcache.
     246             : 
     247             :    This implies, in the worst case, there at least depth+1 chunk_mtu
     248             :    footprint frags (those not near the end) and 1 frag with a
     249             :    2*chunk_mtu-1 footprint (the one frag nearest the dcache end) in the
     250             :    dcache.  depth of these are exposed to consumers and 1 in preparation
     251             :    by the producer.  It also implies that the set of chunks in the
     252             :    dcache in use is cyclically contiguous starting from the oldest
     253             :    consumer exposed frag until the currently exposed frag.
     254             : 
     255             :    Noting that the act of publishing in the in preparation frag also
     256             :    unpublishes the oldest exposed frag.  Given the above, this
     257             :    guarantees that there is at least chunk_mtu contiguous space
     258             :    available for use by the next frag so long as chunk_mtu is large
     259             :    enough to cover the worst case frag and the dcache has room at least
     260             :    for chunk_mtu*(depth+2)-1 chunks. */
     261             : 
     262             : FD_FN_CONST static inline ulong         /* Will be in [chunk0,wmark] */
     263             : fd_dcache_compact_next( ulong chunk,    /* Assumed in [chunk0,wmark] */
     264             :                         ulong sz,       /* Assumed in [0,mtu] */
     265             :                         ulong chunk0,   /* From fd_dcache_compact_chunk0 */
     266    77099484 :                         ulong wmark ) { /* From fd_dcache_compact_wmark */
     267    77099484 :   chunk += ((sz+(2UL*FD_CHUNK_SZ-1UL)) >> (1+FD_CHUNK_LG_SZ)) << 1; /* Advance to next chunk pair, no overflow if init passed */
     268    77099484 :   return fd_ulong_if( chunk>wmark, chunk0, chunk );                 /* If that goes over the high water mark, wrap to zero */
     269    77099484 : }
     270             : 
     271             : FD_PROTOTYPES_END
     272             : 
     273             : #endif /* HEADER_fd_src_tango_dcache_fd_dcache_h */
     274             : 

Generated by: LCOV version 1.14