LCOV - code coverage report
Current view: top level - tango - fd_tango_base.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 48 74 64.9 %
Date: 2025-01-08 12:08:44 Functions: 46 5244 0.9 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_tango_fd_tango_base_h
       2             : #define HEADER_fd_src_tango_fd_tango_base_h
       3             : 
       4             : /* Tango messaging concepts:
       5             : 
       6             :    - Each message comes from a single local origin.  Each origin has a
       7             :      13-bit id that uniquely identifies it within a set of message
       8             :      producers and consumers for the lifetime of the set.  Origins
       9             :      typically include a mixture of network receiving devices, local
      10             :      message publishers, etc.  Applications might restrict the set of
      11             :      origins / add additional context / structure to origins id as
      12             :      need.
      13             : 
      14             :    - Messages are partitioned into one or more disjoint fragments.  The
      15             :      number of message payload bytes in a message fragment is in
      16             :      [0,2^16).  That is, message fragment size is any 16-bit unsigned
      17             :      int (thus bounded and variable).  Zero sized fragments are
      18             :      legitimate (e.g. one use case for this is heartbeating a stalled
      19             :      send of a large multi-fragment message).  Note that this is large
      20             :      enough to allow a maximum size UDP payload to be published in a
      21             :      single message fragment.  Applications might choose to impose
      22             :      additional limitations on message fragmentation.
      23             : 
      24             :    - Each fragment has a 64-bit sequence number that is unique over a
      25             :      (potentially dynamic) set of communicating message producers and
      26             :      consumers for the lifetime of that set.  Note that the use of a
      27             :      64-bit sequence number means that sequence number reuse is not an
      28             :      issue practically (would take hundreds of years even at highly
      29             :      local unrealistic messaging rates from producers to consumers).
      30             :      Note also that it is possible to use a smaller sequence number and
      31             :      deal with the implications of sequence number reuse via a number of
      32             :      standard techniques (epochs, TCP timestamp style, etc ... possibly
      33             :      with some minor additional constraints).  This is not done here for
      34             :      code simplicity / robustness / flexibility.
      35             : 
      36             :    - Message fragment sequence numbers increase sequentially with no
      37             :      gaps over the set of all producers for the set's lifetime.  As
      38             :      such, if a consumer encounters a gap in fragment sequence numbers,
      39             :      it knows it was overrun and has lost a message fragment (but
      40             :      typically that consumer does not know the origin of the lost
      41             :      fragment and needs to react accordingly).
      42             : 
      43             :    - The message fragment sequence numbers increase monotonically but
      44             :      not necessarily sequentially as the fragments from messages from
      45             :      different origins may be interleaved in fragment sequence number.
      46             : 
      47             :    - Each fragment is timestamped accordingly to when its origin first
      48             :      started producing it (tsorig) and when it was made first available
      49             :      for consumers (tspub).  As these are used mostly for monitoring and
      50             :      diagnostic purposes, they are stored in a temporally and/or
      51             :      precision compressed representation to free up room for other
      52             :      metadata.
      53             : 
      54             :   -  tsorig is measured on the origin's wallclock and the tspub is
      55             :      measured on the consumer facing publisher's wallclock (these are
      56             :      often the same wallclock).  As such, tsorig from the same origin
      57             :      will be monotonically increasing and tspub will be monotonically
      58             :      increasing across all fragments from all origins.
      59             : 
      60             :    - The wallclocks used for the timestamping should be reasonably well
      61             :      synchronized in the sense described in util/log.  As such
      62             :      timestamps measured by the same wallclocks will be exactly
      63             :      spatially comparable and approximately temporally comparable and
      64             :      timestamps measured by different wallclocks are both approximately
      65             :      spatially and temporally comparable.  Applications might chose to
      66             :      use things like preexisting host globally synchronized hardware
      67             :      tickcounters (e.g. RDTSC) for these instead of the system wallclock
      68             :      to reduce overheads.
      69             : 
      70             :    - Message fragments are distributed strictly in order.  There is no
      71             :      inherent limit to the number of fragments in a message.
      72             :      Applications might impose additional restrictions as appropriate
      73             :      for their needs.
      74             : 
      75             :    - To facilitate message reassembly, each fragment has a set of
      76             :      control bits that specify message boundaries and other conditions
      77             :      that might occur during message distribution.
      78             : 
      79             :      * SOM ("start-of-message"): This indicates this fragment starts a
      80             :        message from the fragment's origin.
      81             : 
      82             :      * EOM ("end-of-message"): This indicates this fragment ends a
      83             :        message from the fragment's origin.  If a consumer sees all the
      84             :        fragment sequence numbers between the sequence number of an SOM
      85             :        fragment from an origin to the sequence number of an EOM fragment
      86             :        from that origin inclusive, it knows that it has received all
      87             :        fragments for that message without loss from that origin.
      88             : 
      89             :      * ERR ("error"): This indicates that the _entire_ message to which
      90             :        the fragment belongs should be considered as corrupt (e.g. CRC
      91             :        checks that happen at the very end of network packet reception
      92             :        are the typical reason for this and these inherent cannot be
      93             :        checked until the last fragment).
      94             : 
      95             :    - To facilitate high performance message distribution, each fragment
      96             :      has a 64-bit message signature.  How the signature is used is
      97             :      application defined.  A typical use case is to have the first
      98             :      fragment of a message signify (in an application dependent way)
      99             :      which consumers are definitely known a priori to be uninterested in
     100             :      the message (such that those consumer doesn't have to spend any
     101             :      bandwidth or compute to reassemble or parse message payloads while
     102             :      still preserving common sequencing and ordering of all messages
     103             :      between all consumers).
     104             : 
     105             :    - For similar reasons, recent message fragments are typically stored
     106             :      in two separate caches:  A fragment metadata cache ("mcache", which
     107             :      behaves like a hybrid of a ring and a direct mapped cache ... it
     108             :      maps recently published fragment sequence numbers to fragment
     109             :      metadata) and a fragment payload cache (which is more flexibly
     110             :      allocated at "chunk" granularity as per the capabilities and needs
     111             :      of the individual origins). */
     112             : 
     113             : #include "../util/fd_util.h"
     114             : 
     115             : #if FD_HAS_SSE /* also covers FD_HAS_AVX */
     116             : #include <x86intrin.h>
     117             : #endif
     118             : 
     119             : /* FD_CHUNK_{LG_SZ,ALIGN,FOOTPRINT,SZ} describe the granularity of
     120             :    message fragment payload allocations.  ALIGN==FOOTPRINT==SZ==2^LG_SZ
     121             :    and recommend this to be something like a cache line practically. */
     122             : 
     123  1384445091 : #define FD_CHUNK_LG_SZ     (6)
     124          15 : #define FD_CHUNK_ALIGN     (64UL) /* == 2^FD_CHUNK_LG_SZ, explicit to workaround compiler limitations */
     125             : #define FD_CHUNK_FOOTPRINT (64UL) /* " */
     126    77418975 : #define FD_CHUNK_SZ        (64UL) /* " */
     127             : 
     128             : /* FD_CHUNK_{LG_SZ,ALIGN,FOOTPRINT,SZ} describe the coarse layout of
     129             :    message fragment structures.
     130             :    sizeof(fd_frag_meta_t)==ALIGN==FOOTPRINT==SZ==2^LG_SZ.  Recommend
     131             :    this to be something like a positive integer multiple or an integer
     132             :    power of two divisor of a cache line size. */
     133             : 
     134             : #define FD_FRAG_META_LG_SZ     (5)
     135             : #define FD_FRAG_META_ALIGN     (32UL) /* == 2^FD_FRAG_META_LG_SZ, explicit to workaround compiler limitations */
     136             : #define FD_FRAG_META_FOOTPRINT (32UL) /* " */
     137             : #define FD_FRAG_META_SZ        (32UL) /* " */
     138             : 
     139             : /* FD_FRAG_META_ORIG_MAX specifies the maximum number of message origins
     140             :    that are supported.  Origins ids are in [0,FD_FRAG_META_ORIG_MAX). */
     141             : 
     142             : #define FD_FRAG_META_ORIG_MAX (8192UL)
     143             : 
     144             : /* fd_frag_meta_t specifies the message fragment metadata. */
     145             : 
     146             : union __attribute__((aligned(FD_FRAG_META_ALIGN))) fd_frag_meta {
     147             : 
     148             :   struct {
     149             : 
     150             :     /* First aligned SSE word ... these are strictly updated atomically */
     151             : 
     152             :     ulong  seq; /* naturally atomic r/w, frag sequence number. */
     153             :     ulong  sig; /* naturally atomic r/w, application defined message signature for fast consumer side filtering
     154             :                    performance is best if this is updated atomically with seq */
     155             : 
     156             :     /* Second aligned SSE word ... these are typically updated
     157             :        atomically but there is no guarantee both SSE words are jointly
     158             :        updated atomically. */
     159             : 
     160             :     uint   chunk;  /* naturally atomic r/w, compressed relative location of first byte of the frag in data region. */
     161             :     ushort sz;     /* naturally atomic r/w, Frag size in bytes. */
     162             :     ushort ctl;    /* naturally atomic r/w, Message reassembly control bits (origin/clock domain, SOM/EOM/ERR flags) */
     163             :     uint   tsorig; /* naturally atomic r/w, Message diagnostic compressed timestamps */
     164             :     uint   tspub;  /* naturally atomic r/w, " */
     165             : 
     166             :   };
     167             : 
     168             : 
     169             :   /* Intel architecture manual 3A section 8.1.1 (April 2022):
     170             : 
     171             :        Processors that enumerate support for Intel AVX (by setting the
     172             :        feature flag CPUID.01H:ECX.AVX[bit 28]) guarantee that the
     173             :        16-byte memory operations performed by the following instructions
     174             :        will always be carried out atomically:
     175             : 
     176             :        - MOVAPD, MOVAPS, and MOVDQA.
     177             :        - VMOVAPD, VMOVAPS, and VMOVDQA when encoded with VEX.128.
     178             :        - VMOVAPD, VMOVAPS, VMOVDQA32, and VMOVDQA64 when encoded with
     179             :          EVEX.128 and k0 (masking disabled).
     180             : 
     181             :        (Note that these instructions require the linear addresses of
     182             :        their memory operands to be 16-byte aligned.)
     183             : 
     184             :      That is accesses to "sse0" and "sse1" below are atomic when AVX
     185             :      support is available given the overall structure alignment,
     186             :      appropriate intrinsics and what not.  Accesses to avx are likely
     187             :      atomic on many x86 platforms but this is not guaranteed and such
     188             :      should not be assumed. */
     189             : 
     190             : # if FD_HAS_SSE
     191             :   struct {
     192             :     __m128i sse0; /* naturally atomic r/w, covers seq and sig */
     193             :     __m128i sse1; /* naturally atomic r/w, covers chunk, sz, ctl, tsorig and tspub */
     194             :   };
     195             : # endif
     196             : 
     197             : # if FD_HAS_AVX
     198             :   __m256i avx; /* Possibly non-atomic but can hold the metadata in a single register */
     199             : # endif
     200             : 
     201             : };
     202             : 
     203             : typedef union fd_frag_meta fd_frag_meta_t;
     204             : 
     205             : FD_PROTOTYPES_BEGIN
     206             : 
     207             : /* fd_seq_{lt,le,eq,ne,ge,gt} compare 64-bit sequence numbers with
     208             :    proper handling of sequence number wrapping (e.g. if, for example, we
     209             :    decide to randomize the initial sequence numbers used by an
     210             :    application for security reasons and by chance pick a sequence number
     211             :    near 2^64 such that wrapping sequence numbers 0 occurs.  That is,
     212             :    sequence number reuse is not an issue practically in a real world
     213             :    application but sequence number wrapping is if we want to support
     214             :    things like initial sequence number randomization for security.
     215             : 
     216             :    fd_seq_{inc,dec} returns the result of incrementing/decrementing
     217             :    sequence number a delta times.
     218             : 
     219             :    fd_seq_diff returns the how many sequence numbers a is ahead of b.
     220             :    Positive/negative values means a is in the future/past of b.  Zero
     221             :    indicates a and b are the same.
     222             : 
     223             :    In general operations on sequence numbers are strongly encouraged to
     224             :    use this macros as such facilitates updating code to accommodate
     225             :    things like changing the width of a sequence number. */
     226             : 
     227  3076520523 : FD_FN_CONST static inline int fd_seq_lt( ulong a, ulong b ) { return ((long)(a-b))< 0L; }
     228           0 : FD_FN_CONST static inline int fd_seq_le( ulong a, ulong b ) { return ((long)(a-b))<=0L; }
     229  3073425600 : FD_FN_CONST static inline int fd_seq_eq( ulong a, ulong b ) { return a==b;              }
     230 10447495920 : FD_FN_CONST static inline int fd_seq_ne( ulong a, ulong b ) { return a!=b;              }
     231     3000000 : FD_FN_CONST static inline int fd_seq_ge( ulong a, ulong b ) { return ((long)(a-b))>=0L; }
     232  3071759349 : FD_FN_CONST static inline int fd_seq_gt( ulong a, ulong b ) { return ((long)(a-b))> 0L; }
     233             : 
     234 23848728637 : FD_FN_CONST static inline ulong fd_seq_inc( ulong a, ulong delta ) { return a+delta; }
     235   874533581 : FD_FN_CONST static inline ulong fd_seq_dec( ulong a, ulong delta ) { return a-delta; }
     236             : 
     237 27723645238 : FD_FN_CONST static inline long fd_seq_diff( ulong a, ulong b ) { return (long)(a-b); }
     238             : 
     239             : /* fd_chunk_to_laddr: returns a pointer in the local address space to
     240             :    the first byte of the chunk with the given compressed relative
     241             :    address chunk given the pointer in the local address space of the
     242             :    chunk whose index is 0 (chunk0).  fd_chunk_to_laddr_const is for
     243             :    const-correctness.
     244             : 
     245             :    fd_laddr_to_chunk: vice versa. */
     246             : 
     247             : FD_FN_CONST static inline void *    /* Will be aligned FD_CHUNK_ALIGN and in [ chunk0, chunk0 + FD_CHUNK_SZ*(UINT_MAX+1) ) */
     248             : fd_chunk_to_laddr( void * chunk0,   /* Assumed aligned FD_CHUNK_ALIGN */
     249    76799490 :                    ulong  chunk ) { /* Assumed in [0,UINT_MAX] */
     250    76799490 :   return (void *)(((ulong)chunk0) + (chunk << FD_CHUNK_LG_SZ));
     251    76799490 : }
     252             : 
     253             : FD_FN_CONST static inline void const *
     254             : fd_chunk_to_laddr_const( void const * chunk0,
     255  1228790457 :                          ulong        chunk ) {
     256  1228790457 :   return (void const *)(((ulong)chunk0) + (chunk << FD_CHUNK_LG_SZ));
     257  1228790457 : }
     258             : 
     259             : FD_FN_CONST static inline ulong           /* Will be in [0,UINT_MAX] */
     260             : fd_laddr_to_chunk( void const * chunk0,   /* Assumed aligned FD_CHUNK_ALIGN */
     261       65841 :                    void const * laddr ) { /* Assumed aligned FD_CHUNK_ALIGN and in [ chunk0, chunk0 + FD_CHUNK_SZ*(UINT_MAX+1) ) */
     262       65841 :   return (((ulong)laddr)-((ulong)chunk0)) >> FD_CHUNK_LG_SZ;
     263       65841 : }
     264             : 
     265             : /* fd_frag_meta_seq_query returns the sequence number pointed to by meta
     266             :    as atomically observed at some point of time between when the call
     267             :    was made and the call returns.  Assumes meta is valid.  This acts as
     268             :    a compiler memory fence. */
     269             : 
     270             : static inline ulong
     271  1237790448 : fd_frag_meta_seq_query( fd_frag_meta_t const * meta ) { /* Assumed non-NULL */
     272  1237790448 :   FD_COMPILER_MFENCE();
     273  1237790448 :   ulong seq = FD_VOLATILE_CONST( meta->seq );
     274  1237790448 :   FD_COMPILER_MFENCE();
     275  1237790448 :   return seq;
     276  1237790448 : }
     277             : 
     278             : #if FD_HAS_SSE
     279             : 
     280             : /* fd_frag_meta_seq_sig_query returns the sequence number and signature
     281             :    pointed to by meta in one atomic read, same semantics as
     282             :    fd_frag_meta_seq_query. */
     283             : static inline __m128i
     284           0 : fd_frag_meta_seq_sig_query( fd_frag_meta_t const * meta ) { /* Assumed non-NULL */
     285           0 :   FD_COMPILER_MFENCE();
     286           0 :   __m128i sse0 = _mm_load_si128( &meta->sse0 );
     287           0 :   FD_COMPILER_MFENCE();
     288           0 :   return sse0;
     289           0 : }
     290             : 
     291             : #endif
     292             : 
     293             : /* fd_frag_meta_ctl, fd_frag_meta_ctl_{som,eom,err} pack and unpack the
     294             :    fd_frag message reassembly control bits. */
     295             : 
     296             : FD_FN_CONST static inline ulong  /* In [0,2^16) */
     297             : fd_frag_meta_ctl( ulong orig,    /* Assumed in [0,FD_FRAG_META_ORIG_MAX) */
     298             :                   int   som,     /* 0 for false, non-zero for true */
     299             :                   int   eom,     /* 0 for false, non-zero for true */
     300   860717540 :                   int   err ) {  /* 0 for false, non-zero for true */
     301   860717540 :   return ((ulong)!!som) | (((ulong)!!eom)<<1) | (((ulong)!!err)<<2) | (orig<<3);
     302   860717540 : }
     303             : 
     304           0 : FD_FN_CONST static inline ulong fd_frag_meta_ctl_orig( ulong ctl ) { return        ctl>>3;         }
     305     3000000 : FD_FN_CONST static inline int   fd_frag_meta_ctl_som ( ulong ctl ) { return (int)( ctl     & 1UL); }
     306     3000000 : FD_FN_CONST static inline int   fd_frag_meta_ctl_eom ( ulong ctl ) { return (int)((ctl>>1) & 1UL); }
     307     3000000 : FD_FN_CONST static inline int   fd_frag_meta_ctl_err ( ulong ctl ) { return (int)((ctl>>2) & 1UL); }
     308             : 
     309             : #if FD_HAS_SSE
     310             : 
     311             : FD_FN_CONST static inline __m128i
     312             : fd_frag_meta_sse0( ulong seq,
     313          81 :                    ulong sig ) {
     314          81 :   return _mm_set_epi64x( (long)sig, (long)seq ); /* Backward Intel ... sigh */
     315          81 : }
     316             : 
     317           0 : FD_FN_CONST static inline ulong fd_frag_meta_sse0_seq( __m128i sse0 ) { return (ulong)_mm_extract_epi64( sse0, 0 ); }
     318           0 : FD_FN_CONST static inline ulong fd_frag_meta_sse0_sig( __m128i sse0 ) { return (ulong)_mm_extract_epi64( sse0, 1 ); }
     319             : 
     320             : FD_FN_CONST static inline __m128i
     321             : fd_frag_meta_sse1( ulong chunk,    /* Assumed 32-bit */
     322             :                    ulong sz,       /* Assumed 16 bit */
     323             :                    ulong ctl,      /* Assumed 16-bit */
     324             :                    ulong tsorig,   /* Assumed 32-bit */
     325          81 :                    ulong tspub ) { /* Assumed 32-bit */
     326          81 :   return _mm_set_epi64x( (long)(tsorig | (tspub<<32)),
     327          81 :                          (long)(chunk | (sz<<32) | (ctl<<48)) ); /* Backward Intel ... sigh */
     328          81 : }
     329             : 
     330           0 : FD_FN_CONST static inline ulong fd_frag_meta_sse1_chunk ( __m128i sse1 ) { return (ulong)(uint  )_mm_extract_epi32( sse1, 0 ); }
     331           0 : FD_FN_CONST static inline ulong fd_frag_meta_sse1_sz    ( __m128i sse1 ) { return (ulong)(ushort)_mm_extract_epi16( sse1, 2 ); }
     332           0 : FD_FN_CONST static inline ulong fd_frag_meta_sse1_ctl   ( __m128i sse1 ) { return (ulong)(ushort)_mm_extract_epi16( sse1, 3 ); }
     333           0 : FD_FN_CONST static inline ulong fd_frag_meta_sse1_tsorig( __m128i sse1 ) { return (ulong)(uint  )_mm_extract_epi32( sse1, 2 ); }
     334           0 : FD_FN_CONST static inline ulong fd_frag_meta_sse1_tspub ( __m128i sse1 ) { return (ulong)(uint  )_mm_extract_epi32( sse1, 3 ); }
     335             : 
     336             : #endif
     337             : #if FD_HAS_AVX
     338             : 
     339             : FD_FN_CONST static inline __m256i
     340             : fd_frag_meta_avx( ulong seq,
     341             :                   ulong sig,
     342             :                   ulong chunk,    /* Assumed 32-bit */
     343             :                   ulong sz,       /* Assumed 16 bit */
     344             :                   ulong ctl,      /* Assumed 16-bit */
     345             :                   ulong tsorig,   /* Assumed 32-bit */
     346       65841 :                   ulong tspub ) { /* Assumed 32-bit */
     347       65841 :   return _mm256_set_epi64x( (long)(tsorig | (tspub<<32)),
     348       65841 :                             (long)(chunk | (sz<<32) | (ctl<<48)),
     349       65841 :                             (long)sig,
     350       65841 :                             (long)seq ); /* Backward Intel ... sigh */
     351       65841 : }
     352             : 
     353           0 : FD_FN_CONST static inline ulong fd_frag_meta_avx_seq   ( __m256i avx ) { return (ulong)        _mm256_extract_epi64( avx,  0 ); }
     354           0 : FD_FN_CONST static inline ulong fd_frag_meta_avx_sig   ( __m256i avx ) { return (ulong)        _mm256_extract_epi64( avx,  1 ); }
     355           0 : FD_FN_CONST static inline ulong fd_frag_meta_avx_chunk ( __m256i avx ) { return (ulong)(uint  )_mm256_extract_epi32( avx,  4 ); }
     356           0 : FD_FN_CONST static inline ulong fd_frag_meta_avx_sz    ( __m256i avx ) { return (ulong)(ushort)_mm256_extract_epi16( avx, 10 ); }
     357           0 : FD_FN_CONST static inline ulong fd_frag_meta_avx_ctl   ( __m256i avx ) { return (ulong)(ushort)_mm256_extract_epi16( avx, 11 ); }
     358           0 : FD_FN_CONST static inline ulong fd_frag_meta_avx_tsorig( __m256i avx ) { return (ulong)(uint  )_mm256_extract_epi32( avx,  6 ); }
     359           0 : FD_FN_CONST static inline ulong fd_frag_meta_avx_tspub ( __m256i avx ) { return (ulong)(uint  )_mm256_extract_epi32( avx,  7 ); }
     360             : 
     361             : #endif
     362             : 
     363             : /* fd_frag_meta_ts_{comp,decomp}:  Given the longs ts and tsref that
     364             :    are reasonably close to each other (|ts-tsref| < 2^31 ... about
     365             :    +/-2.1 seconds if ts and tsref are reasonably well synchronized
     366             :    fd_log_wallclock measurements), this pair of functions can quickly
     367             :    and losslessly compress / decompress ts by a factor of 2 exactly
     368             :    using tsref as the compressor / decompressor "state". */
     369             : 
     370             : FD_FN_CONST static inline ulong   /* In [0,UINT_MAX] */
     371   909334789 : fd_frag_meta_ts_comp( long ts ) {
     372   909334789 :   return (ulong)(uint)ts;
     373   909334789 : }
     374             : 
     375             : FD_FN_CONST static inline long
     376             : fd_frag_meta_ts_decomp( ulong tscomp,   /* In [0,UINT_MAX] */
     377           0 :                         long  tsref ) {
     378           0 :   ulong msb = ((ulong)tsref) + fd_ulong_mask_lsb(31) - tscomp;
     379           0 :   return (long)((msb & ~fd_ulong_mask_lsb(32)) | tscomp);
     380           0 : }
     381             : 
     382             : FD_PROTOTYPES_END
     383             : 
     384             : #endif /* HEADER_fd_src_tango_fd_tango_base_h */
     385             : 

Generated by: LCOV version 1.14