LCOV - code coverage report
Current view: top level - util/bits - fd_bits.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 334 334 100.0 %
Date: 2024-11-13 11:58:15 Functions: 1267 193880 0.7 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_util_bits_fd_bits_h
       2             : #define HEADER_fd_src_util_bits_fd_bits_h
       3             : 
       4             : /* Bit manipulation APIs */
       5             : 
       6             : #include "../sanitize/fd_msan.h"
       7             : 
       8             : FD_PROTOTYPES_BEGIN
       9             : 
      10             : /* fd_ulong_is_pow2    ( x          ) returns 1 if x is a positive integral power of 2 and 0 otherwise.
      11             :    fd_ulong_pow2       ( n          ) returns 2^n mod 2^64.  U.B. if n is negative.
      12             : 
      13             :    fd_ulong_mask_bit   ( b          ) returns the ulong with bit b set and all other bits 0.  U.B. if b is not in [0,64).
      14             :    fd_ulong_clear_bit  ( x, b       ) returns x with bit b cleared.  U.B. if b is not in [0,64).
      15             :    fd_ulong_set_bit    ( x, b       ) returns x with bit b set. U.B. if b is not in [0,64).
      16             :    fd_ulong_extract_bit( x, b       ) returns bit b of x as an int in {0,1}.  U.B. if b is not in [0,64).
      17             :    fd_ulong_insert_bit ( x, b, y    ) returns x with bit b set to y.  U.B. if b is not in [0,64) and/or y is not in {0,1}.
      18             : 
      19             :    fd_ulong_mask_lsb   ( n          ) returns the ulong bits [0,n) set and all other bits 0.  U.B. if n is not in [0,64].
      20             :    fd_ulong_clear_lsb  ( x, n       ) returns x with bits [0,n) cleared.  U.B. if n is not in [0,64].
      21             :    fd_ulong_set_lsb    ( x, n       ) returns x with bits [0,n) set. U.B. if n is not in [0,64].
      22             :    fd_ulong_flip_lsb   ( x, n       ) returns x with bits [0,n) flipped. U.B. if n is not in [0,64].
      23             :    fd_ulong_extract_lsb( x, n       ) returns bits [0,n) of x.  U.B. if b is not in [0,64).
      24             :    fd_ulong_insert_lsb ( x, n, y    ) returns x with bits [0,n) set to y.  U.B. if b is not in [0,64) and/or y is not in [0,2^n).
      25             : 
      26             :    fd_ulong_mask       ( l, h       ) returns the ulong bits [l,h] set and all other bits 0.  U.B. if not 0<=l<=h<64.
      27             :    fd_ulong_clear      ( x, l, h    ) returns x with bits [l,h] cleared.  U.B. if not 0<=l<=h<64.
      28             :    fd_ulong_set        ( x, l, h    ) returns x with bits [l,h] set.  U.B. if not 0<=l<=h<64.
      29             :    fd_ulong_flip       ( x, l, h    ) returns x with bits [l,h] flipped.  U.B. if not 0<=l<=h<64.
      30             :    fd_ulong_extract    ( x, l, h    ) returns bits [l,h] of x.  U.B. if not 0<=l<=h<64.
      31             :    fd_ulong_insert     ( x, l, h, y ) returns x with bits [l,h] set to y.
      32             :                                       U.B. if not 0<=l<=h<64 and/or y is not in in [0,2^(h-l+1)).
      33             : 
      34             :    fd_ulong_pop_lsb    ( x          ) returns x with the least significant set bit cleared (0 returns 0).
      35             : 
      36             :    FIXME: CONSIDER HAVING (A,X) INSTEAD OF (X,A)?
      37             :    fd_ulong_is_aligned ( x, a       ) returns 1 if x is an integral multiple of a and 0 otherwise.  U.B. if !fd_ulong_is_pow2( a )
      38             :    fd_ulong_alignment  ( x, a       ) returns x mod a.  U.B. if !fd_ulong_is_pow2( a )
      39             :    fd_ulong_align_dn   ( x, a       ) returns x rounded down to the closest multiple of a <= x.  U.B. if !fd_ulong_is_pow2( a )
      40             :    fd_ulong_align_up   ( x, a       ) returns x rounded up to the closest multiple of a >= x mod 2^64.
      41             :                                       U.B. if !fd_ulong_is_pow2( a )
      42             : 
      43             :    fd_ulong_blend      ( m, t, f    ) returns forms a ulong by selecting bits from t where m is 1 and from f where m is 0.
      44             :    fd_ulong_if         ( c, t, f    ) returns t if c is 1 and f if c is 0.  U.B. if c is not in {0,1}
      45             :    fd_ulong_store_if   ( c, p, v    ) if c is non-zero, stores v to *p.  Otherwise does nothing.
      46             :    fd_ulong_abs        ( x          ) returns |x| as a ulong
      47             :    fd_ulong_min        ( x, y       ) returns min(x,y)
      48             :    fd_ulong_max        ( x, y       ) returns max(x,y)
      49             : 
      50             :    fd_ulong_shift_left  ( x, n ) returns x with its bits shifted left n times (n>63 shifts to zero), U.B. if n<0
      51             :    fd_ulong_shift_right ( x, n ) returns x with its bits shifted right n times (n>63 shifts to zero), U.B. if n<0
      52             :    fd_ulong_rotate_left ( x, n ) returns x with its bits rotated left n times (negative values rotate right)
      53             :    fd_ulong_rotate_right( x, n ) returns x with its bits rotated right n times (negative values rotate left)
      54             : 
      55             :    fd_ulong_popcnt            ( x    ) returns the number of bits set in x, in [0,64].
      56             :    fd_ulong_find_lsb          ( x    ) returns the index of the least significant bit set in x, in [0,64).  U.B. if x is zero.
      57             :    fd_ulong_find_lsb_w_default( x, d ) returns the index of the least significant bit set in x, in [0,64).  d if x is zero.
      58             :    fd_ulong_find_msb          ( x    ) returns the index of the most significant bit set in x, in [0,64).  U.B. if x is zero.
      59             :    fd_ulong_find_msb_w_default( x, d ) returns the index of the most significant bit set in x, in [0,64).  d if x is zero.
      60             :    fd_ulong_bswap             ( x    ) returns x with its bytes swapped
      61             :    fd_ulong_pow2_up           ( x    ) returns y mod 2^64 where y is the smallest integer power of 2 >= x.  U.B. if x is zero.
      62             :                                        (current implementation returns 0 if x==0).
      63             :    fd_ulong_pow2_dn           ( x    ) returns the largest integer power of 2 <= x.  U.B. if x is zero.
      64             :                                        (current implementation returns 1 if x==0).
      65             : 
      66             :    Similarly for uchar,ushort,uint,uint128.  Note that the signed
      67             :    versions of shift_left, rotate_left, rotate_right operate on the bit
      68             :    pattern of the underlying type directly.  Signed shift_right is sign
      69             :    extending while unsigned shift_right is zero padding (such that if x
      70             :    is negative/non-negative, a large magnitude shift will shift to
      71             :    -1/0).
      72             : 
      73             :    Support for zig-zag encoding is also provided.  E.g.
      74             : 
      75             :    fd_long_zz_enc( x ) returns the zig-zag encodes  long x and returns it as a ulong.
      76             :    fd_long_zz_dec( y ) returns the zig-zag decodes ulong y and returns it as a long.
      77             : 
      78             :    zig-zag encoding losslessly maps a signed integer to an unsigned
      79             :    integer such that, if the magnitude of the signed integer was small,
      80             :    the magnitude of the unsigned integer will be small too.
      81             : 
      82             :    Note that, though fd_ulong_if and friends look like a completely
      83             :    superfluous wrapper for the trinary operator, they have subtly
      84             :    different linguistic meanings.  This seemingly trivial difference can
      85             :    have profound effects on code generation quality, especially on
      86             :    modern architectures.  For example:
      87             : 
      88             :      c ? x[i] : x[j];
      89             : 
      90             :    linguistically means, if c is non-zero, load the i-th element of
      91             :    x.  Otherwise, load the j-th element of x.  But:
      92             : 
      93             :      fd_ulong_if( c, x[i], x[j] )
      94             : 
      95             :    means load the i-th element of x _and_ load the j-th element of x
      96             :    _and_ then select the first value if c is non-zero or the second
      97             :    value otherwise.  Further, it explicitly says that c and the loads
      98             :    can be evaluated in any order.
      99             : 
     100             :    As such, in the trinary case, the compiler will be loathe to do
     101             :    either load before computing c because the language explicitly says
     102             :    "don't do that".  In the unlikely case it overcomes this barrier,
     103             :    it then has the difficult job of proving that reordering c with the
     104             :    loads is safe (e.g. evaluating c might affect the value that would be
     105             :    loaded if c has side effects).  In the unlikely case it overcomes
     106             :    that barrier, it then has the difficult job of proving it is safe to
     107             :    do the loads before evaluating c (e.g. c might be protecting against
     108             :    a load that could seg-fault).
     109             : 
     110             :    In the fd_ulong_if case though, the compiler has been explicitly told
     111             :    up front that the loads are safe and c and the loads can be done in
     112             :    any order.  Now the optimizer finds it a lot easier to optimize
     113             :    because it isn't accidentally over-constrained and doesn't have to
     114             :    prove anything.  E.g. it can use otherwise unused instruction slots
     115             :    before the operation to hide load latency while computing c in
     116             :    parallel via ILP.  Further, it can then ideally can use a conditional
     117             :    move operation to eliminate unnecessary consumption of BTB resources.
     118             :    And, if c is known at compile time, the compiler can prune
     119             :    unnecessary code for the unselected option (e.g. the compiler knows
     120             :    that omitting an unused normal load has no observable effect in the
     121             :    machine model).
     122             : 
     123             :    Faster, more deterministic, less BTB resources consumed and good
     124             :    compile time behavior.  Everything the trinary operator should have
     125             :    been, rather than the giant pile of linguistic fail that it is.
     126             : 
     127             :    Overall, compilers traditionally are much much better at pruning
     128             :    unneeded operations than speculating execution (especially for code
     129             :    paths that a language says not to do and doubly so for code paths
     130             :    that are not obviously safe in general).  And most of this is because
     131             :    languages are not designed correctly to help developers express their
     132             :    intent and constraints to the compiler.
     133             : 
     134             :    This dynamic has had multi-billion dollar commercial impacts though
     135             :    the cause-and-effect has gone largely unrecognized.
     136             : 
     137             :    At one extreme, a major reason why Itanium failed was languages
     138             :    didn't have the right constructs and machine models for developers to
     139             :    "do the right thing".  Even given such, most devs wouldn't know to
     140             :    use them because they were erroneously taught to code to a machine
     141             :    abstraction that hasn't applied to the real world since the early
     142             :    1980s.  Compilers then were not able to utilize all the speculative
     143             :    execution / ILP / ... features that were key to performance on that
     144             :    architecture.  The developer community, not being able to see any
     145             :    benefits (much less large enough benefits to justify short term
     146             :    switching costs) and not wanting to write tons of hand-tuned
     147             :    non-portable ASM kernels, shrugged and Itanium withered away.
     148             : 
     149             :    At the other extreme, CUDA gave developers a good GPU abstraction and
     150             :    extended languages and compilers to make it possible for developers
     151             :    code to that abstraction (e.g. express locality explicitly instead of
     152             :    the constant lying-by-omission about the importance of locality that
     153             :    virtually everything else in tech does ... the speed of light isn't
     154             :    infinite or even all that fast relative to modern CPUs ... stop
     155             :    pretending that it is).  CUDA enabled GPUs have since thrived in
     156             :    gaming, high performance computing, machine learning, crypto, etc.
     157             :    Profits had by all (well, by Nvidia at least).
     158             : 
     159             :    TL;DR
     160             : 
     161             :    * It'd be laughable if it weren't so pathetic that CPU ISAs and
     162             :      programming languages usually forget to expose one of the most
     163             :      fundamental and important digital logic circuits to devs ... the
     164             :      2:1 mux.
     165             : 
     166             :    * Developers will usually do the right thing if languages let them.
     167             : 
     168             :    TODO: mask_msb, clear_msb, set_msb, flip_msb, extract_msb,
     169             :    insert_msb, bitrev, sign, copysign, flipsign, rounding right shift,
     170             :    ... */
     171             : 
     172             : #define FD_SRC_UTIL_BITS_FD_BITS_IMPL(T,w)                                                                                         \
     173   279920388 : FD_FN_CONST static inline int  fd_##T##_is_pow2     ( T x               ) { return (!!x) & (!(x & (x-(T)1)));                    } \
     174         759 : FD_FN_CONST static inline T    fd_##T##_pow2        ( int n             ) { return (T)(((T)(n<w))<<(n&(w-1)));                   } \
     175   440047902 : FD_FN_CONST static inline T    fd_##T##_mask_bit    ( int b             ) { return (T)(((T)1)<<b);                               } \
     176       51138 : FD_FN_CONST static inline T    fd_##T##_clear_bit   ( T x, int b        ) { return (T)(x & ~fd_##T##_mask_bit(b));               } \
     177   439551183 : FD_FN_CONST static inline T    fd_##T##_set_bit     ( T x, int b        ) { return (T)(x |  fd_##T##_mask_bit(b));               } \
     178        2976 : FD_FN_CONST static inline T    fd_##T##_flip_bit    ( T x, int b        ) { return (T)(x ^  fd_##T##_mask_bit(b));               } \
     179   709103743 : FD_FN_CONST static inline int  fd_##T##_extract_bit ( T x, int b        ) { return (int)((x>>b) & (T)1);                         } \
     180        5952 : FD_FN_CONST static inline T    fd_##T##_insert_bit  ( T x, int b, int y ) { return (T)((x & ~fd_##T##_mask_bit(b))|(((T)y)<<b)); } \
     181  1284119511 : FD_FN_CONST static inline T    fd_##T##_mask_lsb    ( int n             ) { return (T)((((T)(n<w))<<(n&(w-1)))-((T)1));          } \
     182  1281521550 : FD_FN_CONST static inline T    fd_##T##_clear_lsb   ( T x, int n        ) { return (T)(x & ~fd_##T##_mask_lsb(n));               } \
     183        2262 : FD_FN_CONST static inline T    fd_##T##_set_lsb     ( T x, int n        ) { return (T)(x |  fd_##T##_mask_lsb(n));               } \
     184        2262 : FD_FN_CONST static inline T    fd_##T##_flip_lsb    ( T x, int n        ) { return (T)(x ^  fd_##T##_mask_lsb(n));               } \
     185        2262 : FD_FN_CONST static inline T    fd_##T##_extract_lsb ( T x, int n        ) { return (T)(x &  fd_##T##_mask_lsb(n));               } \
     186  1281519288 : FD_FN_CONST static inline T    fd_##T##_insert_lsb  ( T x, int n, T y   ) { return (T)(fd_##T##_clear_lsb(x,n) | y);             } \
     187       91704 : FD_FN_CONST static inline T    fd_##T##_mask        ( int l, int h      ) { return (T)( fd_##T##_mask_lsb(h-l+1) << l );         } \
     188      124881 : FD_FN_CONST static inline T    fd_##T##_clear       ( T x, int l, int h ) { return (T)(x & ~fd_##T##_mask(l,h));                 } \
     189       42480 : FD_FN_CONST static inline T    fd_##T##_set         ( T x, int l, int h ) { return (T)(x |  fd_##T##_mask(l,h));                 } \
     190      108978 : FD_FN_CONST static inline T    fd_##T##_flip        ( T x, int l, int h ) { return (T)(x ^  fd_##T##_mask(l,h));                 } \
     191       82896 : FD_FN_CONST static inline T    fd_##T##_extract     ( T x, int l, int h ) { return (T)( (x>>l) & fd_##T##_mask_lsb(h-l+1) );     } \
     192      264864 : FD_FN_CONST static inline T    fd_##T##_insert      ( T x, int l, int h, T y ) { return (T)(fd_##T##_clear(x,l,h) | (y<<l));     } \
     193  3433658337 : FD_FN_CONST static inline T    fd_##T##_pop_lsb     ( T x               ) { return (T)(x & (x-(T)1));                            } \
     194  9638420611 : FD_FN_CONST static inline int  fd_##T##_is_aligned  ( T x, T a          ) { a--; return !(x & a);                                } \
     195       66960 : FD_FN_CONST static inline T    fd_##T##_alignment   ( T x, T a          ) { a--; return (T)( x    &  a);                         } \
     196   210475570 : FD_FN_CONST static inline T    fd_##T##_align_dn    ( T x, T a          ) { a--; return (T)( x    & ~a);                         } \
     197 11865002197 : FD_FN_CONST static inline T    fd_##T##_align_up    ( T x, T a          ) { a--; return (T)((x+a) & ~a);                         } \
     198   251658240 : FD_FN_CONST static inline T    fd_##T##_blend       ( T m, T t, T f     ) { return (T)((t & m) | (f & ~m));                      } \
     199 29449760936 : FD_FN_CONST static inline T    fd_##T##_if          ( int c, T t, T f   ) { return c ? t : f;     /* cmov */                     } \
     200   300787866 : /*       */ static inline void fd_##T##_store_if    ( int c, T * p, T v ) { T _[ 1 ]; *( c ? p : _ ) = v; /* cmov */             } \
     201   489555636 : FD_FN_CONST static inline T    fd_##T##_abs         ( T x               ) { return x;                                            } \
     202 20427256701 : FD_FN_CONST static inline T    fd_##T##_min         ( T x, T y          ) { return (x<y) ? x : y; /* cmov */                     } \
     203  3256547313 : FD_FN_CONST static inline T    fd_##T##_max         ( T x, T y          ) { return (x>y) ? x : y; /* cmov */                     } \
     204   754974720 : FD_FN_CONST static inline T    fd_##T##_shift_left  ( T x, int n        ) { return (T)(((n>(w-1)) ? ((T)0) : x) << (n&(w-1)));   } \
     205   503317968 : FD_FN_CONST static inline T    fd_##T##_shift_right ( T x, int n        ) { return (T)(((n>(w-1)) ? ((T)0) : x) >> (n&(w-1)));   } \
     206 >31685*10^7 : FD_FN_CONST static inline T    fd_##T##_rotate_left ( T x, int n        ) { return (T)((x << (n&(w-1))) | (x >> ((-n)&(w-1))));  } \
     207  4571123638 : FD_FN_CONST static inline T    fd_##T##_rotate_right( T x, int n        ) { return (T)((x >> (n&(w-1))) | (x << ((-n)&(w-1))));  }
     208             : 
     209             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uchar,  8)
     210             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(ushort,16)
     211             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uint,  32)
     212             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(ulong, 64)
     213             : 
     214             : #if FD_HAS_INT128 /* FIXME: These probably could benefit from x86 specializations */
     215             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uint128,128)
     216             : #endif
     217             : 
     218             : #undef FD_SRC_UTIL_BITS_FD_BITS_IMPL
     219             : 
     220  1892748118 : FD_FN_CONST static inline int fd_uchar_popcnt ( uchar  x ) { return __builtin_popcount ( (uint)x ); }
     221         816 : FD_FN_CONST static inline int fd_ushort_popcnt( ushort x ) { return __builtin_popcount ( (uint)x ); }
     222      769734 : FD_FN_CONST static inline int fd_uint_popcnt  ( uint   x ) { return __builtin_popcount (       x ); }
     223 13140883578 : FD_FN_CONST static inline int fd_ulong_popcnt ( ulong  x ) { return __builtin_popcountl(       x ); }
     224             : 
     225             : #if FD_HAS_INT128
     226             : FD_FN_CONST static inline int
     227       49536 : fd_uint128_popcnt( uint128 x ) {
     228       49536 :   return  __builtin_popcountl( (ulong) x ) + __builtin_popcountl( (ulong)(x>>64) );
     229       49536 : }
     230             : #endif
     231             : 
     232             : #include "fd_bits_find_lsb.h" /* Provides find_lsb_w_default too */
     233             : #include "fd_bits_find_msb.h" /* Provides find_msb_w_default too */ /* Note that find_msb==floor( log2( x ) ) for non-zero x */
     234             : 
     235     3051666 : FD_FN_CONST static inline uchar  fd_uchar_bswap ( uchar  x ) { return x; }
     236   364816481 : FD_FN_CONST static inline ushort fd_ushort_bswap( ushort x ) { return __builtin_bswap16( x ); }
     237 12381054111 : FD_FN_CONST static inline uint   fd_uint_bswap  ( uint   x ) { return __builtin_bswap32( x ); }
     238  1388754890 : FD_FN_CONST static inline ulong  fd_ulong_bswap ( ulong  x ) { return __builtin_bswap64( x ); }
     239             : 
     240             : #if FD_HAS_INT128
     241             : FD_FN_CONST static inline uint128
     242         387 : fd_uint128_bswap( uint128 x ) {
     243         387 :   ulong xl = (ulong) x;
     244         387 :   ulong xh = (ulong)(x>>64);
     245         387 :   return (((uint128)fd_ulong_bswap( xl )) << 64) | ((uint128)fd_ulong_bswap( xh ));
     246         387 : }
     247             : #endif
     248             : 
     249             : /* FIXME: consider find_msb based solution (probably not as the combination
     250             :    of FD_FN_CONST and the use of inline asm for find_msb on some targets
     251             :    is probably less than ideal). */
     252             : 
     253             : FD_FN_CONST static inline uchar
     254         111 : fd_uchar_pow2_up( uchar _x ) {
     255         111 :   uint x = (uint)_x;
     256         111 :   x--;
     257         111 :   x |= (x>> 1);
     258         111 :   x |= (x>> 2);
     259         111 :   x |= (x>> 4);
     260         111 :   x++;
     261         111 :   return (uchar)x;
     262         111 : }
     263             : 
     264             : FD_FN_CONST static inline uchar
     265         111 : fd_uchar_pow2_dn( uchar _x ) {
     266         111 :   uint x = (uint)_x;
     267         111 :   x >>= 1;
     268         111 :   x |= (x>> 1);
     269         111 :   x |= (x>> 2);
     270         111 :   x |= (x>> 4);
     271         111 :   x++;
     272         111 :   return (uchar)x;
     273         111 : }
     274             : 
     275             : FD_FN_CONST static inline ushort
     276         411 : fd_ushort_pow2_up( ushort _x ) {
     277         411 :   uint x = (uint)_x;
     278         411 :   x--;
     279         411 :   x |= (x>> 1);
     280         411 :   x |= (x>> 2);
     281         411 :   x |= (x>> 4);
     282         411 :   x |= (x>> 8);
     283         411 :   x++;
     284         411 :   return (ushort)x;
     285         411 : }
     286             : 
     287             : FD_FN_CONST static inline ushort
     288         411 : fd_ushort_pow2_dn( ushort _x ) {
     289         411 :   uint x = (uint)_x;
     290         411 :   x >>= 1;
     291         411 :   x |= (x>> 1);
     292         411 :   x |= (x>> 2);
     293         411 :   x |= (x>> 4);
     294         411 :   x |= (x>> 8);
     295         411 :   x++;
     296         411 :   return (ushort)x;
     297         411 : }
     298             : 
     299             : FD_FN_CONST static inline uint
     300        1587 : fd_uint_pow2_up( uint x ) {
     301        1587 :   x--;
     302        1587 :   x |= (x>> 1);
     303        1587 :   x |= (x>> 2);
     304        1587 :   x |= (x>> 4);
     305        1587 :   x |= (x>> 8);
     306        1587 :   x |= (x>>16);
     307        1587 :   x++;
     308        1587 :   return x;
     309        1587 : }
     310             : 
     311             : FD_FN_CONST static inline uint
     312        1587 : fd_uint_pow2_dn( uint x ) {
     313        1587 :   x >>= 1;
     314        1587 :   x |= (x>> 1);
     315        1587 :   x |= (x>> 2);
     316        1587 :   x |= (x>> 4);
     317        1587 :   x |= (x>> 8);
     318        1587 :   x |= (x>>16);
     319        1587 :   x++;
     320        1587 :   return x;
     321        1587 : }
     322             : 
     323             : FD_FN_CONST static inline ulong
     324   107503164 : fd_ulong_pow2_up( ulong x ) {
     325   107503164 :   x--;
     326   107503164 :   x |= (x>> 1);
     327   107503164 :   x |= (x>> 2);
     328   107503164 :   x |= (x>> 4);
     329   107503164 :   x |= (x>> 8);
     330   107503164 :   x |= (x>>16);
     331   107503164 :   x |= (x>>32);
     332   107503164 :   x++;
     333   107503164 :   return x;
     334   107503164 : }
     335             : 
     336             : FD_FN_CONST static inline ulong
     337        6243 : fd_ulong_pow2_dn( ulong x ) {
     338        6243 :   x >>= 1;
     339        6243 :   x |= (x>> 1);
     340        6243 :   x |= (x>> 2);
     341        6243 :   x |= (x>> 4);
     342        6243 :   x |= (x>> 8);
     343        6243 :   x |= (x>>16);
     344        6243 :   x |= (x>>32);
     345        6243 :   x++;
     346        6243 :   return x;
     347        6243 : }
     348             : 
     349             : #if FD_HAS_INT128
     350             : FD_FN_CONST static inline uint128
     351       24771 : fd_uint128_pow2_up( uint128 x ) {
     352       24771 :   x--;
     353       24771 :   x |= (x>> 1);
     354       24771 :   x |= (x>> 2);
     355       24771 :   x |= (x>> 4);
     356       24771 :   x |= (x>> 8);
     357       24771 :   x |= (x>>16);
     358       24771 :   x |= (x>>32);
     359       24771 :   x |= (x>>64);
     360       24771 :   x++;
     361       24771 :   return x;
     362       24771 : }
     363             : 
     364             : FD_FN_CONST static inline uint128
     365       24771 : fd_uint128_pow2_dn( uint128 x ) {
     366       24771 :   x >>= 1;
     367       24771 :   x |= (x>> 1);
     368       24771 :   x |= (x>> 2);
     369       24771 :   x |= (x>> 4);
     370       24771 :   x |= (x>> 8);
     371       24771 :   x |= (x>>16);
     372       24771 :   x |= (x>>32);
     373       24771 :   x |= (x>>64);
     374       24771 :   x++;
     375       24771 :   return x;
     376       24771 : }
     377             : #endif
     378             : 
     379             : /* Brokenness of indeterminant char sign strikes again ... sigh.  We
     380             :    explicitly provide the unsigned variant of the token to these
     381             :    macros because the uchar token is not related to the schar token
     382             :    by simply prepending u to schar. */
     383             : 
     384             : /* Note: the implementations of abs, right_shift and zz_enc below do not
     385             :    exploit the sign extending right shift behavior specified by the
     386             :    machine model (and thus can be used safely in more general machine
     387             :    models) but are slightly more expensive.
     388             : 
     389             :    FD_FN_CONST static inline UT fd_##T##_abs( T x ) { UT u = (UT)x; UT m = (UT)-(u>>(w-1)); return (UT)((u+m)^m); }
     390             : 
     391             :    FD_FN_CONST static inline T
     392             :    fd_##T##_shift_right( T   x,
     393             :                          int n ) {
     394             :      UT u = (UT)x;
     395             :      UT m = (UT)-(u >> (w-1));
     396             :      return (T)(fd_##UT##_shift_right( u ^ m, n ) ^ m);
     397             :    }
     398             : 
     399             :    FD_FN_CONST static inline UT fd_##T##_zz_enc( T x ) { UT u = (UT)x; return (UT)((-(u>>(w-1))) ^ (u<<1)); }
     400             : */
     401             : 
     402             : #define FD_SRC_UTIL_BITS_FD_BITS_IMPL(T,UT,w)                                                                                    \
     403  1039398353 : FD_FN_CONST static inline T    fd_##T##_if          ( int c, T t, T f ) { return c ? t : f;      /* cmov */ }                    \
     404  4023217692 : /*       */ static inline void fd_##T##_store_if    ( int c, T * p, T v ) { T _[ 1 ]; *( c ? p : _ ) = v; /* cmov */           } \
     405  1704524866 : FD_FN_CONST static inline UT   fd_##T##_abs         ( T x             ) { UT m = (UT)(x >> (w-1)); return (UT)((((UT)x)+m)^m); } \
     406   473228942 : FD_FN_CONST static inline T    fd_##T##_min         ( T x, T y        ) { return (x<=y) ? x : y; /* cmov */ }                    \
     407   286507096 : FD_FN_CONST static inline T    fd_##T##_max         ( T x, T y        ) { return (x>=y) ? x : y; /* cmov */ }                    \
     408   251658240 : FD_FN_CONST static inline T    fd_##T##_shift_left  ( T x, int n      ) { return (T)fd_##UT##_shift_left  ( (UT)x, n ); }        \
     409   251658240 : FD_FN_CONST static inline T    fd_##T##_shift_right ( T x, int n      ) { return (T)(x >> ((n>(w-1)) ? (w-1) : n)); /* cmov */ } \
     410  1096648128 : FD_FN_CONST static inline T    fd_##T##_rotate_left ( T x, int n      ) { return (T)fd_##UT##_rotate_left ( (UT)x, n ); }        \
     411  1096648128 : FD_FN_CONST static inline T    fd_##T##_rotate_right( T x, int n      ) { return (T)fd_##UT##_rotate_right( (UT)x, n ); }        \
     412      196620 : FD_FN_CONST static inline UT   fd_##T##_zz_enc      ( T x             ) { return (UT)(((UT)(x>>(w-1))) ^ (((UT)x)<<1)); }        \
     413      149151 : FD_FN_CONST static inline T    fd_##T##_zz_dec      ( UT x            ) { return (T)((x>>1) ^ (-(x & (UT)1))); }
     414             : 
     415             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(schar, uchar,    8)
     416             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(short, ushort,  16)
     417             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(int,   uint,    32)
     418             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(long,  ulong,   64)
     419             : #if FD_HAS_INT128
     420             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(int128,uint128,128)
     421             : #endif
     422             : 
     423             : #undef FD_SRC_UTIL_BITS_FD_BITS_IMPL
     424             : 
     425             : /* Brokenness of indeterminant char sign strikes again ... sigh.  We
     426             :    can't provide a char_min/char_max between platforms as they don't
     427             :    necessarily produce the same results.  Likewise, we don't provide a
     428             :    fd_char_abs because it will not produce equivalent results between
     429             :    platforms.  But it is useful to have a char_if and char_store_if to
     430             :    help with making branchless string operation implementations. */
     431             : 
     432    53724801 : FD_FN_CONST static inline char fd_char_if( int c, char t, char f ) { return c ? t : f; }
     433             : 
     434    50331648 : static inline void fd_char_store_if( int c, char * p, char v ) { char _[ 1 ]; *( c ? p : _ ) = v; /* cmov */ }
     435             : 
     436             : /* FIXME: ADD HASHING PAIRS FOR UCHAR AND USHORT? */
     437             : 
     438             : /* High quality (full avalanche) high speed integer to integer hashing.
     439             :    fd_uint_hash has the properties that [0,2^32) hashes to a random
     440             :    looking permutation of [0,2^32) and hash(0)==0.  Similarly for
     441             :    fd_ulong_hash.  Based on Google's Murmur3 hash finalizer (public
     442             :    domain).  Not cryptographically secure but passes various strict
     443             :    tests of randomness when used as a PRNG. */
     444             : 
     445             : FD_FN_CONST static inline uint
     446   119368026 : fd_uint_hash( uint x ) {
     447   119368026 :   x ^= x >> 16;
     448   119368026 :   x *= 0x85ebca6bU;
     449   119368026 :   x ^= x >> 13;
     450   119368026 :   x *= 0xc2b2ae35U;
     451   119368026 :   x ^= x >> 16;
     452   119368026 :   return x;
     453   119368026 : }
     454             : 
     455             : FD_FN_CONST static inline ulong
     456 >15905*10^7 : fd_ulong_hash( ulong x ) {
     457 >15905*10^7 :   x ^= x >> 33;
     458 >15905*10^7 :   x *= 0xff51afd7ed558ccdUL;
     459 >15905*10^7 :   x ^= x >> 33;
     460 >15905*10^7 :   x *= 0xc4ceb9fe1a85ec53UL;
     461 >15905*10^7 :   x ^= x >> 33;
     462 >15905*10^7 :   return x;
     463 >15905*10^7 : }
     464             : 
     465             : /* Inverses of the above.  E.g.:
     466             :      fd_uint_hash_inverse( fd_uint_hash( (uint)x ) )==(uint)x
     467             :    and:
     468             :      fd_uint_hash( fd_uint_hash_inverse( (uint)x ) )==(uint)x
     469             :    Similarly for fd_ulong_hash_inverse.  These by themselves are similar
     470             :    quality hashes to the above and having the inverses of the above can
     471             :    be useful.  The fact these have (nearly) identical operations /
     472             :    operation counts concretely demonstrates that none of these are
     473             :    standalone cryptographically secure. */
     474             : 
     475             : FD_FN_CONST static inline uint
     476   103809084 : fd_uint_hash_inverse( uint x ) {
     477   103809084 :   x ^= x >> 16;
     478   103809084 :   x *= 0x7ed1b41dU;
     479   103809084 :   x ^= (x >> 13) ^ (x >> 26);
     480   103809084 :   x *= 0xa5cb9243U;
     481   103809084 :   x ^= x >> 16;
     482   103809084 :   return x;
     483   103809084 : }
     484             : 
     485             : FD_FN_CONST static inline ulong
     486 13089422283 : fd_ulong_hash_inverse( ulong x ) {
     487 13089422283 :   x ^= x >> 33;
     488 13089422283 :   x *= 0x9cb4b2f8129337dbUL;
     489 13089422283 :   x ^= x >> 33;
     490 13089422283 :   x *= 0x4f74430c22a54005UL;
     491 13089422283 :   x ^= x >> 33;
     492 13089422283 :   return x;
     493 13089422283 : }
     494             : 
     495             : /* fd_ulong_base10_dig_cnt returns the number of digits in the base 10
     496             :    representation of x.  FIXME: USE BETTER ALGO. */
     497             : 
     498             : #define FD_SRC_UTIL_BITS_FD_BITS_IMPL(T,M) \
     499             : FD_FN_CONST static inline ulong            \
     500         237 : fd_##T##_base10_dig_cnt( T _x ) {          \
     501         237 :   ulong x      = (ulong)_x;                \
     502         237 :   ulong cnt    = 1UL;                      \
     503         237 :   ulong thresh = 10UL;                     \
     504        1521 :   do {                                     \
     505        1521 :     if( FD_LIKELY( x<thresh ) ) break;     \
     506        1521 :     cnt++;                                 \
     507        1284 :     thresh *= 10UL;                        \
     508        1284 :   } while( FD_LIKELY( cnt<M ) );           \
     509         237 :   return cnt;                              \
     510         237 : }
     511             : 
     512             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uchar,  3UL) /*                  255 ->  3 dig */
     513             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(ushort, 5UL) /*                65535 ->  5 dig */
     514             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uint,  10UL) /*           4294967295 -> 10 dig */
     515             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(ulong, 20UL) /* 18446744073709551615 -> 20 dig */
     516             : 
     517             : #undef FD_SRC_UTIL_BITS_FD_BITS_IMPL
     518             : 
     519             : /* fd_float_if, fd_float_store_if, fd_float_abs are described above.
     520             :    Ideally, the system will implement fd_float_abs by just clearing the
     521             :    sign bit.  fd_float_eq tests to floating point values for whether or
     522             :    not their bit representations are identical.  Useful when IEEE
     523             :    handling of equality with +/-0 or nan are not desired (e.g. can test
     524             :    if nans have different signs or syndromes). */
     525             : 
     526    50534199 : FD_FN_CONST static inline float fd_float_if      ( int c, float   t, float f ) { return c ? t : f; }
     527    50331648 : /*       */ static inline void  fd_float_store_if( int c, float * p, float v ) { float _[ 1 ]; *( c ? p : _ ) = v; }
     528    96095148 : FD_FN_CONST static inline float fd_float_abs     ( float x ) { return __builtin_fabsf( x ); }
     529             : FD_FN_CONST static inline int
     530             : fd_float_eq( float x,
     531   100663296 :              float y ) {
     532   100663296 :   union { float f; uint u; } tx, ty;
     533   100663296 :   tx.f = x;
     534   100663296 :   ty.f = y;
     535   100663296 :   return tx.u==ty.u;
     536   100663296 : }
     537             : 
     538             : /* fd_double_if, fd_double_store_if, fd_double_abs and fd_double_eq are
     539             :    double precision versions of the above. */
     540             : 
     541             : #if FD_HAS_DOUBLE
     542    54497061 : FD_FN_CONST static inline double fd_double_if      ( int c, double   t, double f ) { return c ? t : f; }
     543    50331648 : /*       */ static inline void   fd_double_store_if( int c, double * p, double v ) { double _[ 1 ]; *( c ? p : _ ) = v; }
     544    96161058 : FD_FN_CONST static inline double fd_double_abs     ( double x ) { return __builtin_fabs( x ); }
     545             : FD_FN_CONST static inline int
     546             : fd_double_eq( double x,
     547   100663296 :               double y ) {
     548   100663296 :   union { double f; ulong u; } tx, ty;
     549   100663296 :   tx.f = x;
     550   100663296 :   ty.f = y;
     551   100663296 :   return tx.u==ty.u;
     552   100663296 : }
     553             : #endif
     554             : 
     555             : /* fd_swap swaps the values in a and b.  Assumes a and b have the same
     556             :    plain-old-data type.  Best if a and b are primitive types (e.g.
     557             :    ulong).  This macro is robust (it evaluates a and b the minimal
     558             :    number of times).  Note that compilers are pretty good about identify
     559             :    swap operations and replacing them with more optimal assembly for
     560             :    types and architectures where alternative implementations (like xor
     561             :    tricks) might be faster. */
     562             : 
     563   654311424 : #define fd_swap(a,b) do { __typeof__((a)) _fd_swap_tmp = (a); (a) = (b); (b) = _fd_swap_tmp; } while(0)
     564             : 
     565             : /* fd_swap_if swaps the values in a and b if c is non-zero and leaves
     566             :    them unchanged otherwise.  Assumes a and b have the same
     567             :    plain-old-data type.  Best if a and b are primitive types (e.g.
     568             :    ulong) as the compiler will likely replace the trinaries below with
     569             :    cmovs, making this branchless.  This macro is robust (it evaluates a,
     570             :    b and c the minimal number of times). */
     571             : 
     572   661873917 : #define fd_swap_if(c,a,b) do {                                      \
     573   661873917 :     int             _fd_swap_if_c = (c);                            \
     574   661873917 :     __typeof__((a)) _fd_swap_if_a = (a);                            \
     575   661873917 :     __typeof__((b)) _fd_swap_if_b = (b);                            \
     576   661873917 :     (a) = _fd_swap_if_c ? _fd_swap_if_b : _fd_swap_if_a; /* cmov */ \
     577   661873917 :     (b) = _fd_swap_if_c ? _fd_swap_if_a : _fd_swap_if_b; /* cmov */ \
     578   661873917 :   } while(0)
     579             : 
     580             : /* fd_ptr_if is a generic version of the above for pointers.  This macro
     581             :    is robust.
     582             : 
     583             :    IMPORTANT SAFETY TIP!  The output type is the type of t.  Thus, if
     584             :    the condition is false and t and f have different pointer types
     585             :    (which is inherently gross and we might want to consider outright
     586             :    rejecting in the future via, unfortunately non-standard, compiler
     587             :    builtins), this would implicitly cast the pointer f to the type of t.
     588             :    As such, strict aliasing rules in the language also imply mixed usage
     589             :    cases need to be wrapped in a fd_type_pun.  In short, mixing pointer
     590             :    types between t and f is strongly discouraged. */
     591             : 
     592             : #if FD_HAS_UBSAN
     593             : #define fd_ptr_if(c,t,f) ((__typeof__((t)))( (c) ? (ulong)(t) : (ulong)(f) ))
     594             : #else
     595  6096428981 : #define fd_ptr_if(c,t,f) ((__typeof__((t)))fd_ulong_if( (c), (ulong)(t), (ulong)(f) ))
     596             : #endif
     597             : 
     598             : /* FD_ULONG_{MASK_LSB,MASK_MSB,ALIGN_UP} are the same as
     599             :    fd_ulong_{mask_lsb,mask_msb,align_up} but can be used at compile
     600             :    time.  The tradeoff is n/a must be safe against multiple evaluation
     601             :    at compile time.  x should be ulong compatible and n/a should be int
     602             :    compatible. */
     603             : 
     604    12116310 : #define FD_ULONG_MASK_LSB( n )    ((((ulong)((n)<=63)) << ((n) & 63)) - 1UL)
     605             : #define FD_ULONG_MASK_MSB( n )    (~FD_ULONG_MASK_LSB(64-(n)))
     606    19654206 : #define FD_ULONG_ALIGN_UP( x, a ) (((x)+((a)-1UL)) & (~((a)-1UL)))
     607             : 
     608             : /* Unaligned access annotations.
     609             : 
     610             :    FD_LOAD( T, src ) is equivalent to:
     611             :      return (*(T const *)(src))
     612             :    but src can have arbitrary alignment.
     613             : 
     614             :    FD_STORE( T, dst, val ) is equivalent to:
     615             :      T * ptr = (T *)(dst);
     616             :      *ptr = (val);
     617             :      return ptr
     618             :    but dst can have arbitrary alignment.
     619             : 
     620             :    Note: Ideally, we would infer the type T in FD_LOAD from src (e.g.
     621             :    use typeof(*(src)).  But there are some nasty linguistic and
     622             :    optimizer interactions when src is a constant pointer in a truly
     623             :    generic implementation.  Similarly for FD_STORE.
     624             : 
     625             :    fd_T_load_n( src ) where T is in [uchar,ushort,uint,ulong] loads n
     626             :    bytes into the least significant n bytes of a T, zeros any remaining
     627             :    bytes and returns the result.  fd_T_load_n_fast is the same but
     628             :    assumes it is safe to tail read a couple of bytes past the end of src
     629             :    if such is beneficial for higher performance.
     630             : 
     631             :    Accesses that would normally be atomic (e.g. an aligned access to a
     632             :    primitive type like a ulong) are not guaranteed to be atomic if done
     633             :    through these annotations. */
     634             : 
     635             : #ifndef FD_UNALIGNED_ACCESS_STYLE
     636             : #if FD_HAS_X86
     637             : #define FD_UNALIGNED_ACCESS_STYLE 0  /* 1 is broken ... */
     638             : #else
     639             : #define FD_UNALIGNED_ACCESS_STYLE 0
     640             : #endif
     641             : #endif
     642             : 
     643             : #if FD_UNALIGNED_ACCESS_STYLE==0 /* memcpy elision based */
     644             : 
     645             : /* This implementation does not assume it is safe to access unaligned
     646             :    memory directly (and thus can be used on platforms outside the
     647             :    development environment's machine model) but it does still assume
     648             :    little endian byte ordering.
     649             : 
     650             :    It is based on memcpy and, in principle, the compiler should elide
     651             :    the memcpy and replace this with optimized asm on platforms where
     652             :    this is safe (which is virtually all commercially viable platforms as
     653             :    packet processing deal heavily with unaligned accesses and virtually
     654             :    all platforms are near universally networked and networking needs to
     655             :    do packet processing efficiently).  But this fails often enough in
     656             :    practice that this should not be relied upon, especially if
     657             :    performance is important as performance is glacial when the compiler
     658             :    mucks up.  (fd_memcpy is an especially bad idea here because the
     659             :    risk of the compiler mucking it up is much greater.)
     660             : 
     661             :    It is also more than little bizarre that this is an encouraged
     662             :    practice nowadays.  That is, practically, we are using a low level
     663             :    language (C/C++) that have language constructs (dereference a
     664             :    pointer to a primitive type) that map directly onto low level
     665             :    hardware operations (asm load operation) that are actually supported
     666             :    by the target hardware here (fine if pointer is not aligned to
     667             :    width of the type).
     668             : 
     669             :    But instead of encouraging developers to write short, readable,
     670             :    library-independent code that generates fast and ultra compact asm,
     671             :    they are encouraged to write long, obtuse, library-dependent code
     672             :    that naively would generate slow bloated asm in hopes the compiler
     673             :    will realize can be turned into the simple implementation and turn it
     674             :    back into the developer's original intent and then generate good asm.
     675             :    Hmmm. */
     676             : 
     677             : #define FD_LOAD( T, src ) \
     678  4236438490 :   (__extension__({ T _fd_load_tmp; memcpy( &_fd_load_tmp, (T const *)(src), sizeof(T) ); _fd_load_tmp; }))
     679             : 
     680             : #define FD_STORE( T, dst, val ) \
     681 39016526304 :   (__extension__({ T _fd_store_tmp = (val); (T *)memcpy( (T *)(dst), &_fd_store_tmp, sizeof(T) ); }))
     682             : 
     683        6144 : FD_FN_PURE static inline uchar  fd_uchar_load_1      ( void const * p ) { return         *(uchar const *)p; }
     684             : 
     685        6141 : FD_FN_PURE static inline ushort fd_ushort_load_1     ( void const * p ) { return (ushort)*(uchar const *)p; }
     686        6141 : FD_FN_PURE static inline ushort fd_ushort_load_2     ( void const * p ) { ushort t;       memcpy( &t, p, 2UL ); return        t; }
     687             : 
     688        6135 : FD_FN_PURE static inline uint   fd_uint_load_1       ( void const * p ) { return (uint  )*(uchar const *)p; }
     689        6135 : FD_FN_PURE static inline uint   fd_uint_load_2       ( void const * p ) { ushort t;       memcpy( &t, p, 2UL ); return (uint )t; }
     690       30207 : FD_FN_PURE static inline uint   fd_uint_load_3       ( void const * p ) { uint   t = 0UL; memcpy( &t, p, 3UL ); return (uint )t; }
     691   236817204 : FD_FN_PURE static inline uint   fd_uint_load_4       ( void const * p ) { uint   t;       memcpy( &t, p, 4UL ); return        t; }
     692             : 
     693  2790120837 : FD_FN_PURE static inline ulong  fd_ulong_load_1      ( void const * p ) { return (ulong )*(uchar const *)p; }
     694  1383809949 : FD_FN_PURE static inline ulong  fd_ulong_load_2      ( void const * p ) { ushort t;       memcpy( &t, p, 2UL ); return (ulong)t; }
     695        6123 : FD_FN_PURE static inline ulong  fd_ulong_load_3      ( void const * p ) { uint   t = 0UL; memcpy( &t, p, 3UL ); return (ulong)t; }
     696    66180738 : FD_FN_PURE static inline ulong  fd_ulong_load_4      ( void const * p ) { uint   t;       memcpy( &t, p, 4UL ); return (ulong)t; }
     697        6123 : FD_FN_PURE static inline ulong  fd_ulong_load_5      ( void const * p ) { ulong  t = 0UL; memcpy( &t, p, 5UL ); return        t; }
     698        6141 : FD_FN_PURE static inline ulong  fd_ulong_load_6      ( void const * p ) { ulong  t = 0UL; memcpy( &t, p, 6UL ); return        t; }
     699        6123 : FD_FN_PURE static inline ulong  fd_ulong_load_7      ( void const * p ) { ulong  t = 0UL; memcpy( &t, p, 7UL ); return        t; }
     700  2182432337 : FD_FN_PURE static inline ulong  fd_ulong_load_8      ( void const * p ) { ulong  t;       memcpy( &t, p, 8UL ); return        t; }
     701             : 
     702             : #define                         fd_uchar_load_1_fast                    fd_uchar_load_1
     703             : 
     704             : #define                         fd_ushort_load_1_fast                   fd_ushort_load_1
     705             : #define                         fd_ushort_load_2_fast                   fd_ushort_load_2
     706             : 
     707             : #define                         fd_uint_load_1_fast                     fd_uint_load_1
     708             : #define                         fd_uint_load_2_fast                     fd_uint_load_2
     709        6135 : FD_FN_PURE static inline uint   fd_uint_load_3_fast  ( void const * p ) { uint   t; memcpy( &t, p, 4UL ); return ((uint )t) & 0x00ffffffU;          }
     710   159144165 : #define                         fd_uint_load_4_fast                     fd_uint_load_4
     711             : 
     712             : #define                         fd_ulong_load_1_fast                    fd_ulong_load_1
     713           9 : #define                         fd_ulong_load_2_fast                    fd_ulong_load_2
     714        6132 : FD_FN_PURE static inline ulong  fd_ulong_load_3_fast ( void const * p ) { uint   t; memcpy( &t, p, 4UL ); return ((ulong)t) & 0x0000000000ffffffUL; }
     715           9 : #define                         fd_ulong_load_4_fast                    fd_ulong_load_4
     716        6123 : FD_FN_PURE static inline ulong  fd_ulong_load_5_fast ( void const * p ) { ulong  t; memcpy( &t, p, 8UL ); return         t  & 0x000000ffffffffffUL; }
     717        6123 : FD_FN_PURE static inline ulong  fd_ulong_load_6_fast ( void const * p ) { ulong  t; memcpy( &t, p, 8UL ); return         t  & 0x0000ffffffffffffUL; }
     718        6123 : FD_FN_PURE static inline ulong  fd_ulong_load_7_fast ( void const * p ) { ulong  t; memcpy( &t, p, 8UL ); return         t  & 0x00ffffffffffffffUL; }
     719    91140904 : #define                         fd_ulong_load_8_fast                    fd_ulong_load_8
     720             : 
     721             : #elif FD_UNALIGNED_ACCESS_STYLE==1 /* direct access */
     722             : 
     723             : #define FD_LOAD( T, src ) (__extension__({      \
     724             :     T const * _fd_store_tmp = (T const *)(src); \
     725             :     FD_COMPILER_FORGET( _fd_store_tmp );        \
     726             :     *_fd_store_tmp;                             \
     727             :   }))
     728             : 
     729             : #define FD_STORE( T, dst, val ) (__extension__({           \
     730             :     T * _fd_store_tmp = (T *)fd_type_pun( (void *)(dst) ); \
     731             :     *_fd_store_tmp = (val);                                \
     732             :     FD_COMPILER_MFENCE();                                  \
     733             :     _fd_store_tmp;                                         \
     734             :   }))
     735             : 
     736             : FD_FN_PURE static inline uchar  fd_uchar_load_1      ( void const * p ) { FD_COMPILER_FORGET( p ) ; return (        *(uchar  const *)p); }
     737             : 
     738             : FD_FN_PURE static inline ushort fd_ushort_load_1     ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ushort)*(uchar  const *)p); }
     739             : FD_FN_PURE static inline ushort fd_ushort_load_2     ( void const * p ) { FD_COMPILER_FORGET( p ); return (        *(ushort const *)p); }
     740             : 
     741             : FD_FN_PURE static inline uint   fd_uint_load_1       ( void const * p ) { FD_COMPILER_FORGET( p ); return ((uint  )*(uchar  const *)p); }
     742             : FD_FN_PURE static inline uint   fd_uint_load_2       ( void const * p ) { FD_COMPILER_FORGET( p ); return ((uint  )*(ushort const *)p); }
     743             : FD_FN_PURE static inline uint   fd_uint_load_3       ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_uint_load_2 (p) | (fd_uint_load_1 (((uchar const *)p)+2UL)<<16); }
     744             : FD_FN_PURE static inline uint   fd_uint_load_4       ( void const * p ) { FD_COMPILER_FORGET( p ); return (        *(uint   const *)p); }
     745             : 
     746             : FD_FN_PURE static inline ulong  fd_ulong_load_1      ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ulong )*(uchar  const *)p); }
     747             : FD_FN_PURE static inline ulong  fd_ulong_load_2      ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ulong )*(ushort const *)p); }
     748             : FD_FN_PURE static inline ulong  fd_ulong_load_3      ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_ulong_load_2(p) | (fd_ulong_load_1(((uchar const *)p)+2UL)<<16); }
     749             : FD_FN_PURE static inline ulong  fd_ulong_load_4      ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ulong )*(uint   const *)p); }
     750             : FD_FN_PURE static inline ulong  fd_ulong_load_5      ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_ulong_load_4(p) | (fd_ulong_load_1(((uchar const *)p)+4UL)<<32); }
     751             : FD_FN_PURE static inline ulong  fd_ulong_load_6      ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_ulong_load_4(p) | (fd_ulong_load_2(((uchar const *)p)+4UL)<<32); }
     752             : FD_FN_PURE static inline ulong  fd_ulong_load_7      ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_ulong_load_6(p) | (fd_ulong_load_1(((uchar const *)p)+6UL)<<48); }
     753             : FD_FN_PURE static inline ulong  fd_ulong_load_8      ( void const * p ) { FD_COMPILER_FORGET( p ); return (        *(ulong  const *)p); }
     754             : 
     755             : #define                         fd_uchar_load_1_fast                    fd_uchar_load_1
     756             : 
     757             : #define                         fd_ushort_load_1_fast                   fd_ushort_load_1
     758             : #define                         fd_ushort_load_2_fast                   fd_ushort_load_2
     759             : 
     760             : #define                         fd_uint_load_1_fast                     fd_uint_load_1
     761             : #define                         fd_uint_load_2_fast                     fd_uint_load_2
     762             : FD_FN_PURE static inline uint   fd_uint_load_3_fast  ( void const * p ) { FD_COMPILER_FORGET( p ); return (       *(uint   const *)p) & 0x00ffffffU;          } /* Tail read 1B */
     763             : #define                         fd_uint_load_4_fast                     fd_uint_load_4
     764             : 
     765             : #define                         fd_ulong_load_1_fast                    fd_ulong_load_1
     766             : #define                         fd_ulong_load_2_fast                    fd_ulong_load_2
     767             : FD_FN_PURE static inline ulong  fd_ulong_load_3_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ulong)*(uint   const *)p) & 0x0000000000ffffffUL; } /* Tail read 1B */
     768             : #define                         fd_ulong_load_4_fast                    fd_ulong_load_4
     769             : FD_FN_PURE static inline ulong  fd_ulong_load_5_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return (       *(ulong  const *)p) & 0x000000ffffffffffUL; } /* Tail read 3B */
     770             : FD_FN_PURE static inline ulong  fd_ulong_load_6_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return (       *(ulong  const *)p) & 0x0000ffffffffffffUL; } /* Tail read 2B */
     771             : FD_FN_PURE static inline ulong  fd_ulong_load_7_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return (       *(ulong  const *)p) & 0x00ffffffffffffffUL; } /* Tail read 1B */
     772             : #define                         fd_ulong_load_8_fast                    fd_ulong_load_8
     773             : 
     774             : #else
     775             : #error "Unsupported FD_UNALIGNED_ACCESS_STYLE"
     776             : #endif
     777             : 
     778             : /* fd_ulong_svw_enc_sz returns the number of bytes needed to encode
     779             :    x as a symmetric variable width encoded integer.  This is at most
     780             :    FD_ULONG_SVW_ENC_MAX (9).  Result will be in {1,2,3,4,5,8,9}. */
     781             : 
     782             : #define FD_ULONG_SVW_ENC_MAX (9UL) /* For compile time use */
     783             : 
     784             : FD_FN_UNUSED FD_FN_CONST static ulong /* Work around -Winline */
     785   719282325 : fd_ulong_svw_enc_sz( ulong x ) {
     786             :   /* FIXME: CONSIDER FIND_MSB BASED TABLE LOOKUP? */
     787   719282325 :   if( FD_LIKELY( x<(1UL<< 6) ) ) return 1UL;
     788   288991281 :   if( FD_LIKELY( x<(1UL<<10) ) ) return 2UL;
     789    43497180 :   if( FD_LIKELY( x<(1UL<<18) ) ) return 3UL;
     790    35686491 :   if( FD_LIKELY( x<(1UL<<24) ) ) return 4UL;
     791    30959463 :   if( FD_LIKELY( x<(1UL<<32) ) ) return 5UL;
     792    24571452 :   if( FD_LIKELY( x<(1UL<<56) ) ) return 8UL;
     793     5695980 :   return                                9UL;
     794    24571452 : }
     795             : 
     796             : /* fd_ulong_svw_enc appends x to the byte stream b as a symmetric
     797             :    variable width encoded integer.  b should have room from
     798             :    fd_ulong_svw_env_sz(x) (note that 9 is sufficient for all possible
     799             :    x).  Returns the next location in the byte system. */
     800             : 
     801             : FD_FN_UNUSED static uchar * /* Work around -Winline */
     802             : fd_ulong_svw_enc( uchar * b,
     803    50704740 :                   ulong   x ) {
     804    50704740 :   if(      FD_LIKELY( x<(1UL<< 6) ) ) {                                                                 b[0] = (uchar)          (x<< 1);  b+=1; } /* 0    | x( 6) |    0 */
     805    45124017 :   else if( FD_LIKELY( x<(1UL<<10) ) ) { FD_STORE( ushort, b, (ushort)(            0x8001UL | (x<<3)) );                                   b+=2; } /* 100  | x(10) |  001 */
     806    41982501 :   else if( FD_LIKELY( x<(1UL<<18) ) ) { FD_STORE( ushort, b, (ushort)(               0x5UL | (x<<3)) ); b[2] = (uchar)(0xa0UL | (x>>13)); b+=3; } /* 101  | x(18) |  101 */
     807    35614710 :   else if( FD_LIKELY( x<(1UL<<24) ) ) { FD_STORE( uint,   b, (uint  )(        0xc0000003UL | (x<<4)) );                                   b+=4; } /* 1100 | x(24) | 0011 */
     808    30887754 :   else if( FD_LIKELY( x<(1UL<<32) ) ) { FD_STORE( uint,   b, (uint  )(               0xbUL | (x<<4)) ); b[4] = (uchar)(0xd0UL | (x>>28)); b+=5; } /* 1101 | x(32) | 1011 */
     809    24523458 :   else if( FD_LIKELY( x<(1UL<<56) ) ) { FD_STORE( ulong,  b,          0xe000000000000007UL | (x<<4)  );                                   b+=8; } /* 1110 | x(56) | 0111 */
     810     5648181 :   else                                { FD_STORE( ulong,  b,                         0xfUL | (x<<4)  ); b[8] = (uchar)(0xf0UL | (x>>60)); b+=9; } /* 1111 | x(64) | 1111 */
     811    50704740 :   return b;
     812    50704740 : }
     813             : 
     814             : /* fd_ulong_svw_enc_fixed appends x to the byte stream b as a symmetric
     815             :    csz width encoded integer.  csz is assumed to be in {1,2,3,4,5,8,9}.
     816             :    b should have room from csz bytes and x should be known apriori to be
     817             :    compatible with csz.  Useful for updating in place an existing
     818             :    encoded integer to a value that is <= the current value.  Returns
     819             :    b+csz. */
     820             : 
     821             : FD_FN_UNUSED static uchar * /* Work around -Winline */
     822             : fd_ulong_svw_enc_fixed( uchar * b,
     823             :                         ulong   csz,
     824  1099804029 :                         ulong   x ) {
     825  1099804029 :   if(      FD_LIKELY( csz==1UL ) ) {                                                                 b[0] = (uchar)          (x<< 1);  } /* 0    | x( 6) |    0 */
     826   408064524 :   else if( FD_LIKELY( csz==2UL ) ) { FD_STORE( ushort, b, (ushort)(            0x8001UL | (x<<3)) );                                   } /* 100  | x(10) |  001 */
     827    51099444 :   else if( FD_LIKELY( csz==3UL ) ) { FD_STORE( ushort, b, (ushort)(               0x5UL | (x<<3)) ); b[2] = (uchar)(0xa0UL | (x>>13)); } /* 101  | x(18) |  101 */
     828    35391699 :   else if( FD_LIKELY( csz==4UL ) ) { FD_STORE( uint,   b, (uint  )(        0xc0000003UL | (x<<4)) );                                   } /* 1100 | x(24) | 0011 */
     829    30665091 :   else if( FD_LIKELY( csz==5UL ) ) { FD_STORE( uint,   b, (uint  )(               0xbUL | (x<<4)) ); b[4] = (uchar)(0xd0UL | (x>>28)); } /* 1101 | x(32) | 1011 */
     830    24375081 :   else if( FD_LIKELY( csz==8UL ) ) { FD_STORE( ulong,  b,          0xe000000000000007UL | (x<<4)  );                                   } /* 1110 | x(56) | 0111 */
     831     5500425 :   else             /* csz==9UL */  { FD_STORE( ulong,  b,                         0xfUL | (x<<4)  ); b[8] = (uchar)(0xf0UL | (x>>60)); } /* 1111 | x(64) | 1111 */
     832  1099804029 :   return b+csz;
     833  1099804029 : }
     834             : 
     835             : /* fd_ulong_svw_dec_sz returns the number of bytes representing an svw
     836             :    encoded integer.  b points to the first byte of the encoded integer.
     837             :    Result will be in {1,2,3,4,5,8,9}. */
     838             : 
     839             : FD_FN_PURE static inline ulong
     840  4001378844 : fd_ulong_svw_dec_sz( uchar const * b ) {
     841             : 
     842             :   /* LSB:         Compressed size
     843             :      xxxx|xxx0 -> 1B
     844             :      xxxx|x001 -> 2B
     845             :      xxxx|x101 -> 3B
     846             :      xxxx|0011 -> 4B
     847             :      xxxx|1011 -> 5B
     848             :      xxxx|0111 -> 8B
     849             :      xxxx|1111 -> 9B
     850             : 
     851             :       15   14   13   12   11   10    9    8    7    6    5    4    3    2    1    0
     852             :      1111 1110 1101 1100 1011 1010 1001 1000 0111 0110 0101 0100 0011 0010 0001 0000
     853             :        9    1    3    1    5    1    2    1    8    1    3    1    4    1    2    1 */
     854             : 
     855  4001378844 :   return (0x9131512181314121UL >> ((((ulong)b[0]) & 15UL) << 2)) & 15UL;
     856  4001378844 : }
     857             : 
     858             : /* fd_ulong_svw_dec_tail_sz returns the number of bytes representing an
     859             :    svw encoded integer.  b points to one after the last byte of the
     860             :    encoded integer.  Result will be in {1,2,3,4,5,8,9}. */
     861             : 
     862             : FD_FN_PURE static inline ulong
     863   228151521 : fd_ulong_svw_dec_tail_sz( uchar const * b ) {
     864             : 
     865             :   /* MSB:         Compressed size
     866             :      0xxx|xxxx -> 1B
     867             :      100x|xxxx -> 2B
     868             :      101x|xxxx -> 3B
     869             :      1100|xxxx -> 4B
     870             :      1101|xxxx -> 5B
     871             :      1110|xxxx -> 8B
     872             :      1111|xxxx -> 9B
     873             : 
     874             :       15   14   13   12   11   10    9    8    7    6    5    4    3    2    1    0
     875             :      1111 1110 1101 1100 1011 1010 1001 1000 0111 0110 0101 0100 0011 0010 0001 0000
     876             :        9    8    5    4    3    3    2    2    1    1    1    1    1    1    1    1 */
     877             : 
     878   228151521 :   return (0x9854332211111111UL >> ((((ulong)b[-1]) >> 4) << 2)) & 15UL;
     879   228151521 : }
     880             : 
     881             : /* fd_ulong_svw_dec_fixed decodes a ulong encoded as a symmetric
     882             :    variable width encoded integer whose width is known.  b points to the
     883             :    first byte of the encoded integer and the encoded integer is csz
     884             :    byte.  csz is assumed to be in {1,2,3,4,5,8,9}. */
     885             : 
     886             : FD_FN_UNUSED static ulong /* Work around -Winline */
     887             : fd_ulong_svw_dec_fixed( uchar const * b,
     888  4386492123 :                         ulong         csz ) {
     889  4386492123 :   if( FD_LIKELY( csz==1UL ) ) return (fd_ulong_load_1( b ) >> 1);
     890  1596377409 :   if( FD_LIKELY( csz==2UL ) ) return (fd_ulong_load_2( b ) >> 3) &              1023UL;
     891   303212289 :   if( FD_LIKELY( csz==3UL ) ) return (fd_ulong_load_2( b ) >> 3) | ((((ulong)b[2]) & 0x1fUL) << 13);
     892   212573592 :   if( FD_LIKELY( csz==4UL ) ) return (fd_ulong_load_4( b ) >> 4) &          16777215UL;
     893   184213458 :   if( FD_LIKELY( csz==5UL ) ) return (fd_ulong_load_4( b ) >> 4) | ((((ulong)b[4]) & 0x0fUL) << 28);
     894   146398986 :   if( FD_LIKELY( csz==8UL ) ) return (fd_ulong_load_8( b ) >> 4) & 72057594037927935UL;
     895    33150429 :   return       /*csz==9UL*/          (fd_ulong_load_8( b ) >> 4) | ( ((ulong)b[8])           << 60);
     896   146398986 : }
     897             : 
     898             : /* fd_ulong_svw_dec decodes a ulong encoded as a symmetric variable
     899             :    width encoded integer.  b points to the first byte of the encoded
     900             :    integer.  Returns a pointer to the first byte after the symvarint and
     901             :    *_x will hold the uncompressed value on return.  If the byte stream
     902             :    might be corrupt, it should be safe to read up to 9 bytes starting a
     903             :    b. */
     904             : 
     905             : static inline uchar const *
     906             : fd_ulong_svw_dec( uchar const * b,
     907   101036253 :                   ulong *       _x ) {
     908   101036253 :   ulong csz = fd_ulong_svw_dec_sz( b );
     909   101036253 :   *_x = fd_ulong_svw_dec_fixed( b, csz ); b += csz;
     910   101036253 :   return b;
     911   101036253 : }
     912             : 
     913             : /* fd_ulong_svw_dec_tail decodes a ulong encoded as a symmetric variable
     914             :    width encoded integer.  b points to the first byte after the encoded
     915             :    integer.  Returns a pointer to the first byte of the encoded integer
     916             :    and *_x will have the hold the uncompressed value on return.  If the
     917             :    byte stream might be corrupt, it should be safe to read up to 9 bytes
     918             :    immediately before b. */
     919             : 
     920             : static inline uchar const *
     921             : fd_ulong_svw_dec_tail( uchar const * b,
     922   100663296 :                        ulong *       _x ) {
     923   100663296 :   ulong csz = fd_ulong_svw_dec_tail_sz( b );
     924   100663296 :   b -= csz; *_x = fd_ulong_svw_dec_fixed( b, csz );
     925   100663296 :   return b;
     926   100663296 : }
     927             : 
     928             : /* FD_LAYOUT_{INIT,APPEND,FINI} are useful for compile time
     929             :    determination of the required footprint of shared memory regions with
     930             :    dynamic sizes and complex alignment restrictions.
     931             : 
     932             :    FD_LAYOUT_INIT starts a layout.  Returns a handle to the layout.
     933             : 
     934             :    FD_LAYOUT_APPEND appends a s byte region of alignment a to a layout
     935             :    where l is an in progress layout.
     936             : 
     937             :    FD_LAYOUT_FINI returns the final layout footprint.  a is the
     938             :    alignment to be used for the overall layout.  It should be the
     939             :    alignment of all appends.  The final footprint will be a multiple of
     940             :    a.
     941             : 
     942             :    All arguments should be ulong compatible.  All alignment should be a
     943             :    positive integer power of 2 and safe against multiple evaluation.
     944             : 
     945             :    The caller further promises the layout is not unreasonably large that
     946             :    overflow might be an issue (i.e. will be at most
     947             :    fd_ulong_align_dn(ULONG_MAX,a) where is the a used for FINI in size).
     948             : 
     949             :    Example usage:
     950             : 
     951             :      FD_LAYOUT_FINI( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_INIT,
     952             :        align0, size0 ),
     953             :        align1, size1 ),
     954             :        page_sz )
     955             : 
     956             :    would return the number of pages as a page_sz multiple for a shared
     957             :    memory region that starts with a initial/final region of size0/size1
     958             :    bytes and alignment align0/align1.  Correct operation requires
     959             :    page_sz>=max(align0,align1),  page_sz, align0 and align1 be positive
     960             :    integer powers of 2, page_sz, size0, align0 and align1 should be
     961             :    ulong compatible, page_sz, align0 and align1 be safe against multiple
     962             :    evaluation, and the final size be at most ULONG_MAX-page_sz+1. */
     963             : 
     964      115293 : #define FD_LAYOUT_INIT              (0UL)
     965      235377 : #define FD_LAYOUT_APPEND( l, a, s ) (FD_ULONG_ALIGN_UP( (l), (a) ) + (s))
     966     6029268 : #define FD_LAYOUT_FINI( l, a )      FD_ULONG_ALIGN_UP( (l), (a) )
     967             : 
     968             : /* FD_SCRATCH_ALLOC_{INIT,APPEND,FINI} are utility macros for allocating
     969             :    sub-regions of memory out of one larger region of memory.  These are
     970             :    intentionally parallel to FD_LAYOUT, but for use at runtime, not
     971             :    compile time, and when you want to know the actual addresses, and not
     972             :    just the total footprint.
     973             : 
     974             :    FD_SCRATCH_ALLOC_INIT begins a scratch allocation operation called
     975             :    layout with starting base address of base.
     976             : 
     977             :    FD_SCRATCH_ALLOC_APPEND returns the next address in the allocation
     978             :    operation `layout` with the required alignment and advances the
     979             :    allocation operation by the provided size.
     980             : 
     981             :    FD_SCRATCH_ALLOC_FINI finalizes a scratch allocation operation with
     982             :    the name given by `layout` and returns the next address with the
     983             :    requested alignment.
     984             : 
     985             :    align must be a power of 2, and layout should be an identifier name.
     986             :    The macros are robust otherwise.
     987             : 
     988             :    Example usage:
     989             :       FD_SCRATCH_ALLOC_INIT( foo, scratch_base );
     990             :       int   * arr1 = FD_SCRATCH_ALLOC_APPEND( foo, alignof(int),   100*sizeof(int)   );
     991             :       ulong * arr2 = FD_SCRATCH_ALLOC_APPEND( foo, alignof(ulong),  25*sizeof(ulong) );
     992             :       FD_SCRATCH_ALLOC_FINI( foo, 32UL );
     993             :    */
     994      670299 : #define FD_SCRATCH_ALLOC_INIT(   layout, base )  ulong _##layout = (ulong)(base)
     995     3388566 : #define FD_SCRATCH_ALLOC_APPEND( layout, align, sz ) (__extension__({                               \
     996     3388566 :     ulong _align = (align);                                                                         \
     997     3388566 :     ulong _sz    = (sz);                                                                            \
     998     3388566 :     ulong _scratch_alloc = fd_ulong_align_up( _##layout, (align) );                                 \
     999     3388566 :     if( FD_UNLIKELY( _scratch_alloc+_sz<_scratch_alloc ) )                                          \
    1000     3388566 :       FD_LOG_ERR(( "FD_SCRATCH_ALLOC_APPEND( %s, %lu, %lu ) overflowed", #layout, _align, _sz ));   \
    1001     3388566 :     _##layout = _scratch_alloc + _sz;                                                               \
    1002     3388566 :     (void *)_scratch_alloc;                                                                         \
    1003     3388566 :     }))
    1004      352644 : #define FD_SCRATCH_ALLOC_FINI( layout, align ) (_##layout = FD_ULONG_ALIGN_UP( _##layout, (align) ) )
    1005             : 
    1006             : #define FD_SCRATCH_ALLOC_PUBLISH( layout ) (__extension__({            \
    1007             :     void * end = (void *)FD_SCRATCH_ALLOC_FINI( layout, 1UL );         \
    1008             :     int ok = fd_scratch_publish_is_safe( end );                        \
    1009             :     if( ok ) fd_scratch_publish( end );                                \
    1010             :     ok;                                                                \
    1011             :   }))
    1012             : 
    1013             : FD_PROTOTYPES_END
    1014             : 
    1015             : #endif /* HEADER_fd_src_util_bits_fd_bits_h */
    1016             : 

Generated by: LCOV version 1.14