LCOV - code coverage report
Current view: top level - util/bits - fd_bits.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 335 335 100.0 %
Date: 2025-03-20 12:08:36 Functions: 1285 221536 0.6 %

          Line data    Source code
       1             : #ifndef HEADER_fd_src_util_bits_fd_bits_h
       2             : #define HEADER_fd_src_util_bits_fd_bits_h
       3             : 
       4             : /* Bit manipulation APIs */
       5             : 
       6             : #include "../sanitize/fd_msan.h"
       7             : 
       8             : FD_PROTOTYPES_BEGIN
       9             : 
      10             : /* fd_ulong_is_pow2    ( x          ) returns 1 if x is a positive integral power of 2 and 0 otherwise.
      11             :    fd_ulong_pow2       ( n          ) returns 2^n mod 2^64.  U.B. if n is negative.
      12             : 
      13             :    fd_ulong_mask_bit   ( b          ) returns the ulong with bit b set and all other bits 0.  U.B. if b is not in [0,64).
      14             :    fd_ulong_clear_bit  ( x, b       ) returns x with bit b cleared.  U.B. if b is not in [0,64).
      15             :    fd_ulong_set_bit    ( x, b       ) returns x with bit b set. U.B. if b is not in [0,64).
      16             :    fd_ulong_extract_bit( x, b       ) returns bit b of x as an int in {0,1}.  U.B. if b is not in [0,64).
      17             :    fd_ulong_insert_bit ( x, b, y    ) returns x with bit b set to y.  U.B. if b is not in [0,64) and/or y is not in {0,1}.
      18             : 
      19             :    fd_ulong_mask_lsb   ( n          ) returns the ulong bits [0,n) set and all other bits 0.  U.B. if n is not in [0,64].
      20             :    fd_ulong_clear_lsb  ( x, n       ) returns x with bits [0,n) cleared.  U.B. if n is not in [0,64].
      21             :    fd_ulong_set_lsb    ( x, n       ) returns x with bits [0,n) set. U.B. if n is not in [0,64].
      22             :    fd_ulong_flip_lsb   ( x, n       ) returns x with bits [0,n) flipped. U.B. if n is not in [0,64].
      23             :    fd_ulong_extract_lsb( x, n       ) returns bits [0,n) of x.  U.B. if n is not in [0,64].
      24             :    fd_ulong_insert_lsb ( x, n, y    ) returns x with bits [0,n) set to y.  U.B. if n is not in [0,64] and/or y is not in [0,2^n).
      25             : 
      26             :    fd_ulong_mask       ( l, h       ) returns the ulong bits [l,h] set and all other bits 0.  U.B. if not 0<=l<=h<64.
      27             :    fd_ulong_clear      ( x, l, h    ) returns x with bits [l,h] cleared.  U.B. if not 0<=l<=h<64.
      28             :    fd_ulong_set        ( x, l, h    ) returns x with bits [l,h] set.  U.B. if not 0<=l<=h<64.
      29             :    fd_ulong_flip       ( x, l, h    ) returns x with bits [l,h] flipped.  U.B. if not 0<=l<=h<64.
      30             :    fd_ulong_extract    ( x, l, h    ) returns bits [l,h] of x.  U.B. if not 0<=l<=h<64.
      31             :    fd_ulong_insert     ( x, l, h, y ) returns x with bits [l,h] set to y.
      32             :                                       U.B. if not 0<=l<=h<64 and/or y is not in in [0,2^(h-l+1)).
      33             : 
      34             :    fd_ulong_lsb        ( x          ) returns 2^i where i is the index of x's least significant set bit (x = 0 returns 0)
      35             :    fd_ulong_pop_lsb    ( x          ) returns x with the least significant set bit cleared (0 returns 0).
      36             : 
      37             :    FIXME: CONSIDER HAVING (A,X) INSTEAD OF (X,A)?
      38             :    fd_ulong_is_aligned ( x, a       ) returns 1 if x is an integral multiple of a and 0 otherwise.  U.B. if !fd_ulong_is_pow2( a )
      39             :    fd_ulong_alignment  ( x, a       ) returns x mod a.  U.B. if !fd_ulong_is_pow2( a )
      40             :    fd_ulong_align_dn   ( x, a       ) returns x rounded down to the closest multiple of a <= x.  U.B. if !fd_ulong_is_pow2( a )
      41             :    fd_ulong_align_up   ( x, a       ) returns x rounded up to the closest multiple of a >= x mod 2^64.
      42             :                                       U.B. if !fd_ulong_is_pow2( a )
      43             : 
      44             :    fd_ulong_blend      ( m, t, f    ) returns forms a ulong by selecting bits from t where m is 1 and from f where m is 0.
      45             :    fd_ulong_if         ( c, t, f    ) returns t if c is 1 and f if c is 0.  U.B. if c is not in {0,1}
      46             :    fd_ulong_store_if   ( c, p, v    ) if c is non-zero, stores v to *p.  Otherwise does nothing.
      47             :    fd_ulong_abs        ( x          ) returns |x| as a ulong
      48             :    fd_ulong_min        ( x, y       ) returns min(x,y)
      49             :    fd_ulong_max        ( x, y       ) returns max(x,y)
      50             : 
      51             :    fd_ulong_shift_left  ( x, n ) returns x with its bits shifted left n times (n>63 shifts to zero), U.B. if n<0
      52             :    fd_ulong_shift_right ( x, n ) returns x with its bits shifted right n times (n>63 shifts to zero), U.B. if n<0
      53             :    fd_ulong_rotate_left ( x, n ) returns x with its bits rotated left n times (negative values rotate right)
      54             :    fd_ulong_rotate_right( x, n ) returns x with its bits rotated right n times (negative values rotate left)
      55             : 
      56             :    fd_ulong_popcnt            ( x    ) returns the number of bits set in x, in [0,64].
      57             :    fd_ulong_find_lsb          ( x    ) returns the index of the least significant bit set in x, in [0,64).  U.B. if x is zero.
      58             :    fd_ulong_find_lsb_w_default( x, d ) returns the index of the least significant bit set in x, in [0,64).  d if x is zero.
      59             :    fd_ulong_find_msb          ( x    ) returns the index of the most significant bit set in x, in [0,64).  U.B. if x is zero.
      60             :    fd_ulong_find_msb_w_default( x, d ) returns the index of the most significant bit set in x, in [0,64).  d if x is zero.
      61             :    fd_ulong_bswap             ( x    ) returns x with its bytes swapped
      62             :    fd_ulong_pow2_up           ( x    ) returns y mod 2^64 where y is the smallest integer power of 2 >= x.  U.B. if x is zero.
      63             :                                        (current implementation returns 0 if x==0).
      64             :    fd_ulong_pow2_dn           ( x    ) returns the largest integer power of 2 <= x.  U.B. if x is zero.
      65             :                                        (current implementation returns 1 if x==0).
      66             : 
      67             :    Similarly for uchar,ushort,uint,uint128.  Note that the signed
      68             :    versions of shift_left, rotate_left, rotate_right operate on the bit
      69             :    pattern of the underlying type directly.  Signed shift_right is sign
      70             :    extending while unsigned shift_right is zero padding (such that if x
      71             :    is negative/non-negative, a large magnitude shift will shift to
      72             :    -1/0).
      73             : 
      74             :    Support for zig-zag encoding is also provided.  E.g.
      75             : 
      76             :    fd_long_zz_enc( x ) returns the zig-zag encodes  long x and returns it as a ulong.
      77             :    fd_long_zz_dec( y ) returns the zig-zag decodes ulong y and returns it as a long.
      78             : 
      79             :    zig-zag encoding losslessly maps a signed integer to an unsigned
      80             :    integer such that, if the magnitude of the signed integer was small,
      81             :    the magnitude of the unsigned integer will be small too.
      82             : 
      83             :    Note that, though fd_ulong_if and friends look like a completely
      84             :    superfluous wrapper for the trinary operator, they have subtly
      85             :    different linguistic meanings.  This seemingly trivial difference can
      86             :    have profound effects on code generation quality, especially on
      87             :    modern architectures.  For example:
      88             : 
      89             :      c ? x[i] : x[j];
      90             : 
      91             :    linguistically means, if c is non-zero, load the i-th element of
      92             :    x.  Otherwise, load the j-th element of x.  But:
      93             : 
      94             :      fd_ulong_if( c, x[i], x[j] )
      95             : 
      96             :    means load the i-th element of x _and_ load the j-th element of x
      97             :    _and_ then select the first value if c is non-zero or the second
      98             :    value otherwise.  Further, it explicitly says that c and the loads
      99             :    can be evaluated in any order.
     100             : 
     101             :    As such, in the trinary case, the compiler will be loathe to do
     102             :    either load before computing c because the language explicitly says
     103             :    "don't do that".  In the unlikely case it overcomes this barrier,
     104             :    it then has the difficult job of proving that reordering c with the
     105             :    loads is safe (e.g. evaluating c might affect the value that would be
     106             :    loaded if c has side effects).  In the unlikely case it overcomes
     107             :    that barrier, it then has the difficult job of proving it is safe to
     108             :    do the loads before evaluating c (e.g. c might be protecting against
     109             :    a load that could seg-fault).
     110             : 
     111             :    In the fd_ulong_if case though, the compiler has been explicitly told
     112             :    up front that the loads are safe and c and the loads can be done in
     113             :    any order.  Now the optimizer finds it a lot easier to optimize
     114             :    because it isn't accidentally over-constrained and doesn't have to
     115             :    prove anything.  E.g. it can use otherwise unused instruction slots
     116             :    before the operation to hide load latency while computing c in
     117             :    parallel via ILP.  Further, it can then ideally can use a conditional
     118             :    move operation to eliminate unnecessary consumption of BTB resources.
     119             :    And, if c is known at compile time, the compiler can prune
     120             :    unnecessary code for the unselected option (e.g. the compiler knows
     121             :    that omitting an unused normal load has no observable effect in the
     122             :    machine model).
     123             : 
     124             :    Faster, more deterministic, less BTB resources consumed and good
     125             :    compile time behavior.  Everything the trinary operator should have
     126             :    been, rather than the giant pile of linguistic fail that it is.
     127             : 
     128             :    Overall, compilers traditionally are much much better at pruning
     129             :    unneeded operations than speculating execution (especially for code
     130             :    paths that a language says not to do and doubly so for code paths
     131             :    that are not obviously safe in general).  And most of this is because
     132             :    languages are not designed correctly to help developers express their
     133             :    intent and constraints to the compiler.
     134             : 
     135             :    This dynamic has had multi-billion dollar commercial impacts though
     136             :    the cause-and-effect has gone largely unrecognized.
     137             : 
     138             :    At one extreme, a major reason why Itanium failed was languages
     139             :    didn't have the right constructs and machine models for developers to
     140             :    "do the right thing".  Even given such, most devs wouldn't know to
     141             :    use them because they were erroneously taught to code to a machine
     142             :    abstraction that hasn't applied to the real world since the early
     143             :    1980s.  Compilers then were not able to utilize all the speculative
     144             :    execution / ILP / ... features that were key to performance on that
     145             :    architecture.  The developer community, not being able to see any
     146             :    benefits (much less large enough benefits to justify short term
     147             :    switching costs) and not wanting to write tons of hand-tuned
     148             :    non-portable ASM kernels, shrugged and Itanium withered away.
     149             : 
     150             :    At the other extreme, CUDA gave developers a good GPU abstraction and
     151             :    extended languages and compilers to make it possible for developers
     152             :    code to that abstraction (e.g. express locality explicitly instead of
     153             :    the constant lying-by-omission about the importance of locality that
     154             :    virtually everything else in tech does ... the speed of light isn't
     155             :    infinite or even all that fast relative to modern CPUs ... stop
     156             :    pretending that it is).  CUDA enabled GPUs have since thrived in
     157             :    gaming, high performance computing, machine learning, crypto, etc.
     158             :    Profits had by all (well, by Nvidia at least).
     159             : 
     160             :    TL;DR
     161             : 
     162             :    * It'd be laughable if it weren't so pathetic that CPU ISAs and
     163             :      programming languages usually forget to expose one of the most
     164             :      fundamental and important digital logic circuits to devs ... the
     165             :      2:1 mux.
     166             : 
     167             :    * Developers will usually do the right thing if languages let them.
     168             : 
     169             :    TODO: mask_msb, clear_msb, set_msb, flip_msb, extract_msb,
     170             :    insert_msb, bitrev, sign, copysign, flipsign, rounding right shift,
     171             :    ... */
     172             : 
     173             : #define FD_SRC_UTIL_BITS_FD_BITS_IMPL(T,w)                                                                                         \
     174   479513394 : FD_FN_CONST static inline int  fd_##T##_is_pow2     ( T x               ) { return (!!x) & (!(x & (x-(T)1)));                    } \
     175   201327351 : FD_FN_CONST static inline T    fd_##T##_pow2        ( int n             ) { return (T)(((T)(n<w))<<(n&(w-1)));                   } \
     176   568023111 : FD_FN_CONST static inline T    fd_##T##_mask_bit    ( int b             ) { return (T)(((T)1)<<b);                               } \
     177       51096 : FD_FN_CONST static inline T    fd_##T##_clear_bit   ( T x, int b        ) { return (T)(x & ~fd_##T##_mask_bit(b));               } \
     178   567962343 : FD_FN_CONST static inline T    fd_##T##_set_bit     ( T x, int b        ) { return (T)(x |  fd_##T##_mask_bit(b));               } \
     179        2976 : FD_FN_CONST static inline T    fd_##T##_flip_bit    ( T x, int b        ) { return (T)(x ^  fd_##T##_mask_bit(b));               } \
     180   833152510 : FD_FN_CONST static inline int  fd_##T##_extract_bit ( T x, int b        ) { return (int)((x>>b) & (T)1);                         } \
     181        5952 : FD_FN_CONST static inline T    fd_##T##_insert_bit  ( T x, int b, int y ) { return (T)((x & ~fd_##T##_mask_bit(b))|(((T)y)<<b)); } \
     182  1281997011 : FD_FN_CONST static inline T    fd_##T##_mask_lsb    ( int n             ) { return (T)((((T)(n<w))<<(n&(w-1)))-((T)1));          } \
     183  1281523638 : FD_FN_CONST static inline T    fd_##T##_clear_lsb   ( T x, int n        ) { return (T)(x & ~fd_##T##_mask_lsb(n));               } \
     184        2262 : FD_FN_CONST static inline T    fd_##T##_set_lsb     ( T x, int n        ) { return (T)(x |  fd_##T##_mask_lsb(n));               } \
     185        2262 : FD_FN_CONST static inline T    fd_##T##_flip_lsb    ( T x, int n        ) { return (T)(x ^  fd_##T##_mask_lsb(n));               } \
     186        2262 : FD_FN_CONST static inline T    fd_##T##_extract_lsb ( T x, int n        ) { return (T)(x &  fd_##T##_mask_lsb(n));               } \
     187  1281521376 : FD_FN_CONST static inline T    fd_##T##_insert_lsb  ( T x, int n, T y   ) { return (T)(fd_##T##_clear_lsb(x,n) | y);             } \
     188       91764 : FD_FN_CONST static inline T    fd_##T##_mask        ( int l, int h      ) { return (T)( fd_##T##_mask_lsb(h-l+1) << l );         } \
     189      124881 : FD_FN_CONST static inline T    fd_##T##_clear       ( T x, int l, int h ) { return (T)(x & ~fd_##T##_mask(l,h));                 } \
     190       42480 : FD_FN_CONST static inline T    fd_##T##_set         ( T x, int l, int h ) { return (T)(x |  fd_##T##_mask(l,h));                 } \
     191      108978 : FD_FN_CONST static inline T    fd_##T##_flip        ( T x, int l, int h ) { return (T)(x ^  fd_##T##_mask(l,h));                 } \
     192       82896 : FD_FN_CONST static inline T    fd_##T##_extract     ( T x, int l, int h ) { return (T)( (x>>l) & fd_##T##_mask_lsb(h-l+1) );     } \
     193      264864 : FD_FN_CONST static inline T    fd_##T##_insert      ( T x, int l, int h, T y ) { return (T)(fd_##T##_clear(x,l,h) | (y<<l));     } \
     194     2154732 : FD_FN_CONST static inline T    fd_##T##_lsb         ( T x               ) { return (T)(x ^ (x & (x-(T)1)));                      } \
     195  3433702758 : FD_FN_CONST static inline T    fd_##T##_pop_lsb     ( T x               ) { return (T)(x & (x-(T)1));                            } \
     196  9672237236 : FD_FN_CONST static inline int  fd_##T##_is_aligned  ( T x, T a          ) { a--; return !(x & a);                                } \
     197       66960 : FD_FN_CONST static inline T    fd_##T##_alignment   ( T x, T a          ) { a--; return (T)( x    &  a);                         } \
     198   366151876 : FD_FN_CONST static inline T    fd_##T##_align_dn    ( T x, T a          ) { a--; return (T)( x    & ~a);                         } \
     199 12876453174 : FD_FN_CONST static inline T    fd_##T##_align_up    ( T x, T a          ) { a--; return (T)((x+a) & ~a);                         } \
     200   251658240 : FD_FN_CONST static inline T    fd_##T##_blend       ( T m, T t, T f     ) { return (T)((t & m) | (f & ~m));                      } \
     201 29132391582 : FD_FN_CONST static inline T    fd_##T##_if          ( int c, T t, T f   ) { return c ? t : f;     /* cmov */                     } \
     202   314339793 : /*       */ static inline void fd_##T##_store_if    ( int c, T * p, T v ) { T _[ 1 ]; *( c ? p : _ ) = v; /* cmov */             } \
     203   489555636 : FD_FN_CONST static inline T    fd_##T##_abs         ( T x               ) { return x;                                            } \
     204 20272963034 : FD_FN_CONST static inline T    fd_##T##_min         ( T x, T y          ) { return (x<y) ? x : y; /* cmov */                     } \
     205  2980760088 : FD_FN_CONST static inline T    fd_##T##_max         ( T x, T y          ) { return (x>y) ? x : y; /* cmov */                     } \
     206   754974720 : FD_FN_CONST static inline T    fd_##T##_shift_left  ( T x, int n        ) { return (T)(((n>(w-1)) ? ((T)0) : x) << (n&(w-1)));   } \
     207   511844859 : FD_FN_CONST static inline T    fd_##T##_shift_right ( T x, int n        ) { return (T)(((n>(w-1)) ? ((T)0) : x) >> (n&(w-1)));   } \
     208 >31703*10^7 : FD_FN_CONST static inline T    fd_##T##_rotate_left ( T x, int n        ) { return (T)((x << (n&(w-1))) | (x >> ((-n)&(w-1))));  } \
     209  4570736620 : FD_FN_CONST static inline T    fd_##T##_rotate_right( T x, int n        ) { return (T)((x >> (n&(w-1))) | (x << ((-n)&(w-1))));  }
     210             : 
     211             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uchar,  8)
     212             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(ushort,16)
     213             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uint,  32)
     214             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(ulong, 64)
     215             : 
     216             : #if FD_HAS_INT128 /* FIXME: These probably could benefit from x86 specializations */
     217             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uint128,128)
     218             : #endif
     219             : 
     220             : #undef FD_SRC_UTIL_BITS_FD_BITS_IMPL
     221             : 
     222  1892748618 : FD_FN_CONST static inline int fd_uchar_popcnt ( uchar  x ) { return __builtin_popcount ( (uint)x ); }
     223         816 : FD_FN_CONST static inline int fd_ushort_popcnt( ushort x ) { return __builtin_popcount ( (uint)x ); }
     224      781986 : FD_FN_CONST static inline int fd_uint_popcnt  ( uint   x ) { return __builtin_popcount (       x ); }
     225 13098953913 : FD_FN_CONST static inline int fd_ulong_popcnt ( ulong  x ) { return __builtin_popcountl(       x ); }
     226             : 
     227             : #if FD_HAS_INT128
     228             : FD_FN_CONST static inline int
     229       49536 : fd_uint128_popcnt( uint128 x ) {
     230       49536 :   return  __builtin_popcountl( (ulong) x ) + __builtin_popcountl( (ulong)(x>>64) );
     231       49536 : }
     232             : #endif
     233             : 
     234             : #include "fd_bits_find_lsb.h" /* Provides find_lsb_w_default too */
     235             : #include "fd_bits_find_msb.h" /* Provides find_msb_w_default too */ /* Note that find_msb==floor( log2( x ) ) for non-zero x */
     236             : 
     237     4203198 : FD_FN_CONST static inline uchar  fd_uchar_bswap ( uchar  x ) { return x; }
     238   255663865 : FD_FN_CONST static inline ushort fd_ushort_bswap( ushort x ) { return __builtin_bswap16( x ); }
     239 12350011300 : FD_FN_CONST static inline uint   fd_uint_bswap  ( uint   x ) { return __builtin_bswap32( x ); }
     240  1468722461 : FD_FN_CONST static inline ulong  fd_ulong_bswap ( ulong  x ) { return __builtin_bswap64( x ); }
     241             : 
     242             : #if FD_HAS_INT128
     243             : FD_FN_CONST static inline uint128
     244         387 : fd_uint128_bswap( uint128 x ) {
     245         387 :   ulong xl = (ulong) x;
     246         387 :   ulong xh = (ulong)(x>>64);
     247         387 :   return (((uint128)fd_ulong_bswap( xl )) << 64) | ((uint128)fd_ulong_bswap( xh ));
     248         387 : }
     249             : #endif
     250             : 
     251             : /* FIXME: consider find_msb based solution (probably not as the combination
     252             :    of FD_FN_CONST and the use of inline asm for find_msb on some targets
     253             :    is probably less than ideal). */
     254             : 
     255             : FD_FN_CONST static inline uchar
     256         111 : fd_uchar_pow2_up( uchar _x ) {
     257         111 :   uint x = (uint)_x;
     258         111 :   x--;
     259         111 :   x |= (x>> 1);
     260         111 :   x |= (x>> 2);
     261         111 :   x |= (x>> 4);
     262         111 :   x++;
     263         111 :   return (uchar)x;
     264         111 : }
     265             : 
     266             : FD_FN_CONST static inline uchar
     267         111 : fd_uchar_pow2_dn( uchar _x ) {
     268         111 :   uint x = (uint)_x;
     269         111 :   x >>= 1;
     270         111 :   x |= (x>> 1);
     271         111 :   x |= (x>> 2);
     272         111 :   x |= (x>> 4);
     273         111 :   x++;
     274         111 :   return (uchar)x;
     275         111 : }
     276             : 
     277             : FD_FN_CONST static inline ushort
     278         411 : fd_ushort_pow2_up( ushort _x ) {
     279         411 :   uint x = (uint)_x;
     280         411 :   x--;
     281         411 :   x |= (x>> 1);
     282         411 :   x |= (x>> 2);
     283         411 :   x |= (x>> 4);
     284         411 :   x |= (x>> 8);
     285         411 :   x++;
     286         411 :   return (ushort)x;
     287         411 : }
     288             : 
     289             : FD_FN_CONST static inline ushort
     290         411 : fd_ushort_pow2_dn( ushort _x ) {
     291         411 :   uint x = (uint)_x;
     292         411 :   x >>= 1;
     293         411 :   x |= (x>> 1);
     294         411 :   x |= (x>> 2);
     295         411 :   x |= (x>> 4);
     296         411 :   x |= (x>> 8);
     297         411 :   x++;
     298         411 :   return (ushort)x;
     299         411 : }
     300             : 
     301             : FD_FN_CONST static inline uint
     302        1587 : fd_uint_pow2_up( uint x ) {
     303        1587 :   x--;
     304        1587 :   x |= (x>> 1);
     305        1587 :   x |= (x>> 2);
     306        1587 :   x |= (x>> 4);
     307        1587 :   x |= (x>> 8);
     308        1587 :   x |= (x>>16);
     309        1587 :   x++;
     310        1587 :   return x;
     311        1587 : }
     312             : 
     313             : FD_FN_CONST static inline uint
     314        1587 : fd_uint_pow2_dn( uint x ) {
     315        1587 :   x >>= 1;
     316        1587 :   x |= (x>> 1);
     317        1587 :   x |= (x>> 2);
     318        1587 :   x |= (x>> 4);
     319        1587 :   x |= (x>> 8);
     320        1587 :   x |= (x>>16);
     321        1587 :   x++;
     322        1587 :   return x;
     323        1587 : }
     324             : 
     325             : FD_FN_CONST static inline ulong
     326    60505830 : fd_ulong_pow2_up( ulong x ) {
     327    60505830 :   x--;
     328    60505830 :   x |= (x>> 1);
     329    60505830 :   x |= (x>> 2);
     330    60505830 :   x |= (x>> 4);
     331    60505830 :   x |= (x>> 8);
     332    60505830 :   x |= (x>>16);
     333    60505830 :   x |= (x>>32);
     334    60505830 :   x++;
     335    60505830 :   return x;
     336    60505830 : }
     337             : 
     338             : FD_FN_CONST static inline ulong
     339        6243 : fd_ulong_pow2_dn( ulong x ) {
     340        6243 :   x >>= 1;
     341        6243 :   x |= (x>> 1);
     342        6243 :   x |= (x>> 2);
     343        6243 :   x |= (x>> 4);
     344        6243 :   x |= (x>> 8);
     345        6243 :   x |= (x>>16);
     346        6243 :   x |= (x>>32);
     347        6243 :   x++;
     348        6243 :   return x;
     349        6243 : }
     350             : 
     351             : #if FD_HAS_INT128
     352             : FD_FN_CONST static inline uint128
     353       24771 : fd_uint128_pow2_up( uint128 x ) {
     354       24771 :   x--;
     355       24771 :   x |= (x>> 1);
     356       24771 :   x |= (x>> 2);
     357       24771 :   x |= (x>> 4);
     358       24771 :   x |= (x>> 8);
     359       24771 :   x |= (x>>16);
     360       24771 :   x |= (x>>32);
     361       24771 :   x |= (x>>64);
     362       24771 :   x++;
     363       24771 :   return x;
     364       24771 : }
     365             : 
     366             : FD_FN_CONST static inline uint128
     367       24771 : fd_uint128_pow2_dn( uint128 x ) {
     368       24771 :   x >>= 1;
     369       24771 :   x |= (x>> 1);
     370       24771 :   x |= (x>> 2);
     371       24771 :   x |= (x>> 4);
     372       24771 :   x |= (x>> 8);
     373       24771 :   x |= (x>>16);
     374       24771 :   x |= (x>>32);
     375       24771 :   x |= (x>>64);
     376       24771 :   x++;
     377       24771 :   return x;
     378       24771 : }
     379             : #endif
     380             : 
     381             : /* Brokenness of indeterminant char sign strikes again ... sigh.  We
     382             :    explicitly provide the unsigned variant of the token to these
     383             :    macros because the uchar token is not related to the schar token
     384             :    by simply prepending u to schar. */
     385             : 
     386             : /* Note: the implementations of abs, right_shift and zz_enc below do not
     387             :    exploit the sign extending right shift behavior specified by the
     388             :    machine model (and thus can be used safely in more general machine
     389             :    models) but are slightly more expensive.
     390             : 
     391             :    FD_FN_CONST static inline UT fd_##T##_abs( T x ) { UT u = (UT)x; UT m = (UT)-(u>>(w-1)); return (UT)((u+m)^m); }
     392             : 
     393             :    FD_FN_CONST static inline T
     394             :    fd_##T##_shift_right( T   x,
     395             :                          int n ) {
     396             :      UT u = (UT)x;
     397             :      UT m = (UT)-(u >> (w-1));
     398             :      return (T)(fd_##UT##_shift_right( u ^ m, n ) ^ m);
     399             :    }
     400             : 
     401             :    FD_FN_CONST static inline UT fd_##T##_zz_enc( T x ) { UT u = (UT)x; return (UT)((-(u>>(w-1))) ^ (u<<1)); }
     402             : */
     403             : 
     404             : #define FD_SRC_UTIL_BITS_FD_BITS_IMPL(T,UT,w)                                                                                    \
     405  1667961756 : FD_FN_CONST static inline T    fd_##T##_if          ( int c, T t, T f ) { return c ? t : f;      /* cmov */ }                    \
     406  5534446710 : /*       */ static inline void fd_##T##_store_if    ( int c, T * p, T v ) { T _[ 1 ]; *( c ? p : _ ) = v; /* cmov */           } \
     407  1704372799 : FD_FN_CONST static inline UT   fd_##T##_abs         ( T x             ) { UT m = (UT)(x >> (w-1)); return (UT)((((UT)x)+m)^m); } \
     408   470596307 : FD_FN_CONST static inline T    fd_##T##_min         ( T x, T y        ) { return (x<=y) ? x : y; /* cmov */ }                    \
     409   286841151 : FD_FN_CONST static inline T    fd_##T##_max         ( T x, T y        ) { return (x>=y) ? x : y; /* cmov */ }                    \
     410   251658240 : FD_FN_CONST static inline T    fd_##T##_shift_left  ( T x, int n      ) { return (T)fd_##UT##_shift_left  ( (UT)x, n ); }        \
     411   251658240 : FD_FN_CONST static inline T    fd_##T##_shift_right ( T x, int n      ) { return (T)(x >> ((n>(w-1)) ? (w-1) : n)); /* cmov */ } \
     412  1096648128 : FD_FN_CONST static inline T    fd_##T##_rotate_left ( T x, int n      ) { return (T)fd_##UT##_rotate_left ( (UT)x, n ); }        \
     413  1096648128 : FD_FN_CONST static inline T    fd_##T##_rotate_right( T x, int n      ) { return (T)fd_##UT##_rotate_right( (UT)x, n ); }        \
     414      196920 : FD_FN_CONST static inline UT   fd_##T##_zz_enc      ( T x             ) { return (UT)(((UT)(x>>(w-1))) ^ (((UT)x)<<1)); }        \
     415      149538 : FD_FN_CONST static inline T    fd_##T##_zz_dec      ( UT x            ) { return (T)((x>>1) ^ (-(x & (UT)1))); }
     416             : 
     417             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(schar, uchar,    8)
     418             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(short, ushort,  16)
     419             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(int,   uint,    32)
     420             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(long,  ulong,   64)
     421             : #if FD_HAS_INT128
     422             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(int128,uint128,128)
     423             : #endif
     424             : 
     425             : #undef FD_SRC_UTIL_BITS_FD_BITS_IMPL
     426             : 
     427             : /* Brokenness of indeterminant char sign strikes again ... sigh.  We
     428             :    can't provide a char_min/char_max between platforms as they don't
     429             :    necessarily produce the same results.  Likewise, we don't provide a
     430             :    fd_char_abs because it will not produce equivalent results between
     431             :    platforms.  But it is useful to have a char_if and char_store_if to
     432             :    help with making branchless string operation implementations. */
     433             : 
     434    53565981 : FD_FN_CONST static inline char fd_char_if( int c, char t, char f ) { return c ? t : f; }
     435             : 
     436    50331648 : static inline void fd_char_store_if( int c, char * p, char v ) { char _[ 1 ]; *( c ? p : _ ) = v; /* cmov */ }
     437             : 
     438             : /* FIXME: ADD HASHING PAIRS FOR UCHAR AND USHORT? */
     439             : 
     440             : /* High quality (full avalanche) high speed integer to integer hashing.
     441             :    fd_uint_hash has the properties that [0,2^32) hashes to a random
     442             :    looking permutation of [0,2^32) and hash(0)==0.  Similarly for
     443             :    fd_ulong_hash.  Based on Google's Murmur3 hash finalizer (public
     444             :    domain).  Not cryptographically secure but passes various strict
     445             :    tests of randomness when used as a PRNG. */
     446             : 
     447             : FD_FN_CONST static inline uint
     448   112493844 : fd_uint_hash( uint x ) {
     449   112493844 :   x ^= x >> 16;
     450   112493844 :   x *= 0x85ebca6bU;
     451   112493844 :   x ^= x >> 13;
     452   112493844 :   x *= 0xc2b2ae35U;
     453   112493844 :   x ^= x >> 16;
     454   112493844 :   return x;
     455   112493844 : }
     456             : 
     457             : FD_FN_CONST static inline ulong
     458 >16309*10^7 : fd_ulong_hash( ulong x ) {
     459 >16309*10^7 :   x ^= x >> 33;
     460 >16309*10^7 :   x *= 0xff51afd7ed558ccdUL;
     461 >16309*10^7 :   x ^= x >> 33;
     462 >16309*10^7 :   x *= 0xc4ceb9fe1a85ec53UL;
     463 >16309*10^7 :   x ^= x >> 33;
     464 >16309*10^7 :   return x;
     465 >16309*10^7 : }
     466             : 
     467             : /* Inverses of the above.  E.g.:
     468             :      fd_uint_hash_inverse( fd_uint_hash( (uint)x ) )==(uint)x
     469             :    and:
     470             :      fd_uint_hash( fd_uint_hash_inverse( (uint)x ) )==(uint)x
     471             :    Similarly for fd_ulong_hash_inverse.  These by themselves are similar
     472             :    quality hashes to the above and having the inverses of the above can
     473             :    be useful.  The fact these have (nearly) identical operations /
     474             :    operation counts concretely demonstrates that none of these are
     475             :    standalone cryptographically secure. */
     476             : 
     477             : FD_FN_CONST static inline uint
     478   103809084 : fd_uint_hash_inverse( uint x ) {
     479   103809084 :   x ^= x >> 16;
     480   103809084 :   x *= 0x7ed1b41dU;
     481   103809084 :   x ^= (x >> 13) ^ (x >> 26);
     482   103809084 :   x *= 0xa5cb9243U;
     483   103809084 :   x ^= x >> 16;
     484   103809084 :   return x;
     485   103809084 : }
     486             : 
     487             : FD_FN_CONST static inline ulong
     488 13089422283 : fd_ulong_hash_inverse( ulong x ) {
     489 13089422283 :   x ^= x >> 33;
     490 13089422283 :   x *= 0x9cb4b2f8129337dbUL;
     491 13089422283 :   x ^= x >> 33;
     492 13089422283 :   x *= 0x4f74430c22a54005UL;
     493 13089422283 :   x ^= x >> 33;
     494 13089422283 :   return x;
     495 13089422283 : }
     496             : 
     497             : /* fd_ulong_base10_dig_cnt returns the number of digits in the base 10
     498             :    representation of x.  FIXME: USE BETTER ALGO. */
     499             : 
     500             : #define FD_SRC_UTIL_BITS_FD_BITS_IMPL(T,M) \
     501             : FD_FN_CONST static inline ulong            \
     502         306 : fd_##T##_base10_dig_cnt( T _x ) {          \
     503         306 :   ulong x      = (ulong)_x;                \
     504         306 :   ulong cnt    = 1UL;                      \
     505         306 :   ulong thresh = 10UL;                     \
     506        1860 :   do {                                     \
     507        1860 :     if( FD_LIKELY( x<thresh ) ) break;     \
     508        1860 :     cnt++;                                 \
     509        1554 :     thresh *= 10UL;                        \
     510        1554 :   } while( FD_LIKELY( cnt<M ) );           \
     511         306 :   return cnt;                              \
     512         306 : }
     513             : 
     514             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uchar,  3UL) /*                  255 ->  3 dig */
     515             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(ushort, 5UL) /*                65535 ->  5 dig */
     516             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uint,  10UL) /*           4294967295 -> 10 dig */
     517             : FD_SRC_UTIL_BITS_FD_BITS_IMPL(ulong, 20UL) /* 18446744073709551615 -> 20 dig */
     518             : 
     519             : #undef FD_SRC_UTIL_BITS_FD_BITS_IMPL
     520             : 
     521             : /* fd_float_if, fd_float_store_if, fd_float_abs are described above.
     522             :    Ideally, the system will implement fd_float_abs by just clearing the
     523             :    sign bit.  fd_float_eq tests to floating point values for whether or
     524             :    not their bit representations are identical.  Useful when IEEE
     525             :    handling of equality with +/-0 or nan are not desired (e.g. can test
     526             :    if nans have different signs or syndromes). */
     527             : 
     528    54490935 : FD_FN_CONST static inline float fd_float_if      ( int c, float   t, float f ) { return c ? t : f; }
     529    50331648 : /*       */ static inline void  fd_float_store_if( int c, float * p, float v ) { float _[ 1 ]; *( c ? p : _ ) = v; }
     530    96095148 : FD_FN_CONST static inline float fd_float_abs     ( float x ) { return __builtin_fabsf( x ); }
     531             : FD_FN_CONST static inline int
     532             : fd_float_eq( float x,
     533   100663296 :              float y ) {
     534   100663296 :   union { float f; uint u; } tx, ty;
     535   100663296 :   tx.f = x;
     536   100663296 :   ty.f = y;
     537   100663296 :   return tx.u==ty.u;
     538   100663296 : }
     539             : 
     540             : /* fd_double_if, fd_double_store_if, fd_double_abs and fd_double_eq are
     541             :    double precision versions of the above. */
     542             : 
     543             : #if FD_HAS_DOUBLE
     544    54491892 : FD_FN_CONST static inline double fd_double_if      ( int c, double   t, double f ) { return c ? t : f; }
     545    50331648 : /*       */ static inline void   fd_double_store_if( int c, double * p, double v ) { double _[ 1 ]; *( c ? p : _ ) = v; }
     546    96161202 : FD_FN_CONST static inline double fd_double_abs     ( double x ) { return __builtin_fabs( x ); }
     547             : FD_FN_CONST static inline int
     548             : fd_double_eq( double x,
     549   100663296 :               double y ) {
     550   100663296 :   union { double f; ulong u; } tx, ty;
     551   100663296 :   tx.f = x;
     552   100663296 :   ty.f = y;
     553   100663296 :   return tx.u==ty.u;
     554   100663296 : }
     555             : #endif
     556             : 
     557             : /* fd_swap swaps the values in a and b.  Assumes a and b have the same
     558             :    plain-old-data type.  Best if a and b are primitive types (e.g.
     559             :    ulong).  This macro is robust (it evaluates a and b the minimal
     560             :    number of times).  Note that compilers are pretty good about identify
     561             :    swap operations and replacing them with more optimal assembly for
     562             :    types and architectures where alternative implementations (like xor
     563             :    tricks) might be faster. */
     564             : 
     565   658063524 : #define fd_swap(a,b) do { __typeof__((a)) _fd_swap_tmp = (a); (a) = (b); (b) = _fd_swap_tmp; } while(0)
     566             : 
     567             : /* fd_swap_if swaps the values in a and b if c is non-zero and leaves
     568             :    them unchanged otherwise.  Assumes a and b have the same
     569             :    plain-old-data type.  Best if a and b are primitive types (e.g.
     570             :    ulong) as the compiler will likely replace the trinaries below with
     571             :    cmovs, making this branchless.  This macro is robust (it evaluates a,
     572             :    b and c the minimal number of times). */
     573             : 
     574   662231538 : #define fd_swap_if(c,a,b) do {                                      \
     575   662231538 :     int             _fd_swap_if_c = (c);                            \
     576   662231538 :     __typeof__((a)) _fd_swap_if_a = (a);                            \
     577   662231538 :     __typeof__((b)) _fd_swap_if_b = (b);                            \
     578   662231538 :     (a) = _fd_swap_if_c ? _fd_swap_if_b : _fd_swap_if_a; /* cmov */ \
     579   662231538 :     (b) = _fd_swap_if_c ? _fd_swap_if_a : _fd_swap_if_b; /* cmov */ \
     580   662231538 :   } while(0)
     581             : 
     582             : /* fd_ptr_if is a generic version of the above for pointers.  This macro
     583             :    is robust.
     584             : 
     585             :    IMPORTANT SAFETY TIP!  The output type is the type of t.  Thus, if
     586             :    the condition is false and t and f have different pointer types
     587             :    (which is inherently gross and we might want to consider outright
     588             :    rejecting in the future via, unfortunately non-standard, compiler
     589             :    builtins), this would implicitly cast the pointer f to the type of t.
     590             :    As such, strict aliasing rules in the language also imply mixed usage
     591             :    cases need to be wrapped in a fd_type_pun.  In short, mixing pointer
     592             :    types between t and f is strongly discouraged. */
     593             : 
     594             : #if FD_HAS_UBSAN
     595             : #define fd_ptr_if(c,t,f) ((__typeof__((t)))( (c) ? (ulong)(t) : (ulong)(f) ))
     596             : #else
     597  6143191470 : #define fd_ptr_if(c,t,f) ((__typeof__((t)))fd_ulong_if( (c), (ulong)(t), (ulong)(f) ))
     598             : #endif
     599             : 
     600             : /* FD_ULONG_{MASK_LSB,MASK_MSB,ALIGN_UP} are the same as
     601             :    fd_ulong_{mask_lsb,mask_msb,align_up} but can be used at compile
     602             :    time.  The tradeoff is n/a must be safe against multiple evaluation
     603             :    at compile time.  x should be ulong compatible and n/a should be int
     604             :    compatible. */
     605             : 
     606    11951862 : #define FD_ULONG_MASK_LSB( n )    ((((ulong)((n)<=63)) << ((n) & 63)) - 1UL)
     607             : #define FD_ULONG_MASK_MSB( n )    (~FD_ULONG_MASK_LSB(64-(n)))
     608     9266082 : #define FD_ULONG_ALIGN_UP( x, a ) (((x)+((a)-1UL)) & (~((a)-1UL)))
     609             : 
     610             : /* Unaligned access annotations.
     611             : 
     612             :    FD_LOAD( T, src ) is equivalent to:
     613             :      return (*(T const *)(src))
     614             :    but src can have arbitrary alignment.
     615             : 
     616             :    FD_STORE( T, dst, val ) is equivalent to:
     617             :      T * ptr = (T *)(dst);
     618             :      *ptr = (val);
     619             :      return ptr
     620             :    but dst can have arbitrary alignment.
     621             : 
     622             :    Note: Ideally, we would infer the type T in FD_LOAD from src (e.g.
     623             :    use typeof(*(src)).  But there are some nasty linguistic and
     624             :    optimizer interactions when src is a constant pointer in a truly
     625             :    generic implementation.  Similarly for FD_STORE.
     626             : 
     627             :    fd_T_load_n( src ) where T is in [uchar,ushort,uint,ulong] loads n
     628             :    bytes into the least significant n bytes of a T, zeros any remaining
     629             :    bytes and returns the result.  fd_T_load_n_fast is the same but
     630             :    assumes it is safe to tail read a couple of bytes past the end of src
     631             :    if such is beneficial for higher performance.
     632             : 
     633             :    Accesses that would normally be atomic (e.g. an aligned access to a
     634             :    primitive type like a ulong) are not guaranteed to be atomic if done
     635             :    through these annotations. */
     636             : 
     637             : #ifndef FD_UNALIGNED_ACCESS_STYLE
     638             : #if FD_HAS_X86
     639             : #define FD_UNALIGNED_ACCESS_STYLE 0  /* 1 is broken ... */
     640             : #else
     641             : #define FD_UNALIGNED_ACCESS_STYLE 0
     642             : #endif
     643             : #endif
     644             : 
     645             : #if FD_UNALIGNED_ACCESS_STYLE==0 /* memcpy elision based */
     646             : 
     647             : /* This implementation does not assume it is safe to access unaligned
     648             :    memory directly (and thus can be used on platforms outside the
     649             :    development environment's machine model) but it does still assume
     650             :    little endian byte ordering.
     651             : 
     652             :    It is based on memcpy and, in principle, the compiler should elide
     653             :    the memcpy and replace this with optimized asm on platforms where
     654             :    this is safe (which is virtually all commercially viable platforms as
     655             :    packet processing deal heavily with unaligned accesses and virtually
     656             :    all platforms are near universally networked and networking needs to
     657             :    do packet processing efficiently).  But this fails often enough in
     658             :    practice that this should not be relied upon, especially if
     659             :    performance is important as performance is glacial when the compiler
     660             :    mucks up.  (fd_memcpy is an especially bad idea here because the
     661             :    risk of the compiler mucking it up is much greater.)
     662             : 
     663             :    It is also more than little bizarre that this is an encouraged
     664             :    practice nowadays.  That is, practically, we are using a low level
     665             :    language (C/C++) that have language constructs (dereference a
     666             :    pointer to a primitive type) that map directly onto low level
     667             :    hardware operations (asm load operation) that are actually supported
     668             :    by the target hardware here (fine if pointer is not aligned to
     669             :    width of the type).
     670             : 
     671             :    But instead of encouraging developers to write short, readable,
     672             :    library-independent code that generates fast and ultra compact asm,
     673             :    they are encouraged to write long, obtuse, library-dependent code
     674             :    that naively would generate slow bloated asm in hopes the compiler
     675             :    will realize can be turned into the simple implementation and turn it
     676             :    back into the developer's original intent and then generate good asm.
     677             :    Hmmm. */
     678             : 
     679             : #define FD_LOAD( T, src ) \
     680  4224508946 :   (__extension__({ T _fd_load_tmp; memcpy( &_fd_load_tmp, (T const *)(src), sizeof(T) ); _fd_load_tmp; }))
     681             : 
     682             : #define FD_STORE( T, dst, val ) \
     683 39099460399 :   (__extension__({ T _fd_store_tmp = (val); (T *)memcpy( (T *)(dst), &_fd_store_tmp, sizeof(T) ); }))
     684             : 
     685        6144 : FD_FN_PURE static inline uchar  fd_uchar_load_1      ( void const * p ) { return         *(uchar const *)p; }
     686             : 
     687        6141 : FD_FN_PURE static inline ushort fd_ushort_load_1     ( void const * p ) { return (ushort)*(uchar const *)p; }
     688        6141 : FD_FN_PURE static inline ushort fd_ushort_load_2     ( void const * p ) { ushort t;       memcpy( &t, p, 2UL ); return        t; }
     689             : 
     690        6135 : FD_FN_PURE static inline uint   fd_uint_load_1       ( void const * p ) { return (uint  )*(uchar const *)p; }
     691        6135 : FD_FN_PURE static inline uint   fd_uint_load_2       ( void const * p ) { ushort t;       memcpy( &t, p, 2UL ); return (uint )t; }
     692       30219 : FD_FN_PURE static inline uint   fd_uint_load_3       ( void const * p ) { uint   t = 0UL; memcpy( &t, p, 3UL ); return (uint )t; }
     693   168420639 : FD_FN_PURE static inline uint   fd_uint_load_4       ( void const * p ) { uint   t;       memcpy( &t, p, 4UL ); return        t; }
     694             : 
     695  2790348858 : FD_FN_PURE static inline ulong  fd_ulong_load_1      ( void const * p ) { return (ulong )*(uchar const *)p; }
     696  1383846795 : FD_FN_PURE static inline ulong  fd_ulong_load_2      ( void const * p ) { ushort t;       memcpy( &t, p, 2UL ); return (ulong)t; }
     697        6123 : FD_FN_PURE static inline ulong  fd_ulong_load_3      ( void const * p ) { uint   t = 0UL; memcpy( &t, p, 3UL ); return (ulong)t; }
     698    66181359 : FD_FN_PURE static inline ulong  fd_ulong_load_4      ( void const * p ) { uint   t;       memcpy( &t, p, 4UL ); return (ulong)t; }
     699        6123 : FD_FN_PURE static inline ulong  fd_ulong_load_5      ( void const * p ) { ulong  t = 0UL; memcpy( &t, p, 5UL ); return        t; }
     700        6141 : FD_FN_PURE static inline ulong  fd_ulong_load_6      ( void const * p ) { ulong  t = 0UL; memcpy( &t, p, 6UL ); return        t; }
     701        6123 : FD_FN_PURE static inline ulong  fd_ulong_load_7      ( void const * p ) { ulong  t = 0UL; memcpy( &t, p, 7UL ); return        t; }
     702  2183546958 : FD_FN_PURE static inline ulong  fd_ulong_load_8      ( void const * p ) { ulong  t;       memcpy( &t, p, 8UL ); return        t; }
     703             : 
     704             : #define                         fd_uchar_load_1_fast                    fd_uchar_load_1
     705             : 
     706             : #define                         fd_ushort_load_1_fast                   fd_ushort_load_1
     707             : #define                         fd_ushort_load_2_fast                   fd_ushort_load_2
     708             : 
     709             : #define                         fd_uint_load_1_fast                     fd_uint_load_1
     710             : #define                         fd_uint_load_2_fast                     fd_uint_load_2
     711        6135 : FD_FN_PURE static inline uint   fd_uint_load_3_fast  ( void const * p ) { uint   t; memcpy( &t, p, 4UL ); return ((uint )t) & 0x00ffffffU;          }
     712   130374690 : #define                         fd_uint_load_4_fast                     fd_uint_load_4
     713             : 
     714             : #define                         fd_ulong_load_1_fast                    fd_ulong_load_1
     715           9 : #define                         fd_ulong_load_2_fast                    fd_ulong_load_2
     716        6132 : FD_FN_PURE static inline ulong  fd_ulong_load_3_fast ( void const * p ) { uint   t; memcpy( &t, p, 4UL ); return ((ulong)t) & 0x0000000000ffffffUL; }
     717           9 : #define                         fd_ulong_load_4_fast                    fd_ulong_load_4
     718        6123 : FD_FN_PURE static inline ulong  fd_ulong_load_5_fast ( void const * p ) { ulong  t; memcpy( &t, p, 8UL ); return         t  & 0x000000ffffffffffUL; }
     719        6123 : FD_FN_PURE static inline ulong  fd_ulong_load_6_fast ( void const * p ) { ulong  t; memcpy( &t, p, 8UL ); return         t  & 0x0000ffffffffffffUL; }
     720        6123 : FD_FN_PURE static inline ulong  fd_ulong_load_7_fast ( void const * p ) { ulong  t; memcpy( &t, p, 8UL ); return         t  & 0x00ffffffffffffffUL; }
     721    91067692 : #define                         fd_ulong_load_8_fast                    fd_ulong_load_8
     722             : 
     723             : #elif FD_UNALIGNED_ACCESS_STYLE==1 /* direct access */
     724             : 
     725             : #define FD_LOAD( T, src ) (__extension__({      \
     726             :     T const * _fd_store_tmp = (T const *)(src); \
     727             :     FD_COMPILER_FORGET( _fd_store_tmp );        \
     728             :     *_fd_store_tmp;                             \
     729             :   }))
     730             : 
     731             : #define FD_STORE( T, dst, val ) (__extension__({           \
     732             :     T * _fd_store_tmp = (T *)fd_type_pun( (void *)(dst) ); \
     733             :     *_fd_store_tmp = (val);                                \
     734             :     FD_COMPILER_MFENCE();                                  \
     735             :     _fd_store_tmp;                                         \
     736             :   }))
     737             : 
     738             : FD_FN_PURE static inline uchar  fd_uchar_load_1      ( void const * p ) { FD_COMPILER_FORGET( p ) ; return (        *(uchar  const *)p); }
     739             : 
     740             : FD_FN_PURE static inline ushort fd_ushort_load_1     ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ushort)*(uchar  const *)p); }
     741             : FD_FN_PURE static inline ushort fd_ushort_load_2     ( void const * p ) { FD_COMPILER_FORGET( p ); return (        *(ushort const *)p); }
     742             : 
     743             : FD_FN_PURE static inline uint   fd_uint_load_1       ( void const * p ) { FD_COMPILER_FORGET( p ); return ((uint  )*(uchar  const *)p); }
     744             : FD_FN_PURE static inline uint   fd_uint_load_2       ( void const * p ) { FD_COMPILER_FORGET( p ); return ((uint  )*(ushort const *)p); }
     745             : FD_FN_PURE static inline uint   fd_uint_load_3       ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_uint_load_2 (p) | (fd_uint_load_1 (((uchar const *)p)+2UL)<<16); }
     746             : FD_FN_PURE static inline uint   fd_uint_load_4       ( void const * p ) { FD_COMPILER_FORGET( p ); return (        *(uint   const *)p); }
     747             : 
     748             : FD_FN_PURE static inline ulong  fd_ulong_load_1      ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ulong )*(uchar  const *)p); }
     749             : FD_FN_PURE static inline ulong  fd_ulong_load_2      ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ulong )*(ushort const *)p); }
     750             : FD_FN_PURE static inline ulong  fd_ulong_load_3      ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_ulong_load_2(p) | (fd_ulong_load_1(((uchar const *)p)+2UL)<<16); }
     751             : FD_FN_PURE static inline ulong  fd_ulong_load_4      ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ulong )*(uint   const *)p); }
     752             : FD_FN_PURE static inline ulong  fd_ulong_load_5      ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_ulong_load_4(p) | (fd_ulong_load_1(((uchar const *)p)+4UL)<<32); }
     753             : FD_FN_PURE static inline ulong  fd_ulong_load_6      ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_ulong_load_4(p) | (fd_ulong_load_2(((uchar const *)p)+4UL)<<32); }
     754             : FD_FN_PURE static inline ulong  fd_ulong_load_7      ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_ulong_load_6(p) | (fd_ulong_load_1(((uchar const *)p)+6UL)<<48); }
     755             : FD_FN_PURE static inline ulong  fd_ulong_load_8      ( void const * p ) { FD_COMPILER_FORGET( p ); return (        *(ulong  const *)p); }
     756             : 
     757             : #define                         fd_uchar_load_1_fast                    fd_uchar_load_1
     758             : 
     759             : #define                         fd_ushort_load_1_fast                   fd_ushort_load_1
     760             : #define                         fd_ushort_load_2_fast                   fd_ushort_load_2
     761             : 
     762             : #define                         fd_uint_load_1_fast                     fd_uint_load_1
     763             : #define                         fd_uint_load_2_fast                     fd_uint_load_2
     764             : FD_FN_PURE static inline uint   fd_uint_load_3_fast  ( void const * p ) { FD_COMPILER_FORGET( p ); return (       *(uint   const *)p) & 0x00ffffffU;          } /* Tail read 1B */
     765             : #define                         fd_uint_load_4_fast                     fd_uint_load_4
     766             : 
     767             : #define                         fd_ulong_load_1_fast                    fd_ulong_load_1
     768             : #define                         fd_ulong_load_2_fast                    fd_ulong_load_2
     769             : FD_FN_PURE static inline ulong  fd_ulong_load_3_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ulong)*(uint   const *)p) & 0x0000000000ffffffUL; } /* Tail read 1B */
     770             : #define                         fd_ulong_load_4_fast                    fd_ulong_load_4
     771             : FD_FN_PURE static inline ulong  fd_ulong_load_5_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return (       *(ulong  const *)p) & 0x000000ffffffffffUL; } /* Tail read 3B */
     772             : FD_FN_PURE static inline ulong  fd_ulong_load_6_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return (       *(ulong  const *)p) & 0x0000ffffffffffffUL; } /* Tail read 2B */
     773             : FD_FN_PURE static inline ulong  fd_ulong_load_7_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return (       *(ulong  const *)p) & 0x00ffffffffffffffUL; } /* Tail read 1B */
     774             : #define                         fd_ulong_load_8_fast                    fd_ulong_load_8
     775             : 
     776             : #else
     777             : #error "Unsupported FD_UNALIGNED_ACCESS_STYLE"
     778             : #endif
     779             : 
     780             : /* fd_ulong_svw_enc_sz returns the number of bytes needed to encode
     781             :    x as a symmetric variable width encoded integer.  This is at most
     782             :    FD_ULONG_SVW_ENC_MAX (9).  Result will be in {1,2,3,4,5,8,9}. */
     783             : 
     784             : #define FD_ULONG_SVW_ENC_MAX (9UL) /* For compile time use */
     785             : 
     786             : FD_FN_UNUSED FD_FN_CONST static ulong /* Work around -Winline */
     787   719287029 : fd_ulong_svw_enc_sz( ulong x ) {
     788             :   /* FIXME: CONSIDER FIND_MSB BASED TABLE LOOKUP? */
     789   719287029 :   if( FD_LIKELY( x<(1UL<< 6) ) ) return 1UL;
     790   288992580 :   if( FD_LIKELY( x<(1UL<<10) ) ) return 2UL;
     791    43497816 :   if( FD_LIKELY( x<(1UL<<18) ) ) return 3UL;
     792    35686536 :   if( FD_LIKELY( x<(1UL<<24) ) ) return 4UL;
     793    30959490 :   if( FD_LIKELY( x<(1UL<<32) ) ) return 5UL;
     794    24571458 :   if( FD_LIKELY( x<(1UL<<56) ) ) return 8UL;
     795     5695983 :   return                                9UL;
     796    24571458 : }
     797             : 
     798             : /* fd_ulong_svw_enc appends x to the byte stream b as a symmetric
     799             :    variable width encoded integer.  b should have room from
     800             :    fd_ulong_svw_env_sz(x) (note that 9 is sufficient for all possible
     801             :    x).  Returns the next location in the byte system. */
     802             : 
     803             : FD_FN_UNUSED static uchar * /* Work around -Winline */
     804             : fd_ulong_svw_enc( uchar * b,
     805    50705259 :                   ulong   x ) {
     806    50705259 :   if(      FD_LIKELY( x<(1UL<< 6) ) ) {                                                                 b[0] = (uchar)          (x<< 1);  b+=1; } /* 0    | x( 6) |    0 */
     807    45124269 :   else if( FD_LIKELY( x<(1UL<<10) ) ) { FD_STORE( ushort, b, (ushort)(            0x8001UL | (x<<3)) );                                   b+=2; } /* 100  | x(10) |  001 */
     808    41982678 :   else if( FD_LIKELY( x<(1UL<<18) ) ) { FD_STORE( ushort, b, (ushort)(               0x5UL | (x<<3)) ); b[2] = (uchar)(0xa0UL | (x>>13)); b+=3; } /* 101  | x(18) |  101 */
     809    35614740 :   else if( FD_LIKELY( x<(1UL<<24) ) ) { FD_STORE( uint,   b, (uint  )(        0xc0000003UL | (x<<4)) );                                   b+=4; } /* 1100 | x(24) | 0011 */
     810    30887781 :   else if( FD_LIKELY( x<(1UL<<32) ) ) { FD_STORE( uint,   b, (uint  )(               0xbUL | (x<<4)) ); b[4] = (uchar)(0xd0UL | (x>>28)); b+=5; } /* 1101 | x(32) | 1011 */
     811    24523464 :   else if( FD_LIKELY( x<(1UL<<56) ) ) { FD_STORE( ulong,  b,          0xe000000000000007UL | (x<<4)  );                                   b+=8; } /* 1110 | x(56) | 0111 */
     812     5648184 :   else                                { FD_STORE( ulong,  b,                         0xfUL | (x<<4)  ); b[8] = (uchar)(0xf0UL | (x>>60)); b+=9; } /* 1111 | x(64) | 1111 */
     813    50705259 :   return b;
     814    50705259 : }
     815             : 
     816             : /* fd_ulong_svw_enc_fixed appends x to the byte stream b as a symmetric
     817             :    csz width encoded integer.  csz is assumed to be in {1,2,3,4,5,8,9}.
     818             :    b should have room from csz bytes and x should be known apriori to be
     819             :    compatible with csz.  Useful for updating in place an existing
     820             :    encoded integer to a value that is <= the current value.  Returns
     821             :    b+csz. */
     822             : 
     823             : FD_FN_UNUSED static uchar * /* Work around -Winline */
     824             : fd_ulong_svw_enc_fixed( uchar * b,
     825             :                         ulong   csz,
     826  1099817607 :                         ulong   x ) {
     827  1099817607 :   if(      FD_LIKELY( csz==1UL ) ) {                                                                 b[0] = (uchar)          (x<< 1);  } /* 0    | x( 6) |    0 */
     828   408071679 :   else if( FD_LIKELY( csz==2UL ) ) { FD_STORE( ushort, b, (ushort)(            0x8001UL | (x<<3)) );                                   } /* 100  | x(10) |  001 */
     829    51102777 :   else if( FD_LIKELY( csz==3UL ) ) { FD_STORE( ushort, b, (ushort)(               0x5UL | (x<<3)) ); b[2] = (uchar)(0xa0UL | (x>>13)); } /* 101  | x(18) |  101 */
     830    35391786 :   else if( FD_LIKELY( csz==4UL ) ) { FD_STORE( uint,   b, (uint  )(        0xc0000003UL | (x<<4)) );                                   } /* 1100 | x(24) | 0011 */
     831    30665178 :   else if( FD_LIKELY( csz==5UL ) ) { FD_STORE( uint,   b, (uint  )(               0xbUL | (x<<4)) ); b[4] = (uchar)(0xd0UL | (x>>28)); } /* 1101 | x(32) | 1011 */
     832    24375081 :   else if( FD_LIKELY( csz==8UL ) ) { FD_STORE( ulong,  b,          0xe000000000000007UL | (x<<4)  );                                   } /* 1110 | x(56) | 0111 */
     833     5500425 :   else             /* csz==9UL */  { FD_STORE( ulong,  b,                         0xfUL | (x<<4)  ); b[8] = (uchar)(0xf0UL | (x>>60)); } /* 1111 | x(64) | 1111 */
     834  1099817607 :   return b+csz;
     835  1099817607 : }
     836             : 
     837             : /* fd_ulong_svw_dec_sz returns the number of bytes representing an svw
     838             :    encoded integer.  b points to the first byte of the encoded integer.
     839             :    Result will be in {1,2,3,4,5,8,9}. */
     840             : 
     841             : FD_FN_PURE static inline ulong
     842  4001636079 : fd_ulong_svw_dec_sz( uchar const * b ) {
     843             : 
     844             :   /* LSB:         Compressed size
     845             :      xxxx|xxx0 -> 1B
     846             :      xxxx|x001 -> 2B
     847             :      xxxx|x101 -> 3B
     848             :      xxxx|0011 -> 4B
     849             :      xxxx|1011 -> 5B
     850             :      xxxx|0111 -> 8B
     851             :      xxxx|1111 -> 9B
     852             : 
     853             :       15   14   13   12   11   10    9    8    7    6    5    4    3    2    1    0
     854             :      1111 1110 1101 1100 1011 1010 1001 1000 0111 0110 0101 0100 0011 0010 0001 0000
     855             :        9    1    3    1    5    1    2    1    8    1    3    1    4    1    2    1 */
     856             : 
     857  4001636079 :   return (0x9131512181314121UL >> ((((ulong)b[0]) & 15UL) << 2)) & 15UL;
     858  4001636079 : }
     859             : 
     860             : /* fd_ulong_svw_dec_tail_sz returns the number of bytes representing an
     861             :    svw encoded integer.  b points to one after the last byte of the
     862             :    encoded integer.  Result will be in {1,2,3,4,5,8,9}. */
     863             : 
     864             : FD_FN_PURE static inline ulong
     865   228152967 : fd_ulong_svw_dec_tail_sz( uchar const * b ) {
     866             : 
     867             :   /* MSB:         Compressed size
     868             :      0xxx|xxxx -> 1B
     869             :      100x|xxxx -> 2B
     870             :      101x|xxxx -> 3B
     871             :      1100|xxxx -> 4B
     872             :      1101|xxxx -> 5B
     873             :      1110|xxxx -> 8B
     874             :      1111|xxxx -> 9B
     875             : 
     876             :       15   14   13   12   11   10    9    8    7    6    5    4    3    2    1    0
     877             :      1111 1110 1101 1100 1011 1010 1001 1000 0111 0110 0101 0100 0011 0010 0001 0000
     878             :        9    8    5    4    3    3    2    2    1    1    1    1    1    1    1    1 */
     879             : 
     880   228152967 :   return (0x9854332211111111UL >> ((((ulong)b[-1]) >> 4) << 2)) & 15UL;
     881   228152967 : }
     882             : 
     883             : /* fd_ulong_svw_dec_fixed decodes a ulong encoded as a symmetric
     884             :    variable width encoded integer whose width is known.  b points to the
     885             :    first byte of the encoded integer and the encoded integer is csz
     886             :    byte.  csz is assumed to be in {1,2,3,4,5,8,9}. */
     887             : 
     888             : FD_FN_UNUSED static ulong /* Work around -Winline */
     889             : fd_ulong_svw_dec_fixed( uchar const * b,
     890  4386757716 :                         ulong         csz ) {
     891  4386757716 :   if( FD_LIKELY( csz==1UL ) ) return (fd_ulong_load_1( b ) >> 1);
     892  1596414981 :   if( FD_LIKELY( csz==2UL ) ) return (fd_ulong_load_2( b ) >> 3) &              1023UL;
     893   303226452 :   if( FD_LIKELY( csz==3UL ) ) return (fd_ulong_load_2( b ) >> 3) | ((((ulong)b[2]) & 0x1fUL) << 13);
     894   212574318 :   if( FD_LIKELY( csz==4UL ) ) return (fd_ulong_load_4( b ) >> 4) &          16777215UL;
     895   184214163 :   if( FD_LIKELY( csz==5UL ) ) return (fd_ulong_load_4( b ) >> 4) | ((((ulong)b[4]) & 0x0fUL) << 28);
     896   146399091 :   if( FD_LIKELY( csz==8UL ) ) return (fd_ulong_load_8( b ) >> 4) & 72057594037927935UL;
     897    33150531 :   return       /*csz==9UL*/          (fd_ulong_load_8( b ) >> 4) | ( ((ulong)b[8])           << 60);
     898   146399091 : }
     899             : 
     900             : /* fd_ulong_svw_dec decodes a ulong encoded as a symmetric variable
     901             :    width encoded integer.  b points to the first byte of the encoded
     902             :    integer.  Returns a pointer to the first byte after the symvarint and
     903             :    *_x will hold the uncompressed value on return.  If the byte stream
     904             :    might be corrupt, it should be safe to read up to 9 bytes starting a
     905             :    b. */
     906             : 
     907             : static inline uchar const *
     908             : fd_ulong_svw_dec( uchar const * b,
     909   101037444 :                   ulong *       _x ) {
     910   101037444 :   ulong csz = fd_ulong_svw_dec_sz( b );
     911   101037444 :   *_x = fd_ulong_svw_dec_fixed( b, csz ); b += csz;
     912   101037444 :   return b;
     913   101037444 : }
     914             : 
     915             : /* fd_ulong_svw_dec_tail decodes a ulong encoded as a symmetric variable
     916             :    width encoded integer.  b points to the first byte after the encoded
     917             :    integer.  Returns a pointer to the first byte of the encoded integer
     918             :    and *_x will have the hold the uncompressed value on return.  If the
     919             :    byte stream might be corrupt, it should be safe to read up to 9 bytes
     920             :    immediately before b. */
     921             : 
     922             : static inline uchar const *
     923             : fd_ulong_svw_dec_tail( uchar const * b,
     924   100663296 :                        ulong *       _x ) {
     925   100663296 :   ulong csz = fd_ulong_svw_dec_tail_sz( b );
     926   100663296 :   b -= csz; *_x = fd_ulong_svw_dec_fixed( b, csz );
     927   100663296 :   return b;
     928   100663296 : }
     929             : 
     930             : /* FD_LAYOUT_{INIT,APPEND,FINI} are useful for compile time
     931             :    determination of the required footprint of shared memory regions with
     932             :    dynamic sizes and complex alignment restrictions.
     933             : 
     934             :    FD_LAYOUT_INIT starts a layout.  Returns a handle to the layout.
     935             : 
     936             :    FD_LAYOUT_APPEND appends a s byte region of alignment a to a layout
     937             :    where l is an in progress layout.
     938             : 
     939             :    FD_LAYOUT_FINI returns the final layout footprint.  a is the
     940             :    alignment to be used for the overall layout.  It should be the
     941             :    alignment of all appends.  The final footprint will be a multiple of
     942             :    a.
     943             : 
     944             :    All arguments should be ulong compatible.  All alignment should be a
     945             :    positive integer power of 2 and safe against multiple evaluation.
     946             : 
     947             :    The caller further promises the layout is not unreasonably large that
     948             :    overflow might be an issue (i.e. will be at most
     949             :    fd_ulong_align_dn(ULONG_MAX,a) where is the a used for FINI in size).
     950             : 
     951             :    Example usage:
     952             : 
     953             :      FD_LAYOUT_FINI( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_INIT,
     954             :        align0, size0 ),
     955             :        align1, size1 ),
     956             :        page_sz )
     957             : 
     958             :    would return the number of pages as a page_sz multiple for a shared
     959             :    memory region that starts with a initial/final region of size0/size1
     960             :    bytes and alignment align0/align1.  Correct operation requires
     961             :    page_sz>=max(align0,align1),  page_sz, align0 and align1 be positive
     962             :    integer powers of 2, page_sz, size0, align0 and align1 should be
     963             :    ulong compatible, page_sz, align0 and align1 be safe against multiple
     964             :    evaluation, and the final size be at most ULONG_MAX-page_sz+1. */
     965             : 
     966       13476 : #define FD_LAYOUT_INIT              (0UL)
     967       43827 : #define FD_LAYOUT_APPEND( l, a, s ) (FD_ULONG_ALIGN_UP( (l), (a) ) + (s))
     968     5598321 : #define FD_LAYOUT_FINI( l, a )      FD_ULONG_ALIGN_UP( (l), (a) )
     969             : 
     970             : /* FD_SCRATCH_ALLOC_{INIT,APPEND,FINI} are utility macros for allocating
     971             :    sub-regions of memory out of one larger region of memory.  These are
     972             :    intentionally parallel to FD_LAYOUT, but for use at runtime, not
     973             :    compile time, and when you want to know the actual addresses, and not
     974             :    just the total footprint.
     975             : 
     976             :    FD_SCRATCH_ALLOC_INIT begins a scratch allocation operation called
     977             :    layout with starting base address of base.
     978             : 
     979             :    FD_SCRATCH_ALLOC_APPEND returns the next address in the allocation
     980             :    operation `layout` with the required alignment and advances the
     981             :    allocation operation by the provided size.
     982             : 
     983             :    FD_SCRATCH_ALLOC_FINI finalizes a scratch allocation operation with
     984             :    the name given by `layout` and returns the next address with the
     985             :    requested alignment.
     986             : 
     987             :    align must be a power of 2, and layout should be an identifier name.
     988             :    The macros are robust otherwise.
     989             : 
     990             :    Example usage:
     991             :       FD_SCRATCH_ALLOC_INIT( foo, scratch_base );
     992             :       int   * arr1 = FD_SCRATCH_ALLOC_APPEND( foo, alignof(int),   100*sizeof(int)   );
     993             :       ulong * arr2 = FD_SCRATCH_ALLOC_APPEND( foo, alignof(ulong),  25*sizeof(ulong) );
     994             :       FD_SCRATCH_ALLOC_FINI( foo, 32UL );
     995             :    */
     996      316293 : #define FD_SCRATCH_ALLOC_INIT(   layout, base )  ulong _##layout = (ulong)(base)
     997     1585398 : #define FD_SCRATCH_ALLOC_APPEND( layout, align, sz ) (__extension__({                               \
     998     1585398 :     ulong _align = (align);                                                                         \
     999     1585398 :     ulong _sz    = (sz);                                                                            \
    1000     1585398 :     ulong _scratch_alloc = fd_ulong_align_up( _##layout, (align) );                                 \
    1001     1585398 :     if( FD_UNLIKELY( _scratch_alloc+_sz<_scratch_alloc ) )                                          \
    1002     1585398 :       FD_LOG_ERR(( "FD_SCRATCH_ALLOC_APPEND( %s, %lu, %lu ) overflowed", #layout, _align, _sz ));   \
    1003     1585398 :     _##layout = _scratch_alloc + _sz;                                                               \
    1004     1585398 :     (void *)_scratch_alloc;                                                                         \
    1005     1585398 :     }))
    1006        2223 : #define FD_SCRATCH_ALLOC_FINI( layout, align ) (_##layout = FD_ULONG_ALIGN_UP( _##layout, (align) ) )
    1007             : 
    1008             : #define FD_SCRATCH_ALLOC_PUBLISH( layout ) (__extension__({            \
    1009             :     void * end = (void *)FD_SCRATCH_ALLOC_FINI( layout, 1UL );         \
    1010             :     int ok = fd_scratch_publish_is_safe( end );                        \
    1011             :     if( ok ) fd_scratch_publish( end );                                \
    1012             :     ok;                                                                \
    1013             :   }))
    1014             : 
    1015             : FD_PROTOTYPES_END
    1016             : 
    1017             : #endif /* HEADER_fd_src_util_bits_fd_bits_h */

Generated by: LCOV version 1.14