Line data Source code
1 : #ifndef HEADER_fd_src_util_bits_fd_bits_h
2 : #define HEADER_fd_src_util_bits_fd_bits_h
3 :
4 : /* Bit manipulation APIs */
5 :
6 : #include "../sanitize/fd_msan.h"
7 :
8 : FD_PROTOTYPES_BEGIN
9 :
10 : /* fd_ulong_is_pow2 ( x ) returns 1 if x is a positive integral power of 2 and 0 otherwise.
11 : fd_ulong_pow2 ( n ) returns 2^n mod 2^64. U.B. if n is negative.
12 :
13 : fd_ulong_mask_bit ( b ) returns the ulong with bit b set and all other bits 0. U.B. if b is not in [0,64).
14 : fd_ulong_clear_bit ( x, b ) returns x with bit b cleared. U.B. if b is not in [0,64).
15 : fd_ulong_set_bit ( x, b ) returns x with bit b set. U.B. if b is not in [0,64).
16 : fd_ulong_extract_bit( x, b ) returns bit b of x as an int in {0,1}. U.B. if b is not in [0,64).
17 : fd_ulong_insert_bit ( x, b, y ) returns x with bit b set to y. U.B. if b is not in [0,64) and/or y is not in {0,1}.
18 :
19 : fd_ulong_mask_lsb ( n ) returns the ulong bits [0,n) set and all other bits 0. U.B. if n is not in [0,64].
20 : fd_ulong_clear_lsb ( x, n ) returns x with bits [0,n) cleared. U.B. if n is not in [0,64].
21 : fd_ulong_set_lsb ( x, n ) returns x with bits [0,n) set. U.B. if n is not in [0,64].
22 : fd_ulong_flip_lsb ( x, n ) returns x with bits [0,n) flipped. U.B. if n is not in [0,64].
23 : fd_ulong_extract_lsb( x, n ) returns bits [0,n) of x. U.B. if b is not in [0,64).
24 : fd_ulong_insert_lsb ( x, n, y ) returns x with bits [0,n) set to y. U.B. if b is not in [0,64) and/or y is not in [0,2^n).
25 :
26 : fd_ulong_mask ( l, h ) returns the ulong bits [l,h] set and all other bits 0. U.B. if not 0<=l<=h<64.
27 : fd_ulong_clear ( x, l, h ) returns x with bits [l,h] cleared. U.B. if not 0<=l<=h<64.
28 : fd_ulong_set ( x, l, h ) returns x with bits [l,h] set. U.B. if not 0<=l<=h<64.
29 : fd_ulong_flip ( x, l, h ) returns x with bits [l,h] flipped. U.B. if not 0<=l<=h<64.
30 : fd_ulong_extract ( x, l, h ) returns bits [l,h] of x. U.B. if not 0<=l<=h<64.
31 : fd_ulong_insert ( x, l, h, y ) returns x with bits [l,h] set to y.
32 : U.B. if not 0<=l<=h<64 and/or y is not in in [0,2^(h-l+1)).
33 :
34 : fd_ulong_pop_lsb ( x ) returns x with the least significant set bit cleared (0 returns 0).
35 :
36 : FIXME: CONSIDER HAVING (A,X) INSTEAD OF (X,A)?
37 : fd_ulong_is_aligned ( x, a ) returns 1 if x is an integral multiple of a and 0 otherwise. U.B. if !fd_ulong_is_pow2( a )
38 : fd_ulong_alignment ( x, a ) returns x mod a. U.B. if !fd_ulong_is_pow2( a )
39 : fd_ulong_align_dn ( x, a ) returns x rounded down to the closest multiple of a <= x. U.B. if !fd_ulong_is_pow2( a )
40 : fd_ulong_align_up ( x, a ) returns x rounded up to the closest multiple of a >= x mod 2^64.
41 : U.B. if !fd_ulong_is_pow2( a )
42 :
43 : fd_ulong_blend ( m, t, f ) returns forms a ulong by selecting bits from t where m is 1 and from f where m is 0.
44 : fd_ulong_if ( c, t, f ) returns t if c is 1 and f if c is 0. U.B. if c is not in {0,1}
45 : fd_ulong_store_if ( c, p, v ) if c is non-zero, stores v to *p. Otherwise does nothing.
46 : fd_ulong_abs ( x ) returns |x| as a ulong
47 : fd_ulong_min ( x, y ) returns min(x,y)
48 : fd_ulong_max ( x, y ) returns max(x,y)
49 :
50 : fd_ulong_shift_left ( x, n ) returns x with its bits shifted left n times (n>63 shifts to zero), U.B. if n<0
51 : fd_ulong_shift_right ( x, n ) returns x with its bits shifted right n times (n>63 shifts to zero), U.B. if n<0
52 : fd_ulong_rotate_left ( x, n ) returns x with its bits rotated left n times (negative values rotate right)
53 : fd_ulong_rotate_right( x, n ) returns x with its bits rotated right n times (negative values rotate left)
54 :
55 : fd_ulong_popcnt ( x ) returns the number of bits set in x, in [0,64].
56 : fd_ulong_find_lsb ( x ) returns the index of the least significant bit set in x, in [0,64). U.B. if x is zero.
57 : fd_ulong_find_lsb_w_default( x, d ) returns the index of the least significant bit set in x, in [0,64). d if x is zero.
58 : fd_ulong_find_msb ( x ) returns the index of the most significant bit set in x, in [0,64). U.B. if x is zero.
59 : fd_ulong_find_msb_w_default( x, d ) returns the index of the most significant bit set in x, in [0,64). d if x is zero.
60 : fd_ulong_bswap ( x ) returns x with its bytes swapped
61 : fd_ulong_pow2_up ( x ) returns y mod 2^64 where y is the smallest integer power of 2 >= x. U.B. if x is zero.
62 : (current implementation returns 0 if x==0).
63 : fd_ulong_pow2_dn ( x ) returns the largest integer power of 2 <= x. U.B. if x is zero.
64 : (current implementation returns 1 if x==0).
65 :
66 : Similarly for uchar,ushort,uint,uint128. Note that the signed
67 : versions of shift_left, rotate_left, rotate_right operate on the bit
68 : pattern of the underlying type directly. Signed shift_right is sign
69 : extending while unsigned shift_right is zero padding (such that if x
70 : is negative/non-negative, a large magnitude shift will shift to
71 : -1/0).
72 :
73 : Support for zig-zag encoding is also provided. E.g.
74 :
75 : fd_long_zz_enc( x ) returns the zig-zag encodes long x and returns it as a ulong.
76 : fd_long_zz_dec( y ) returns the zig-zag decodes ulong y and returns it as a long.
77 :
78 : zig-zag encoding losslessly maps a signed integer to an unsigned
79 : integer such that, if the magnitude of the signed integer was small,
80 : the magnitude of the unsigned integer will be small too.
81 :
82 : Note that, though fd_ulong_if and friends look like a completely
83 : superfluous wrapper for the trinary operator, they have subtly
84 : different linguistic meanings. This seemingly trivial difference can
85 : have profound effects on code generation quality, especially on
86 : modern architectures. For example:
87 :
88 : c ? x[i] : x[j];
89 :
90 : linguistically means, if c is non-zero, load the i-th element of
91 : x. Otherwise, load the j-th element of x. But:
92 :
93 : fd_ulong_if( c, x[i], x[j] )
94 :
95 : means load the i-th element of x _and_ load the j-th element of x
96 : _and_ then select the first value if c is non-zero or the second
97 : value otherwise. Further, it explicitly says that c and the loads
98 : can be evaluated in any order.
99 :
100 : As such, in the trinary case, the compiler will be loathe to do
101 : either load before computing c because the language explicitly says
102 : "don't do that". In the unlikely case it overcomes this barrier,
103 : it then has the difficult job of proving that reordering c with the
104 : loads is safe (e.g. evaluating c might affect the value that would be
105 : loaded if c has side effects). In the unlikely case it overcomes
106 : that barrier, it then has the difficult job of proving it is safe to
107 : do the loads before evaluating c (e.g. c might be protecting against
108 : a load that could seg-fault).
109 :
110 : In the fd_ulong_if case though, the compiler has been explicitly told
111 : up front that the loads are safe and c and the loads can be done in
112 : any order. Now the optimizer finds it a lot easier to optimize
113 : because it isn't accidentally over-constrained and doesn't have to
114 : prove anything. E.g. it can use otherwise unused instruction slots
115 : before the operation to hide load latency while computing c in
116 : parallel via ILP. Further, it can then ideally can use a conditional
117 : move operation to eliminate unnecessary consumption of BTB resources.
118 : And, if c is known at compile time, the compiler can prune
119 : unnecessary code for the unselected option (e.g. the compiler knows
120 : that omitting an unused normal load has no observable effect in the
121 : machine model).
122 :
123 : Faster, more deterministic, less BTB resources consumed and good
124 : compile time behavior. Everything the trinary operator should have
125 : been, rather than the giant pile of linguistic fail that it is.
126 :
127 : Overall, compilers traditionally are much much better at pruning
128 : unneeded operations than speculating execution (especially for code
129 : paths that a language says not to do and doubly so for code paths
130 : that are not obviously safe in general). And most of this is because
131 : languages are not designed correctly to help developers express their
132 : intent and constraints to the compiler.
133 :
134 : This dynamic has had multi-billion dollar commercial impacts though
135 : the cause-and-effect has gone largely unrecognized.
136 :
137 : At one extreme, a major reason why Itanium failed was languages
138 : didn't have the right constructs and machine models for developers to
139 : "do the right thing". Even given such, most devs wouldn't know to
140 : use them because they were erroneously taught to code to a machine
141 : abstraction that hasn't applied to the real world since the early
142 : 1980s. Compilers then were not able to utilize all the speculative
143 : execution / ILP / ... features that were key to performance on that
144 : architecture. The developer community, not being able to see any
145 : benefits (much less large enough benefits to justify short term
146 : switching costs) and not wanting to write tons of hand-tuned
147 : non-portable ASM kernels, shrugged and Itanium withered away.
148 :
149 : At the other extreme, CUDA gave developers a good GPU abstraction and
150 : extended languages and compilers to make it possible for developers
151 : code to that abstraction (e.g. express locality explicitly instead of
152 : the constant lying-by-omission about the importance of locality that
153 : virtually everything else in tech does ... the speed of light isn't
154 : infinite or even all that fast relative to modern CPUs ... stop
155 : pretending that it is). CUDA enabled GPUs have since thrived in
156 : gaming, high performance computing, machine learning, crypto, etc.
157 : Profits had by all (well, by Nvidia at least).
158 :
159 : TL;DR
160 :
161 : * It'd be laughable if it weren't so pathetic that CPU ISAs and
162 : programming languages usually forget to expose one of the most
163 : fundamental and important digital logic circuits to devs ... the
164 : 2:1 mux.
165 :
166 : * Developers will usually do the right thing if languages let them.
167 :
168 : TODO: mask_msb, clear_msb, set_msb, flip_msb, extract_msb,
169 : insert_msb, bitrev, sign, copysign, flipsign, rounding right shift,
170 : ... */
171 :
172 : #define FD_SRC_UTIL_BITS_FD_BITS_IMPL(T,w) \
173 279920388 : FD_FN_CONST static inline int fd_##T##_is_pow2 ( T x ) { return (!!x) & (!(x & (x-(T)1))); } \
174 759 : FD_FN_CONST static inline T fd_##T##_pow2 ( int n ) { return (T)(((T)(n<w))<<(n&(w-1))); } \
175 440047902 : FD_FN_CONST static inline T fd_##T##_mask_bit ( int b ) { return (T)(((T)1)<<b); } \
176 51138 : FD_FN_CONST static inline T fd_##T##_clear_bit ( T x, int b ) { return (T)(x & ~fd_##T##_mask_bit(b)); } \
177 439551183 : FD_FN_CONST static inline T fd_##T##_set_bit ( T x, int b ) { return (T)(x | fd_##T##_mask_bit(b)); } \
178 2976 : FD_FN_CONST static inline T fd_##T##_flip_bit ( T x, int b ) { return (T)(x ^ fd_##T##_mask_bit(b)); } \
179 709103743 : FD_FN_CONST static inline int fd_##T##_extract_bit ( T x, int b ) { return (int)((x>>b) & (T)1); } \
180 5952 : FD_FN_CONST static inline T fd_##T##_insert_bit ( T x, int b, int y ) { return (T)((x & ~fd_##T##_mask_bit(b))|(((T)y)<<b)); } \
181 1284119511 : FD_FN_CONST static inline T fd_##T##_mask_lsb ( int n ) { return (T)((((T)(n<w))<<(n&(w-1)))-((T)1)); } \
182 1281521550 : FD_FN_CONST static inline T fd_##T##_clear_lsb ( T x, int n ) { return (T)(x & ~fd_##T##_mask_lsb(n)); } \
183 2262 : FD_FN_CONST static inline T fd_##T##_set_lsb ( T x, int n ) { return (T)(x | fd_##T##_mask_lsb(n)); } \
184 2262 : FD_FN_CONST static inline T fd_##T##_flip_lsb ( T x, int n ) { return (T)(x ^ fd_##T##_mask_lsb(n)); } \
185 2262 : FD_FN_CONST static inline T fd_##T##_extract_lsb ( T x, int n ) { return (T)(x & fd_##T##_mask_lsb(n)); } \
186 1281519288 : FD_FN_CONST static inline T fd_##T##_insert_lsb ( T x, int n, T y ) { return (T)(fd_##T##_clear_lsb(x,n) | y); } \
187 91704 : FD_FN_CONST static inline T fd_##T##_mask ( int l, int h ) { return (T)( fd_##T##_mask_lsb(h-l+1) << l ); } \
188 124881 : FD_FN_CONST static inline T fd_##T##_clear ( T x, int l, int h ) { return (T)(x & ~fd_##T##_mask(l,h)); } \
189 42480 : FD_FN_CONST static inline T fd_##T##_set ( T x, int l, int h ) { return (T)(x | fd_##T##_mask(l,h)); } \
190 108978 : FD_FN_CONST static inline T fd_##T##_flip ( T x, int l, int h ) { return (T)(x ^ fd_##T##_mask(l,h)); } \
191 82896 : FD_FN_CONST static inline T fd_##T##_extract ( T x, int l, int h ) { return (T)( (x>>l) & fd_##T##_mask_lsb(h-l+1) ); } \
192 264864 : FD_FN_CONST static inline T fd_##T##_insert ( T x, int l, int h, T y ) { return (T)(fd_##T##_clear(x,l,h) | (y<<l)); } \
193 3433658337 : FD_FN_CONST static inline T fd_##T##_pop_lsb ( T x ) { return (T)(x & (x-(T)1)); } \
194 9638420611 : FD_FN_CONST static inline int fd_##T##_is_aligned ( T x, T a ) { a--; return !(x & a); } \
195 66960 : FD_FN_CONST static inline T fd_##T##_alignment ( T x, T a ) { a--; return (T)( x & a); } \
196 210475570 : FD_FN_CONST static inline T fd_##T##_align_dn ( T x, T a ) { a--; return (T)( x & ~a); } \
197 11865002197 : FD_FN_CONST static inline T fd_##T##_align_up ( T x, T a ) { a--; return (T)((x+a) & ~a); } \
198 251658240 : FD_FN_CONST static inline T fd_##T##_blend ( T m, T t, T f ) { return (T)((t & m) | (f & ~m)); } \
199 29449760936 : FD_FN_CONST static inline T fd_##T##_if ( int c, T t, T f ) { return c ? t : f; /* cmov */ } \
200 300787866 : /* */ static inline void fd_##T##_store_if ( int c, T * p, T v ) { T _[ 1 ]; *( c ? p : _ ) = v; /* cmov */ } \
201 489555636 : FD_FN_CONST static inline T fd_##T##_abs ( T x ) { return x; } \
202 20427256701 : FD_FN_CONST static inline T fd_##T##_min ( T x, T y ) { return (x<y) ? x : y; /* cmov */ } \
203 3256547313 : FD_FN_CONST static inline T fd_##T##_max ( T x, T y ) { return (x>y) ? x : y; /* cmov */ } \
204 754974720 : FD_FN_CONST static inline T fd_##T##_shift_left ( T x, int n ) { return (T)(((n>(w-1)) ? ((T)0) : x) << (n&(w-1))); } \
205 503317968 : FD_FN_CONST static inline T fd_##T##_shift_right ( T x, int n ) { return (T)(((n>(w-1)) ? ((T)0) : x) >> (n&(w-1))); } \
206 >31685*10^7 : FD_FN_CONST static inline T fd_##T##_rotate_left ( T x, int n ) { return (T)((x << (n&(w-1))) | (x >> ((-n)&(w-1)))); } \
207 4571123638 : FD_FN_CONST static inline T fd_##T##_rotate_right( T x, int n ) { return (T)((x >> (n&(w-1))) | (x << ((-n)&(w-1)))); }
208 :
209 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uchar, 8)
210 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(ushort,16)
211 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uint, 32)
212 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(ulong, 64)
213 :
214 : #if FD_HAS_INT128 /* FIXME: These probably could benefit from x86 specializations */
215 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uint128,128)
216 : #endif
217 :
218 : #undef FD_SRC_UTIL_BITS_FD_BITS_IMPL
219 :
220 1892748118 : FD_FN_CONST static inline int fd_uchar_popcnt ( uchar x ) { return __builtin_popcount ( (uint)x ); }
221 816 : FD_FN_CONST static inline int fd_ushort_popcnt( ushort x ) { return __builtin_popcount ( (uint)x ); }
222 769734 : FD_FN_CONST static inline int fd_uint_popcnt ( uint x ) { return __builtin_popcount ( x ); }
223 13140883578 : FD_FN_CONST static inline int fd_ulong_popcnt ( ulong x ) { return __builtin_popcountl( x ); }
224 :
225 : #if FD_HAS_INT128
226 : FD_FN_CONST static inline int
227 49536 : fd_uint128_popcnt( uint128 x ) {
228 49536 : return __builtin_popcountl( (ulong) x ) + __builtin_popcountl( (ulong)(x>>64) );
229 49536 : }
230 : #endif
231 :
232 : #include "fd_bits_find_lsb.h" /* Provides find_lsb_w_default too */
233 : #include "fd_bits_find_msb.h" /* Provides find_msb_w_default too */ /* Note that find_msb==floor( log2( x ) ) for non-zero x */
234 :
235 3051666 : FD_FN_CONST static inline uchar fd_uchar_bswap ( uchar x ) { return x; }
236 364816481 : FD_FN_CONST static inline ushort fd_ushort_bswap( ushort x ) { return __builtin_bswap16( x ); }
237 12381054111 : FD_FN_CONST static inline uint fd_uint_bswap ( uint x ) { return __builtin_bswap32( x ); }
238 1388754890 : FD_FN_CONST static inline ulong fd_ulong_bswap ( ulong x ) { return __builtin_bswap64( x ); }
239 :
240 : #if FD_HAS_INT128
241 : FD_FN_CONST static inline uint128
242 387 : fd_uint128_bswap( uint128 x ) {
243 387 : ulong xl = (ulong) x;
244 387 : ulong xh = (ulong)(x>>64);
245 387 : return (((uint128)fd_ulong_bswap( xl )) << 64) | ((uint128)fd_ulong_bswap( xh ));
246 387 : }
247 : #endif
248 :
249 : /* FIXME: consider find_msb based solution (probably not as the combination
250 : of FD_FN_CONST and the use of inline asm for find_msb on some targets
251 : is probably less than ideal). */
252 :
253 : FD_FN_CONST static inline uchar
254 111 : fd_uchar_pow2_up( uchar _x ) {
255 111 : uint x = (uint)_x;
256 111 : x--;
257 111 : x |= (x>> 1);
258 111 : x |= (x>> 2);
259 111 : x |= (x>> 4);
260 111 : x++;
261 111 : return (uchar)x;
262 111 : }
263 :
264 : FD_FN_CONST static inline uchar
265 111 : fd_uchar_pow2_dn( uchar _x ) {
266 111 : uint x = (uint)_x;
267 111 : x >>= 1;
268 111 : x |= (x>> 1);
269 111 : x |= (x>> 2);
270 111 : x |= (x>> 4);
271 111 : x++;
272 111 : return (uchar)x;
273 111 : }
274 :
275 : FD_FN_CONST static inline ushort
276 411 : fd_ushort_pow2_up( ushort _x ) {
277 411 : uint x = (uint)_x;
278 411 : x--;
279 411 : x |= (x>> 1);
280 411 : x |= (x>> 2);
281 411 : x |= (x>> 4);
282 411 : x |= (x>> 8);
283 411 : x++;
284 411 : return (ushort)x;
285 411 : }
286 :
287 : FD_FN_CONST static inline ushort
288 411 : fd_ushort_pow2_dn( ushort _x ) {
289 411 : uint x = (uint)_x;
290 411 : x >>= 1;
291 411 : x |= (x>> 1);
292 411 : x |= (x>> 2);
293 411 : x |= (x>> 4);
294 411 : x |= (x>> 8);
295 411 : x++;
296 411 : return (ushort)x;
297 411 : }
298 :
299 : FD_FN_CONST static inline uint
300 1587 : fd_uint_pow2_up( uint x ) {
301 1587 : x--;
302 1587 : x |= (x>> 1);
303 1587 : x |= (x>> 2);
304 1587 : x |= (x>> 4);
305 1587 : x |= (x>> 8);
306 1587 : x |= (x>>16);
307 1587 : x++;
308 1587 : return x;
309 1587 : }
310 :
311 : FD_FN_CONST static inline uint
312 1587 : fd_uint_pow2_dn( uint x ) {
313 1587 : x >>= 1;
314 1587 : x |= (x>> 1);
315 1587 : x |= (x>> 2);
316 1587 : x |= (x>> 4);
317 1587 : x |= (x>> 8);
318 1587 : x |= (x>>16);
319 1587 : x++;
320 1587 : return x;
321 1587 : }
322 :
323 : FD_FN_CONST static inline ulong
324 107503164 : fd_ulong_pow2_up( ulong x ) {
325 107503164 : x--;
326 107503164 : x |= (x>> 1);
327 107503164 : x |= (x>> 2);
328 107503164 : x |= (x>> 4);
329 107503164 : x |= (x>> 8);
330 107503164 : x |= (x>>16);
331 107503164 : x |= (x>>32);
332 107503164 : x++;
333 107503164 : return x;
334 107503164 : }
335 :
336 : FD_FN_CONST static inline ulong
337 6243 : fd_ulong_pow2_dn( ulong x ) {
338 6243 : x >>= 1;
339 6243 : x |= (x>> 1);
340 6243 : x |= (x>> 2);
341 6243 : x |= (x>> 4);
342 6243 : x |= (x>> 8);
343 6243 : x |= (x>>16);
344 6243 : x |= (x>>32);
345 6243 : x++;
346 6243 : return x;
347 6243 : }
348 :
349 : #if FD_HAS_INT128
350 : FD_FN_CONST static inline uint128
351 24771 : fd_uint128_pow2_up( uint128 x ) {
352 24771 : x--;
353 24771 : x |= (x>> 1);
354 24771 : x |= (x>> 2);
355 24771 : x |= (x>> 4);
356 24771 : x |= (x>> 8);
357 24771 : x |= (x>>16);
358 24771 : x |= (x>>32);
359 24771 : x |= (x>>64);
360 24771 : x++;
361 24771 : return x;
362 24771 : }
363 :
364 : FD_FN_CONST static inline uint128
365 24771 : fd_uint128_pow2_dn( uint128 x ) {
366 24771 : x >>= 1;
367 24771 : x |= (x>> 1);
368 24771 : x |= (x>> 2);
369 24771 : x |= (x>> 4);
370 24771 : x |= (x>> 8);
371 24771 : x |= (x>>16);
372 24771 : x |= (x>>32);
373 24771 : x |= (x>>64);
374 24771 : x++;
375 24771 : return x;
376 24771 : }
377 : #endif
378 :
379 : /* Brokenness of indeterminant char sign strikes again ... sigh. We
380 : explicitly provide the unsigned variant of the token to these
381 : macros because the uchar token is not related to the schar token
382 : by simply prepending u to schar. */
383 :
384 : /* Note: the implementations of abs, right_shift and zz_enc below do not
385 : exploit the sign extending right shift behavior specified by the
386 : machine model (and thus can be used safely in more general machine
387 : models) but are slightly more expensive.
388 :
389 : FD_FN_CONST static inline UT fd_##T##_abs( T x ) { UT u = (UT)x; UT m = (UT)-(u>>(w-1)); return (UT)((u+m)^m); }
390 :
391 : FD_FN_CONST static inline T
392 : fd_##T##_shift_right( T x,
393 : int n ) {
394 : UT u = (UT)x;
395 : UT m = (UT)-(u >> (w-1));
396 : return (T)(fd_##UT##_shift_right( u ^ m, n ) ^ m);
397 : }
398 :
399 : FD_FN_CONST static inline UT fd_##T##_zz_enc( T x ) { UT u = (UT)x; return (UT)((-(u>>(w-1))) ^ (u<<1)); }
400 : */
401 :
402 : #define FD_SRC_UTIL_BITS_FD_BITS_IMPL(T,UT,w) \
403 1039398353 : FD_FN_CONST static inline T fd_##T##_if ( int c, T t, T f ) { return c ? t : f; /* cmov */ } \
404 4023217692 : /* */ static inline void fd_##T##_store_if ( int c, T * p, T v ) { T _[ 1 ]; *( c ? p : _ ) = v; /* cmov */ } \
405 1704524866 : FD_FN_CONST static inline UT fd_##T##_abs ( T x ) { UT m = (UT)(x >> (w-1)); return (UT)((((UT)x)+m)^m); } \
406 473228942 : FD_FN_CONST static inline T fd_##T##_min ( T x, T y ) { return (x<=y) ? x : y; /* cmov */ } \
407 286507096 : FD_FN_CONST static inline T fd_##T##_max ( T x, T y ) { return (x>=y) ? x : y; /* cmov */ } \
408 251658240 : FD_FN_CONST static inline T fd_##T##_shift_left ( T x, int n ) { return (T)fd_##UT##_shift_left ( (UT)x, n ); } \
409 251658240 : FD_FN_CONST static inline T fd_##T##_shift_right ( T x, int n ) { return (T)(x >> ((n>(w-1)) ? (w-1) : n)); /* cmov */ } \
410 1096648128 : FD_FN_CONST static inline T fd_##T##_rotate_left ( T x, int n ) { return (T)fd_##UT##_rotate_left ( (UT)x, n ); } \
411 1096648128 : FD_FN_CONST static inline T fd_##T##_rotate_right( T x, int n ) { return (T)fd_##UT##_rotate_right( (UT)x, n ); } \
412 196620 : FD_FN_CONST static inline UT fd_##T##_zz_enc ( T x ) { return (UT)(((UT)(x>>(w-1))) ^ (((UT)x)<<1)); } \
413 149151 : FD_FN_CONST static inline T fd_##T##_zz_dec ( UT x ) { return (T)((x>>1) ^ (-(x & (UT)1))); }
414 :
415 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(schar, uchar, 8)
416 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(short, ushort, 16)
417 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(int, uint, 32)
418 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(long, ulong, 64)
419 : #if FD_HAS_INT128
420 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(int128,uint128,128)
421 : #endif
422 :
423 : #undef FD_SRC_UTIL_BITS_FD_BITS_IMPL
424 :
425 : /* Brokenness of indeterminant char sign strikes again ... sigh. We
426 : can't provide a char_min/char_max between platforms as they don't
427 : necessarily produce the same results. Likewise, we don't provide a
428 : fd_char_abs because it will not produce equivalent results between
429 : platforms. But it is useful to have a char_if and char_store_if to
430 : help with making branchless string operation implementations. */
431 :
432 53724801 : FD_FN_CONST static inline char fd_char_if( int c, char t, char f ) { return c ? t : f; }
433 :
434 50331648 : static inline void fd_char_store_if( int c, char * p, char v ) { char _[ 1 ]; *( c ? p : _ ) = v; /* cmov */ }
435 :
436 : /* FIXME: ADD HASHING PAIRS FOR UCHAR AND USHORT? */
437 :
438 : /* High quality (full avalanche) high speed integer to integer hashing.
439 : fd_uint_hash has the properties that [0,2^32) hashes to a random
440 : looking permutation of [0,2^32) and hash(0)==0. Similarly for
441 : fd_ulong_hash. Based on Google's Murmur3 hash finalizer (public
442 : domain). Not cryptographically secure but passes various strict
443 : tests of randomness when used as a PRNG. */
444 :
445 : FD_FN_CONST static inline uint
446 119368026 : fd_uint_hash( uint x ) {
447 119368026 : x ^= x >> 16;
448 119368026 : x *= 0x85ebca6bU;
449 119368026 : x ^= x >> 13;
450 119368026 : x *= 0xc2b2ae35U;
451 119368026 : x ^= x >> 16;
452 119368026 : return x;
453 119368026 : }
454 :
455 : FD_FN_CONST static inline ulong
456 >15905*10^7 : fd_ulong_hash( ulong x ) {
457 >15905*10^7 : x ^= x >> 33;
458 >15905*10^7 : x *= 0xff51afd7ed558ccdUL;
459 >15905*10^7 : x ^= x >> 33;
460 >15905*10^7 : x *= 0xc4ceb9fe1a85ec53UL;
461 >15905*10^7 : x ^= x >> 33;
462 >15905*10^7 : return x;
463 >15905*10^7 : }
464 :
465 : /* Inverses of the above. E.g.:
466 : fd_uint_hash_inverse( fd_uint_hash( (uint)x ) )==(uint)x
467 : and:
468 : fd_uint_hash( fd_uint_hash_inverse( (uint)x ) )==(uint)x
469 : Similarly for fd_ulong_hash_inverse. These by themselves are similar
470 : quality hashes to the above and having the inverses of the above can
471 : be useful. The fact these have (nearly) identical operations /
472 : operation counts concretely demonstrates that none of these are
473 : standalone cryptographically secure. */
474 :
475 : FD_FN_CONST static inline uint
476 103809084 : fd_uint_hash_inverse( uint x ) {
477 103809084 : x ^= x >> 16;
478 103809084 : x *= 0x7ed1b41dU;
479 103809084 : x ^= (x >> 13) ^ (x >> 26);
480 103809084 : x *= 0xa5cb9243U;
481 103809084 : x ^= x >> 16;
482 103809084 : return x;
483 103809084 : }
484 :
485 : FD_FN_CONST static inline ulong
486 13089422283 : fd_ulong_hash_inverse( ulong x ) {
487 13089422283 : x ^= x >> 33;
488 13089422283 : x *= 0x9cb4b2f8129337dbUL;
489 13089422283 : x ^= x >> 33;
490 13089422283 : x *= 0x4f74430c22a54005UL;
491 13089422283 : x ^= x >> 33;
492 13089422283 : return x;
493 13089422283 : }
494 :
495 : /* fd_ulong_base10_dig_cnt returns the number of digits in the base 10
496 : representation of x. FIXME: USE BETTER ALGO. */
497 :
498 : #define FD_SRC_UTIL_BITS_FD_BITS_IMPL(T,M) \
499 : FD_FN_CONST static inline ulong \
500 237 : fd_##T##_base10_dig_cnt( T _x ) { \
501 237 : ulong x = (ulong)_x; \
502 237 : ulong cnt = 1UL; \
503 237 : ulong thresh = 10UL; \
504 1521 : do { \
505 1521 : if( FD_LIKELY( x<thresh ) ) break; \
506 1521 : cnt++; \
507 1284 : thresh *= 10UL; \
508 1284 : } while( FD_LIKELY( cnt<M ) ); \
509 237 : return cnt; \
510 237 : }
511 :
512 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uchar, 3UL) /* 255 -> 3 dig */
513 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(ushort, 5UL) /* 65535 -> 5 dig */
514 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uint, 10UL) /* 4294967295 -> 10 dig */
515 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(ulong, 20UL) /* 18446744073709551615 -> 20 dig */
516 :
517 : #undef FD_SRC_UTIL_BITS_FD_BITS_IMPL
518 :
519 : /* fd_float_if, fd_float_store_if, fd_float_abs are described above.
520 : Ideally, the system will implement fd_float_abs by just clearing the
521 : sign bit. fd_float_eq tests to floating point values for whether or
522 : not their bit representations are identical. Useful when IEEE
523 : handling of equality with +/-0 or nan are not desired (e.g. can test
524 : if nans have different signs or syndromes). */
525 :
526 50534199 : FD_FN_CONST static inline float fd_float_if ( int c, float t, float f ) { return c ? t : f; }
527 50331648 : /* */ static inline void fd_float_store_if( int c, float * p, float v ) { float _[ 1 ]; *( c ? p : _ ) = v; }
528 96095148 : FD_FN_CONST static inline float fd_float_abs ( float x ) { return __builtin_fabsf( x ); }
529 : FD_FN_CONST static inline int
530 : fd_float_eq( float x,
531 100663296 : float y ) {
532 100663296 : union { float f; uint u; } tx, ty;
533 100663296 : tx.f = x;
534 100663296 : ty.f = y;
535 100663296 : return tx.u==ty.u;
536 100663296 : }
537 :
538 : /* fd_double_if, fd_double_store_if, fd_double_abs and fd_double_eq are
539 : double precision versions of the above. */
540 :
541 : #if FD_HAS_DOUBLE
542 54497061 : FD_FN_CONST static inline double fd_double_if ( int c, double t, double f ) { return c ? t : f; }
543 50331648 : /* */ static inline void fd_double_store_if( int c, double * p, double v ) { double _[ 1 ]; *( c ? p : _ ) = v; }
544 96161058 : FD_FN_CONST static inline double fd_double_abs ( double x ) { return __builtin_fabs( x ); }
545 : FD_FN_CONST static inline int
546 : fd_double_eq( double x,
547 100663296 : double y ) {
548 100663296 : union { double f; ulong u; } tx, ty;
549 100663296 : tx.f = x;
550 100663296 : ty.f = y;
551 100663296 : return tx.u==ty.u;
552 100663296 : }
553 : #endif
554 :
555 : /* fd_swap swaps the values in a and b. Assumes a and b have the same
556 : plain-old-data type. Best if a and b are primitive types (e.g.
557 : ulong). This macro is robust (it evaluates a and b the minimal
558 : number of times). Note that compilers are pretty good about identify
559 : swap operations and replacing them with more optimal assembly for
560 : types and architectures where alternative implementations (like xor
561 : tricks) might be faster. */
562 :
563 654311424 : #define fd_swap(a,b) do { __typeof__((a)) _fd_swap_tmp = (a); (a) = (b); (b) = _fd_swap_tmp; } while(0)
564 :
565 : /* fd_swap_if swaps the values in a and b if c is non-zero and leaves
566 : them unchanged otherwise. Assumes a and b have the same
567 : plain-old-data type. Best if a and b are primitive types (e.g.
568 : ulong) as the compiler will likely replace the trinaries below with
569 : cmovs, making this branchless. This macro is robust (it evaluates a,
570 : b and c the minimal number of times). */
571 :
572 661873917 : #define fd_swap_if(c,a,b) do { \
573 661873917 : int _fd_swap_if_c = (c); \
574 661873917 : __typeof__((a)) _fd_swap_if_a = (a); \
575 661873917 : __typeof__((b)) _fd_swap_if_b = (b); \
576 661873917 : (a) = _fd_swap_if_c ? _fd_swap_if_b : _fd_swap_if_a; /* cmov */ \
577 661873917 : (b) = _fd_swap_if_c ? _fd_swap_if_a : _fd_swap_if_b; /* cmov */ \
578 661873917 : } while(0)
579 :
580 : /* fd_ptr_if is a generic version of the above for pointers. This macro
581 : is robust.
582 :
583 : IMPORTANT SAFETY TIP! The output type is the type of t. Thus, if
584 : the condition is false and t and f have different pointer types
585 : (which is inherently gross and we might want to consider outright
586 : rejecting in the future via, unfortunately non-standard, compiler
587 : builtins), this would implicitly cast the pointer f to the type of t.
588 : As such, strict aliasing rules in the language also imply mixed usage
589 : cases need to be wrapped in a fd_type_pun. In short, mixing pointer
590 : types between t and f is strongly discouraged. */
591 :
592 : #if FD_HAS_UBSAN
593 : #define fd_ptr_if(c,t,f) ((__typeof__((t)))( (c) ? (ulong)(t) : (ulong)(f) ))
594 : #else
595 6096428981 : #define fd_ptr_if(c,t,f) ((__typeof__((t)))fd_ulong_if( (c), (ulong)(t), (ulong)(f) ))
596 : #endif
597 :
598 : /* FD_ULONG_{MASK_LSB,MASK_MSB,ALIGN_UP} are the same as
599 : fd_ulong_{mask_lsb,mask_msb,align_up} but can be used at compile
600 : time. The tradeoff is n/a must be safe against multiple evaluation
601 : at compile time. x should be ulong compatible and n/a should be int
602 : compatible. */
603 :
604 12116310 : #define FD_ULONG_MASK_LSB( n ) ((((ulong)((n)<=63)) << ((n) & 63)) - 1UL)
605 : #define FD_ULONG_MASK_MSB( n ) (~FD_ULONG_MASK_LSB(64-(n)))
606 19654206 : #define FD_ULONG_ALIGN_UP( x, a ) (((x)+((a)-1UL)) & (~((a)-1UL)))
607 :
608 : /* Unaligned access annotations.
609 :
610 : FD_LOAD( T, src ) is equivalent to:
611 : return (*(T const *)(src))
612 : but src can have arbitrary alignment.
613 :
614 : FD_STORE( T, dst, val ) is equivalent to:
615 : T * ptr = (T *)(dst);
616 : *ptr = (val);
617 : return ptr
618 : but dst can have arbitrary alignment.
619 :
620 : Note: Ideally, we would infer the type T in FD_LOAD from src (e.g.
621 : use typeof(*(src)). But there are some nasty linguistic and
622 : optimizer interactions when src is a constant pointer in a truly
623 : generic implementation. Similarly for FD_STORE.
624 :
625 : fd_T_load_n( src ) where T is in [uchar,ushort,uint,ulong] loads n
626 : bytes into the least significant n bytes of a T, zeros any remaining
627 : bytes and returns the result. fd_T_load_n_fast is the same but
628 : assumes it is safe to tail read a couple of bytes past the end of src
629 : if such is beneficial for higher performance.
630 :
631 : Accesses that would normally be atomic (e.g. an aligned access to a
632 : primitive type like a ulong) are not guaranteed to be atomic if done
633 : through these annotations. */
634 :
635 : #ifndef FD_UNALIGNED_ACCESS_STYLE
636 : #if FD_HAS_X86
637 : #define FD_UNALIGNED_ACCESS_STYLE 0 /* 1 is broken ... */
638 : #else
639 : #define FD_UNALIGNED_ACCESS_STYLE 0
640 : #endif
641 : #endif
642 :
643 : #if FD_UNALIGNED_ACCESS_STYLE==0 /* memcpy elision based */
644 :
645 : /* This implementation does not assume it is safe to access unaligned
646 : memory directly (and thus can be used on platforms outside the
647 : development environment's machine model) but it does still assume
648 : little endian byte ordering.
649 :
650 : It is based on memcpy and, in principle, the compiler should elide
651 : the memcpy and replace this with optimized asm on platforms where
652 : this is safe (which is virtually all commercially viable platforms as
653 : packet processing deal heavily with unaligned accesses and virtually
654 : all platforms are near universally networked and networking needs to
655 : do packet processing efficiently). But this fails often enough in
656 : practice that this should not be relied upon, especially if
657 : performance is important as performance is glacial when the compiler
658 : mucks up. (fd_memcpy is an especially bad idea here because the
659 : risk of the compiler mucking it up is much greater.)
660 :
661 : It is also more than little bizarre that this is an encouraged
662 : practice nowadays. That is, practically, we are using a low level
663 : language (C/C++) that have language constructs (dereference a
664 : pointer to a primitive type) that map directly onto low level
665 : hardware operations (asm load operation) that are actually supported
666 : by the target hardware here (fine if pointer is not aligned to
667 : width of the type).
668 :
669 : But instead of encouraging developers to write short, readable,
670 : library-independent code that generates fast and ultra compact asm,
671 : they are encouraged to write long, obtuse, library-dependent code
672 : that naively would generate slow bloated asm in hopes the compiler
673 : will realize can be turned into the simple implementation and turn it
674 : back into the developer's original intent and then generate good asm.
675 : Hmmm. */
676 :
677 : #define FD_LOAD( T, src ) \
678 4236438490 : (__extension__({ T _fd_load_tmp; memcpy( &_fd_load_tmp, (T const *)(src), sizeof(T) ); _fd_load_tmp; }))
679 :
680 : #define FD_STORE( T, dst, val ) \
681 39016526304 : (__extension__({ T _fd_store_tmp = (val); (T *)memcpy( (T *)(dst), &_fd_store_tmp, sizeof(T) ); }))
682 :
683 6144 : FD_FN_PURE static inline uchar fd_uchar_load_1 ( void const * p ) { return *(uchar const *)p; }
684 :
685 6141 : FD_FN_PURE static inline ushort fd_ushort_load_1 ( void const * p ) { return (ushort)*(uchar const *)p; }
686 6141 : FD_FN_PURE static inline ushort fd_ushort_load_2 ( void const * p ) { ushort t; memcpy( &t, p, 2UL ); return t; }
687 :
688 6135 : FD_FN_PURE static inline uint fd_uint_load_1 ( void const * p ) { return (uint )*(uchar const *)p; }
689 6135 : FD_FN_PURE static inline uint fd_uint_load_2 ( void const * p ) { ushort t; memcpy( &t, p, 2UL ); return (uint )t; }
690 30207 : FD_FN_PURE static inline uint fd_uint_load_3 ( void const * p ) { uint t = 0UL; memcpy( &t, p, 3UL ); return (uint )t; }
691 236817204 : FD_FN_PURE static inline uint fd_uint_load_4 ( void const * p ) { uint t; memcpy( &t, p, 4UL ); return t; }
692 :
693 2790120837 : FD_FN_PURE static inline ulong fd_ulong_load_1 ( void const * p ) { return (ulong )*(uchar const *)p; }
694 1383809949 : FD_FN_PURE static inline ulong fd_ulong_load_2 ( void const * p ) { ushort t; memcpy( &t, p, 2UL ); return (ulong)t; }
695 6123 : FD_FN_PURE static inline ulong fd_ulong_load_3 ( void const * p ) { uint t = 0UL; memcpy( &t, p, 3UL ); return (ulong)t; }
696 66180738 : FD_FN_PURE static inline ulong fd_ulong_load_4 ( void const * p ) { uint t; memcpy( &t, p, 4UL ); return (ulong)t; }
697 6123 : FD_FN_PURE static inline ulong fd_ulong_load_5 ( void const * p ) { ulong t = 0UL; memcpy( &t, p, 5UL ); return t; }
698 6141 : FD_FN_PURE static inline ulong fd_ulong_load_6 ( void const * p ) { ulong t = 0UL; memcpy( &t, p, 6UL ); return t; }
699 6123 : FD_FN_PURE static inline ulong fd_ulong_load_7 ( void const * p ) { ulong t = 0UL; memcpy( &t, p, 7UL ); return t; }
700 2182432337 : FD_FN_PURE static inline ulong fd_ulong_load_8 ( void const * p ) { ulong t; memcpy( &t, p, 8UL ); return t; }
701 :
702 : #define fd_uchar_load_1_fast fd_uchar_load_1
703 :
704 : #define fd_ushort_load_1_fast fd_ushort_load_1
705 : #define fd_ushort_load_2_fast fd_ushort_load_2
706 :
707 : #define fd_uint_load_1_fast fd_uint_load_1
708 : #define fd_uint_load_2_fast fd_uint_load_2
709 6135 : FD_FN_PURE static inline uint fd_uint_load_3_fast ( void const * p ) { uint t; memcpy( &t, p, 4UL ); return ((uint )t) & 0x00ffffffU; }
710 159144165 : #define fd_uint_load_4_fast fd_uint_load_4
711 :
712 : #define fd_ulong_load_1_fast fd_ulong_load_1
713 9 : #define fd_ulong_load_2_fast fd_ulong_load_2
714 6132 : FD_FN_PURE static inline ulong fd_ulong_load_3_fast ( void const * p ) { uint t; memcpy( &t, p, 4UL ); return ((ulong)t) & 0x0000000000ffffffUL; }
715 9 : #define fd_ulong_load_4_fast fd_ulong_load_4
716 6123 : FD_FN_PURE static inline ulong fd_ulong_load_5_fast ( void const * p ) { ulong t; memcpy( &t, p, 8UL ); return t & 0x000000ffffffffffUL; }
717 6123 : FD_FN_PURE static inline ulong fd_ulong_load_6_fast ( void const * p ) { ulong t; memcpy( &t, p, 8UL ); return t & 0x0000ffffffffffffUL; }
718 6123 : FD_FN_PURE static inline ulong fd_ulong_load_7_fast ( void const * p ) { ulong t; memcpy( &t, p, 8UL ); return t & 0x00ffffffffffffffUL; }
719 91140904 : #define fd_ulong_load_8_fast fd_ulong_load_8
720 :
721 : #elif FD_UNALIGNED_ACCESS_STYLE==1 /* direct access */
722 :
723 : #define FD_LOAD( T, src ) (__extension__({ \
724 : T const * _fd_store_tmp = (T const *)(src); \
725 : FD_COMPILER_FORGET( _fd_store_tmp ); \
726 : *_fd_store_tmp; \
727 : }))
728 :
729 : #define FD_STORE( T, dst, val ) (__extension__({ \
730 : T * _fd_store_tmp = (T *)fd_type_pun( (void *)(dst) ); \
731 : *_fd_store_tmp = (val); \
732 : FD_COMPILER_MFENCE(); \
733 : _fd_store_tmp; \
734 : }))
735 :
736 : FD_FN_PURE static inline uchar fd_uchar_load_1 ( void const * p ) { FD_COMPILER_FORGET( p ) ; return ( *(uchar const *)p); }
737 :
738 : FD_FN_PURE static inline ushort fd_ushort_load_1 ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ushort)*(uchar const *)p); }
739 : FD_FN_PURE static inline ushort fd_ushort_load_2 ( void const * p ) { FD_COMPILER_FORGET( p ); return ( *(ushort const *)p); }
740 :
741 : FD_FN_PURE static inline uint fd_uint_load_1 ( void const * p ) { FD_COMPILER_FORGET( p ); return ((uint )*(uchar const *)p); }
742 : FD_FN_PURE static inline uint fd_uint_load_2 ( void const * p ) { FD_COMPILER_FORGET( p ); return ((uint )*(ushort const *)p); }
743 : FD_FN_PURE static inline uint fd_uint_load_3 ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_uint_load_2 (p) | (fd_uint_load_1 (((uchar const *)p)+2UL)<<16); }
744 : FD_FN_PURE static inline uint fd_uint_load_4 ( void const * p ) { FD_COMPILER_FORGET( p ); return ( *(uint const *)p); }
745 :
746 : FD_FN_PURE static inline ulong fd_ulong_load_1 ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ulong )*(uchar const *)p); }
747 : FD_FN_PURE static inline ulong fd_ulong_load_2 ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ulong )*(ushort const *)p); }
748 : FD_FN_PURE static inline ulong fd_ulong_load_3 ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_ulong_load_2(p) | (fd_ulong_load_1(((uchar const *)p)+2UL)<<16); }
749 : FD_FN_PURE static inline ulong fd_ulong_load_4 ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ulong )*(uint const *)p); }
750 : FD_FN_PURE static inline ulong fd_ulong_load_5 ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_ulong_load_4(p) | (fd_ulong_load_1(((uchar const *)p)+4UL)<<32); }
751 : FD_FN_PURE static inline ulong fd_ulong_load_6 ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_ulong_load_4(p) | (fd_ulong_load_2(((uchar const *)p)+4UL)<<32); }
752 : FD_FN_PURE static inline ulong fd_ulong_load_7 ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_ulong_load_6(p) | (fd_ulong_load_1(((uchar const *)p)+6UL)<<48); }
753 : FD_FN_PURE static inline ulong fd_ulong_load_8 ( void const * p ) { FD_COMPILER_FORGET( p ); return ( *(ulong const *)p); }
754 :
755 : #define fd_uchar_load_1_fast fd_uchar_load_1
756 :
757 : #define fd_ushort_load_1_fast fd_ushort_load_1
758 : #define fd_ushort_load_2_fast fd_ushort_load_2
759 :
760 : #define fd_uint_load_1_fast fd_uint_load_1
761 : #define fd_uint_load_2_fast fd_uint_load_2
762 : FD_FN_PURE static inline uint fd_uint_load_3_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return ( *(uint const *)p) & 0x00ffffffU; } /* Tail read 1B */
763 : #define fd_uint_load_4_fast fd_uint_load_4
764 :
765 : #define fd_ulong_load_1_fast fd_ulong_load_1
766 : #define fd_ulong_load_2_fast fd_ulong_load_2
767 : FD_FN_PURE static inline ulong fd_ulong_load_3_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ulong)*(uint const *)p) & 0x0000000000ffffffUL; } /* Tail read 1B */
768 : #define fd_ulong_load_4_fast fd_ulong_load_4
769 : FD_FN_PURE static inline ulong fd_ulong_load_5_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return ( *(ulong const *)p) & 0x000000ffffffffffUL; } /* Tail read 3B */
770 : FD_FN_PURE static inline ulong fd_ulong_load_6_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return ( *(ulong const *)p) & 0x0000ffffffffffffUL; } /* Tail read 2B */
771 : FD_FN_PURE static inline ulong fd_ulong_load_7_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return ( *(ulong const *)p) & 0x00ffffffffffffffUL; } /* Tail read 1B */
772 : #define fd_ulong_load_8_fast fd_ulong_load_8
773 :
774 : #else
775 : #error "Unsupported FD_UNALIGNED_ACCESS_STYLE"
776 : #endif
777 :
778 : /* fd_ulong_svw_enc_sz returns the number of bytes needed to encode
779 : x as a symmetric variable width encoded integer. This is at most
780 : FD_ULONG_SVW_ENC_MAX (9). Result will be in {1,2,3,4,5,8,9}. */
781 :
782 : #define FD_ULONG_SVW_ENC_MAX (9UL) /* For compile time use */
783 :
784 : FD_FN_UNUSED FD_FN_CONST static ulong /* Work around -Winline */
785 719282325 : fd_ulong_svw_enc_sz( ulong x ) {
786 : /* FIXME: CONSIDER FIND_MSB BASED TABLE LOOKUP? */
787 719282325 : if( FD_LIKELY( x<(1UL<< 6) ) ) return 1UL;
788 288991281 : if( FD_LIKELY( x<(1UL<<10) ) ) return 2UL;
789 43497180 : if( FD_LIKELY( x<(1UL<<18) ) ) return 3UL;
790 35686491 : if( FD_LIKELY( x<(1UL<<24) ) ) return 4UL;
791 30959463 : if( FD_LIKELY( x<(1UL<<32) ) ) return 5UL;
792 24571452 : if( FD_LIKELY( x<(1UL<<56) ) ) return 8UL;
793 5695980 : return 9UL;
794 24571452 : }
795 :
796 : /* fd_ulong_svw_enc appends x to the byte stream b as a symmetric
797 : variable width encoded integer. b should have room from
798 : fd_ulong_svw_env_sz(x) (note that 9 is sufficient for all possible
799 : x). Returns the next location in the byte system. */
800 :
801 : FD_FN_UNUSED static uchar * /* Work around -Winline */
802 : fd_ulong_svw_enc( uchar * b,
803 50704740 : ulong x ) {
804 50704740 : if( FD_LIKELY( x<(1UL<< 6) ) ) { b[0] = (uchar) (x<< 1); b+=1; } /* 0 | x( 6) | 0 */
805 45124017 : else if( FD_LIKELY( x<(1UL<<10) ) ) { FD_STORE( ushort, b, (ushort)( 0x8001UL | (x<<3)) ); b+=2; } /* 100 | x(10) | 001 */
806 41982501 : else if( FD_LIKELY( x<(1UL<<18) ) ) { FD_STORE( ushort, b, (ushort)( 0x5UL | (x<<3)) ); b[2] = (uchar)(0xa0UL | (x>>13)); b+=3; } /* 101 | x(18) | 101 */
807 35614710 : else if( FD_LIKELY( x<(1UL<<24) ) ) { FD_STORE( uint, b, (uint )( 0xc0000003UL | (x<<4)) ); b+=4; } /* 1100 | x(24) | 0011 */
808 30887754 : else if( FD_LIKELY( x<(1UL<<32) ) ) { FD_STORE( uint, b, (uint )( 0xbUL | (x<<4)) ); b[4] = (uchar)(0xd0UL | (x>>28)); b+=5; } /* 1101 | x(32) | 1011 */
809 24523458 : else if( FD_LIKELY( x<(1UL<<56) ) ) { FD_STORE( ulong, b, 0xe000000000000007UL | (x<<4) ); b+=8; } /* 1110 | x(56) | 0111 */
810 5648181 : else { FD_STORE( ulong, b, 0xfUL | (x<<4) ); b[8] = (uchar)(0xf0UL | (x>>60)); b+=9; } /* 1111 | x(64) | 1111 */
811 50704740 : return b;
812 50704740 : }
813 :
814 : /* fd_ulong_svw_enc_fixed appends x to the byte stream b as a symmetric
815 : csz width encoded integer. csz is assumed to be in {1,2,3,4,5,8,9}.
816 : b should have room from csz bytes and x should be known apriori to be
817 : compatible with csz. Useful for updating in place an existing
818 : encoded integer to a value that is <= the current value. Returns
819 : b+csz. */
820 :
821 : FD_FN_UNUSED static uchar * /* Work around -Winline */
822 : fd_ulong_svw_enc_fixed( uchar * b,
823 : ulong csz,
824 1099804029 : ulong x ) {
825 1099804029 : if( FD_LIKELY( csz==1UL ) ) { b[0] = (uchar) (x<< 1); } /* 0 | x( 6) | 0 */
826 408064524 : else if( FD_LIKELY( csz==2UL ) ) { FD_STORE( ushort, b, (ushort)( 0x8001UL | (x<<3)) ); } /* 100 | x(10) | 001 */
827 51099444 : else if( FD_LIKELY( csz==3UL ) ) { FD_STORE( ushort, b, (ushort)( 0x5UL | (x<<3)) ); b[2] = (uchar)(0xa0UL | (x>>13)); } /* 101 | x(18) | 101 */
828 35391699 : else if( FD_LIKELY( csz==4UL ) ) { FD_STORE( uint, b, (uint )( 0xc0000003UL | (x<<4)) ); } /* 1100 | x(24) | 0011 */
829 30665091 : else if( FD_LIKELY( csz==5UL ) ) { FD_STORE( uint, b, (uint )( 0xbUL | (x<<4)) ); b[4] = (uchar)(0xd0UL | (x>>28)); } /* 1101 | x(32) | 1011 */
830 24375081 : else if( FD_LIKELY( csz==8UL ) ) { FD_STORE( ulong, b, 0xe000000000000007UL | (x<<4) ); } /* 1110 | x(56) | 0111 */
831 5500425 : else /* csz==9UL */ { FD_STORE( ulong, b, 0xfUL | (x<<4) ); b[8] = (uchar)(0xf0UL | (x>>60)); } /* 1111 | x(64) | 1111 */
832 1099804029 : return b+csz;
833 1099804029 : }
834 :
835 : /* fd_ulong_svw_dec_sz returns the number of bytes representing an svw
836 : encoded integer. b points to the first byte of the encoded integer.
837 : Result will be in {1,2,3,4,5,8,9}. */
838 :
839 : FD_FN_PURE static inline ulong
840 4001378844 : fd_ulong_svw_dec_sz( uchar const * b ) {
841 :
842 : /* LSB: Compressed size
843 : xxxx|xxx0 -> 1B
844 : xxxx|x001 -> 2B
845 : xxxx|x101 -> 3B
846 : xxxx|0011 -> 4B
847 : xxxx|1011 -> 5B
848 : xxxx|0111 -> 8B
849 : xxxx|1111 -> 9B
850 :
851 : 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
852 : 1111 1110 1101 1100 1011 1010 1001 1000 0111 0110 0101 0100 0011 0010 0001 0000
853 : 9 1 3 1 5 1 2 1 8 1 3 1 4 1 2 1 */
854 :
855 4001378844 : return (0x9131512181314121UL >> ((((ulong)b[0]) & 15UL) << 2)) & 15UL;
856 4001378844 : }
857 :
858 : /* fd_ulong_svw_dec_tail_sz returns the number of bytes representing an
859 : svw encoded integer. b points to one after the last byte of the
860 : encoded integer. Result will be in {1,2,3,4,5,8,9}. */
861 :
862 : FD_FN_PURE static inline ulong
863 228151521 : fd_ulong_svw_dec_tail_sz( uchar const * b ) {
864 :
865 : /* MSB: Compressed size
866 : 0xxx|xxxx -> 1B
867 : 100x|xxxx -> 2B
868 : 101x|xxxx -> 3B
869 : 1100|xxxx -> 4B
870 : 1101|xxxx -> 5B
871 : 1110|xxxx -> 8B
872 : 1111|xxxx -> 9B
873 :
874 : 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
875 : 1111 1110 1101 1100 1011 1010 1001 1000 0111 0110 0101 0100 0011 0010 0001 0000
876 : 9 8 5 4 3 3 2 2 1 1 1 1 1 1 1 1 */
877 :
878 228151521 : return (0x9854332211111111UL >> ((((ulong)b[-1]) >> 4) << 2)) & 15UL;
879 228151521 : }
880 :
881 : /* fd_ulong_svw_dec_fixed decodes a ulong encoded as a symmetric
882 : variable width encoded integer whose width is known. b points to the
883 : first byte of the encoded integer and the encoded integer is csz
884 : byte. csz is assumed to be in {1,2,3,4,5,8,9}. */
885 :
886 : FD_FN_UNUSED static ulong /* Work around -Winline */
887 : fd_ulong_svw_dec_fixed( uchar const * b,
888 4386492123 : ulong csz ) {
889 4386492123 : if( FD_LIKELY( csz==1UL ) ) return (fd_ulong_load_1( b ) >> 1);
890 1596377409 : if( FD_LIKELY( csz==2UL ) ) return (fd_ulong_load_2( b ) >> 3) & 1023UL;
891 303212289 : if( FD_LIKELY( csz==3UL ) ) return (fd_ulong_load_2( b ) >> 3) | ((((ulong)b[2]) & 0x1fUL) << 13);
892 212573592 : if( FD_LIKELY( csz==4UL ) ) return (fd_ulong_load_4( b ) >> 4) & 16777215UL;
893 184213458 : if( FD_LIKELY( csz==5UL ) ) return (fd_ulong_load_4( b ) >> 4) | ((((ulong)b[4]) & 0x0fUL) << 28);
894 146398986 : if( FD_LIKELY( csz==8UL ) ) return (fd_ulong_load_8( b ) >> 4) & 72057594037927935UL;
895 33150429 : return /*csz==9UL*/ (fd_ulong_load_8( b ) >> 4) | ( ((ulong)b[8]) << 60);
896 146398986 : }
897 :
898 : /* fd_ulong_svw_dec decodes a ulong encoded as a symmetric variable
899 : width encoded integer. b points to the first byte of the encoded
900 : integer. Returns a pointer to the first byte after the symvarint and
901 : *_x will hold the uncompressed value on return. If the byte stream
902 : might be corrupt, it should be safe to read up to 9 bytes starting a
903 : b. */
904 :
905 : static inline uchar const *
906 : fd_ulong_svw_dec( uchar const * b,
907 101036253 : ulong * _x ) {
908 101036253 : ulong csz = fd_ulong_svw_dec_sz( b );
909 101036253 : *_x = fd_ulong_svw_dec_fixed( b, csz ); b += csz;
910 101036253 : return b;
911 101036253 : }
912 :
913 : /* fd_ulong_svw_dec_tail decodes a ulong encoded as a symmetric variable
914 : width encoded integer. b points to the first byte after the encoded
915 : integer. Returns a pointer to the first byte of the encoded integer
916 : and *_x will have the hold the uncompressed value on return. If the
917 : byte stream might be corrupt, it should be safe to read up to 9 bytes
918 : immediately before b. */
919 :
920 : static inline uchar const *
921 : fd_ulong_svw_dec_tail( uchar const * b,
922 100663296 : ulong * _x ) {
923 100663296 : ulong csz = fd_ulong_svw_dec_tail_sz( b );
924 100663296 : b -= csz; *_x = fd_ulong_svw_dec_fixed( b, csz );
925 100663296 : return b;
926 100663296 : }
927 :
928 : /* FD_LAYOUT_{INIT,APPEND,FINI} are useful for compile time
929 : determination of the required footprint of shared memory regions with
930 : dynamic sizes and complex alignment restrictions.
931 :
932 : FD_LAYOUT_INIT starts a layout. Returns a handle to the layout.
933 :
934 : FD_LAYOUT_APPEND appends a s byte region of alignment a to a layout
935 : where l is an in progress layout.
936 :
937 : FD_LAYOUT_FINI returns the final layout footprint. a is the
938 : alignment to be used for the overall layout. It should be the
939 : alignment of all appends. The final footprint will be a multiple of
940 : a.
941 :
942 : All arguments should be ulong compatible. All alignment should be a
943 : positive integer power of 2 and safe against multiple evaluation.
944 :
945 : The caller further promises the layout is not unreasonably large that
946 : overflow might be an issue (i.e. will be at most
947 : fd_ulong_align_dn(ULONG_MAX,a) where is the a used for FINI in size).
948 :
949 : Example usage:
950 :
951 : FD_LAYOUT_FINI( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_INIT,
952 : align0, size0 ),
953 : align1, size1 ),
954 : page_sz )
955 :
956 : would return the number of pages as a page_sz multiple for a shared
957 : memory region that starts with a initial/final region of size0/size1
958 : bytes and alignment align0/align1. Correct operation requires
959 : page_sz>=max(align0,align1), page_sz, align0 and align1 be positive
960 : integer powers of 2, page_sz, size0, align0 and align1 should be
961 : ulong compatible, page_sz, align0 and align1 be safe against multiple
962 : evaluation, and the final size be at most ULONG_MAX-page_sz+1. */
963 :
964 115293 : #define FD_LAYOUT_INIT (0UL)
965 235377 : #define FD_LAYOUT_APPEND( l, a, s ) (FD_ULONG_ALIGN_UP( (l), (a) ) + (s))
966 6029268 : #define FD_LAYOUT_FINI( l, a ) FD_ULONG_ALIGN_UP( (l), (a) )
967 :
968 : /* FD_SCRATCH_ALLOC_{INIT,APPEND,FINI} are utility macros for allocating
969 : sub-regions of memory out of one larger region of memory. These are
970 : intentionally parallel to FD_LAYOUT, but for use at runtime, not
971 : compile time, and when you want to know the actual addresses, and not
972 : just the total footprint.
973 :
974 : FD_SCRATCH_ALLOC_INIT begins a scratch allocation operation called
975 : layout with starting base address of base.
976 :
977 : FD_SCRATCH_ALLOC_APPEND returns the next address in the allocation
978 : operation `layout` with the required alignment and advances the
979 : allocation operation by the provided size.
980 :
981 : FD_SCRATCH_ALLOC_FINI finalizes a scratch allocation operation with
982 : the name given by `layout` and returns the next address with the
983 : requested alignment.
984 :
985 : align must be a power of 2, and layout should be an identifier name.
986 : The macros are robust otherwise.
987 :
988 : Example usage:
989 : FD_SCRATCH_ALLOC_INIT( foo, scratch_base );
990 : int * arr1 = FD_SCRATCH_ALLOC_APPEND( foo, alignof(int), 100*sizeof(int) );
991 : ulong * arr2 = FD_SCRATCH_ALLOC_APPEND( foo, alignof(ulong), 25*sizeof(ulong) );
992 : FD_SCRATCH_ALLOC_FINI( foo, 32UL );
993 : */
994 670299 : #define FD_SCRATCH_ALLOC_INIT( layout, base ) ulong _##layout = (ulong)(base)
995 3388566 : #define FD_SCRATCH_ALLOC_APPEND( layout, align, sz ) (__extension__({ \
996 3388566 : ulong _align = (align); \
997 3388566 : ulong _sz = (sz); \
998 3388566 : ulong _scratch_alloc = fd_ulong_align_up( _##layout, (align) ); \
999 3388566 : if( FD_UNLIKELY( _scratch_alloc+_sz<_scratch_alloc ) ) \
1000 3388566 : FD_LOG_ERR(( "FD_SCRATCH_ALLOC_APPEND( %s, %lu, %lu ) overflowed", #layout, _align, _sz )); \
1001 3388566 : _##layout = _scratch_alloc + _sz; \
1002 3388566 : (void *)_scratch_alloc; \
1003 3388566 : }))
1004 352644 : #define FD_SCRATCH_ALLOC_FINI( layout, align ) (_##layout = FD_ULONG_ALIGN_UP( _##layout, (align) ) )
1005 :
1006 : #define FD_SCRATCH_ALLOC_PUBLISH( layout ) (__extension__({ \
1007 : void * end = (void *)FD_SCRATCH_ALLOC_FINI( layout, 1UL ); \
1008 : int ok = fd_scratch_publish_is_safe( end ); \
1009 : if( ok ) fd_scratch_publish( end ); \
1010 : ok; \
1011 : }))
1012 :
1013 : FD_PROTOTYPES_END
1014 :
1015 : #endif /* HEADER_fd_src_util_bits_fd_bits_h */
1016 :
|