Line data Source code
1 : #ifndef HEADER_fd_src_util_bits_fd_bits_h
2 : #define HEADER_fd_src_util_bits_fd_bits_h
3 :
4 : /* Bit manipulation APIs */
5 :
6 : #include "../sanitize/fd_msan.h"
7 :
8 : FD_PROTOTYPES_BEGIN
9 :
10 : /* fd_ulong_is_pow2 ( x ) returns 1 if x is a positive integral power of 2 and 0 otherwise.
11 : fd_ulong_pow2 ( n ) returns 2^n mod 2^64. U.B. if n is negative.
12 :
13 : fd_ulong_mask_bit ( b ) returns the ulong with bit b set and all other bits 0. U.B. if b is not in [0,64).
14 : fd_ulong_clear_bit ( x, b ) returns x with bit b cleared. U.B. if b is not in [0,64).
15 : fd_ulong_set_bit ( x, b ) returns x with bit b set. U.B. if b is not in [0,64).
16 : fd_ulong_extract_bit( x, b ) returns bit b of x as an int in {0,1}. U.B. if b is not in [0,64).
17 : fd_ulong_insert_bit ( x, b, y ) returns x with bit b set to y. U.B. if b is not in [0,64) and/or y is not in {0,1}.
18 :
19 : fd_ulong_mask_lsb ( n ) returns the ulong bits [0,n) set and all other bits 0. U.B. if n is not in [0,64].
20 : fd_ulong_clear_lsb ( x, n ) returns x with bits [0,n) cleared. U.B. if n is not in [0,64].
21 : fd_ulong_set_lsb ( x, n ) returns x with bits [0,n) set. U.B. if n is not in [0,64].
22 : fd_ulong_flip_lsb ( x, n ) returns x with bits [0,n) flipped. U.B. if n is not in [0,64].
23 : fd_ulong_extract_lsb( x, n ) returns bits [0,n) of x. U.B. if n is not in [0,64].
24 : fd_ulong_insert_lsb ( x, n, y ) returns x with bits [0,n) set to y. U.B. if n is not in [0,64] and/or y is not in [0,2^n).
25 :
26 : fd_ulong_mask ( l, h ) returns the ulong bits [l,h] set and all other bits 0. U.B. if not 0<=l<=h<64.
27 : fd_ulong_clear ( x, l, h ) returns x with bits [l,h] cleared. U.B. if not 0<=l<=h<64.
28 : fd_ulong_set ( x, l, h ) returns x with bits [l,h] set. U.B. if not 0<=l<=h<64.
29 : fd_ulong_flip ( x, l, h ) returns x with bits [l,h] flipped. U.B. if not 0<=l<=h<64.
30 : fd_ulong_extract ( x, l, h ) returns bits [l,h] of x. U.B. if not 0<=l<=h<64.
31 : fd_ulong_insert ( x, l, h, y ) returns x with bits [l,h] set to y.
32 : U.B. if not 0<=l<=h<64 and/or y is not in in [0,2^(h-l+1)).
33 :
34 : fd_ulong_lsb ( x ) returns 2^i where i is the index of x's least significant set bit (x = 0 returns 0)
35 : fd_ulong_pop_lsb ( x ) returns x with the least significant set bit cleared (0 returns 0).
36 :
37 : FIXME: CONSIDER HAVING (A,X) INSTEAD OF (X,A)?
38 : fd_ulong_is_aligned ( x, a ) returns 1 if x is an integral multiple of a and 0 otherwise. U.B. if !fd_ulong_is_pow2( a )
39 : fd_ulong_alignment ( x, a ) returns x mod a. U.B. if !fd_ulong_is_pow2( a )
40 : fd_ulong_align_dn ( x, a ) returns x rounded down to the closest multiple of a <= x. U.B. if !fd_ulong_is_pow2( a )
41 : fd_ulong_align_up ( x, a ) returns x rounded up to the closest multiple of a >= x mod 2^64.
42 : U.B. if !fd_ulong_is_pow2( a )
43 :
44 : fd_ulong_blend ( m, t, f ) returns forms a ulong by selecting bits from t where m is 1 and from f where m is 0.
45 : fd_ulong_if ( c, t, f ) returns t if c is 1 and f if c is 0. U.B. if c is not in {0,1}
46 : fd_ulong_store_if ( c, p, v ) if c is non-zero, stores v to *p. Otherwise does nothing.
47 : fd_ulong_abs ( x ) returns |x| as a ulong
48 : fd_ulong_min ( x, y ) returns min(x,y)
49 : fd_ulong_max ( x, y ) returns max(x,y)
50 :
51 : fd_ulong_shift_left ( x, n ) returns x with its bits shifted left n times (n>63 shifts to zero), U.B. if n<0
52 : fd_ulong_shift_right ( x, n ) returns x with its bits shifted right n times (n>63 shifts to zero), U.B. if n<0
53 : fd_ulong_rotate_left ( x, n ) returns x with its bits rotated left n times (negative values rotate right)
54 : fd_ulong_rotate_right( x, n ) returns x with its bits rotated right n times (negative values rotate left)
55 :
56 : fd_ulong_popcnt ( x ) returns the number of bits set in x, in [0,64].
57 : fd_ulong_find_lsb ( x ) returns the index of the least significant bit set in x, in [0,64). U.B. if x is zero.
58 : fd_ulong_find_lsb_w_default( x, d ) returns the index of the least significant bit set in x, in [0,64). d if x is zero.
59 : fd_ulong_find_msb ( x ) returns the index of the most significant bit set in x, in [0,64). U.B. if x is zero.
60 : fd_ulong_find_msb_w_default( x, d ) returns the index of the most significant bit set in x, in [0,64). d if x is zero.
61 : fd_ulong_bswap ( x ) returns x with its bytes swapped
62 : fd_ulong_pow2_up ( x ) returns y mod 2^64 where y is the smallest integer power of 2 >= x. U.B. if x is zero.
63 : (current implementation returns 0 if x==0).
64 : fd_ulong_pow2_dn ( x ) returns the largest integer power of 2 <= x. U.B. if x is zero.
65 : (current implementation returns 1 if x==0).
66 :
67 : Similarly for uchar,ushort,uint,uint128. Note that the signed
68 : versions of shift_left, rotate_left, rotate_right operate on the bit
69 : pattern of the underlying type directly. Signed shift_right is sign
70 : extending while unsigned shift_right is zero padding (such that if x
71 : is negative/non-negative, a large magnitude shift will shift to
72 : -1/0).
73 :
74 : Support for zig-zag encoding is also provided. E.g.
75 :
76 : fd_long_zz_enc( x ) returns the zig-zag encodes long x and returns it as a ulong.
77 : fd_long_zz_dec( y ) returns the zig-zag decodes ulong y and returns it as a long.
78 :
79 : zig-zag encoding losslessly maps a signed integer to an unsigned
80 : integer such that, if the magnitude of the signed integer was small,
81 : the magnitude of the unsigned integer will be small too.
82 :
83 : Note that, though fd_ulong_if and friends look like a completely
84 : superfluous wrapper for the trinary operator, they have subtly
85 : different linguistic meanings. This seemingly trivial difference can
86 : have profound effects on code generation quality, especially on
87 : modern architectures. For example:
88 :
89 : c ? x[i] : x[j];
90 :
91 : linguistically means, if c is non-zero, load the i-th element of
92 : x. Otherwise, load the j-th element of x. But:
93 :
94 : fd_ulong_if( c, x[i], x[j] )
95 :
96 : means load the i-th element of x _and_ load the j-th element of x
97 : _and_ then select the first value if c is non-zero or the second
98 : value otherwise. Further, it explicitly says that c and the loads
99 : can be evaluated in any order.
100 :
101 : As such, in the trinary case, the compiler will be loathe to do
102 : either load before computing c because the language explicitly says
103 : "don't do that". In the unlikely case it overcomes this barrier,
104 : it then has the difficult job of proving that reordering c with the
105 : loads is safe (e.g. evaluating c might affect the value that would be
106 : loaded if c has side effects). In the unlikely case it overcomes
107 : that barrier, it then has the difficult job of proving it is safe to
108 : do the loads before evaluating c (e.g. c might be protecting against
109 : a load that could seg-fault).
110 :
111 : In the fd_ulong_if case though, the compiler has been explicitly told
112 : up front that the loads are safe and c and the loads can be done in
113 : any order. Now the optimizer finds it a lot easier to optimize
114 : because it isn't accidentally over-constrained and doesn't have to
115 : prove anything. E.g. it can use otherwise unused instruction slots
116 : before the operation to hide load latency while computing c in
117 : parallel via ILP. Further, it can then ideally can use a conditional
118 : move operation to eliminate unnecessary consumption of BTB resources.
119 : And, if c is known at compile time, the compiler can prune
120 : unnecessary code for the unselected option (e.g. the compiler knows
121 : that omitting an unused normal load has no observable effect in the
122 : machine model).
123 :
124 : Faster, more deterministic, less BTB resources consumed and good
125 : compile time behavior. Everything the trinary operator should have
126 : been, rather than the giant pile of linguistic fail that it is.
127 :
128 : Overall, compilers traditionally are much much better at pruning
129 : unneeded operations than speculating execution (especially for code
130 : paths that a language says not to do and doubly so for code paths
131 : that are not obviously safe in general). And most of this is because
132 : languages are not designed correctly to help developers express their
133 : intent and constraints to the compiler.
134 :
135 : This dynamic has had multi-billion dollar commercial impacts though
136 : the cause-and-effect has gone largely unrecognized.
137 :
138 : At one extreme, a major reason why Itanium failed was languages
139 : didn't have the right constructs and machine models for developers to
140 : "do the right thing". Even given such, most devs wouldn't know to
141 : use them because they were erroneously taught to code to a machine
142 : abstraction that hasn't applied to the real world since the early
143 : 1980s. Compilers then were not able to utilize all the speculative
144 : execution / ILP / ... features that were key to performance on that
145 : architecture. The developer community, not being able to see any
146 : benefits (much less large enough benefits to justify short term
147 : switching costs) and not wanting to write tons of hand-tuned
148 : non-portable ASM kernels, shrugged and Itanium withered away.
149 :
150 : At the other extreme, CUDA gave developers a good GPU abstraction and
151 : extended languages and compilers to make it possible for developers
152 : code to that abstraction (e.g. express locality explicitly instead of
153 : the constant lying-by-omission about the importance of locality that
154 : virtually everything else in tech does ... the speed of light isn't
155 : infinite or even all that fast relative to modern CPUs ... stop
156 : pretending that it is). CUDA enabled GPUs have since thrived in
157 : gaming, high performance computing, machine learning, crypto, etc.
158 : Profits had by all (well, by Nvidia at least).
159 :
160 : TL;DR
161 :
162 : * It'd be laughable if it weren't so pathetic that CPU ISAs and
163 : programming languages usually forget to expose one of the most
164 : fundamental and important digital logic circuits to devs ... the
165 : 2:1 mux.
166 :
167 : * Developers will usually do the right thing if languages let them.
168 :
169 : TODO: mask_msb, clear_msb, set_msb, flip_msb, extract_msb,
170 : insert_msb, bitrev, sign, copysign, flipsign, rounding right shift,
171 : ... */
172 :
173 : #define FD_SRC_UTIL_BITS_FD_BITS_IMPL(T,w) \
174 479513394 : FD_FN_CONST static inline int fd_##T##_is_pow2 ( T x ) { return (!!x) & (!(x & (x-(T)1))); } \
175 201327351 : FD_FN_CONST static inline T fd_##T##_pow2 ( int n ) { return (T)(((T)(n<w))<<(n&(w-1))); } \
176 568023111 : FD_FN_CONST static inline T fd_##T##_mask_bit ( int b ) { return (T)(((T)1)<<b); } \
177 51096 : FD_FN_CONST static inline T fd_##T##_clear_bit ( T x, int b ) { return (T)(x & ~fd_##T##_mask_bit(b)); } \
178 567962343 : FD_FN_CONST static inline T fd_##T##_set_bit ( T x, int b ) { return (T)(x | fd_##T##_mask_bit(b)); } \
179 2976 : FD_FN_CONST static inline T fd_##T##_flip_bit ( T x, int b ) { return (T)(x ^ fd_##T##_mask_bit(b)); } \
180 833152510 : FD_FN_CONST static inline int fd_##T##_extract_bit ( T x, int b ) { return (int)((x>>b) & (T)1); } \
181 5952 : FD_FN_CONST static inline T fd_##T##_insert_bit ( T x, int b, int y ) { return (T)((x & ~fd_##T##_mask_bit(b))|(((T)y)<<b)); } \
182 1281997011 : FD_FN_CONST static inline T fd_##T##_mask_lsb ( int n ) { return (T)((((T)(n<w))<<(n&(w-1)))-((T)1)); } \
183 1281523638 : FD_FN_CONST static inline T fd_##T##_clear_lsb ( T x, int n ) { return (T)(x & ~fd_##T##_mask_lsb(n)); } \
184 2262 : FD_FN_CONST static inline T fd_##T##_set_lsb ( T x, int n ) { return (T)(x | fd_##T##_mask_lsb(n)); } \
185 2262 : FD_FN_CONST static inline T fd_##T##_flip_lsb ( T x, int n ) { return (T)(x ^ fd_##T##_mask_lsb(n)); } \
186 2262 : FD_FN_CONST static inline T fd_##T##_extract_lsb ( T x, int n ) { return (T)(x & fd_##T##_mask_lsb(n)); } \
187 1281521376 : FD_FN_CONST static inline T fd_##T##_insert_lsb ( T x, int n, T y ) { return (T)(fd_##T##_clear_lsb(x,n) | y); } \
188 91764 : FD_FN_CONST static inline T fd_##T##_mask ( int l, int h ) { return (T)( fd_##T##_mask_lsb(h-l+1) << l ); } \
189 124881 : FD_FN_CONST static inline T fd_##T##_clear ( T x, int l, int h ) { return (T)(x & ~fd_##T##_mask(l,h)); } \
190 42480 : FD_FN_CONST static inline T fd_##T##_set ( T x, int l, int h ) { return (T)(x | fd_##T##_mask(l,h)); } \
191 108978 : FD_FN_CONST static inline T fd_##T##_flip ( T x, int l, int h ) { return (T)(x ^ fd_##T##_mask(l,h)); } \
192 82896 : FD_FN_CONST static inline T fd_##T##_extract ( T x, int l, int h ) { return (T)( (x>>l) & fd_##T##_mask_lsb(h-l+1) ); } \
193 264864 : FD_FN_CONST static inline T fd_##T##_insert ( T x, int l, int h, T y ) { return (T)(fd_##T##_clear(x,l,h) | (y<<l)); } \
194 2154732 : FD_FN_CONST static inline T fd_##T##_lsb ( T x ) { return (T)(x ^ (x & (x-(T)1))); } \
195 3433702758 : FD_FN_CONST static inline T fd_##T##_pop_lsb ( T x ) { return (T)(x & (x-(T)1)); } \
196 9672237236 : FD_FN_CONST static inline int fd_##T##_is_aligned ( T x, T a ) { a--; return !(x & a); } \
197 66960 : FD_FN_CONST static inline T fd_##T##_alignment ( T x, T a ) { a--; return (T)( x & a); } \
198 366151876 : FD_FN_CONST static inline T fd_##T##_align_dn ( T x, T a ) { a--; return (T)( x & ~a); } \
199 12876453174 : FD_FN_CONST static inline T fd_##T##_align_up ( T x, T a ) { a--; return (T)((x+a) & ~a); } \
200 251658240 : FD_FN_CONST static inline T fd_##T##_blend ( T m, T t, T f ) { return (T)((t & m) | (f & ~m)); } \
201 29132391582 : FD_FN_CONST static inline T fd_##T##_if ( int c, T t, T f ) { return c ? t : f; /* cmov */ } \
202 314339793 : /* */ static inline void fd_##T##_store_if ( int c, T * p, T v ) { T _[ 1 ]; *( c ? p : _ ) = v; /* cmov */ } \
203 489555636 : FD_FN_CONST static inline T fd_##T##_abs ( T x ) { return x; } \
204 20272963034 : FD_FN_CONST static inline T fd_##T##_min ( T x, T y ) { return (x<y) ? x : y; /* cmov */ } \
205 2980760088 : FD_FN_CONST static inline T fd_##T##_max ( T x, T y ) { return (x>y) ? x : y; /* cmov */ } \
206 754974720 : FD_FN_CONST static inline T fd_##T##_shift_left ( T x, int n ) { return (T)(((n>(w-1)) ? ((T)0) : x) << (n&(w-1))); } \
207 511844859 : FD_FN_CONST static inline T fd_##T##_shift_right ( T x, int n ) { return (T)(((n>(w-1)) ? ((T)0) : x) >> (n&(w-1))); } \
208 >31703*10^7 : FD_FN_CONST static inline T fd_##T##_rotate_left ( T x, int n ) { return (T)((x << (n&(w-1))) | (x >> ((-n)&(w-1)))); } \
209 4570736620 : FD_FN_CONST static inline T fd_##T##_rotate_right( T x, int n ) { return (T)((x >> (n&(w-1))) | (x << ((-n)&(w-1)))); }
210 :
211 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uchar, 8)
212 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(ushort,16)
213 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uint, 32)
214 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(ulong, 64)
215 :
216 : #if FD_HAS_INT128 /* FIXME: These probably could benefit from x86 specializations */
217 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uint128,128)
218 : #endif
219 :
220 : #undef FD_SRC_UTIL_BITS_FD_BITS_IMPL
221 :
222 1892748618 : FD_FN_CONST static inline int fd_uchar_popcnt ( uchar x ) { return __builtin_popcount ( (uint)x ); }
223 816 : FD_FN_CONST static inline int fd_ushort_popcnt( ushort x ) { return __builtin_popcount ( (uint)x ); }
224 781986 : FD_FN_CONST static inline int fd_uint_popcnt ( uint x ) { return __builtin_popcount ( x ); }
225 13098953913 : FD_FN_CONST static inline int fd_ulong_popcnt ( ulong x ) { return __builtin_popcountl( x ); }
226 :
227 : #if FD_HAS_INT128
228 : FD_FN_CONST static inline int
229 49536 : fd_uint128_popcnt( uint128 x ) {
230 49536 : return __builtin_popcountl( (ulong) x ) + __builtin_popcountl( (ulong)(x>>64) );
231 49536 : }
232 : #endif
233 :
234 : #include "fd_bits_find_lsb.h" /* Provides find_lsb_w_default too */
235 : #include "fd_bits_find_msb.h" /* Provides find_msb_w_default too */ /* Note that find_msb==floor( log2( x ) ) for non-zero x */
236 :
237 4203198 : FD_FN_CONST static inline uchar fd_uchar_bswap ( uchar x ) { return x; }
238 255663865 : FD_FN_CONST static inline ushort fd_ushort_bswap( ushort x ) { return __builtin_bswap16( x ); }
239 12350011300 : FD_FN_CONST static inline uint fd_uint_bswap ( uint x ) { return __builtin_bswap32( x ); }
240 1468722461 : FD_FN_CONST static inline ulong fd_ulong_bswap ( ulong x ) { return __builtin_bswap64( x ); }
241 :
242 : #if FD_HAS_INT128
243 : FD_FN_CONST static inline uint128
244 387 : fd_uint128_bswap( uint128 x ) {
245 387 : ulong xl = (ulong) x;
246 387 : ulong xh = (ulong)(x>>64);
247 387 : return (((uint128)fd_ulong_bswap( xl )) << 64) | ((uint128)fd_ulong_bswap( xh ));
248 387 : }
249 : #endif
250 :
251 : /* FIXME: consider find_msb based solution (probably not as the combination
252 : of FD_FN_CONST and the use of inline asm for find_msb on some targets
253 : is probably less than ideal). */
254 :
255 : FD_FN_CONST static inline uchar
256 111 : fd_uchar_pow2_up( uchar _x ) {
257 111 : uint x = (uint)_x;
258 111 : x--;
259 111 : x |= (x>> 1);
260 111 : x |= (x>> 2);
261 111 : x |= (x>> 4);
262 111 : x++;
263 111 : return (uchar)x;
264 111 : }
265 :
266 : FD_FN_CONST static inline uchar
267 111 : fd_uchar_pow2_dn( uchar _x ) {
268 111 : uint x = (uint)_x;
269 111 : x >>= 1;
270 111 : x |= (x>> 1);
271 111 : x |= (x>> 2);
272 111 : x |= (x>> 4);
273 111 : x++;
274 111 : return (uchar)x;
275 111 : }
276 :
277 : FD_FN_CONST static inline ushort
278 411 : fd_ushort_pow2_up( ushort _x ) {
279 411 : uint x = (uint)_x;
280 411 : x--;
281 411 : x |= (x>> 1);
282 411 : x |= (x>> 2);
283 411 : x |= (x>> 4);
284 411 : x |= (x>> 8);
285 411 : x++;
286 411 : return (ushort)x;
287 411 : }
288 :
289 : FD_FN_CONST static inline ushort
290 411 : fd_ushort_pow2_dn( ushort _x ) {
291 411 : uint x = (uint)_x;
292 411 : x >>= 1;
293 411 : x |= (x>> 1);
294 411 : x |= (x>> 2);
295 411 : x |= (x>> 4);
296 411 : x |= (x>> 8);
297 411 : x++;
298 411 : return (ushort)x;
299 411 : }
300 :
301 : FD_FN_CONST static inline uint
302 1587 : fd_uint_pow2_up( uint x ) {
303 1587 : x--;
304 1587 : x |= (x>> 1);
305 1587 : x |= (x>> 2);
306 1587 : x |= (x>> 4);
307 1587 : x |= (x>> 8);
308 1587 : x |= (x>>16);
309 1587 : x++;
310 1587 : return x;
311 1587 : }
312 :
313 : FD_FN_CONST static inline uint
314 1587 : fd_uint_pow2_dn( uint x ) {
315 1587 : x >>= 1;
316 1587 : x |= (x>> 1);
317 1587 : x |= (x>> 2);
318 1587 : x |= (x>> 4);
319 1587 : x |= (x>> 8);
320 1587 : x |= (x>>16);
321 1587 : x++;
322 1587 : return x;
323 1587 : }
324 :
325 : FD_FN_CONST static inline ulong
326 60505830 : fd_ulong_pow2_up( ulong x ) {
327 60505830 : x--;
328 60505830 : x |= (x>> 1);
329 60505830 : x |= (x>> 2);
330 60505830 : x |= (x>> 4);
331 60505830 : x |= (x>> 8);
332 60505830 : x |= (x>>16);
333 60505830 : x |= (x>>32);
334 60505830 : x++;
335 60505830 : return x;
336 60505830 : }
337 :
338 : FD_FN_CONST static inline ulong
339 6243 : fd_ulong_pow2_dn( ulong x ) {
340 6243 : x >>= 1;
341 6243 : x |= (x>> 1);
342 6243 : x |= (x>> 2);
343 6243 : x |= (x>> 4);
344 6243 : x |= (x>> 8);
345 6243 : x |= (x>>16);
346 6243 : x |= (x>>32);
347 6243 : x++;
348 6243 : return x;
349 6243 : }
350 :
351 : #if FD_HAS_INT128
352 : FD_FN_CONST static inline uint128
353 24771 : fd_uint128_pow2_up( uint128 x ) {
354 24771 : x--;
355 24771 : x |= (x>> 1);
356 24771 : x |= (x>> 2);
357 24771 : x |= (x>> 4);
358 24771 : x |= (x>> 8);
359 24771 : x |= (x>>16);
360 24771 : x |= (x>>32);
361 24771 : x |= (x>>64);
362 24771 : x++;
363 24771 : return x;
364 24771 : }
365 :
366 : FD_FN_CONST static inline uint128
367 24771 : fd_uint128_pow2_dn( uint128 x ) {
368 24771 : x >>= 1;
369 24771 : x |= (x>> 1);
370 24771 : x |= (x>> 2);
371 24771 : x |= (x>> 4);
372 24771 : x |= (x>> 8);
373 24771 : x |= (x>>16);
374 24771 : x |= (x>>32);
375 24771 : x |= (x>>64);
376 24771 : x++;
377 24771 : return x;
378 24771 : }
379 : #endif
380 :
381 : /* Brokenness of indeterminant char sign strikes again ... sigh. We
382 : explicitly provide the unsigned variant of the token to these
383 : macros because the uchar token is not related to the schar token
384 : by simply prepending u to schar. */
385 :
386 : /* Note: the implementations of abs, right_shift and zz_enc below do not
387 : exploit the sign extending right shift behavior specified by the
388 : machine model (and thus can be used safely in more general machine
389 : models) but are slightly more expensive.
390 :
391 : FD_FN_CONST static inline UT fd_##T##_abs( T x ) { UT u = (UT)x; UT m = (UT)-(u>>(w-1)); return (UT)((u+m)^m); }
392 :
393 : FD_FN_CONST static inline T
394 : fd_##T##_shift_right( T x,
395 : int n ) {
396 : UT u = (UT)x;
397 : UT m = (UT)-(u >> (w-1));
398 : return (T)(fd_##UT##_shift_right( u ^ m, n ) ^ m);
399 : }
400 :
401 : FD_FN_CONST static inline UT fd_##T##_zz_enc( T x ) { UT u = (UT)x; return (UT)((-(u>>(w-1))) ^ (u<<1)); }
402 : */
403 :
404 : #define FD_SRC_UTIL_BITS_FD_BITS_IMPL(T,UT,w) \
405 1667961756 : FD_FN_CONST static inline T fd_##T##_if ( int c, T t, T f ) { return c ? t : f; /* cmov */ } \
406 5534446710 : /* */ static inline void fd_##T##_store_if ( int c, T * p, T v ) { T _[ 1 ]; *( c ? p : _ ) = v; /* cmov */ } \
407 1704372799 : FD_FN_CONST static inline UT fd_##T##_abs ( T x ) { UT m = (UT)(x >> (w-1)); return (UT)((((UT)x)+m)^m); } \
408 470596307 : FD_FN_CONST static inline T fd_##T##_min ( T x, T y ) { return (x<=y) ? x : y; /* cmov */ } \
409 286841151 : FD_FN_CONST static inline T fd_##T##_max ( T x, T y ) { return (x>=y) ? x : y; /* cmov */ } \
410 251658240 : FD_FN_CONST static inline T fd_##T##_shift_left ( T x, int n ) { return (T)fd_##UT##_shift_left ( (UT)x, n ); } \
411 251658240 : FD_FN_CONST static inline T fd_##T##_shift_right ( T x, int n ) { return (T)(x >> ((n>(w-1)) ? (w-1) : n)); /* cmov */ } \
412 1096648128 : FD_FN_CONST static inline T fd_##T##_rotate_left ( T x, int n ) { return (T)fd_##UT##_rotate_left ( (UT)x, n ); } \
413 1096648128 : FD_FN_CONST static inline T fd_##T##_rotate_right( T x, int n ) { return (T)fd_##UT##_rotate_right( (UT)x, n ); } \
414 196920 : FD_FN_CONST static inline UT fd_##T##_zz_enc ( T x ) { return (UT)(((UT)(x>>(w-1))) ^ (((UT)x)<<1)); } \
415 149538 : FD_FN_CONST static inline T fd_##T##_zz_dec ( UT x ) { return (T)((x>>1) ^ (-(x & (UT)1))); }
416 :
417 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(schar, uchar, 8)
418 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(short, ushort, 16)
419 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(int, uint, 32)
420 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(long, ulong, 64)
421 : #if FD_HAS_INT128
422 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(int128,uint128,128)
423 : #endif
424 :
425 : #undef FD_SRC_UTIL_BITS_FD_BITS_IMPL
426 :
427 : /* Brokenness of indeterminant char sign strikes again ... sigh. We
428 : can't provide a char_min/char_max between platforms as they don't
429 : necessarily produce the same results. Likewise, we don't provide a
430 : fd_char_abs because it will not produce equivalent results between
431 : platforms. But it is useful to have a char_if and char_store_if to
432 : help with making branchless string operation implementations. */
433 :
434 53565981 : FD_FN_CONST static inline char fd_char_if( int c, char t, char f ) { return c ? t : f; }
435 :
436 50331648 : static inline void fd_char_store_if( int c, char * p, char v ) { char _[ 1 ]; *( c ? p : _ ) = v; /* cmov */ }
437 :
438 : /* FIXME: ADD HASHING PAIRS FOR UCHAR AND USHORT? */
439 :
440 : /* High quality (full avalanche) high speed integer to integer hashing.
441 : fd_uint_hash has the properties that [0,2^32) hashes to a random
442 : looking permutation of [0,2^32) and hash(0)==0. Similarly for
443 : fd_ulong_hash. Based on Google's Murmur3 hash finalizer (public
444 : domain). Not cryptographically secure but passes various strict
445 : tests of randomness when used as a PRNG. */
446 :
447 : FD_FN_CONST static inline uint
448 112493844 : fd_uint_hash( uint x ) {
449 112493844 : x ^= x >> 16;
450 112493844 : x *= 0x85ebca6bU;
451 112493844 : x ^= x >> 13;
452 112493844 : x *= 0xc2b2ae35U;
453 112493844 : x ^= x >> 16;
454 112493844 : return x;
455 112493844 : }
456 :
457 : FD_FN_CONST static inline ulong
458 >16309*10^7 : fd_ulong_hash( ulong x ) {
459 >16309*10^7 : x ^= x >> 33;
460 >16309*10^7 : x *= 0xff51afd7ed558ccdUL;
461 >16309*10^7 : x ^= x >> 33;
462 >16309*10^7 : x *= 0xc4ceb9fe1a85ec53UL;
463 >16309*10^7 : x ^= x >> 33;
464 >16309*10^7 : return x;
465 >16309*10^7 : }
466 :
467 : /* Inverses of the above. E.g.:
468 : fd_uint_hash_inverse( fd_uint_hash( (uint)x ) )==(uint)x
469 : and:
470 : fd_uint_hash( fd_uint_hash_inverse( (uint)x ) )==(uint)x
471 : Similarly for fd_ulong_hash_inverse. These by themselves are similar
472 : quality hashes to the above and having the inverses of the above can
473 : be useful. The fact these have (nearly) identical operations /
474 : operation counts concretely demonstrates that none of these are
475 : standalone cryptographically secure. */
476 :
477 : FD_FN_CONST static inline uint
478 103809084 : fd_uint_hash_inverse( uint x ) {
479 103809084 : x ^= x >> 16;
480 103809084 : x *= 0x7ed1b41dU;
481 103809084 : x ^= (x >> 13) ^ (x >> 26);
482 103809084 : x *= 0xa5cb9243U;
483 103809084 : x ^= x >> 16;
484 103809084 : return x;
485 103809084 : }
486 :
487 : FD_FN_CONST static inline ulong
488 13089422283 : fd_ulong_hash_inverse( ulong x ) {
489 13089422283 : x ^= x >> 33;
490 13089422283 : x *= 0x9cb4b2f8129337dbUL;
491 13089422283 : x ^= x >> 33;
492 13089422283 : x *= 0x4f74430c22a54005UL;
493 13089422283 : x ^= x >> 33;
494 13089422283 : return x;
495 13089422283 : }
496 :
497 : /* fd_ulong_base10_dig_cnt returns the number of digits in the base 10
498 : representation of x. FIXME: USE BETTER ALGO. */
499 :
500 : #define FD_SRC_UTIL_BITS_FD_BITS_IMPL(T,M) \
501 : FD_FN_CONST static inline ulong \
502 306 : fd_##T##_base10_dig_cnt( T _x ) { \
503 306 : ulong x = (ulong)_x; \
504 306 : ulong cnt = 1UL; \
505 306 : ulong thresh = 10UL; \
506 1860 : do { \
507 1860 : if( FD_LIKELY( x<thresh ) ) break; \
508 1860 : cnt++; \
509 1554 : thresh *= 10UL; \
510 1554 : } while( FD_LIKELY( cnt<M ) ); \
511 306 : return cnt; \
512 306 : }
513 :
514 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uchar, 3UL) /* 255 -> 3 dig */
515 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(ushort, 5UL) /* 65535 -> 5 dig */
516 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(uint, 10UL) /* 4294967295 -> 10 dig */
517 : FD_SRC_UTIL_BITS_FD_BITS_IMPL(ulong, 20UL) /* 18446744073709551615 -> 20 dig */
518 :
519 : #undef FD_SRC_UTIL_BITS_FD_BITS_IMPL
520 :
521 : /* fd_float_if, fd_float_store_if, fd_float_abs are described above.
522 : Ideally, the system will implement fd_float_abs by just clearing the
523 : sign bit. fd_float_eq tests to floating point values for whether or
524 : not their bit representations are identical. Useful when IEEE
525 : handling of equality with +/-0 or nan are not desired (e.g. can test
526 : if nans have different signs or syndromes). */
527 :
528 54490935 : FD_FN_CONST static inline float fd_float_if ( int c, float t, float f ) { return c ? t : f; }
529 50331648 : /* */ static inline void fd_float_store_if( int c, float * p, float v ) { float _[ 1 ]; *( c ? p : _ ) = v; }
530 96095148 : FD_FN_CONST static inline float fd_float_abs ( float x ) { return __builtin_fabsf( x ); }
531 : FD_FN_CONST static inline int
532 : fd_float_eq( float x,
533 100663296 : float y ) {
534 100663296 : union { float f; uint u; } tx, ty;
535 100663296 : tx.f = x;
536 100663296 : ty.f = y;
537 100663296 : return tx.u==ty.u;
538 100663296 : }
539 :
540 : /* fd_double_if, fd_double_store_if, fd_double_abs and fd_double_eq are
541 : double precision versions of the above. */
542 :
543 : #if FD_HAS_DOUBLE
544 54491892 : FD_FN_CONST static inline double fd_double_if ( int c, double t, double f ) { return c ? t : f; }
545 50331648 : /* */ static inline void fd_double_store_if( int c, double * p, double v ) { double _[ 1 ]; *( c ? p : _ ) = v; }
546 96161202 : FD_FN_CONST static inline double fd_double_abs ( double x ) { return __builtin_fabs( x ); }
547 : FD_FN_CONST static inline int
548 : fd_double_eq( double x,
549 100663296 : double y ) {
550 100663296 : union { double f; ulong u; } tx, ty;
551 100663296 : tx.f = x;
552 100663296 : ty.f = y;
553 100663296 : return tx.u==ty.u;
554 100663296 : }
555 : #endif
556 :
557 : /* fd_swap swaps the values in a and b. Assumes a and b have the same
558 : plain-old-data type. Best if a and b are primitive types (e.g.
559 : ulong). This macro is robust (it evaluates a and b the minimal
560 : number of times). Note that compilers are pretty good about identify
561 : swap operations and replacing them with more optimal assembly for
562 : types and architectures where alternative implementations (like xor
563 : tricks) might be faster. */
564 :
565 658063524 : #define fd_swap(a,b) do { __typeof__((a)) _fd_swap_tmp = (a); (a) = (b); (b) = _fd_swap_tmp; } while(0)
566 :
567 : /* fd_swap_if swaps the values in a and b if c is non-zero and leaves
568 : them unchanged otherwise. Assumes a and b have the same
569 : plain-old-data type. Best if a and b are primitive types (e.g.
570 : ulong) as the compiler will likely replace the trinaries below with
571 : cmovs, making this branchless. This macro is robust (it evaluates a,
572 : b and c the minimal number of times). */
573 :
574 662231538 : #define fd_swap_if(c,a,b) do { \
575 662231538 : int _fd_swap_if_c = (c); \
576 662231538 : __typeof__((a)) _fd_swap_if_a = (a); \
577 662231538 : __typeof__((b)) _fd_swap_if_b = (b); \
578 662231538 : (a) = _fd_swap_if_c ? _fd_swap_if_b : _fd_swap_if_a; /* cmov */ \
579 662231538 : (b) = _fd_swap_if_c ? _fd_swap_if_a : _fd_swap_if_b; /* cmov */ \
580 662231538 : } while(0)
581 :
582 : /* fd_ptr_if is a generic version of the above for pointers. This macro
583 : is robust.
584 :
585 : IMPORTANT SAFETY TIP! The output type is the type of t. Thus, if
586 : the condition is false and t and f have different pointer types
587 : (which is inherently gross and we might want to consider outright
588 : rejecting in the future via, unfortunately non-standard, compiler
589 : builtins), this would implicitly cast the pointer f to the type of t.
590 : As such, strict aliasing rules in the language also imply mixed usage
591 : cases need to be wrapped in a fd_type_pun. In short, mixing pointer
592 : types between t and f is strongly discouraged. */
593 :
594 : #if FD_HAS_UBSAN
595 : #define fd_ptr_if(c,t,f) ((__typeof__((t)))( (c) ? (ulong)(t) : (ulong)(f) ))
596 : #else
597 6143191470 : #define fd_ptr_if(c,t,f) ((__typeof__((t)))fd_ulong_if( (c), (ulong)(t), (ulong)(f) ))
598 : #endif
599 :
600 : /* FD_ULONG_{MASK_LSB,MASK_MSB,ALIGN_UP} are the same as
601 : fd_ulong_{mask_lsb,mask_msb,align_up} but can be used at compile
602 : time. The tradeoff is n/a must be safe against multiple evaluation
603 : at compile time. x should be ulong compatible and n/a should be int
604 : compatible. */
605 :
606 11951862 : #define FD_ULONG_MASK_LSB( n ) ((((ulong)((n)<=63)) << ((n) & 63)) - 1UL)
607 : #define FD_ULONG_MASK_MSB( n ) (~FD_ULONG_MASK_LSB(64-(n)))
608 9266082 : #define FD_ULONG_ALIGN_UP( x, a ) (((x)+((a)-1UL)) & (~((a)-1UL)))
609 :
610 : /* Unaligned access annotations.
611 :
612 : FD_LOAD( T, src ) is equivalent to:
613 : return (*(T const *)(src))
614 : but src can have arbitrary alignment.
615 :
616 : FD_STORE( T, dst, val ) is equivalent to:
617 : T * ptr = (T *)(dst);
618 : *ptr = (val);
619 : return ptr
620 : but dst can have arbitrary alignment.
621 :
622 : Note: Ideally, we would infer the type T in FD_LOAD from src (e.g.
623 : use typeof(*(src)). But there are some nasty linguistic and
624 : optimizer interactions when src is a constant pointer in a truly
625 : generic implementation. Similarly for FD_STORE.
626 :
627 : fd_T_load_n( src ) where T is in [uchar,ushort,uint,ulong] loads n
628 : bytes into the least significant n bytes of a T, zeros any remaining
629 : bytes and returns the result. fd_T_load_n_fast is the same but
630 : assumes it is safe to tail read a couple of bytes past the end of src
631 : if such is beneficial for higher performance.
632 :
633 : Accesses that would normally be atomic (e.g. an aligned access to a
634 : primitive type like a ulong) are not guaranteed to be atomic if done
635 : through these annotations. */
636 :
637 : #ifndef FD_UNALIGNED_ACCESS_STYLE
638 : #if FD_HAS_X86
639 : #define FD_UNALIGNED_ACCESS_STYLE 0 /* 1 is broken ... */
640 : #else
641 : #define FD_UNALIGNED_ACCESS_STYLE 0
642 : #endif
643 : #endif
644 :
645 : #if FD_UNALIGNED_ACCESS_STYLE==0 /* memcpy elision based */
646 :
647 : /* This implementation does not assume it is safe to access unaligned
648 : memory directly (and thus can be used on platforms outside the
649 : development environment's machine model) but it does still assume
650 : little endian byte ordering.
651 :
652 : It is based on memcpy and, in principle, the compiler should elide
653 : the memcpy and replace this with optimized asm on platforms where
654 : this is safe (which is virtually all commercially viable platforms as
655 : packet processing deal heavily with unaligned accesses and virtually
656 : all platforms are near universally networked and networking needs to
657 : do packet processing efficiently). But this fails often enough in
658 : practice that this should not be relied upon, especially if
659 : performance is important as performance is glacial when the compiler
660 : mucks up. (fd_memcpy is an especially bad idea here because the
661 : risk of the compiler mucking it up is much greater.)
662 :
663 : It is also more than little bizarre that this is an encouraged
664 : practice nowadays. That is, practically, we are using a low level
665 : language (C/C++) that have language constructs (dereference a
666 : pointer to a primitive type) that map directly onto low level
667 : hardware operations (asm load operation) that are actually supported
668 : by the target hardware here (fine if pointer is not aligned to
669 : width of the type).
670 :
671 : But instead of encouraging developers to write short, readable,
672 : library-independent code that generates fast and ultra compact asm,
673 : they are encouraged to write long, obtuse, library-dependent code
674 : that naively would generate slow bloated asm in hopes the compiler
675 : will realize can be turned into the simple implementation and turn it
676 : back into the developer's original intent and then generate good asm.
677 : Hmmm. */
678 :
679 : #define FD_LOAD( T, src ) \
680 4224508946 : (__extension__({ T _fd_load_tmp; memcpy( &_fd_load_tmp, (T const *)(src), sizeof(T) ); _fd_load_tmp; }))
681 :
682 : #define FD_STORE( T, dst, val ) \
683 39099460399 : (__extension__({ T _fd_store_tmp = (val); (T *)memcpy( (T *)(dst), &_fd_store_tmp, sizeof(T) ); }))
684 :
685 6144 : FD_FN_PURE static inline uchar fd_uchar_load_1 ( void const * p ) { return *(uchar const *)p; }
686 :
687 6141 : FD_FN_PURE static inline ushort fd_ushort_load_1 ( void const * p ) { return (ushort)*(uchar const *)p; }
688 6141 : FD_FN_PURE static inline ushort fd_ushort_load_2 ( void const * p ) { ushort t; memcpy( &t, p, 2UL ); return t; }
689 :
690 6135 : FD_FN_PURE static inline uint fd_uint_load_1 ( void const * p ) { return (uint )*(uchar const *)p; }
691 6135 : FD_FN_PURE static inline uint fd_uint_load_2 ( void const * p ) { ushort t; memcpy( &t, p, 2UL ); return (uint )t; }
692 30219 : FD_FN_PURE static inline uint fd_uint_load_3 ( void const * p ) { uint t = 0UL; memcpy( &t, p, 3UL ); return (uint )t; }
693 168420639 : FD_FN_PURE static inline uint fd_uint_load_4 ( void const * p ) { uint t; memcpy( &t, p, 4UL ); return t; }
694 :
695 2790348858 : FD_FN_PURE static inline ulong fd_ulong_load_1 ( void const * p ) { return (ulong )*(uchar const *)p; }
696 1383846795 : FD_FN_PURE static inline ulong fd_ulong_load_2 ( void const * p ) { ushort t; memcpy( &t, p, 2UL ); return (ulong)t; }
697 6123 : FD_FN_PURE static inline ulong fd_ulong_load_3 ( void const * p ) { uint t = 0UL; memcpy( &t, p, 3UL ); return (ulong)t; }
698 66181359 : FD_FN_PURE static inline ulong fd_ulong_load_4 ( void const * p ) { uint t; memcpy( &t, p, 4UL ); return (ulong)t; }
699 6123 : FD_FN_PURE static inline ulong fd_ulong_load_5 ( void const * p ) { ulong t = 0UL; memcpy( &t, p, 5UL ); return t; }
700 6141 : FD_FN_PURE static inline ulong fd_ulong_load_6 ( void const * p ) { ulong t = 0UL; memcpy( &t, p, 6UL ); return t; }
701 6123 : FD_FN_PURE static inline ulong fd_ulong_load_7 ( void const * p ) { ulong t = 0UL; memcpy( &t, p, 7UL ); return t; }
702 2183546958 : FD_FN_PURE static inline ulong fd_ulong_load_8 ( void const * p ) { ulong t; memcpy( &t, p, 8UL ); return t; }
703 :
704 : #define fd_uchar_load_1_fast fd_uchar_load_1
705 :
706 : #define fd_ushort_load_1_fast fd_ushort_load_1
707 : #define fd_ushort_load_2_fast fd_ushort_load_2
708 :
709 : #define fd_uint_load_1_fast fd_uint_load_1
710 : #define fd_uint_load_2_fast fd_uint_load_2
711 6135 : FD_FN_PURE static inline uint fd_uint_load_3_fast ( void const * p ) { uint t; memcpy( &t, p, 4UL ); return ((uint )t) & 0x00ffffffU; }
712 130374690 : #define fd_uint_load_4_fast fd_uint_load_4
713 :
714 : #define fd_ulong_load_1_fast fd_ulong_load_1
715 9 : #define fd_ulong_load_2_fast fd_ulong_load_2
716 6132 : FD_FN_PURE static inline ulong fd_ulong_load_3_fast ( void const * p ) { uint t; memcpy( &t, p, 4UL ); return ((ulong)t) & 0x0000000000ffffffUL; }
717 9 : #define fd_ulong_load_4_fast fd_ulong_load_4
718 6123 : FD_FN_PURE static inline ulong fd_ulong_load_5_fast ( void const * p ) { ulong t; memcpy( &t, p, 8UL ); return t & 0x000000ffffffffffUL; }
719 6123 : FD_FN_PURE static inline ulong fd_ulong_load_6_fast ( void const * p ) { ulong t; memcpy( &t, p, 8UL ); return t & 0x0000ffffffffffffUL; }
720 6123 : FD_FN_PURE static inline ulong fd_ulong_load_7_fast ( void const * p ) { ulong t; memcpy( &t, p, 8UL ); return t & 0x00ffffffffffffffUL; }
721 91067692 : #define fd_ulong_load_8_fast fd_ulong_load_8
722 :
723 : #elif FD_UNALIGNED_ACCESS_STYLE==1 /* direct access */
724 :
725 : #define FD_LOAD( T, src ) (__extension__({ \
726 : T const * _fd_store_tmp = (T const *)(src); \
727 : FD_COMPILER_FORGET( _fd_store_tmp ); \
728 : *_fd_store_tmp; \
729 : }))
730 :
731 : #define FD_STORE( T, dst, val ) (__extension__({ \
732 : T * _fd_store_tmp = (T *)fd_type_pun( (void *)(dst) ); \
733 : *_fd_store_tmp = (val); \
734 : FD_COMPILER_MFENCE(); \
735 : _fd_store_tmp; \
736 : }))
737 :
738 : FD_FN_PURE static inline uchar fd_uchar_load_1 ( void const * p ) { FD_COMPILER_FORGET( p ) ; return ( *(uchar const *)p); }
739 :
740 : FD_FN_PURE static inline ushort fd_ushort_load_1 ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ushort)*(uchar const *)p); }
741 : FD_FN_PURE static inline ushort fd_ushort_load_2 ( void const * p ) { FD_COMPILER_FORGET( p ); return ( *(ushort const *)p); }
742 :
743 : FD_FN_PURE static inline uint fd_uint_load_1 ( void const * p ) { FD_COMPILER_FORGET( p ); return ((uint )*(uchar const *)p); }
744 : FD_FN_PURE static inline uint fd_uint_load_2 ( void const * p ) { FD_COMPILER_FORGET( p ); return ((uint )*(ushort const *)p); }
745 : FD_FN_PURE static inline uint fd_uint_load_3 ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_uint_load_2 (p) | (fd_uint_load_1 (((uchar const *)p)+2UL)<<16); }
746 : FD_FN_PURE static inline uint fd_uint_load_4 ( void const * p ) { FD_COMPILER_FORGET( p ); return ( *(uint const *)p); }
747 :
748 : FD_FN_PURE static inline ulong fd_ulong_load_1 ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ulong )*(uchar const *)p); }
749 : FD_FN_PURE static inline ulong fd_ulong_load_2 ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ulong )*(ushort const *)p); }
750 : FD_FN_PURE static inline ulong fd_ulong_load_3 ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_ulong_load_2(p) | (fd_ulong_load_1(((uchar const *)p)+2UL)<<16); }
751 : FD_FN_PURE static inline ulong fd_ulong_load_4 ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ulong )*(uint const *)p); }
752 : FD_FN_PURE static inline ulong fd_ulong_load_5 ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_ulong_load_4(p) | (fd_ulong_load_1(((uchar const *)p)+4UL)<<32); }
753 : FD_FN_PURE static inline ulong fd_ulong_load_6 ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_ulong_load_4(p) | (fd_ulong_load_2(((uchar const *)p)+4UL)<<32); }
754 : FD_FN_PURE static inline ulong fd_ulong_load_7 ( void const * p ) { FD_COMPILER_FORGET( p ); return fd_ulong_load_6(p) | (fd_ulong_load_1(((uchar const *)p)+6UL)<<48); }
755 : FD_FN_PURE static inline ulong fd_ulong_load_8 ( void const * p ) { FD_COMPILER_FORGET( p ); return ( *(ulong const *)p); }
756 :
757 : #define fd_uchar_load_1_fast fd_uchar_load_1
758 :
759 : #define fd_ushort_load_1_fast fd_ushort_load_1
760 : #define fd_ushort_load_2_fast fd_ushort_load_2
761 :
762 : #define fd_uint_load_1_fast fd_uint_load_1
763 : #define fd_uint_load_2_fast fd_uint_load_2
764 : FD_FN_PURE static inline uint fd_uint_load_3_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return ( *(uint const *)p) & 0x00ffffffU; } /* Tail read 1B */
765 : #define fd_uint_load_4_fast fd_uint_load_4
766 :
767 : #define fd_ulong_load_1_fast fd_ulong_load_1
768 : #define fd_ulong_load_2_fast fd_ulong_load_2
769 : FD_FN_PURE static inline ulong fd_ulong_load_3_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return ((ulong)*(uint const *)p) & 0x0000000000ffffffUL; } /* Tail read 1B */
770 : #define fd_ulong_load_4_fast fd_ulong_load_4
771 : FD_FN_PURE static inline ulong fd_ulong_load_5_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return ( *(ulong const *)p) & 0x000000ffffffffffUL; } /* Tail read 3B */
772 : FD_FN_PURE static inline ulong fd_ulong_load_6_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return ( *(ulong const *)p) & 0x0000ffffffffffffUL; } /* Tail read 2B */
773 : FD_FN_PURE static inline ulong fd_ulong_load_7_fast ( void const * p ) { FD_COMPILER_FORGET( p ); return ( *(ulong const *)p) & 0x00ffffffffffffffUL; } /* Tail read 1B */
774 : #define fd_ulong_load_8_fast fd_ulong_load_8
775 :
776 : #else
777 : #error "Unsupported FD_UNALIGNED_ACCESS_STYLE"
778 : #endif
779 :
780 : /* fd_ulong_svw_enc_sz returns the number of bytes needed to encode
781 : x as a symmetric variable width encoded integer. This is at most
782 : FD_ULONG_SVW_ENC_MAX (9). Result will be in {1,2,3,4,5,8,9}. */
783 :
784 : #define FD_ULONG_SVW_ENC_MAX (9UL) /* For compile time use */
785 :
786 : FD_FN_UNUSED FD_FN_CONST static ulong /* Work around -Winline */
787 719287029 : fd_ulong_svw_enc_sz( ulong x ) {
788 : /* FIXME: CONSIDER FIND_MSB BASED TABLE LOOKUP? */
789 719287029 : if( FD_LIKELY( x<(1UL<< 6) ) ) return 1UL;
790 288992580 : if( FD_LIKELY( x<(1UL<<10) ) ) return 2UL;
791 43497816 : if( FD_LIKELY( x<(1UL<<18) ) ) return 3UL;
792 35686536 : if( FD_LIKELY( x<(1UL<<24) ) ) return 4UL;
793 30959490 : if( FD_LIKELY( x<(1UL<<32) ) ) return 5UL;
794 24571458 : if( FD_LIKELY( x<(1UL<<56) ) ) return 8UL;
795 5695983 : return 9UL;
796 24571458 : }
797 :
798 : /* fd_ulong_svw_enc appends x to the byte stream b as a symmetric
799 : variable width encoded integer. b should have room from
800 : fd_ulong_svw_env_sz(x) (note that 9 is sufficient for all possible
801 : x). Returns the next location in the byte system. */
802 :
803 : FD_FN_UNUSED static uchar * /* Work around -Winline */
804 : fd_ulong_svw_enc( uchar * b,
805 50705259 : ulong x ) {
806 50705259 : if( FD_LIKELY( x<(1UL<< 6) ) ) { b[0] = (uchar) (x<< 1); b+=1; } /* 0 | x( 6) | 0 */
807 45124269 : else if( FD_LIKELY( x<(1UL<<10) ) ) { FD_STORE( ushort, b, (ushort)( 0x8001UL | (x<<3)) ); b+=2; } /* 100 | x(10) | 001 */
808 41982678 : else if( FD_LIKELY( x<(1UL<<18) ) ) { FD_STORE( ushort, b, (ushort)( 0x5UL | (x<<3)) ); b[2] = (uchar)(0xa0UL | (x>>13)); b+=3; } /* 101 | x(18) | 101 */
809 35614740 : else if( FD_LIKELY( x<(1UL<<24) ) ) { FD_STORE( uint, b, (uint )( 0xc0000003UL | (x<<4)) ); b+=4; } /* 1100 | x(24) | 0011 */
810 30887781 : else if( FD_LIKELY( x<(1UL<<32) ) ) { FD_STORE( uint, b, (uint )( 0xbUL | (x<<4)) ); b[4] = (uchar)(0xd0UL | (x>>28)); b+=5; } /* 1101 | x(32) | 1011 */
811 24523464 : else if( FD_LIKELY( x<(1UL<<56) ) ) { FD_STORE( ulong, b, 0xe000000000000007UL | (x<<4) ); b+=8; } /* 1110 | x(56) | 0111 */
812 5648184 : else { FD_STORE( ulong, b, 0xfUL | (x<<4) ); b[8] = (uchar)(0xf0UL | (x>>60)); b+=9; } /* 1111 | x(64) | 1111 */
813 50705259 : return b;
814 50705259 : }
815 :
816 : /* fd_ulong_svw_enc_fixed appends x to the byte stream b as a symmetric
817 : csz width encoded integer. csz is assumed to be in {1,2,3,4,5,8,9}.
818 : b should have room from csz bytes and x should be known apriori to be
819 : compatible with csz. Useful for updating in place an existing
820 : encoded integer to a value that is <= the current value. Returns
821 : b+csz. */
822 :
823 : FD_FN_UNUSED static uchar * /* Work around -Winline */
824 : fd_ulong_svw_enc_fixed( uchar * b,
825 : ulong csz,
826 1099817607 : ulong x ) {
827 1099817607 : if( FD_LIKELY( csz==1UL ) ) { b[0] = (uchar) (x<< 1); } /* 0 | x( 6) | 0 */
828 408071679 : else if( FD_LIKELY( csz==2UL ) ) { FD_STORE( ushort, b, (ushort)( 0x8001UL | (x<<3)) ); } /* 100 | x(10) | 001 */
829 51102777 : else if( FD_LIKELY( csz==3UL ) ) { FD_STORE( ushort, b, (ushort)( 0x5UL | (x<<3)) ); b[2] = (uchar)(0xa0UL | (x>>13)); } /* 101 | x(18) | 101 */
830 35391786 : else if( FD_LIKELY( csz==4UL ) ) { FD_STORE( uint, b, (uint )( 0xc0000003UL | (x<<4)) ); } /* 1100 | x(24) | 0011 */
831 30665178 : else if( FD_LIKELY( csz==5UL ) ) { FD_STORE( uint, b, (uint )( 0xbUL | (x<<4)) ); b[4] = (uchar)(0xd0UL | (x>>28)); } /* 1101 | x(32) | 1011 */
832 24375081 : else if( FD_LIKELY( csz==8UL ) ) { FD_STORE( ulong, b, 0xe000000000000007UL | (x<<4) ); } /* 1110 | x(56) | 0111 */
833 5500425 : else /* csz==9UL */ { FD_STORE( ulong, b, 0xfUL | (x<<4) ); b[8] = (uchar)(0xf0UL | (x>>60)); } /* 1111 | x(64) | 1111 */
834 1099817607 : return b+csz;
835 1099817607 : }
836 :
837 : /* fd_ulong_svw_dec_sz returns the number of bytes representing an svw
838 : encoded integer. b points to the first byte of the encoded integer.
839 : Result will be in {1,2,3,4,5,8,9}. */
840 :
841 : FD_FN_PURE static inline ulong
842 4001636079 : fd_ulong_svw_dec_sz( uchar const * b ) {
843 :
844 : /* LSB: Compressed size
845 : xxxx|xxx0 -> 1B
846 : xxxx|x001 -> 2B
847 : xxxx|x101 -> 3B
848 : xxxx|0011 -> 4B
849 : xxxx|1011 -> 5B
850 : xxxx|0111 -> 8B
851 : xxxx|1111 -> 9B
852 :
853 : 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
854 : 1111 1110 1101 1100 1011 1010 1001 1000 0111 0110 0101 0100 0011 0010 0001 0000
855 : 9 1 3 1 5 1 2 1 8 1 3 1 4 1 2 1 */
856 :
857 4001636079 : return (0x9131512181314121UL >> ((((ulong)b[0]) & 15UL) << 2)) & 15UL;
858 4001636079 : }
859 :
860 : /* fd_ulong_svw_dec_tail_sz returns the number of bytes representing an
861 : svw encoded integer. b points to one after the last byte of the
862 : encoded integer. Result will be in {1,2,3,4,5,8,9}. */
863 :
864 : FD_FN_PURE static inline ulong
865 228152967 : fd_ulong_svw_dec_tail_sz( uchar const * b ) {
866 :
867 : /* MSB: Compressed size
868 : 0xxx|xxxx -> 1B
869 : 100x|xxxx -> 2B
870 : 101x|xxxx -> 3B
871 : 1100|xxxx -> 4B
872 : 1101|xxxx -> 5B
873 : 1110|xxxx -> 8B
874 : 1111|xxxx -> 9B
875 :
876 : 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
877 : 1111 1110 1101 1100 1011 1010 1001 1000 0111 0110 0101 0100 0011 0010 0001 0000
878 : 9 8 5 4 3 3 2 2 1 1 1 1 1 1 1 1 */
879 :
880 228152967 : return (0x9854332211111111UL >> ((((ulong)b[-1]) >> 4) << 2)) & 15UL;
881 228152967 : }
882 :
883 : /* fd_ulong_svw_dec_fixed decodes a ulong encoded as a symmetric
884 : variable width encoded integer whose width is known. b points to the
885 : first byte of the encoded integer and the encoded integer is csz
886 : byte. csz is assumed to be in {1,2,3,4,5,8,9}. */
887 :
888 : FD_FN_UNUSED static ulong /* Work around -Winline */
889 : fd_ulong_svw_dec_fixed( uchar const * b,
890 4386757716 : ulong csz ) {
891 4386757716 : if( FD_LIKELY( csz==1UL ) ) return (fd_ulong_load_1( b ) >> 1);
892 1596414981 : if( FD_LIKELY( csz==2UL ) ) return (fd_ulong_load_2( b ) >> 3) & 1023UL;
893 303226452 : if( FD_LIKELY( csz==3UL ) ) return (fd_ulong_load_2( b ) >> 3) | ((((ulong)b[2]) & 0x1fUL) << 13);
894 212574318 : if( FD_LIKELY( csz==4UL ) ) return (fd_ulong_load_4( b ) >> 4) & 16777215UL;
895 184214163 : if( FD_LIKELY( csz==5UL ) ) return (fd_ulong_load_4( b ) >> 4) | ((((ulong)b[4]) & 0x0fUL) << 28);
896 146399091 : if( FD_LIKELY( csz==8UL ) ) return (fd_ulong_load_8( b ) >> 4) & 72057594037927935UL;
897 33150531 : return /*csz==9UL*/ (fd_ulong_load_8( b ) >> 4) | ( ((ulong)b[8]) << 60);
898 146399091 : }
899 :
900 : /* fd_ulong_svw_dec decodes a ulong encoded as a symmetric variable
901 : width encoded integer. b points to the first byte of the encoded
902 : integer. Returns a pointer to the first byte after the symvarint and
903 : *_x will hold the uncompressed value on return. If the byte stream
904 : might be corrupt, it should be safe to read up to 9 bytes starting a
905 : b. */
906 :
907 : static inline uchar const *
908 : fd_ulong_svw_dec( uchar const * b,
909 101037444 : ulong * _x ) {
910 101037444 : ulong csz = fd_ulong_svw_dec_sz( b );
911 101037444 : *_x = fd_ulong_svw_dec_fixed( b, csz ); b += csz;
912 101037444 : return b;
913 101037444 : }
914 :
915 : /* fd_ulong_svw_dec_tail decodes a ulong encoded as a symmetric variable
916 : width encoded integer. b points to the first byte after the encoded
917 : integer. Returns a pointer to the first byte of the encoded integer
918 : and *_x will have the hold the uncompressed value on return. If the
919 : byte stream might be corrupt, it should be safe to read up to 9 bytes
920 : immediately before b. */
921 :
922 : static inline uchar const *
923 : fd_ulong_svw_dec_tail( uchar const * b,
924 100663296 : ulong * _x ) {
925 100663296 : ulong csz = fd_ulong_svw_dec_tail_sz( b );
926 100663296 : b -= csz; *_x = fd_ulong_svw_dec_fixed( b, csz );
927 100663296 : return b;
928 100663296 : }
929 :
930 : /* FD_LAYOUT_{INIT,APPEND,FINI} are useful for compile time
931 : determination of the required footprint of shared memory regions with
932 : dynamic sizes and complex alignment restrictions.
933 :
934 : FD_LAYOUT_INIT starts a layout. Returns a handle to the layout.
935 :
936 : FD_LAYOUT_APPEND appends a s byte region of alignment a to a layout
937 : where l is an in progress layout.
938 :
939 : FD_LAYOUT_FINI returns the final layout footprint. a is the
940 : alignment to be used for the overall layout. It should be the
941 : alignment of all appends. The final footprint will be a multiple of
942 : a.
943 :
944 : All arguments should be ulong compatible. All alignment should be a
945 : positive integer power of 2 and safe against multiple evaluation.
946 :
947 : The caller further promises the layout is not unreasonably large that
948 : overflow might be an issue (i.e. will be at most
949 : fd_ulong_align_dn(ULONG_MAX,a) where is the a used for FINI in size).
950 :
951 : Example usage:
952 :
953 : FD_LAYOUT_FINI( FD_LAYOUT_APPEND( FD_LAYOUT_APPEND( FD_LAYOUT_INIT,
954 : align0, size0 ),
955 : align1, size1 ),
956 : page_sz )
957 :
958 : would return the number of pages as a page_sz multiple for a shared
959 : memory region that starts with a initial/final region of size0/size1
960 : bytes and alignment align0/align1. Correct operation requires
961 : page_sz>=max(align0,align1), page_sz, align0 and align1 be positive
962 : integer powers of 2, page_sz, size0, align0 and align1 should be
963 : ulong compatible, page_sz, align0 and align1 be safe against multiple
964 : evaluation, and the final size be at most ULONG_MAX-page_sz+1. */
965 :
966 13476 : #define FD_LAYOUT_INIT (0UL)
967 43827 : #define FD_LAYOUT_APPEND( l, a, s ) (FD_ULONG_ALIGN_UP( (l), (a) ) + (s))
968 5598321 : #define FD_LAYOUT_FINI( l, a ) FD_ULONG_ALIGN_UP( (l), (a) )
969 :
970 : /* FD_SCRATCH_ALLOC_{INIT,APPEND,FINI} are utility macros for allocating
971 : sub-regions of memory out of one larger region of memory. These are
972 : intentionally parallel to FD_LAYOUT, but for use at runtime, not
973 : compile time, and when you want to know the actual addresses, and not
974 : just the total footprint.
975 :
976 : FD_SCRATCH_ALLOC_INIT begins a scratch allocation operation called
977 : layout with starting base address of base.
978 :
979 : FD_SCRATCH_ALLOC_APPEND returns the next address in the allocation
980 : operation `layout` with the required alignment and advances the
981 : allocation operation by the provided size.
982 :
983 : FD_SCRATCH_ALLOC_FINI finalizes a scratch allocation operation with
984 : the name given by `layout` and returns the next address with the
985 : requested alignment.
986 :
987 : align must be a power of 2, and layout should be an identifier name.
988 : The macros are robust otherwise.
989 :
990 : Example usage:
991 : FD_SCRATCH_ALLOC_INIT( foo, scratch_base );
992 : int * arr1 = FD_SCRATCH_ALLOC_APPEND( foo, alignof(int), 100*sizeof(int) );
993 : ulong * arr2 = FD_SCRATCH_ALLOC_APPEND( foo, alignof(ulong), 25*sizeof(ulong) );
994 : FD_SCRATCH_ALLOC_FINI( foo, 32UL );
995 : */
996 316293 : #define FD_SCRATCH_ALLOC_INIT( layout, base ) ulong _##layout = (ulong)(base)
997 1585398 : #define FD_SCRATCH_ALLOC_APPEND( layout, align, sz ) (__extension__({ \
998 1585398 : ulong _align = (align); \
999 1585398 : ulong _sz = (sz); \
1000 1585398 : ulong _scratch_alloc = fd_ulong_align_up( _##layout, (align) ); \
1001 1585398 : if( FD_UNLIKELY( _scratch_alloc+_sz<_scratch_alloc ) ) \
1002 1585398 : FD_LOG_ERR(( "FD_SCRATCH_ALLOC_APPEND( %s, %lu, %lu ) overflowed", #layout, _align, _sz )); \
1003 1585398 : _##layout = _scratch_alloc + _sz; \
1004 1585398 : (void *)_scratch_alloc; \
1005 1585398 : }))
1006 2223 : #define FD_SCRATCH_ALLOC_FINI( layout, align ) (_##layout = FD_ULONG_ALIGN_UP( _##layout, (align) ) )
1007 :
1008 : #define FD_SCRATCH_ALLOC_PUBLISH( layout ) (__extension__({ \
1009 : void * end = (void *)FD_SCRATCH_ALLOC_FINI( layout, 1UL ); \
1010 : int ok = fd_scratch_publish_is_safe( end ); \
1011 : if( ok ) fd_scratch_publish( end ); \
1012 : ok; \
1013 : }))
1014 :
1015 : FD_PROTOTYPES_END
1016 :
1017 : #endif /* HEADER_fd_src_util_bits_fd_bits_h */
|