Line data Source code
1 : /* Included by fd_bits.h */
2 : /* DO NOT INCLUDE DIRECTLY */
3 :
4 : /* find_lsb */
5 :
6 : #if FD_HAS_X86 && __BMI__
7 :
8 : /* __builtin_ctz(l)( x ) has proven to be faster than
9 : __builtin_ffs{l}(x)-1, the only numerical difference being the return
10 : value when x = 0 (U.B.). For X86 targets, __builtin_ctz(l)
11 : translates into tzcnt, which may not be supported in very old ones.
12 : In this case it is only used if the compiler defines __BMI__ too. */
13 :
14 108 : FD_FN_CONST static inline int fd_uchar_find_lsb ( uchar x ) { return __builtin_ctz ( (uint)x ); }
15 408 : FD_FN_CONST static inline int fd_ushort_find_lsb( ushort x ) { return __builtin_ctz ( (uint)x ); }
16 1584 : FD_FN_CONST static inline int fd_uint_find_lsb ( uint x ) { return __builtin_ctz ( x ); }
17 3938351979 : FD_FN_CONST static inline int fd_ulong_find_lsb ( ulong x ) { return __builtin_ctzl( x ); }
18 :
19 : #else /* all other architectures */
20 :
21 : FD_FN_CONST static inline int fd_uchar_find_lsb ( uchar x ) { return __builtin_ffs ( (int )x )-1; }
22 : FD_FN_CONST static inline int fd_ushort_find_lsb( ushort x ) { return __builtin_ffs ( (int )x )-1; }
23 : FD_FN_CONST static inline int fd_uint_find_lsb ( uint x ) { return __builtin_ffs ( (int )x )-1; }
24 : FD_FN_CONST static inline int fd_ulong_find_lsb ( ulong x ) { return __builtin_ffsl( (long)x )-1; }
25 :
26 : #endif
27 :
28 : #if FD_HAS_INT128
29 :
30 : #if FD_HAS_X86 && !FD_HAS_MSAN
31 :
32 : FD_FN_CONST static inline int
33 24768 : fd_uint128_find_lsb( uint128 x ) {
34 24768 : ulong xl = (ulong) x;
35 24768 : ulong xh = (ulong)(x>>64);
36 24768 : int _64 = 64;
37 24768 : int c = 0;
38 24768 : __asm__( "testq %1, %1 # cc.zf = !xl;\n\t"
39 24768 : "cmovz %3, %0 # if( !xl ) c = 64;\n\t"
40 24768 : "cmovz %2, %1 # if( !xl ) xl = xh;"
41 24768 : : "+&r" (c), "+&r" (xl) : "r" (xh), "r" (_64) : "cc" );
42 24768 : return c + fd_ulong_find_lsb( xl );
43 24768 : }
44 :
45 : #else /* all other architectures */
46 :
47 : FD_FN_CONST static inline int
48 : fd_uint128_find_lsb( uint128 x ) {
49 : ulong xl = (ulong) x;
50 : ulong xh = (ulong)(x>>64);
51 : int c = !xl;
52 : return (((c)<<6)-1) + __builtin_ffsl( (long)fd_ulong_if( c, xh, xl ) );
53 : }
54 :
55 : #endif
56 :
57 : #endif
58 :
59 : /* find_lsb_w_default */
60 :
61 : #if FD_HAS_X86 && !FD_HAS_MSAN
62 :
63 : /* FIXME: WHY UINT -> INT CAST THROUGH UNION? */
64 :
65 : /* find_lsb_w_default has been optimized for both tzcnt and bsf. Note
66 : that in older Intel architectures (before Skylake) tzcnt has a false
67 : dependency on the destination register (this is true for bsf, but
68 : should not be the case for tzcnt). Instead of using the typical xor
69 : operation on the output register (to break the dependency), the code
70 : below reuses the input register to that end, without affecting the
71 : performance on newer architectures. */
72 :
73 : FD_FN_CONST static inline int
74 : fd_uchar_find_lsb_w_default( uchar x,
75 111 : int d ) {
76 111 : union { int i; uint u; } r, c;
77 111 : c.i = d;
78 111 : r.u = (uint)x;
79 111 : # if __BMI__
80 111 : __asm__( " tzcnt %0, %0 # cc.cf = !x;\n\t"
81 111 : " cmovb %1, %0 # move if cf is set;"
82 111 : : "+&r" (r.u) : "r" (c.u) : "cc" );
83 : # else
84 : __asm__( " bsf %0, %0 # cc.zf = !x;\n\t"
85 : " cmovz %1, %0 # move if zf is set;"
86 : : "+&r" (r.u) : "r" (c.u) : "cc" );
87 : # endif
88 111 : return r.i;
89 111 : }
90 :
91 : FD_FN_CONST static inline int
92 : fd_ushort_find_lsb_w_default( ushort x,
93 411 : int d ) {
94 411 : union { int i; uint u; } r, c;
95 411 : c.i = d;
96 411 : r.u = (uint)x;
97 411 : # if __BMI__
98 411 : __asm__( " tzcnt %0, %0 # cc.cf = !x;\n\t"
99 411 : " cmovb %1, %0 # move if cf is set;"
100 411 : : "+&r" (r.u) : "r" (c.u) : "cc" );
101 : # else
102 : __asm__( " bsf %0, %0 # cc.zf = !x;\n\t"
103 : " cmovz %1, %0 # move if zf is set;"
104 : : "+&r" (r.u) : "r" (c.u) : "cc" );
105 : # endif
106 411 : return r.i;
107 411 : }
108 :
109 : FD_FN_CONST static inline int
110 : fd_uint_find_lsb_w_default( uint x,
111 1587 : int d ) {
112 1587 : union { int i; uint u; } r, c;
113 1587 : c.i = d;
114 1587 : r.u = x;
115 1587 : # if __BMI__
116 1587 : __asm__( " tzcnt %0, %0 # cc.cf = !x;\n\t"
117 1587 : " cmovb %1, %0 # move if cf is set;"
118 1587 : : "+&r" (r.u) : "r" (c.u) : "cc" );
119 : # else
120 : __asm__( " bsf %0, %0 # cc.zf = !x;\n\t"
121 : " cmovz %1, %0 # move if zf is set;"
122 : : "+&r" (r.u) : "r" (c.u) : "cc" );
123 : # endif
124 1587 : return r.i;
125 1587 : }
126 :
127 : FD_FN_CONST static inline int
128 : fd_ulong_find_lsb_w_default( ulong x,
129 6327 : int d ) {
130 6327 : union { long l; ulong u; } r, c;
131 6327 : c.l = (long)d;
132 6327 : r.u = x;
133 6327 : # if __BMI__
134 6327 : __asm__( " tzcnt %0, %0 # cc.cf = !x;\n\t"
135 6327 : " cmovb %1, %0 # move if cf is set;"
136 6327 : : "+&r" (r.u) : "r" (c.u) : "cc" );
137 : # else
138 : __asm__( " bsf %0, %0 # cc.zf = !x;\n\t"
139 : " cmovz %1, %0 # move if zf is set;"
140 : : "+&r" (r.u) : "r" (c.u) : "cc" );
141 : # endif
142 6327 : return (int)r.l;
143 6327 : }
144 :
145 : #else /* other architectures */
146 :
147 : FD_FN_CONST static inline int fd_uchar_find_lsb_w_default ( uchar x, int d ) { return (!x) ? d : fd_uchar_find_lsb ( x ); }
148 : FD_FN_CONST static inline int fd_ushort_find_lsb_w_default( ushort x, int d ) { return (!x) ? d : fd_ushort_find_lsb( x ); }
149 : FD_FN_CONST static inline int fd_uint_find_lsb_w_default ( uint x, int d ) { return (!x) ? d : fd_uint_find_lsb ( x ); }
150 : FD_FN_CONST static inline int fd_ulong_find_lsb_w_default ( ulong x, int d ) { return (!x) ? d : fd_ulong_find_lsb ( x ); }
151 :
152 : #endif
153 :
154 : #if FD_HAS_INT128
155 :
156 : #if FD_HAS_X86 && !FD_HAS_MSAN
157 :
158 : FD_FN_CONST static inline int
159 : fd_uint128_find_lsb_w_default( uint128 x,
160 24771 : int d ) {
161 24771 : ulong xl = (ulong) x;
162 24771 : ulong xh = (ulong)(x>>64);
163 24771 : int c = 0;
164 24771 : int _64 = 64;
165 24771 : __asm__( "testq %2, %2 # cc.zf = !xl;\n\t"
166 24771 : "cmovz %3, %0 # if( !xl ) c = 64;\n\t"
167 24771 : "cmovnz %2, %1 # if(!!xl ) xh = xl;"
168 24771 : : "+&r" (c), "+&r" (xh) : "r" (xl), "r" (_64) : "cc" );
169 24771 : int r = c + fd_ulong_find_lsb( xh );
170 24771 : __asm__( "testq %1, %1 # cc.zf = !xh;\n\t"
171 24771 : "cmovz %2, %0 # if( !xl ) c = d;"
172 24771 : : "+&r" (r) : "r" (xh), "r" (d) : "cc" );
173 24771 : return r;
174 24771 : }
175 :
176 : #else /* other architectures */
177 :
178 : FD_FN_CONST static inline int fd_uint128_find_lsb_w_default( uint128 x, int d ) { return (!x) ? d : fd_uint128_find_lsb( x ); }
179 :
180 : #endif
181 :
182 : #endif
183 :
|