LCOV - code coverage report
Current view: top level - util/bits - fd_bits_find_lsb.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 70 70 100.0 %
Date: 2025-01-08 12:08:44 Functions: 28 6770 0.4 %

          Line data    Source code
       1             : /* Included by fd_bits.h */
       2             : /* DO NOT INCLUDE DIRECTLY */
       3             : 
       4             : /* find_lsb */
       5             : 
       6             : #if FD_HAS_X86 && __BMI__
       7             : 
       8             : /* __builtin_ctz(l)( x ) has proven to be faster than
       9             :    __builtin_ffs{l}(x)-1, the only numerical difference being the return
      10             :    value when x = 0 (U.B.).  For X86 targets, __builtin_ctz(l)
      11             :    translates into tzcnt, which may not be supported in very old ones.
      12             :    In this case it is only used if the compiler defines __BMI__ too. */
      13             : 
      14         108 : FD_FN_CONST static inline int fd_uchar_find_lsb ( uchar  x ) { return __builtin_ctz ( (uint)x ); }
      15         408 : FD_FN_CONST static inline int fd_ushort_find_lsb( ushort x ) { return __builtin_ctz ( (uint)x ); }
      16        1584 : FD_FN_CONST static inline int fd_uint_find_lsb  ( uint   x ) { return __builtin_ctz (       x ); }
      17  3938351979 : FD_FN_CONST static inline int fd_ulong_find_lsb ( ulong  x ) { return __builtin_ctzl(       x ); }
      18             : 
      19             : #else /* all other architectures */
      20             : 
      21             : FD_FN_CONST static inline int fd_uchar_find_lsb ( uchar  x ) { return __builtin_ffs ( (int )x )-1; }
      22             : FD_FN_CONST static inline int fd_ushort_find_lsb( ushort x ) { return __builtin_ffs ( (int )x )-1; }
      23             : FD_FN_CONST static inline int fd_uint_find_lsb  ( uint   x ) { return __builtin_ffs ( (int )x )-1; }
      24             : FD_FN_CONST static inline int fd_ulong_find_lsb ( ulong  x ) { return __builtin_ffsl( (long)x )-1; }
      25             : 
      26             : #endif
      27             : 
      28             : #if FD_HAS_INT128
      29             : 
      30             : #if FD_HAS_X86 && !FD_HAS_MSAN
      31             : 
      32             : FD_FN_CONST static inline int
      33       24768 : fd_uint128_find_lsb( uint128 x ) {
      34       24768 :   ulong xl  = (ulong) x;
      35       24768 :   ulong xh  = (ulong)(x>>64);
      36       24768 :   int   _64 = 64;
      37       24768 :   int   c   = 0;
      38       24768 :   __asm__( "testq %1, %1   # cc.zf = !xl;\n\t"
      39       24768 :            "cmovz %3, %0   # if( !xl ) c = 64;\n\t"
      40       24768 :            "cmovz %2, %1   # if( !xl ) xl = xh;"
      41       24768 :          : "+&r" (c), "+&r" (xl) : "r" (xh), "r" (_64) : "cc" );
      42       24768 :   return c + fd_ulong_find_lsb( xl );
      43       24768 : }
      44             : 
      45             : #else /* all other architectures */
      46             : 
      47             : FD_FN_CONST static inline int
      48             : fd_uint128_find_lsb( uint128 x ) {
      49             :   ulong xl = (ulong) x;
      50             :   ulong xh = (ulong)(x>>64);
      51             :   int   c  = !xl;
      52             :   return (((c)<<6)-1) + __builtin_ffsl( (long)fd_ulong_if( c, xh, xl ) );
      53             : }
      54             : 
      55             : #endif
      56             : 
      57             : #endif
      58             : 
      59             : /* find_lsb_w_default */
      60             : 
      61             : #if FD_HAS_X86 && !FD_HAS_MSAN
      62             : 
      63             : /* FIXME: WHY UINT -> INT CAST THROUGH UNION? */
      64             : 
      65             : /* find_lsb_w_default has been optimized for both tzcnt and bsf.  Note
      66             :    that in older Intel architectures (before Skylake) tzcnt has a false
      67             :    dependency on the destination register (this is true for bsf, but
      68             :    should not be the case for tzcnt).  Instead of using the typical xor
      69             :    operation on the output register (to break the dependency), the code
      70             :    below reuses the input register to that end, without affecting the
      71             :    performance on newer architectures. */
      72             : 
      73             : FD_FN_CONST static inline int
      74             : fd_uchar_find_lsb_w_default( uchar x,
      75         111 :                              int   d ) {
      76         111 :   union { int i; uint u; } r, c;
      77         111 :   c.i = d;
      78         111 :   r.u = (uint)x;
      79         111 : # if __BMI__
      80         111 :   __asm__( " tzcnt %0, %0 # cc.cf = !x;\n\t"
      81         111 :            " cmovb %1, %0 # move if cf is set;"
      82         111 :            : "+&r" (r.u) : "r" (c.u) : "cc" );
      83             : # else
      84             :   __asm__( " bsf   %0, %0 # cc.zf = !x;\n\t"
      85             :            " cmovz %1, %0 # move if zf is set;"
      86             :            : "+&r" (r.u) : "r" (c.u) : "cc" );
      87             : # endif
      88         111 :   return r.i;
      89         111 : }
      90             : 
      91             : FD_FN_CONST static inline int
      92             : fd_ushort_find_lsb_w_default( ushort x,
      93         411 :                               int    d ) {
      94         411 :   union { int i; uint u; } r, c;
      95         411 :   c.i = d;
      96         411 :   r.u = (uint)x;
      97         411 : # if __BMI__
      98         411 :   __asm__( " tzcnt %0, %0 # cc.cf = !x;\n\t"
      99         411 :            " cmovb %1, %0 # move if cf is set;"
     100         411 :            : "+&r" (r.u) : "r" (c.u) : "cc" );
     101             : # else
     102             :   __asm__( " bsf   %0, %0 # cc.zf = !x;\n\t"
     103             :            " cmovz %1, %0 # move if zf is set;"
     104             :            : "+&r" (r.u) : "r" (c.u) : "cc" );
     105             : # endif
     106         411 :   return r.i;
     107         411 : }
     108             : 
     109             : FD_FN_CONST static inline int
     110             : fd_uint_find_lsb_w_default( uint x,
     111        1587 :                             int  d ) {
     112        1587 :   union { int i; uint u; } r, c;
     113        1587 :   c.i = d;
     114        1587 :   r.u = x;
     115        1587 : # if __BMI__
     116        1587 :   __asm__(  " tzcnt %0, %0 # cc.cf = !x;\n\t"
     117        1587 :             " cmovb %1, %0 # move if cf is set;"
     118        1587 :           : "+&r" (r.u) : "r" (c.u) : "cc" );
     119             : # else
     120             :   __asm__(  " bsf   %0, %0 # cc.zf = !x;\n\t"
     121             :             " cmovz %1, %0 # move if zf is set;"
     122             :           : "+&r" (r.u) : "r" (c.u) : "cc" );
     123             : # endif
     124        1587 :   return r.i;
     125        1587 : }
     126             : 
     127             : FD_FN_CONST static inline int
     128             : fd_ulong_find_lsb_w_default( ulong x,
     129        6327 :                              int   d ) {
     130        6327 :   union { long l; ulong u; } r, c;
     131        6327 :   c.l = (long)d;
     132        6327 :   r.u = x;
     133        6327 : # if __BMI__
     134        6327 :   __asm__( " tzcnt %0, %0 # cc.cf = !x;\n\t"
     135        6327 :            " cmovb %1, %0 # move if cf is set;"
     136        6327 :            : "+&r" (r.u) : "r" (c.u) : "cc" );
     137             : # else
     138             :   __asm__( " bsf   %0, %0 # cc.zf = !x;\n\t"
     139             :            " cmovz %1, %0 # move if zf is set;"
     140             :            : "+&r" (r.u) : "r" (c.u) : "cc" );
     141             : # endif
     142        6327 :   return (int)r.l;
     143        6327 : }
     144             : 
     145             : #else /* other architectures */
     146             : 
     147             : FD_FN_CONST static inline int fd_uchar_find_lsb_w_default ( uchar   x, int d ) { return (!x) ? d : fd_uchar_find_lsb ( x ); }
     148             : FD_FN_CONST static inline int fd_ushort_find_lsb_w_default( ushort  x, int d ) { return (!x) ? d : fd_ushort_find_lsb( x ); }
     149             : FD_FN_CONST static inline int fd_uint_find_lsb_w_default  ( uint    x, int d ) { return (!x) ? d : fd_uint_find_lsb  ( x ); }
     150             : FD_FN_CONST static inline int fd_ulong_find_lsb_w_default ( ulong   x, int d ) { return (!x) ? d : fd_ulong_find_lsb ( x ); }
     151             : 
     152             : #endif
     153             : 
     154             : #if FD_HAS_INT128
     155             : 
     156             : #if FD_HAS_X86 && !FD_HAS_MSAN
     157             : 
     158             : FD_FN_CONST static inline int
     159             : fd_uint128_find_lsb_w_default( uint128 x,
     160       24771 :                                int     d ) {
     161       24771 :   ulong xl  = (ulong) x;
     162       24771 :   ulong xh  = (ulong)(x>>64);
     163       24771 :   int   c   = 0;
     164       24771 :   int   _64 = 64;
     165       24771 :   __asm__( "testq  %2, %2  # cc.zf = !xl;\n\t"
     166       24771 :            "cmovz  %3, %0  # if( !xl ) c = 64;\n\t"
     167       24771 :            "cmovnz %2, %1  # if(!!xl ) xh = xl;"
     168       24771 :            : "+&r" (c), "+&r" (xh) : "r" (xl), "r" (_64) : "cc" );
     169       24771 :   int r = c + fd_ulong_find_lsb( xh );
     170       24771 :   __asm__( "testq  %1, %1  # cc.zf = !xh;\n\t"
     171       24771 :            "cmovz  %2, %0  # if( !xl ) c = d;"
     172       24771 :            : "+&r" (r) : "r" (xh), "r" (d) : "cc" );
     173       24771 :   return r;
     174       24771 : }
     175             : 
     176             : #else /* other architectures */
     177             : 
     178             : FD_FN_CONST static inline int fd_uint128_find_lsb_w_default( uint128 x, int d ) { return (!x) ? d : fd_uint128_find_lsb( x ); }
     179             : 
     180             : #endif
     181             : 
     182             : #endif
     183             : 

Generated by: LCOV version 1.14