LCOV - code coverage report
Current view: top level - util/bits - fd_bits.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 99 99 100.0 %
Date: 2025-08-05 05:04:49 Functions: 8 8 100.0 %

          Line data    Source code
       1             : #include "fd_bits.h"
       2             : 
       3             : ulong
       4  1190663193 : fd_ulong_approx_sqrt( ulong x ) {
       5             : 
       6             :   /* Theory:
       7             : 
       8             :      Break a non-zero x exactly into:
       9             : 
      10             :        x = 2^e ( 1 + m/2^63 )
      11             : 
      12             :      where e is 6-bit and m is 63-bit.  Further, break e exactly into:
      13             : 
      14             :        e = 2 q + r
      15             : 
      16             :      where q is 5-bit and r in 1-bit .  Then, break m exactly into:
      17             : 
      18             :        m = u 2^61 + h 2^29 + l
      19             : 
      20             :      where u is 2-bit, h is 32-bit and l is 29-bit.  Then:
      21             : 
      22             :        y = sqrt( x )
      23             :          = 2^q 2^(r/2) sqrt( 1 + u/4 + h/2^34 + l/2^63 )
      24             : 
      25             :      We ignore the l term as very small.  For given r and u, y is
      26             :      roughly linearly with respect to h.  Let i be a 3-bit table index
      27             :      such that:
      28             : 
      29             :        i = 4 r + u
      30             : 
      31             :      and approximate:
      32             : 
      33             :        y ~ 2^q (alpha_i + h beta_i / 2^32)
      34             : 
      35             :      where:
      36             : 
      37             :        alpha_i = 2^(r/2) sqrt( 1 + u/4 )
      38             :        beta_i  = alpha_{i+1} - alpha_i
      39             : 
      40             :      The right hand term is in [1,2] for h in [0,2^32) after rounding.
      41             :      To compute this in 64-bit integer math then, we scale the right
      42             :      hand term by 2^62 yielding:
      43             : 
      44             :        y ~ round( 2^62 (alpha_i + beta_i h) / 2^(62-q) )
      45             :          ~ floor( (a_i + b_i h) / 2^(62-q) + 0.5 )
      46             :          ~ (a_i + b_i h + 2^(61-q)) / 2^(62-q)
      47             : 
      48             :      where:
      49             : 
      50             :        a_i = round( 2^62 alpha_i )
      51             :        b_i = round( 2^30  beta_i ) */
      52             : 
      53  1190663193 :   if( FD_UNLIKELY( !x ) ) return 0UL;
      54             : 
      55  1187550924 :   int   e = fd_ulong_find_msb( x );         /* In [0,63]      */
      56  1187550924 :   ulong m = x << (63-e);                    /* In [2^63,2^64) */
      57  1187550924 :   int   q = e >> 1;                         /* In [0,31]      */
      58  1187550924 :   int   r = e & 1;                          /* In [0,1]       */
      59  1187550924 :   int   i = (4*r) + (int)((m >> 61) & 3UL); /* In [0,8)       */
      60  1187550924 :   ulong h = (ulong)(uint)(m >> 29);         /* In [0,2^32)    */
      61             : 
      62             :   /* These tables are autogenerated using long double arithmetic */
      63             : 
      64  1187550924 :   static ulong const a[8] = { 4611686018427387904UL, 5156021714044493574UL, 5648138799537240564UL, 6100687164736251614UL,
      65  1187550924 :                               6521908912666391106UL, 7291715835891894856UL, 7987674492471257551UL, 8627674528165471361UL };
      66  1187550924 :   static ulong const b[8] = {           126738030UL,           114579938UL,           105367127UL,            98073331UL,
      67  1187550924 :                                         179234641UL,           162040502UL,           149011620UL,           138696634UL };
      68             : 
      69  1187550924 :   return (a[i] + b[i]*h + (1UL<<(61-q))) >> (62-q);
      70  1190663193 : }
      71             : 
      72             : ulong
      73   300000000 : fd_ulong_round_sqrt( ulong x ) {
      74             : 
      75             :   /* Theory:
      76             : 
      77             :      To find the integer y such that y = round( sqrt(x) ) with ties
      78             :      rounding toward zero, note:
      79             : 
      80             :           (y-0.5)^2 < x <= (y+0.5)^2
      81             :        -> y^2 - y + 0.25 < x <= y^2 + y + 0.25
      82             : 
      83             :      Since x and y are integral, ties can never occur.  Thus:
      84             : 
      85             :        -> y^2 - y + 0.25 < x < y^2 + y + 0.25
      86             : 
      87             :      Similarly, integral x and y means we can eliminate the 0.25 adds:
      88             : 
      89             :        -> y^2 - y + 1 <= x < y^2 + y + 1
      90             : 
      91             :      Let y^2 - y + 1 = x - r.
      92             : 
      93             :        -> x - r <= x < x - r + 2 y
      94             :        -> 0 <= r < 2 y
      95             : 
      96             :      That is, if y is round( sqrt(x) ) we must have:
      97             : 
      98             :         y^2 - y + 1 = x - r for some r in [0,2 y).
      99             : 
     100             :      Suppose we have a reasonable initial guess.  Applying
     101             :      Newton-Raphson to:
     102             : 
     103             :        f(y) = y^2 - x
     104             : 
     105             :      for real valued x and y yields:
     106             : 
     107             :        y1 = y0 + (x - y0^2)/(2 y0)
     108             : 
     109             :      Naively generalizing to integral x and y yields:
     110             : 
     111             :        y1 = y0 + floor( (x - y0^2)/(2 y0) )
     112             : 
     113             :      If this converges, at convergence we have:
     114             : 
     115             :           (x - y^2 - r) / (2 y) = 0 for some r in [0,2 y)
     116             :        -> y^2 = x - r for some r in [0,2 y)
     117             : 
     118             :      This the naive implementation doesn't converge to the desired y.
     119             :      But if we tweak the N-R iteration to:
     120             : 
     121             :        y1 = y0 + floor( (x - y0^2 + y0 - 1) / (2 y0) )
     122             : 
     123             :      At convergence we have:
     124             : 
     125             :           x - y^2 + y - 1 - r = 0 for some r in [0,2 y)
     126             :        -> y^2 - y + 1 = x - r     for some r in [0,2 y).
     127             : 
     128             :      which is the desired solution.
     129             : 
     130             :      N-R converges quadratically (i.e. the number of correct bits
     131             :      roughly doubles each iteration given a reasonable initial guess).
     132             :      The initial guess here is accurate to 4+ bits and the solution
     133             :      requires ~32 bits accuracy.  So this will complete with at most ~3
     134             :      divisions.  Though it is slightly faster for large x to use an
     135             :      unrolled implementation (e.g. iterate 3 quickly), we use a variable
     136             :      iteration count to be absolutely sure of convergence and to be
     137             :      faster when fed smaller inputs (as is done in typical usage ...
     138             :      where O(1) call is made with a moderate sized x).
     139             : 
     140             :      Since our initial guess is close, y^2 fits within 65 bits.  Thus,
     141             :      the subtraction to compute x - y^2 will "do the right thing" with
     142             :      no special handling needed for wrapping.
     143             : 
     144             :      Since modern C/C++ use round toward zero for integer division, we
     145             :      have to tweak the numerator when it is negative to convert to floor
     146             :      style division. */
     147             : 
     148   300000000 :   if( FD_UNLIKELY( !x ) ) return 0UL;
     149             : 
     150   296887731 :   ulong y = fd_ulong_approx_sqrt( x );
     151             : 
     152   588777390 :   for(;;) {
     153   588777390 :     long den = (long)(y << 1);
     154   588777390 :     long num = (long)(x - y*y + y - 1UL);
     155   588777390 :     if( ((0L<=num) & (num<den)) ) break; /* Branch prob depends on x's distribution */
     156   291889659 :     y += (ulong)((num - fd_long_if( num>=0L, 0L, den-1L )) / den);
     157   291889659 :   }
     158             : 
     159   296887731 :   return y;
     160   300000000 : }
     161             : 
     162             : ulong
     163   300000000 : fd_ulong_floor_sqrt( ulong x ) {
     164             : 
     165             :   /* Theory:
     166             : 
     167             :      To find the integer y such that y = floor( sqrt(x) ), note:
     168             : 
     169             :           y^2 <= x < (y+1)^2
     170             :        -> y^2 <= x < y^2 + 2 y + 1
     171             : 
     172             :      Let y^2 = x - r.
     173             : 
     174             :        -> x - r <= x < x - r + 2 y + 1
     175             :        -> 0 <= r < 2 y + 1
     176             : 
     177             :      That is, if y is floor( sqrt(x) ) we must have:
     178             : 
     179             :         y^2 = x - r for some r in [0,2 y + 1).
     180             : 
     181             :      Like the round implementation, naively generalizing N-R
     182             :      to integer doesn't converge to the correct y.  But if
     183             :      we tweak the N-R iteration to:
     184             : 
     185             :        y1 = y0 + floor( (x - y0^2) / (2 y0 + 1) )
     186             : 
     187             :      At convergence we have:
     188             : 
     189             :           x - y^2 - r = 0 for some r in [0,2 y+1)
     190             :        -> y^2 = x - r     for some r in [0,2 y).
     191             : 
     192             :      which is the desired solution.
     193             : 
     194             :      Similar considerations for convergence, wrapping and trunc vs floor
     195             :      division as the above. */
     196             : 
     197   300000000 :   if( FD_UNLIKELY( !x ) ) return 0UL;
     198             : 
     199   296887731 :   ulong y = fd_ulong_approx_sqrt( x );
     200             : 
     201   547129983 :   for(;;) {
     202   547129983 :     long den = (long)((y << 1) + 1UL);
     203   547129983 :     long num = (long)(x - y*y);
     204   547129983 :     if( ((0L<=num) & (num<den)) ) break; /* Branch prob depends on x's distribution */
     205   250242252 :     y += (ulong)((num - fd_long_if( num>=0L, 0L, den-1L )) / den);
     206   250242252 :   }
     207             : 
     208   296887731 :   return y;
     209   300000000 : }
     210             : 
     211             : ulong
     212   300000000 : fd_ulong_ceil_sqrt( ulong x ) {
     213             : 
     214             :   /* Theory:
     215             : 
     216             :      To find the integer y such that y = ceil( sqrt(x) ), note that
     217             :      positive x:
     218             : 
     219             :           (y-1)^2 < x <= y
     220             :        -> y^2 - 2 y + 1 < x <= y^2
     221             : 
     222             :      Let y = x + r.
     223             : 
     224             :        -> x + r - 2 y + 1 < x <= x + r
     225             :        -> -2 y + 1 < -r <= 0
     226             :        -> 0 <= r < 2 y - 1
     227             : 
     228             :      That is, if y is ceil( sqrt(x) ) we must have:
     229             : 
     230             :         y^2 = x + r for some r in [0,2 y - 1).
     231             : 
     232             :      Like the round implementation, naively generalizing N-R
     233             :      to integer doesn't converge to the correct y.  But if
     234             :      we tweak the N-R iteration to:
     235             : 
     236             :        y1 = y0 + ceil ( (x - y0^2           ) / (2 y0 - 1) )
     237             :           = y0 + floor( (x - y0^2 + 2 y0 - 2) / (2 y0 - 1) )
     238             : 
     239             :      At convergence we have:
     240             : 
     241             :           x - y^2 + r = 0 for some r in [0,2 y-1)
     242             :        -> y^2 = x + r     for some r in [0,2 y-1).
     243             : 
     244             :      which is the desired solution.
     245             : 
     246             :      Similar considerations for convergence, wrapping and trunc vs floor
     247             :      division as the above. */
     248             : 
     249   300000000 :   if( FD_UNLIKELY( !x ) ) return 0UL;
     250             : 
     251   296887731 :   ulong y = fd_ulong_approx_sqrt( x );
     252             : 
     253   625390167 :   for(;;) {
     254   625390167 :     ulong tmp = y << 1;
     255   625390167 :     long  den = (long)(tmp - 1UL);
     256   625390167 :     long  num = (long)(x - y*y + tmp - 2UL);
     257   625390167 :     if( ((0L<=num) & (num<den)) ) break; /* Branch prob depends on x's distribution */
     258   328502436 :     y += (ulong)((num - fd_long_if( num>=0L, 0L, den-1L )) / den);
     259   328502436 :   }
     260             : 
     261   296887731 :   return y;
     262   300000000 : }
     263             : 
     264             : ulong
     265  1190663193 : fd_ulong_approx_cbrt( ulong x ) {
     266             : 
     267             :   /* This is basically the same as the approx_sqrt but only uses 1
     268             :      significant bit for u. */
     269             : 
     270  1190663193 :   if( FD_UNLIKELY( !x ) ) return 0UL;
     271             : 
     272  1187550924 :   int   e = fd_ulong_find_msb( x );       /* In [0,63]      */
     273  1187550924 :   ulong m = x << (63-e);                  /* In [2^63,2^64) */
     274  1187550924 :   int   q = (int)(((uint)e) / 3U);        /* In [0,21]      */
     275  1187550924 :   int   r = (int)(((uint)e) % 3U);        /* In [0,2)       */
     276  1187550924 :   int   i = 2*r + (int)((m >> 62) & 1UL); /* In [0,6)       */
     277  1187550924 :   ulong h = (ulong)(uint)(m >> 30);       /* In [0,2^32)    */
     278             : 
     279  1187550924 :   static ulong const a[6] = {  4611686018427387904UL,  5279062667477898215UL,  5810360290122541960UL,
     280  1187550924 :                                6651202178469583219UL,  7320595236998672907UL,  8379989631760464848UL };
     281  1187550924 :   static ulong const b[6] = {            155385734UL,            123702367UL,            195773758UL,
     282  1187550924 :                                          155855216UL,            246659478UL,            196365268UL };
     283             : 
     284  1187550924 :   return (a[i] + b[i]*h + (1UL<<(61-q))) >> (62-q);
     285  1190663193 : }
     286             : 
     287             : ulong
     288   300000000 : fd_ulong_round_cbrt( ulong x ) {
     289             : 
     290             :   /* This works the same as the above with:
     291             :           (y-0.5)^3 <= x < (y+0.5)^3
     292             :        -> 4 y^3 - 6 y^2 + 3 y = 4 x - r for some r in [0,12 y^2 + 1)
     293             :      and Newton-Raphson generalized to integer arithmetic applied to:
     294             :         f(y) = y^3 - x
     295             :      yielding:
     296             :         y1 = y0 + floor( 4 x - 4 y0^3 + 6 y0^2 - 3 y0) / (12 y0^2 + 1) ) */
     297             : 
     298   300000000 :   if( FD_UNLIKELY( !x ) ) return 0UL;
     299             : 
     300   296887731 :   ulong y = fd_ulong_approx_cbrt( x );
     301             : 
     302   483466503 :   for(;;) {
     303   483466503 :     ulong ysq = y*y;
     304   483466503 :     long  num = (long)(4UL*(x - y*ysq) + 6UL*ysq - 3UL*y);
     305   483466503 :     long  den = (long)(12UL*ysq + 1UL);
     306   483466503 :     if( ((0L<=num) & (num<den)) ) break; /* Branch prob depends on x's distribution */
     307   186578772 :     y += (ulong)((num - fd_long_if( num>=0L, 0L, den-1L )) / den);
     308   186578772 :   }
     309             : 
     310   296887731 :   return y;
     311   300000000 : }
     312             : 
     313             : ulong
     314   300000000 : fd_ulong_floor_cbrt( ulong x ) {
     315             : 
     316             :   /* This works the same as the above with:
     317             :           y^3 <= x < (y+1)^3
     318             :        -> y^3 = x - r for some r in [0,3 y^2 + 3 y + 1)
     319             :      and Newton-Raphson generalized to integer arithmetic applied to:
     320             :         f(y) = y^3 - x
     321             :      yielding:
     322             :         y1 = y0 + floor( (x - y^3) / (3 y^2 + 3 y + 1) ) */
     323             : 
     324   300000000 :   if( FD_UNLIKELY( !x ) ) return 0UL;
     325             : 
     326   296887731 :   ulong y = fd_ulong_approx_cbrt( x );
     327             : 
     328   540024249 :   for(;;) {
     329   540024249 :     ulong ysq = y*y;
     330   540024249 :     long  num = (long)(x - y*ysq);
     331   540024249 :     long  den = (long)(3UL*(ysq+y) + 1UL);
     332   540024249 :     if( ((0L<=num) & (num<den)) ) break; /* Branch prob depends on x's distribution */
     333   243136518 :     y += (ulong)((num - fd_long_if( num>=0L, 0L, den-1L )) / den);
     334   243136518 :   }
     335             : 
     336   296887731 :   return y;
     337   300000000 : }
     338             : 
     339             : ulong
     340   300000000 : fd_ulong_ceil_cbrt( ulong x ) {
     341             : 
     342             :   /* This works the same as the above with:
     343             :           (y-1)^3 < x <= y^3
     344             :        -> y^3 = x + r for some r in [0,3 y^2 - 3 y + 1)
     345             :      and Newton-Raphson generalized to integer arithmetic applied to:
     346             :         f(y) = y^3 - x
     347             :      yielding:
     348             :         y1 = y0 + ceil ( (x - y^3              ) / (3 y^2 - 3 y + 1) )
     349             :            = y0 + floor( (x - y^3 + 3 y^2 - 3 y) / (3 y^2 - 3 y + 1) ) */
     350             : 
     351   300000000 :   if( FD_UNLIKELY( !x ) ) return 0UL;
     352             : 
     353   296887731 :   ulong y = fd_ulong_approx_cbrt( x );
     354             : 
     355   545419869 :   for(;;) {
     356   545419869 :     ulong ysq = y*y;
     357   545419869 :     ulong tmp = 3UL*(ysq-y);
     358   545419869 :     long  num = (long)(x - y*ysq + tmp);
     359   545419869 :     long  den = (long)(tmp + 1UL);
     360   545419869 :     if( ((0L<=num) & (num<den)) ) break; /* Branch prob depends on x's distribution */
     361   248532138 :     y += (ulong)((num - fd_long_if( num>=0L, 0L, den-1L )) / den);
     362   248532138 :   }
     363             : 
     364   296887731 :   return y;
     365   300000000 : }

Generated by: LCOV version 1.14