LCOV - code coverage report
Current view: top level - ballet/ed25519/avx512 - fd_r43x6.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 51 51 100.0 %
Date: 2025-01-08 12:08:44 Functions: 5 5 100.0 %

          Line data    Source code
       1             : #include "fd_r43x6.h"
       2             : 
       3             : /* fd_r43x6_repsqr_mul(x,y,n) returns z = [x^(2^n)] y of an unreduced
       4             :    fd_r43x6_t (in u44) with lanes 6 and 7 zero where x is an unreduced
       5             :    fd_r43x6_t (in u47).  Assumes lanes 6 and 7 of x are zero.  Computed
       6             :    via n repeated squarings, yielding a cost of n fd_r43x6_sqr. */
       7             : 
       8             : FD_FN_CONST static fd_r43x6_t
       9             : fd_r43x6_repsqr_mul( fd_r43x6_t x,
      10             :                      fd_r43x6_t y,
      11     7212326 :                      ulong      n ) {
      12             : 
      13             :  /* The below is R43X6_SQR1_INL wrapped in a loop to encourage inlining
      14             :     of the loop body and encourage the compiler to hoist various compile
      15             :     time constants out of the loop.  REPSQR is almost always paired with
      16             :     a MUL and this is almost always iterated.  So we incorporate the mul
      17             :     into this operation too to try to get the whole operation inlined
      18             :     but without too much instruction footprint for the below use cases. */
      19             : 
      20   171915567 :   for( ; n; n-- ) FD_R43X6_SQR1_INL( x, x );
      21     7212326 :   FD_R43X6_MUL1_INL( x, x, y );
      22     7212326 :   return x;
      23     7212326 : }
      24             : 
      25             : /* fd_r43x6_repsqr_mul2 does:
      26             :      *_za = fd_r43x6_repsqr_mul( xa, ya, n );
      27             :      *_zb = fd_r43x6_repsqr_mul( xb, yb, n );
      28             :    but faster. */
      29             : 
      30             : static void
      31             : fd_r43x6_repsqr_mul2( fd_r43x6_t * _za, fd_r43x6_t xa, fd_r43x6_t ya,
      32             :                       fd_r43x6_t * _zb, fd_r43x6_t xb, fd_r43x6_t yb,
      33     6192989 :                       ulong        n ) {
      34             : 
      35             :   /* Similar considerations as repsqr_mul */
      36             : 
      37   146942739 :   for( ; n; n-- ) FD_R43X6_SQR2_INL( xa, xa,  xb, xb );
      38     6192989 :   FD_R43X6_MUL2_INL( xa, xa, ya,  xb, xb, yb );
      39     6192989 :   *_za = xa; *_zb = xb;
      40     6192989 : }
      41             : 
      42             : fd_r43x6_t
      43      262247 : fd_r43x6_invert( fd_r43x6_t z ) {
      44             : 
      45             :   /* Theory:
      46             : 
      47             :           z^p       = z in GF(p)
      48             :        -> z^(p-1) z = z
      49             :        -> z^(p-1)   = 1
      50             :        -> z^(p-2) z = 1
      51             :        -> z^(p-2) is the multiplicative inverse of z in GF(p).
      52             : 
      53             :      Since p-2 is impractically large, we have to do this indirectly.
      54             :      This technique is adapted from the OpenSSL implementation.
      55             : 
      56             :        z^(p-2) = z^(2^255-21)
      57             :                = z^[(2^255)-(2^5)+11]
      58             :                = z^[(2^250)(2^5)-(2^5)+11]
      59             :                = z^[(2^250-1)(2^5) + 11]
      60             :                = [z^(2^250-1)]^(2^5) z^11
      61             : 
      62             :      z^11 is straightforward to compute directly.  [...]^(2^5) is
      63             :      straightforward to compute by repeated squaring.  z^(2^n-1) can be
      64             :      computed by a combination of repeated squaring and factorizations
      65             :      like:
      66             : 
      67             :        z^(2^n-1) = z^[(2^(n/2)+1)(2^(n/2)-1)]
      68             :                  = z^[2^(n/2) (2^(n/2)-1) + (2^(n/2)-1)]
      69             :                  = [z^(2^(n/2)-1)]^(2^(n/2)) z^(2^(n/2)-1)
      70             : 
      71             :      where the first term is the n/2 repeated squaring of z^(2^(n/2)-1)
      72             :      and the second term is the factor that initialized the repeated
      73             :      squaring. */
      74             : 
      75             :   /* Compute z^11 (and z^9 along the way) */
      76             : 
      77      262247 :   fd_r43x6_t z2       = fd_r43x6_sqr       ( z                         ); /* TODO: consider repsqr_mul(z,one,1) for more reuse? */
      78      262247 :   fd_r43x6_t z9       = fd_r43x6_repsqr_mul( z2,       z,          2UL );
      79      262247 :   fd_r43x6_t z11      = fd_r43x6_repsqr_mul( z9,       z2,         0UL );
      80             : 
      81             :   /* Compute z^(2^250-1) */
      82             : 
      83      262247 :   fd_r43x6_t z2e5m1   = fd_r43x6_repsqr_mul( z11,      z9,         1UL );
      84      262247 :   fd_r43x6_t z2e10m1  = fd_r43x6_repsqr_mul( z2e5m1,   z2e5m1,     5UL );
      85      262247 :   fd_r43x6_t z2e20m1  = fd_r43x6_repsqr_mul( z2e10m1,  z2e10m1,   10UL );
      86      262247 :   fd_r43x6_t z2e40m1  = fd_r43x6_repsqr_mul( z2e20m1,  z2e20m1,   20UL );
      87      262247 :   fd_r43x6_t z2e50m1  = fd_r43x6_repsqr_mul( z2e40m1,  z2e10m1,   10UL );
      88      262247 :   fd_r43x6_t z2e100m1 = fd_r43x6_repsqr_mul( z2e50m1,  z2e50m1,   50UL );
      89      262247 :   fd_r43x6_t z2e200m1 = fd_r43x6_repsqr_mul( z2e100m1, z2e100m1, 100UL );
      90      262247 :   fd_r43x6_t z2e250m1 = fd_r43x6_repsqr_mul( z2e200m1, z2e50m1,   50UL );
      91             : 
      92             :   /* Combine z^(2^250-1) and z^11 */
      93             : 
      94      262247 :   return fd_r43x6_repsqr_mul( z2e250m1, z11, 5UL );
      95      262247 : }
      96             : 
      97             : fd_r43x6_t
      98      393419 : fd_r43x6_pow22523( fd_r43x6_t z ) {
      99             : 
     100             :   /* This works nearly identical to invert.  The factorization is:
     101             : 
     102             :        z^(2^252-3) = z^[(2^252)-(2^2)+1]
     103             :                    = z^[(2^250-1)(2^2)+1]
     104             :                    = [z^(2^250-1)]^(2^2) z
     105             : 
     106             :      We compute z^(2^250-1) the same way as invert and then do a
     107             :      slightly different final combination. */
     108             : 
     109             :   /* Compute z^(2^250-1) */
     110             : 
     111      393419 :   fd_r43x6_t z2       = fd_r43x6_sqr       ( z                        ); /* TODO: consider repsqr_mul(z,one,1) for more reuse? */
     112      393419 :   fd_r43x6_t z9       = fd_r43x6_repsqr_mul( z2,      z,          2UL );
     113      393419 :   fd_r43x6_t z11      = fd_r43x6_repsqr_mul( z9,      z2,         0UL );
     114      393419 :   fd_r43x6_t z2e5m1   = fd_r43x6_repsqr_mul( z11,     z9,         1UL );
     115      393419 :   fd_r43x6_t z2e10m1  = fd_r43x6_repsqr_mul( z2e5m1,  z2e5m1,     5UL );
     116      393419 :   fd_r43x6_t z2e20m1  = fd_r43x6_repsqr_mul( z2e10m1, z2e10m1,   10UL );
     117      393419 :   fd_r43x6_t z2e40m1  = fd_r43x6_repsqr_mul( z2e20m1, z2e20m1,   20UL );
     118      393419 :   fd_r43x6_t z2e50m1  = fd_r43x6_repsqr_mul( z2e40m1, z2e10m1,   10UL );
     119      393419 :   fd_r43x6_t z2e100m1 = fd_r43x6_repsqr_mul( z2e50m1, z2e50m1,   50UL );
     120      393419 :   fd_r43x6_t z2e200m1 = fd_r43x6_repsqr_mul( z2e100m1,z2e100m1, 100UL );
     121      393419 :   fd_r43x6_t z2e250m1 = fd_r43x6_repsqr_mul( z2e200m1,z2e50m1,   50UL );
     122             : 
     123             :   /* Combine z^(2^250-1) and z */
     124             : 
     125      393419 :   return fd_r43x6_repsqr_mul( z2e250m1, z, 2UL );
     126      393419 : }
     127             : 
     128             : void
     129             : fd_r43x6_pow22523_2( fd_r43x6_t * _za, fd_r43x6_t za,
     130      562999 :                      fd_r43x6_t * _zb, fd_r43x6_t zb ) {
     131             : 
     132             :   /* This is identical to the above but runs two calculations at the
     133             :      same time for lots of ILP. */
     134             : 
     135      562999 :   fd_r43x6_t z2a,       z2b;       FD_R43X6_SQR2_INL   ( z2a,       za,                  z2b,       zb                         );
     136      562999 :   fd_r43x6_t z9a,       z9b;       fd_r43x6_repsqr_mul2( &z9a,      z2a,      za,        &z9b,      z2b,      zb,          2UL );
     137      562999 :   fd_r43x6_t z11a,      z11b;      fd_r43x6_repsqr_mul2( &z11a,     z9a,      z2a,       &z11b,     z9b,      z2b,         0UL );
     138      562999 :   fd_r43x6_t z2e5m1a,   z2e5m1b;   fd_r43x6_repsqr_mul2( &z2e5m1a,  z11a,     z9a,       &z2e5m1b,  z11b,     z9b,         1UL );
     139      562999 :   fd_r43x6_t z2e10m1a,  z2e10m1b;  fd_r43x6_repsqr_mul2( &z2e10m1a, z2e5m1a,  z2e5m1a,   &z2e10m1b, z2e5m1b,  z2e5m1b,     5UL );
     140      562999 :   fd_r43x6_t z2e20m1a,  z2e20m1b;  fd_r43x6_repsqr_mul2( &z2e20m1a, z2e10m1a, z2e10m1a,  &z2e20m1b, z2e10m1b, z2e10m1b,   10UL );
     141      562999 :   fd_r43x6_t z2e40m1a,  z2e40m1b;  fd_r43x6_repsqr_mul2( &z2e40m1a, z2e20m1a, z2e20m1a,  &z2e40m1b, z2e20m1b, z2e20m1b,   20UL );
     142      562999 :   fd_r43x6_t z2e50m1a,  z2e50m1b;  fd_r43x6_repsqr_mul2( &z2e50m1a, z2e40m1a, z2e10m1a,  &z2e50m1b, z2e40m1b, z2e10m1b,   10UL );
     143      562999 :   fd_r43x6_t z2e100m1a, z2e100m1b; fd_r43x6_repsqr_mul2( &z2e100m1a,z2e50m1a, z2e50m1a,  &z2e100m1b,z2e50m1b, z2e50m1b,   50UL );
     144      562999 :   fd_r43x6_t z2e200m1a, z2e200m1b; fd_r43x6_repsqr_mul2( &z2e200m1a,z2e100m1a,z2e100m1a, &z2e200m1b,z2e100m1b,z2e100m1b, 100UL );
     145      562999 :   fd_r43x6_t z2e250m1a, z2e250m1b; fd_r43x6_repsqr_mul2( &z2e250m1a,z2e200m1a,z2e50m1a,  &z2e250m1b,z2e200m1b,z2e50m1b,   50UL );
     146             : 
     147             :   /**/                             fd_r43x6_repsqr_mul2( _za,       z2e250m1a,za,        _zb,       z2e250m1b,zb,          2UL );
     148      562999 : }

Generated by: LCOV version 1.14