LCOV - cov.lcov - ballet/ed25519/avx512/fd_r43x6

LCOV - code coverage report

Current view:	top level - ballet/ed25519/avx512 - fd_r43x6_ge.c (source / functions)		Hit	Total	Coverage
Test:	cov.lcov	Lines:	57	114	50.0 %
Date:	2025-03-20 12:08:36	Functions:	1	3	33.3 %

          Line data    Source code

       1             : #include "fd_r43x6_ge.h"
       2             : 
       3             : /* Direct quotes from RFC 8032 are indicated with '//' style comments. */
       4             : 
       5             : wv_t
       6           0 : fd_r43x6_ge_encode( wwl_t P03, wwl_t P14, wwl_t P25 ) {
       7             : 
       8             :   // 5.1.2.  Encoding
       9             :   //
      10             :   // All values are coded as octet strings, and integers are coded using
      11             :   // little-endian convention, i.e., a 32-octet string h h[0],...h[31]
      12             :   // represents the integer h[0] + 2^8 * h[1] + ... + 2^248 * h[31].
      13             :   //
      14             :   // A curve point (x,y), with coordinates in the range 0 <= x,y < p, is
      15             :   // coded as follows.
      16             : 
      17             :   /* The below computes x=mod(X/Z) and y=mod(Y/Z) but doesn't repack
      18             :      the results into fd_r43x6_t's since we need to tweak y's limbs with
      19             :      values from x0 in the next part. */
      20             : 
      21           0 :   fd_r43x6_t X,Y,Z,T; FD_R43X6_QUAD_UNPACK( X,Y,Z,T, P ); /* in u47/u47/u47/u47 */
      22           0 :   (void)T;
      23             : 
      24           0 :   fd_r43x6_t one_Z = fd_r43x6_invert( Z ); /* in u44 */
      25             : 
      26             :   /* TODO: Consider a FD_R43X6_MUL2_FAST_INL? */
      27           0 :   fd_r43x6_t x = fd_r43x6_mul_fast( X, one_Z ); /* in u63 */     fd_r43x6_t y = fd_r43x6_mul_fast( Y, one_Z ); /* in u63 */
      28             : 
      29           0 :   long x0,x1,x2,x3,x4,x5; fd_r43x6_extract_limbs( x, x );        long y0,y1,y2,y3,y4,y5; fd_r43x6_extract_limbs( y, y );
      30             : 
      31           0 :   fd_r43x6_biased_carry_propagate_limbs( x, x, 0L ); /* in nr */ fd_r43x6_biased_carry_propagate_limbs( y, y, 0L ); /* in nr */
      32             : 
      33           0 :   fd_r43x6_mod_nearly_reduced_limbs( x, x ); /* in [0,p) */      fd_r43x6_mod_nearly_reduced_limbs( y, y ); /* in [0,p) */
      34             : 
      35             :   // First, encode the y-coordinate as a little-endian string of 32
      36             :   // octets.  The most significant bit of the final octet is always
      37             :   // zero.  To form the encoding of the point, copy the least
      38             :   // significant bit of the x-coordinate to the most significant bit of
      39             :   // the final octet.
      40             : 
      41             :   /* Since the limbs of y are already in a packed form, we copy the
      42             :      least significant bit before we pack up the uint256 to save some
      43             :      memory operations. */
      44             : 
      45           0 :   return fd_r43x6_pack( fd_r43x6( y0, y1, y2, y3, y4, y5 | ((x0 & 1L)<<40) ) );
      46           0 : }
      47             : 
      48             : int
      49             : fd_r43x6_ge_decode( wwl_t * _P03, wwl_t * _P14, wwl_t * _P25,
      50           0 :                     void const * _vs ) {
      51             : 
      52             :   // 5.1.3.  Decoding
      53             :   //
      54             :   // Decoding a point, given as a 32-octet string, is a little more
      55             :   // complicated.
      56             :   //
      57             :   // 1.  First, interpret the string as an integer in little-endian
      58             :   //     representation.
      59             : 
      60           0 :   ulong _s[4] __attribute__((aligned(32)));
      61           0 :   memcpy( _s, _vs, 32UL );
      62           0 :   ulong y0 = _s[0]; /* Bits   0- 63 */
      63           0 :   ulong y1 = _s[1]; /* Bits  64-127 */
      64           0 :   ulong y2 = _s[2]; /* Bits 128-191 */
      65           0 :   ulong y3 = _s[3]; /* Bits 192-255 */
      66             : 
      67             :   // Bit 255 of this number is the least significant bit of the
      68             :   // x-coordinate and denote this value x_0.
      69             : 
      70           0 :   int x_0 = (int)(y3>>63);
      71             : 
      72             :   // The y-coordinate is recovered simply by clearing this bit.
      73             : 
      74           0 :   y3 &= ~(1UL<<63);
      75             : 
      76           0 :   fd_r43x6_t y = fd_r43x6_unpack( wv( y0, y1, y2, y3 ) );
      77             : 
      78             :   // 2.  To recover the x-coordinate, the curve equation implies x^2 =
      79             :   //     (y^2 - 1) / (d y^2 + 1) (mod p).  The denominator is always
      80             :   //     non-zero mod p.  Let u = y^2 - 1 and v = d y^2 + 1.
      81             : 
      82           0 :   fd_r43x6_t const one = fd_r43x6_one();
      83           0 :   fd_r43x6_t const d   = fd_r43x6_d();
      84             : 
      85           0 :   fd_r43x6_t ysq = fd_r43x6_sqr( y );                                /*       y^2     in u44 */
      86           0 :   fd_r43x6_t u   = fd_r43x6_sub( ysq, one );                         /* u =   y^2 - 1 in u44 */
      87           0 :   fd_r43x6_t v   = fd_r43x6_add_fast( fd_r43x6_mul( d, ysq ), one ); /* v = d y^2 + 1 in u44 */
      88             : 
      89             :   // To compute the square root of (u/v), the first step is to compute
      90             :   // the candidate root x = (u/v)^((p+3)/8).  This can be done with the
      91             :   // following trick, using a single modular powering for both the
      92             :   // inversion of v and the square root:
      93             :   //
      94             :   //            (p+3)/8      3        (p-5)/8
      95             :   //   x = (u/v)        = u v  (u v^7)         (mod p)
      96             : 
      97           0 :   fd_r43x6_t v2  = fd_r43x6_sqr( v        ); /* v^2               in u44 */
      98           0 :   fd_r43x6_t v4  = fd_r43x6_sqr( v2       ); /* v^4               in u44 */
      99           0 :   fd_r43x6_t v3  = fd_r43x6_mul( v,   v2  ); /* v^3               in u44 */
     100           0 :   fd_r43x6_t uv3 = fd_r43x6_mul( u,   v3  ); /* u v^3             in u44 */
     101           0 :   fd_r43x6_t uv7 = fd_r43x6_mul( uv3, v4  ); /* u v^7             in u44 */
     102           0 :   fd_r43x6_t t0  = fd_r43x6_pow22523( uv7 ); /* (u v^7)^((p-5)/8) in u44 */
     103           0 :   fd_r43x6_t x   = fd_r43x6_mul( uv3, t0  ); /* x                 in u44 */
     104             : 
     105             :   // 3.  Again, there are three cases:
     106             :   //
     107             :   //     1.  If v x^2 = u (mod p), x is a square root.
     108             :   //
     109             :   //     2.  If v x^2 = -u (mod p), set x <-- x * 2^((p-1)/4), which is
     110             :   //         a square root.
     111             :   //
     112             :   //     3.  Otherwise, no square root exists for modulo p, and decoding
     113             :   //         fails
     114             : 
     115             :   /* The implementation below has flattened the above branches to make
     116             :      it more amenable to doing multiple decodes concurrently.  This also
     117             :      makes this essentially a constant time algorithm (but such isn't
     118             :      required here). */
     119             : 
     120           0 :   fd_r43x6_t x2  = fd_r43x6_sqr( x );           /* x^2       in u44 */
     121           0 :   fd_r43x6_t vx2 = fd_r43x6_mul( v, x2 );       /* v x^2     in u44 */
     122           0 :   fd_r43x6_t t1  = fd_r43x6_sub_fast( vx2, u ); /* v x^2 - u in s44 */
     123           0 :   fd_r43x6_t t2  = fd_r43x6_add_fast( vx2, u ); /* v x^2 + u in u45 */
     124           0 :   int t1nz = fd_r43x6_is_nonzero( t1 );
     125           0 :   int t2nz = fd_r43x6_is_nonzero( t2 );
     126           0 :   if( FD_UNLIKELY( t1nz & t2nz ) ) goto fail; /* case 3 */
     127           0 :   fd_r43x6_t t3  = fd_r43x6_if( t1nz, fd_r43x6_imag(), one ); /* in u43 */
     128           0 :   /**/       x   = fd_r43x6_mul( x, t3 );                     /* in u44 */
     129             : 
     130             :   // 4.  Finally, use the x_0 bit to select the right square root.  If x
     131             :   //     = 0, and x_0 = 1, decoding fails." */
     132             : 
     133             :   /* Note that we could merge this branch with the above for even
     134             :      more deterministic performance.
     135             : 
     136             :      WARNING!  This check seems to be missing from the OpenSSL
     137             :      implementation. */
     138             : 
     139           0 :   int x_mod_2 = fd_r43x6_diagnose( x );
     140           0 :   if( FD_UNLIKELY( (x_mod_2==-1) & (x_0==1) ) ) goto fail;
     141             : 
     142             :   // Otherwise, if x_0 != x mod 2, set x <-- p - x.
     143             : 
     144           0 :   x = fd_r43x6_if( x_0!=x_mod_2, fd_r43x6_neg( x ) /* in u44 */, x );
     145             : 
     146             :   // Return the decoded point (x,y).
     147             : 
     148           0 :   FD_R43X6_QUAD_PACK( *_P,
     149           0 :     x,                      /* in u44 */
     150           0 :     y,                      /* Reduced */
     151           0 :     one,                    /* Reduced */
     152           0 :     fd_r43x6_mul( x, y ) ); /* in u44 */
     153           0 :   FD_R43X6_QUAD_FOLD_UNSIGNED( *_P, *_P );
     154           0 :   return 0;
     155             : 
     156           0 : fail:
     157           0 :   *_P03 = fd_r43x6_zero();
     158           0 :   *_P14 = fd_r43x6_zero();
     159           0 :   *_P25 = fd_r43x6_zero();
     160           0 :   return -1;
     161           0 : }
     162             : 
     163             : int
     164             : fd_r43x6_ge_decode2( wwl_t * _Pa03, wwl_t * _Pa14, wwl_t * _Pa25,
     165             :                      void const * _vsa,
     166             :                      wwl_t * _Pb03, wwl_t * _Pb14, wwl_t * _Pb25,
     167      299150 :                      void const * _vsb ) {
     168             : 
     169             : # if 0 /* Reference implementation */
     170             : 
     171             :   if( FD_UNLIKELY( fd_r43x6_ge_decode( _Pa03, _Pa14, _Pa25, _vsa ) ) ) {
     172             :     *_Pb03 = wwl_zero(); *_Pb14 = wwl_zero(); *_Pb25 = wwl_zero();
     173             :     return -1;
     174             :   }
     175             : 
     176             :   if( FD_UNLIKELY( fd_r43x6_ge_decode( _Pb03, _Pb14, _Pb25, _vsb ) ) ) {
     177             :     *_Pa03 = wwl_zero(); *_Pa14 = wwl_zero(); *_Pa25 = wwl_zero();
     178             :     return -2;
     179             :   }
     180             : 
     181             :   return 0;
     182             : 
     183             : # else /* HPC implementation */
     184             : 
     185      299150 :   fd_r43x6_t const one     = fd_r43x6_one();
     186      299150 :   fd_r43x6_t const d       = fd_r43x6_d();
     187      299150 :   fd_r43x6_t const sqrt_m1 = fd_r43x6_imag();
     188             : 
     189      299150 :   ulong _sa[4] __attribute__((aligned(32)));                       ulong _sb[4] __attribute__((aligned(32)));
     190      299150 :   memcpy( _sa, _vsa, 32UL );                                       memcpy( _sb, _vsb, 32UL );
     191      299150 :   ulong y0a = _sa[0];                                              ulong y0b = _sb[0];
     192      299150 :   ulong y1a = _sa[1];                                              ulong y1b = _sb[1];
     193      299150 :   ulong y2a = _sa[2];                                              ulong y2b = _sb[2];
     194      299150 :   ulong y3a = _sa[3];                                              ulong y3b = _sb[3];
     195             : 
     196      299150 :   int x_0a = (int)(y3a>>63);                                       int x_0b = (int)(y3b>>63);
     197             : 
     198      299150 :   y3a &= ~(1UL<<63);                                               y3b &= ~(1UL<<63);
     199             : 
     200      299150 :   _sa[0] = y0a;                                                    _sb[0] = y0b;
     201      299150 :   _sa[1] = y1a;                                                    _sb[1] = y1b;
     202      299150 :   _sa[2] = y2a;                                                    _sb[2] = y2b;
     203      299150 :   _sa[3] = y3a;                                                    _sb[3] = y3b;
     204      299150 :   fd_r43x6_t ya = fd_r43x6_unpack( wv_ld( _sa ) );                 fd_r43x6_t yb = fd_r43x6_unpack( wv_ld( _sb ) );
     205             : 
     206      299150 :   fd_r43x6_t ysqa, ysqb; FD_R43X6_SQR2_INL   ( ysqa, ya,           ysqb, yb       );
     207      299150 :   fd_r43x6_t ua = fd_r43x6_sub( ysqa, one );                       fd_r43x6_t ub = fd_r43x6_sub( ysqb, one );
     208      299150 :   fd_r43x6_t va = fd_r43x6_add_fast( fd_r43x6_mul(d,ysqa), one );  fd_r43x6_t vb = fd_r43x6_add_fast( fd_r43x6_mul(d,ysqb), one );
     209             : 
     210      299150 :   fd_r43x6_t v2a,  v2b;  FD_R43X6_SQR2_INL      ( v2a,  va,        v2b,  vb       );
     211      299150 :   fd_r43x6_t v4a,  v4b;  FD_R43X6_SQR2_INL      ( v4a,  v2a,       v4b,  v2b      );
     212      299150 :   fd_r43x6_t v3a,  v3b;  FD_R43X6_MUL2_INL      ( v3a,  va,v2a,    v3b,  vb,v2b   );
     213      299150 :   fd_r43x6_t uv3a, uv3b; FD_R43X6_MUL2_INL      ( uv3a, ua,v3a,    uv3b, ub,v3b   );
     214      299150 :   fd_r43x6_t uv7a, uv7b; FD_R43X6_MUL2_INL      ( uv7a, uv3a,v4a,  uv7b, uv3b,v4b );
     215      299150 :   fd_r43x6_t t0a,  t0b;  FD_R43X6_POW22523_2_INL( t0a,  uv7a,      t0b,  uv7b     );
     216      299150 :   fd_r43x6_t xa,   xb;   FD_R43X6_MUL2_INL      ( xa,   uv3a,t0a,  xb,   uv3b,t0b );
     217             : 
     218      299150 :   fd_r43x6_t x2a,  x2b;  FD_R43X6_SQR2_INL      ( x2a,  xa,        x2b,  xb       );
     219      299150 :   fd_r43x6_t vx2a, vx2b; FD_R43X6_MUL2_INL      ( vx2a, va,x2a,    vx2b, vb,x2b   );
     220      299150 :   fd_r43x6_t t1a = fd_r43x6_sub_fast( vx2a, ua );                  fd_r43x6_t t1b = fd_r43x6_sub_fast( vx2b, ub );
     221      299150 :   fd_r43x6_t t2a = fd_r43x6_add_fast( vx2a, ua );                  fd_r43x6_t t2b = fd_r43x6_add_fast( vx2b, ub );
     222      299150 :   int t1nza = fd_r43x6_is_nonzero( t1a );                          int t1nzb = fd_r43x6_is_nonzero( t1b );
     223      299150 :   int t2nza = fd_r43x6_is_nonzero( t2a );                          int t2nzb = fd_r43x6_is_nonzero( t2b );
     224      299150 :   if( FD_UNLIKELY( t1nza & t2nza ) ) goto faila;
     225      258958 :   /**/                                                             if( FD_UNLIKELY( t1nzb & t2nzb ) ) goto failb;
     226             : 
     227      256385 :   fd_r43x6_t t3a = fd_r43x6_if( t1nza, sqrt_m1, one );             fd_r43x6_t t3b = fd_r43x6_if( t1nzb, sqrt_m1, one );
     228      256385 :   /**/                   FD_R43X6_MUL2_INL      ( xa,   xa,t3a,    xb,   xb,t3b   );
     229             : 
     230      256385 :   int x_mod_2a = fd_r43x6_diagnose( xa );                          int x_mod_2b = fd_r43x6_diagnose( xb );
     231      256385 :   if( FD_UNLIKELY( (x_mod_2a==-1) & (x_0a==1) ) ) goto faila;
     232      256215 :   /**/                                                             if( FD_UNLIKELY( (x_mod_2b==-1) & (x_0b==1) ) ) goto failb;
     233      255994 :   xa = fd_r43x6_if( x_0a!=x_mod_2a, fd_r43x6_neg( xa ), xa );      xb = fd_r43x6_if( x_0b!=x_mod_2b, fd_r43x6_neg( xb ), xb );
     234             : 
     235      255994 :   fd_r43x6_t xya,  xyb;  FD_R43X6_MUL2_INL      ( xya,  xa,ya,     xyb,  xb,yb    );
     236             : 
     237      255994 :   FD_R43X6_QUAD_PACK( *_Pa, xa,ya,one,xya );                       FD_R43X6_QUAD_PACK( *_Pb, xb,yb,one,xyb );
     238      255994 :   FD_R43X6_QUAD_FOLD_UNSIGNED( *_Pa, *_Pa );                       FD_R43X6_QUAD_FOLD_UNSIGNED( *_Pb, *_Pb );
     239      255994 :   return 0;
     240             : 
     241       40362 : faila:
     242       40362 :   *_Pa03 = wwl_zero();                                             *_Pb03 = wwl_zero();
     243       40362 :   *_Pa14 = wwl_zero();                                             *_Pb14 = wwl_zero();
     244       40362 :   *_Pa25 = wwl_zero();                                             *_Pb25 = wwl_zero();
     245       40362 :   return -1;
     246             : 
     247        2794 : failb:
     248        2794 :   *_Pa03 = wwl_zero();                                             *_Pb03 = wwl_zero();
     249        2794 :   *_Pa14 = wwl_zero();                                             *_Pb14 = wwl_zero();
     250        2794 :   *_Pa25 = wwl_zero();                                             *_Pb25 = wwl_zero();
     251        2794 :   return -2;
     252             : 
     253      256215 : # endif
     254      256215 : }

Generated by: LCOV version 1.14