LCOV - code coverage report
Current view: top level - ballet/ed25519 - fd_x25519.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 146 146 100.0 %
Date: 2024-11-13 11:58:15 Functions: 5 5 100.0 %

          Line data    Source code
       1             : #include "fd_x25519.h"
       2             : #include "fd_f25519.h"
       3             : 
       4             : /* FD_X25519_VECTORIZE calls mul4 instead of sqr2+mul2, and similar.
       5             :    Only useful if the underlying ops are actually vectorized and therefore
       6             :    the cost of 4 muls is <= the cost of 2 sqr + 2 mul. */
       7             : #define FD_X25519_VECTORIZE FD_HAS_AVX512
       8             : 
       9             : /* FD_X25519_ALIGN aligns variables. */
      10             : #if FD_HAS_AVX
      11             : #define FD_X25519_ALIGN __attribute__((aligned(32)))
      12             : #else
      13             : #define FD_X25519_ALIGN
      14             : #endif
      15             : 
      16             : /*
      17             :  * Constant time primitives
      18             :  */
      19             : 
      20             : static inline int FD_FN_SENSITIVE
      21      196941 : fd_x25519_is_zero_const_time( uchar const point[ 32 ] ) {
      22             :   //TODO: this is generally done by (x)or-ing the limbs, see also RFC 7748, page 13.
      23      196941 :   int is_zero = 1;
      24     6499053 :   for( ulong i=0UL; i<32UL; i++ ) {
      25     6302112 :     is_zero &= ( !point[ i ] );
      26     6302112 :   }
      27      196941 :   return is_zero;
      28      196941 : }
      29             : 
      30             : #if !FD_HAS_AVX512
      31             : 
      32             : static inline void FD_FN_SENSITIVE
      33             : fd_x25519_montgomery_ladder( fd_f25519_t *       x2,
      34             :                              fd_f25519_t *       z2,
      35             :                              fd_f25519_t const * x1,
      36      131294 :                              uchar const *       secret_scalar ) {
      37             :   /* memory areas that will contain (partial) secrets and will be cleared at the end */
      38      131294 :   fd_f25519_t secret_tmp_f[4];
      39      131294 :   int swap = 0;
      40      131294 :   int b = 0;
      41             : 
      42             :   /* human-readable variables */
      43      131294 :   fd_f25519_t * x3   = &secret_tmp_f[0];
      44      131294 :   fd_f25519_t * z3   = &secret_tmp_f[1];
      45      131294 :   fd_f25519_t * tmp0 = &secret_tmp_f[2];
      46      131294 :   fd_f25519_t * tmp1 = &secret_tmp_f[3];
      47             : 
      48      131294 :   fd_f25519_set( x2, fd_f25519_one );
      49      131294 :   fd_f25519_set( z2, fd_f25519_zero );
      50             : 
      51             :   /* use fd_f25519_add to reduce x1 mod p. it's prob unnecessary but not worth the risk. */
      52      131294 :   fd_f25519_add( x3, fd_f25519_zero, x1 );
      53      131294 :   fd_f25519_set( z3, fd_f25519_one );
      54             : 
      55    33611264 :   for( long pos=254UL; pos>=0; pos-- ) {
      56    33479970 :     b = (secret_scalar[ pos / 8L ] >> ( pos & 7L )) & 1;
      57    33479970 :     swap ^= b;
      58    33479970 :     fd_f25519_swap_if( x2, x3, swap );
      59    33479970 :     fd_f25519_swap_if( z2, z3, swap );
      60    33479970 :     swap = b;
      61             : 
      62    33479970 :     fd_f25519_sub_nr( tmp0, x3,   z3   );
      63    33479970 :     fd_f25519_sub_nr( tmp1, x2,   z2   );
      64    33479970 :     fd_f25519_add_nr( x2,   x2,   z2   );
      65    33479970 :     fd_f25519_add_nr( z2,   x3,   z3   );
      66             : 
      67             : #   if FD_X25519_VECTORIZE /* Note that okay to use less efficient squaring because we get it for free in unused vector lanes */
      68             :     fd_f25519_mul4( z3,   tmp0, x2,
      69             :                     z2,   z2,   tmp1,
      70             :                     tmp0, tmp1, tmp1,
      71             :                     tmp1, x2,   x2   );
      72             : #   else /* Use more efficient squaring if scalar implementation */
      73    33479970 :     fd_f25519_mul2( z3,   tmp0, x2,
      74    33479970 :                     z2,   z2,   tmp1 );
      75    33479970 :     fd_f25519_sqr2( tmp0, tmp1,
      76    33479970 :                     tmp1, x2         );
      77    33479970 : #   endif
      78    33479970 :     fd_f25519_add_nr( x3,   z3,   z2 );
      79    33479970 :     fd_f25519_sub_nr( z2,   z3,   z2 );
      80             : #   if FD_X25519_VECTORIZE /* See note above */
      81             :     fd_f25519_mul2( x2,   tmp1, tmp0,
      82             :                     z2,   z2,   z2   );
      83             : #   else
      84    33479970 :     fd_f25519_mul(  x2,   tmp1, tmp0 );
      85    33479970 :     fd_f25519_sqr(  z2,   z2         );
      86    33479970 : #   endif
      87    33479970 :     fd_f25519_sub_nr( tmp1, tmp1, tmp0 );
      88             : 
      89    33479970 :     fd_f25519_mul_121666( z3, tmp1 );
      90             : 
      91    33479970 :     fd_f25519_add_nr( tmp0, tmp0, z3   );
      92             : #   if FD_X25519_VECTORIZE /* See note above */
      93             :     fd_f25519_mul3( x3,   x3,   x3,
      94             :                     z3,   x1,   z2,
      95             :                     z2,   tmp0, tmp1 );
      96             : #   else
      97    33479970 :     fd_f25519_sqr ( x3,   x3         );
      98    33479970 :     fd_f25519_mul2( z3,   x1,   z2,
      99    33479970 :                     z2,   tmp1, tmp0 );
     100    33479970 : #   endif
     101    33479970 :   }
     102             : 
     103      131294 :   fd_f25519_swap_if( x2, x3, swap );
     104      131294 :   fd_f25519_swap_if( z2, z3, swap );
     105             : 
     106             :   /* Sanitize */
     107             : 
     108      131294 :   fd_memset_explicit( secret_tmp_f, 0, sizeof(secret_tmp_f) );
     109      131294 :   fd_memset_explicit( &b, 0, sizeof(int) );
     110      131294 :   fd_memset_explicit( &swap, 0, sizeof(int) );
     111      131294 : }
     112             : #else
     113             : 
     114             : /* This is the "transposed" version of the Montgomery ladder above.
     115             :    Experimentally, this is 15-20% faster on AVX-512. */
     116             : static inline void FD_FN_SENSITIVE
     117             : fd_x25519_montgomery_ladder( fd_f25519_t *       x2,
     118             :                              fd_f25519_t *       z2,
     119             :                              fd_f25519_t const * x1,
     120       65647 :                              uchar const *       secret_scalar ) {
     121       65647 :   FD_R43X6_QUAD_DECL( U );
     122       65647 :   FD_R43X6_QUAD_DECL( Q );
     123       65647 :   FD_R43X6_QUAD_DECL( P );
     124       65647 :   FD_R43X6_QUAD_PACK( U, fd_r43x6_zero(),
     125       65647 :                          fd_r43x6_zero(),
     126       65647 :                          fd_r43x6_zero(),
     127       65647 :                          x1->el );                      // x_1 = u, in u44
     128       65647 :   FD_R43X6_QUAD_PACK( Q, fd_r43x6_one(),                // x_2 = 1, in u44
     129       65647 :                          fd_r43x6_zero(),               // z_2 = 0, in u44
     130       65647 :                          x1->el,                        // x_3 = u, in u44
     131       65647 :                          fd_r43x6_one() );              // z_3 = 1, in u44
     132       65647 :   int swap = 0;
     133       65647 :   int k_t = 0;
     134       65647 :   wwl_t perm;
     135       65647 :   fd_r43x6_t AA, E, F, G, H, GG;
     136             : 
     137    16805632 :   for( int t=254UL; t>=0; t-- ) {                       // For t = bits-1 down to 0:
     138             : 
     139             :     /* At this point, Q and U in u44|u44|u44|u44 */
     140             : 
     141    16739985 :     k_t = (secret_scalar[ t / 8L ] >> ( t & 7L )) & 1;  //   k_t = (k >> t) & 1;
     142    16739985 :     swap ^= k_t;                                        //   swap ^= k_t
     143    16739985 :     perm = wwl_if( (-swap) & 0xff, wwl( 2L,3L,0L,1L, 6L,7L,4L,5L ), wwl( 0L,1L,2L,3L, 4L,5L,6L,7L ) );
     144    16739985 :     Q03 = wwl_permute( perm, Q03 );                     //   (x_2, x_3) = cswap(swap, x_2, x_3)
     145    16739985 :     Q14 = wwl_permute( perm, Q14 );                     //   (z_2, z_3) = cswap(swap, z_2, z_3)
     146    16739985 :     Q25 = wwl_permute( perm, Q25 );
     147    16739985 :     swap = k_t;                                         //   swap = k_t
     148             : 
     149             :     /* These operations are exactly from the RFC but have been reordered
     150             :        slightly to make it easier to extract ILP. */
     151             : 
     152    16739985 :     FD_R43X6_QUAD_PERMUTE      ( P, 0,0,2,2, Q );       // A = x_2 + z_2,            P  = x_2|x_2|x_3  |x_3,   in u44|u44|u44|u44
     153    16739985 :     FD_R43X6_QUAD_PERMUTE      ( Q, 1,1,3,3, Q );       // B = x_2 - z_2,            Q  = z_2|z_2|z_3  |z_3,   in u44|u44|u44|u44
     154    16739985 :     FD_R43X6_QUAD_LANE_ADD_FAST( P, P, 1,0,1,0, P, Q ); // C = x_3 + z_3,            P  = A  |x_2|C    |x_3,   in u45|u44|u45|u44
     155    16739985 :     FD_R43X6_QUAD_LANE_SUB_FAST( P, P, 0,1,0,1, P, Q ); // D = x_3 - z_3,            P  = A  |B  |C    |D,     in u45|s44|u45|s44
     156    16739985 :     FD_R43X6_QUAD_PERMUTE      ( Q, 0,1,1,0, P );       // BB = B^2,                 P  = A  |B  |B    |A,     in u44|u44|u44|u44
     157    16739985 :     FD_R43X6_QUAD_MUL_FAST     ( P, P, Q );             // DA = D * A,               P  = AA |BB |CB   |DA,    in u62|u62|u62|u62
     158    16739985 :     FD_R43X6_QUAD_FOLD_SIGNED  ( P, P );                // DA = D * A,               P  = AA |BB |CB   |DA,    in u44|u44|u44|u44
     159    16739985 :     FD_R43X6_QUAD_PERMUTE      ( Q, 1,0,3,2, P );       // CB = C * B,               Q  = BB |AA |DA   |CB,    in u62|u62|u62|u62
     160    16739985 :     FD_R43X6_QUAD_LANE_SUB_FAST( P, P, 0,1,0,1, Q, P ); // E = AA-BB,                P  = AA |E  |CB   |CB-DA, in u62|s62|u62|s62
     161    16739985 :     FD_R43X6_QUAD_LANE_ADD_FAST( P, P, 0,0,1,0, P, Q ); //                           P  = AA |E  |DA+CB|CB-DA, in u62|s62|u63|s62
     162    16739985 :     FD_R43X6_QUAD_LANE_IF      ( Q, 0,1,1,0, P, Q );    //                           Q  = BB |E  |DA+CB|CB,    in u62|u44|u44|u62
     163    16739985 :     FD_R43X6_QUAD_LANE_IF      ( Q, 0,0,0,1, U, Q );    // x_3 = (DA + CB)^2,        Q  = BB |E  |DA+CB|x_1,   in u62|u44|u44|u44
     164    16739985 :     FD_R43X6_QUAD_UNPACK       ( AA, E, F, G, P );
     165    16739985 :     H  = fd_r43x6_add_fast( AA, fd_r43x6_scale_fast( 121665L, E ) ); //              H  = AA + a24 * E,        in u60
     166    16739985 :     GG = fd_r43x6_sqr_fast( G );                        //                           GG = (DA - CB)^2,         in u61
     167    16739985 :     FD_R43X6_QUAD_PACK         ( P, AA, H, F, GG );     // z_2 = E * (AA + a24 * E), P  = AA |H  |DA+CB|GG,    in u44|u60|u44|u61
     168    16739985 :     FD_R43X6_QUAD_FOLD_UNSIGNED( P, P );                //                           P  = AA |H  |DA+CB|GG,    in u44|u44|u44|u44
     169    16739985 :     FD_R43X6_QUAD_MUL_FAST     ( P, P, Q );             // z_3 = x_1 * (DA - CB)^2,  Q  = x_2|z_2|x_3  |z_3,   in u62|u62|u62|u62
     170    16739985 :     FD_R43X6_QUAD_FOLD_UNSIGNED( Q, P    );             //                           Q  = x_2|z_2|x_3  |z_3,   in u44|u44|u44|u44
     171    16739985 :   }
     172             : 
     173             :   /* At this point, Q in u44|u44|u44|u44 */
     174       65647 :   perm = wwl_if( (-swap) & 0xff, wwl( 2L,3L,0L,1L, 6L,7L,4L,5L ), wwl( 0L,1L,2L,3L, 4L,5L,6L,7L ) );
     175       65647 :   Q03 = wwl_permute( perm, Q03 );                       // (x_2, x_3) = cswap(swap, x_2, x_3)
     176       65647 :   Q14 = wwl_permute( perm, Q14 );                       // (z_2, z_3) = cswap(swap, z_2, z_3)
     177       65647 :   Q25 = wwl_permute( perm, Q25 );
     178             : 
     179       65647 :   FD_R43X6_QUAD_UNPACK( x2->el, z2->el, E, F, Q );
     180             : 
     181             :   /* Sanitize */
     182             : 
     183       65647 :   fd_memset_explicit( &P03,  0, sizeof(wwl_t) );
     184       65647 :   fd_memset_explicit( &P14,  0, sizeof(wwl_t) );
     185       65647 :   fd_memset_explicit( &P25,  0, sizeof(wwl_t) );
     186       65647 :   fd_memset_explicit( &U03,  0, sizeof(wwl_t) );
     187       65647 :   fd_memset_explicit( &U14,  0, sizeof(wwl_t) );
     188       65647 :   fd_memset_explicit( &U25,  0, sizeof(wwl_t) );
     189       65647 :   fd_memset_explicit( &Q03,  0, sizeof(wwl_t) );
     190       65647 :   fd_memset_explicit( &Q14,  0, sizeof(wwl_t) );
     191       65647 :   fd_memset_explicit( &Q25,  0, sizeof(wwl_t) );
     192       65647 :   fd_memset_explicit( &AA,   0, sizeof(wwl_t) );
     193       65647 :   fd_memset_explicit( &E,    0, sizeof(wwl_t) );
     194       65647 :   fd_memset_explicit( &F,    0, sizeof(wwl_t) );
     195       65647 :   fd_memset_explicit( &G,    0, sizeof(wwl_t) );
     196       65647 :   fd_memset_explicit( &H,    0, sizeof(wwl_t) );
     197       65647 :   fd_memset_explicit( &GG,   0, sizeof(wwl_t) );
     198       65647 :   fd_memset_explicit( &perm, 0, sizeof(wwl_t) );
     199       65647 :   fd_memset_explicit( &swap, 0, sizeof(int) );
     200       65647 :   fd_memset_explicit( &k_t,  0, sizeof(int) );
     201             : 
     202       65647 : }
     203             : #endif
     204             : 
     205             : /*
     206             :  * X25519 Protocol
     207             :  */
     208             : 
     209             : static inline void FD_FN_SENSITIVE
     210             : fd_x25519_scalar_mul_const_time( uchar               out[ 32 ],
     211             :                                  uchar const *       secret_scalar,
     212      196941 :                                  fd_f25519_t const * point_x ) {
     213      196941 :   fd_f25519_t x2[1], z2[1];
     214             : 
     215      196941 :   fd_x25519_montgomery_ladder( x2, z2, point_x, secret_scalar );
     216             : 
     217      196941 :   fd_f25519_inv( z2, z2 );
     218      196941 :   fd_f25519_mul( x2, x2, z2 );
     219             : 
     220      196941 :   fd_f25519_tobytes( out, x2 );
     221      196941 : }
     222             : 
     223             : static const uchar fd_x25519_basepoint[ 32 ] FD_X25519_ALIGN = { 9 };
     224             : 
     225             : uchar * FD_FN_SENSITIVE
     226             : fd_x25519_public( uchar       self_public_key [ 32 ],
     227       93348 :                   uchar const self_private_key[ 32 ] ) {
     228             :   /* IETF RFC 7748 Section 4.1 (page 3) */
     229       93348 :   return fd_x25519_exchange( self_public_key, self_private_key, fd_x25519_basepoint );
     230       93348 : }
     231             : 
     232             : uchar * FD_FN_SENSITIVE
     233             : fd_x25519_exchange( uchar       shared_secret   [ 32 ],
     234             :                     uchar const self_private_key[ 32 ],
     235      196941 :                     uchar const peer_public_key [ 32 ] ) {
     236             : 
     237             :   /* Memory areas that will contain secrets */
     238      196941 :   uchar secret_scalar[ 32UL ] FD_X25519_ALIGN;
     239             : 
     240             :   /* Public local variables */
     241      196941 :   fd_f25519_t peer_public_key_point_u[1];
     242             : 
     243             :   //  RFC 7748 - Elliptic Curves for Security
     244             :   //
     245             :   //  5. The X25519 and X448 Functions
     246             :   //
     247             :   //  The "X25519" and "X448" functions perform scalar multiplication on
     248             :   //  the Montgomery form of the above curves.  (This is used when
     249             :   //  implementing Diffie-Hellman.)  The functions take a scalar and a
     250             :   //  u-coordinate as inputs and produce a u-coordinate as output.
     251             :   //  Although the functions work internally with integers, the inputs and
     252             :   //  outputs are 32-byte strings (for X25519) or 56-byte strings (for
     253             :   //  X448) and this specification defines their encoding.
     254             : 
     255             :   //  The u-coordinates are elements of the underlying field GF(2^255 - 19)
     256             :   //  or GF(2^448 - 2^224 - 1) and are encoded as an array of bytes, u, in
     257             :   //  little-endian order such that u[0] + 256*u[1] + 256^2*u[2] + ... +
     258             :   //  256^(n-1)*u[n-1] is congruent to the value modulo p and u[n-1] is
     259             :   //  minimal.  When receiving such an array, implementations of X25519
     260             :   //  (but not X448) MUST mask the most significant bit in the final byte.
     261             :   //  This is done to preserve compatibility with point formats that
     262             :   //  reserve the sign bit for use in other protocols and to increase
     263             :   //  resistance to implementation fingerprinting.
     264             : 
     265             :   //  Implementations MUST accept non-canonical values and process them as
     266             :   //  if they had been reduced modulo the field prime.  The non-canonical
     267             :   //  values are 2^255 - 19 through 2^255 - 1 for X25519 and 2^448 - 2^224
     268             :   //  - 1 through 2^448 - 1 for X448.
     269             : 
     270             :   /* From the text above:
     271             :      1. When receiving such an array, implementations of X25519 [...]
     272             :         MUST mask the most significant bit in the final byte
     273             :         >> this is done by fd_f25519_frombytes
     274             :      2. Implementations MUST accept non-canonical values
     275             :         >> no extra check needed */
     276      196941 :   fd_f25519_frombytes( peer_public_key_point_u, peer_public_key );
     277             : 
     278             :   //  Scalars are assumed to be randomly generated bytes.  For X25519, in
     279             :   //  order to decode 32 random bytes as an integer scalar, set the three
     280             :   //  least significant bits of the first byte and the most significant bit
     281             :   //  of the last to zero, set the second most significant bit of the last
     282             :   //  byte to 1 and, finally, decode as little-endian.  This means that the
     283             :   //  resulting integer is of the form 2^254 plus eight times a value
     284             :   //  between 0 and 2^251 - 1 (inclusive).  Likewise, for X448, set the two
     285             :   //  least significant bits of the first byte to 0, and the most
     286             :   //  significant bit of the last byte to 1.  This means that the resulting
     287             :   //  integer is of the form 2^447 plus four times a value between 0 and
     288             :   //  2^445 - 1 (inclusive).
     289             : 
     290             :   /* decodeScalar25519
     291             :      note: e need to copy the private key, because we need to sanitize it. */
     292      196941 :   memcpy( secret_scalar, self_private_key, 32UL );
     293      196941 :   secret_scalar[ 0] &= (uchar)0xF8;
     294      196941 :   secret_scalar[31] &= (uchar)0x7F;
     295      196941 :   secret_scalar[31] |= (uchar)0x40;
     296             : 
     297      196941 :   fd_x25519_scalar_mul_const_time( shared_secret, secret_scalar, peer_public_key_point_u );
     298             : 
     299             :   /* Sanitize */
     300      196941 :   fd_memset_explicit( secret_scalar, 0, 32UL );
     301             : 
     302             :   /* Reject low order points */
     303      196941 :   if( FD_UNLIKELY( fd_x25519_is_zero_const_time( shared_secret ) ) ) {
     304          93 :     return NULL;
     305          93 :   }
     306             : 
     307      196848 :   return shared_secret;
     308      196941 : }

Generated by: LCOV version 1.14