LCOV - cov.lcov - ballet/ed25519/avx512/fd

LCOV - code coverage report

Current view:	top level - ballet/ed25519/avx512 - fd_curve25519.c (source / functions)		Hit	Total	Coverage
Test:	cov.lcov	Lines:	57	72	79.2 %
Date:	2024-11-13 11:58:15	Functions:	10	12	83.3 %

          Line data    Source code

       1             : #include "../fd_curve25519.h"
       2             : #include "./fd_r43x6_ge.h"
       3             : 
       4             : /*
       5             :  * Add
       6             :  */
       7             : 
       8             : /* fd_ed25519_point_add_with_opts computes r = a + b, and returns r.
       9             : 
      10             :    https://eprint.iacr.org/2008/522
      11             :    Sec 4.2, 4-Processor Montgomery addition and doubling.
      12             : 
      13             :    This implementation includes several optional optimizations
      14             :    that are used for speeding up scalar multiplication:
      15             : 
      16             :    - b_Z_is_one, if b->Z == 1 (affine, or decompressed), we can skip 1mul
      17             : 
      18             :    - b_is_precomputed, since the scalar mul loop typically accumulates
      19             :      points from a table, we can pre-compute kT into the table points and
      20             :      therefore skip 1mul in during the loop.
      21             : 
      22             :    - skip_last_mul, since dbl can be computed with just (X, Y, Z)
      23             :      and doesn't need T, we can skip the last 4 mul and selectively
      24             :      compute (X, Y, Z) or (X, Y, Z, T) during the scalar mul loop.
      25             :  */
      26             : FD_25519_INLINE fd_ed25519_point_t *
      27             : fd_ed25519_point_add_with_opts( fd_ed25519_point_t *       r,
      28             :                                 fd_ed25519_point_t const * a,
      29             :                                 fd_ed25519_point_t const * b,
      30             :                                 FD_PARAM_UNUSED int const b_Z_is_one,
      31             :                                 int const b_is_precomputed,
      32    25076704 :                                 FD_PARAM_UNUSED int const skip_last_mul ) {
      33             : 
      34    25076704 :   if( b_is_precomputed ) {
      35    23064088 :     fd_ed25519_point_t tmp[2];
      36    23064088 :     FD_R43X6_GE_ADD_TABLE_ALT( r->P, a->P, b->P, tmp[0].P, tmp[1].P );
      37    23064088 :   } else {
      38     2012616 :     FD_R43X6_GE_ADD( r->P, a->P, b->P );
      39     2012616 :   }
      40    25076704 :   return r;
      41    25076704 : }
      42             : 
      43             : /* fd_ed25519_point_add computes r = a + b, and returns r. */
      44             : fd_ed25519_point_t *
      45             : fd_ed25519_point_add( fd_ed25519_point_t *       r,
      46             :                       fd_ed25519_point_t const * a,
      47     1012505 :                       fd_ed25519_point_t const * b ) {
      48     1012505 :   return fd_ed25519_point_add_with_opts( r, a, b, 0, 0, 0 );
      49     1012505 : }
      50             : 
      51             : /* fd_ed25519_point_add_final_mul computes just the final mul step in point add.
      52             :    See fd_ed25519_point_add_with_opts. */
      53             : FD_25519_INLINE fd_ed25519_point_t *
      54             : fd_ed25519_point_add_final_mul( fd_ed25519_point_t * restrict r,
      55    23332252 :                                 fd_ed25519_point_t const *    a ) {
      56    23332252 :   fd_ed25519_point_set( r, a );
      57    23332252 :   return r;
      58    23332252 : }
      59             : 
      60             : /* fd_ed25519_point_add_final_mul_projective computes just the final mul step
      61             :    in point add, assuming the result is projective (X, Y, Z), i.e. ignoring T.
      62             :    This is useful because dbl only needs (X, Y, Z) in input, so we can save 1mul.
      63             :    See fd_ed25519_point_add_with_opts. */
      64             : FD_25519_INLINE fd_ed25519_point_t *
      65             : fd_ed25519_point_add_final_mul_projective( fd_ed25519_point_t * restrict r,
      66    67145535 :                                            fd_ed25519_point_t const *    a ) {
      67    67145535 :   fd_ed25519_point_set( r, a );
      68    67145535 :   return r;
      69    67145535 : }
      70             : 
      71             : /*
      72             :  * Sub
      73             :  */
      74             : 
      75             : /* fd_ed25519_point_sub sets r = -a. */
      76             : FD_25519_INLINE fd_ed25519_point_t *
      77             : fd_ed25519_point_neg_precomputed( fd_ed25519_point_t *       r,
      78    10115573 :                       fd_ed25519_point_t const * a ) {
      79             :   /* use p instead of zero to avoid mod reduction */
      80    10115573 :   FD_R43X6_QUAD_DECL( _p );
      81    10115573 :   _p03 = wwl( 8796093022189L, 8796093022189L, 8796093022189L, 8796093022189L, 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L );
      82    10115573 :   _p14 = wwl( 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L );
      83    10115573 :   _p25 = wwl( 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L, 1099511627775L, 1099511627775L, 1099511627775L, 1099511627775L );
      84    10115573 :   FD_R43X6_QUAD_LANE_SUB_FAST( r->P, a->P, 0,0,0,1, _p, a->P );
      85    10115573 :   FD_R43X6_QUAD_PERMUTE      ( r->P, 1,0,2,3, r->P );
      86    10115573 :   return r;
      87    10115573 : }
      88             : 
      89             : /* fd_ed25519_point_sub_with_opts computes r = a - b, and returns r.
      90             :    This is like fd_ed25519_point_add_with_opts, replacing:
      91             :    - b->X => -b->X
      92             :    - b->T => -b->T
      93             :    See fd_ed25519_point_add_with_opts for details.
      94             :  */
      95             : FD_25519_INLINE fd_ed25519_point_t *
      96             : fd_ed25519_point_sub_with_opts( fd_ed25519_point_t *       r,
      97             :                                 fd_ed25519_point_t const * a,
      98             :                                 fd_ed25519_point_t const * b,
      99             :                                 int const b_Z_is_one,
     100             :                                 int const b_is_precomputed,
     101    11115684 :                                 int const skip_last_mul ) {
     102             : 
     103    11115684 :   fd_ed25519_point_t neg[1];
     104    11115684 :   if (b_is_precomputed) {
     105    10115573 :     fd_ed25519_point_neg_precomputed( neg, b );
     106    10115573 :   } else {
     107     1000111 :     fd_ed25519_point_neg( neg, b );
     108     1000111 :   }
     109    11115684 :   return fd_ed25519_point_add_with_opts( r, a, neg, b_Z_is_one, b_is_precomputed, skip_last_mul );
     110    11115684 : }
     111             : 
     112             : /* fd_ed25519_point_sub computes r = a - b, and returns r. */
     113             : fd_ed25519_point_t *
     114             : fd_ed25519_point_sub( fd_ed25519_point_t *       r,
     115             :                       fd_ed25519_point_t const * a,
     116     1000111 :                       fd_ed25519_point_t const * b ) {
     117     1000111 :   return fd_ed25519_point_sub_with_opts( r, a, b, 0, 0, 0 );
     118     1000111 : }
     119             : 
     120             : /*
     121             :  * Dbl
     122             :  */
     123             : 
     124             : /* Dedicated dbl
     125             :    https://eprint.iacr.org/2008/522
     126             :    Sec 4.4.
     127             :    This uses sqr instead of mul.
     128             : 
     129             :    TODO: use the same iface with_opts?
     130             :   */
     131             : 
     132             : FD_25519_INLINE fd_ed25519_point_t *
     133             : fd_ed25519_partial_dbl( fd_ed25519_point_t *       r,
     134    67413699 :                         fd_ed25519_point_t const * a ) {
     135    67413699 :   FD_R43X6_GE_DBL( r->P, a->P );
     136    67413699 :   return r;
     137    67413699 : }
     138             : 
     139             : fd_ed25519_point_t *
     140             : fd_ed25519_point_dbl( fd_ed25519_point_t *       r,
     141           0 :                       fd_ed25519_point_t const * a ) {
     142           0 :   FD_R43X6_GE_DBL( r->P, a->P );
     143           0 :   return r;
     144           0 : }
     145             : 
     146             : /*
     147             :  * Ser/de
     148             :  */
     149             : 
     150             : int
     151             : fd_ed25519_point_frombytes_2x( fd_ed25519_point_t * r1,
     152             :                                uchar const          buf1[ 32 ],
     153             :                                fd_ed25519_point_t * r2,
     154      300754 :                                uchar const          buf2[ 32 ] ) {
     155             :   //TODO: consider unifying code with ref
     156      300754 :   return FD_R43X6_GE_DECODE2( r1->P, buf1, r2->P, buf2 );
     157      300754 : }
     158             : 
     159             : /*
     160             :   Affine (only for init(), can be slow)
     161             : */
     162             : fd_ed25519_point_t *
     163             : fd_curve25519_affine_frombytes( fd_ed25519_point_t * r,
     164             :                                 uchar const          _x[ 32 ],
     165           1 :                                 uchar const          _y[ 32 ] ) {
     166           1 :   fd_f25519_t x[1], y[1], z[1], t[1];
     167           1 :   fd_f25519_frombytes( x, _x );
     168           1 :   fd_f25519_frombytes( y, _y );
     169           1 :   fd_f25519_set( z, fd_f25519_one );
     170           1 :   fd_f25519_mul( t, x, y );
     171           1 :   FD_R43X6_QUAD_PACK( r->P, x->el, y->el, z->el, t->el );
     172           1 :   return r;
     173           1 : }
     174             : 
     175             : fd_ed25519_point_t *
     176           0 : fd_curve25519_into_affine( fd_ed25519_point_t * r ) {
     177           0 :   fd_f25519_t x[1], y[1], z[1], t[1];
     178           0 :   FD_R43X6_QUAD_UNPACK( x->el, y->el, z->el, t->el, r->P );
     179           0 :   fd_f25519_inv( z, z );
     180           0 :   fd_f25519_mul( x, x, z );
     181           0 :   fd_f25519_mul( y, y, z );
     182           0 :   fd_f25519_set( z, fd_f25519_one );
     183           0 :   fd_f25519_mul( t, x, y );
     184           0 :   FD_R43X6_QUAD_PACK( r->P, x->el, y->el, z->el, t->el );
     185           0 :   return r;
     186           0 : }

Generated by: LCOV version 1.14