LCOV - cov.lcov - ballet/ed25519/avx512/fd

LCOV - code coverage report

Current view:	top level - ballet/ed25519/avx512 - fd_f25519.h (source / functions)		Hit	Total	Coverage
Test:	cov.lcov	Lines:	58	92	63.0 %
Date:	2025-08-05 05:04:49	Functions:	51	520	9.8 %

          Line data    Source code

       1             : #ifndef HEADER_fd_src_ballet_ed25519_fd_f25519_h
       2             : #error "Do not include this directly; use fd_f25519.h"
       3             : #endif
       4             : 
       5             : #include "../../fd_ballet_base.h"
       6             : #include "fd_r43x6.h"
       7             : 
       8             : #define FD_F25519_ALIGN 64
       9             : 
      10             : /* A fd_f25519_t stores a curve25519 field element in 5 ulong, aligned to 64 bytes */
      11             : struct fd_f25519 {
      12             :   fd_r43x6_t el __attribute__((aligned(FD_F25519_ALIGN)));
      13             : };
      14             : typedef struct fd_f25519 fd_f25519_t;
      15             : 
      16             : #include "../table/fd_f25519_table_avx512.c"
      17             : 
      18             : FD_PROTOTYPES_BEGIN
      19             : 
      20             : /*
      21             :  * Implementation of inline functions
      22             :  */
      23             : 
      24             : /* fd_f25519_mul computes r = a * b, and returns r. */
      25             : FD_25519_INLINE fd_f25519_t *
      26             : fd_f25519_mul( fd_f25519_t * r,
      27             :                fd_f25519_t const * a,
      28    16064458 :                fd_f25519_t const * b ) {
      29    16064458 :   FD_R43X6_MUL1_INL( r->el, a->el, b->el );
      30    16064458 :   return r;
      31    16064458 : }
      32             : 
      33             : /* fd_f25519_sqr computes r = a^2, and returns r. */
      34             : FD_25519_INLINE fd_f25519_t *
      35             : fd_f25519_sqr( fd_f25519_t * r,
      36   249090118 :                fd_f25519_t const * a ) {
      37   249090118 :   FD_R43X6_SQR1_INL( r->el, a->el );
      38   249090118 :   return r;
      39   249090118 : }
      40             : 
      41             : /* fd_f25519_add computes r = a + b, and returns r. */
      42             : FD_25519_INLINE fd_f25519_t *
      43             : fd_f25519_add( fd_f25519_t * r,
      44             :                fd_f25519_t const * a,
      45     1320668 :                fd_f25519_t const * b ) {
      46     1320668 :   (r->el) = fd_r43x6_add( (a->el), (b->el) );
      47     1320668 :   return r;
      48     1320668 : }
      49             : 
      50             : /* fd_f25519_add computes r = a - b, and returns r. */
      51             : FD_25519_INLINE fd_f25519_t *
      52             : fd_f25519_sub( fd_f25519_t * r,
      53             :                fd_f25519_t const * a,
      54     3236802 :                fd_f25519_t const * b ) {
      55     3236802 :   (r->el) = fd_r43x6_fold_signed( fd_r43x6_sub_fast( (a->el), (b->el) ) );
      56     3236802 :   return r;
      57     3236802 : }
      58             : 
      59             : /* fd_f25519_add computes r = a + b, and returns r.
      60             :    Note: this does NOT reduce the result mod p.
      61             :    It can be used before mul, sqr. */
      62             : FD_25519_INLINE fd_f25519_t *
      63             : fd_f25519_add_nr( fd_f25519_t * r,
      64             :                   fd_f25519_t const * a,
      65           2 :                   fd_f25519_t const * b ) {
      66           2 :   (r->el) = fd_r43x6_add_fast( (a->el), (b->el) );
      67           2 :   return r;
      68           2 : }
      69             : 
      70             : /* fd_f25519_sub computes r = a - b, and returns r.
      71             :    Note: this does NOT reduce the result mod p.
      72             :    It can be used before mul, sqr. */
      73             : FD_25519_INLINE fd_f25519_t *
      74             : fd_f25519_sub_nr( fd_f25519_t * r,
      75             :                   fd_f25519_t const * a,
      76           0 :                   fd_f25519_t const * b ) {
      77           0 :   (r->el) = fd_r43x6_sub_fast( (a->el), (b->el) );
      78           0 :   return r;
      79           0 : }
      80             : 
      81             : /* fd_f25519_add computes r = -a, and returns r. */
      82             : FD_25519_INLINE fd_f25519_t *
      83             : fd_f25519_neg( fd_f25519_t * r,
      84      992497 :                fd_f25519_t const * a ) {
      85      992497 :   (r->el) = fd_r43x6_neg_fast( (a->el) );
      86      992497 :   return r;
      87      992497 : }
      88             : 
      89             : /* fd_f25519_add computes r = a * k, k=121666, and returns r. */
      90             : FD_25519_INLINE fd_f25519_t *
      91             : fd_f25519_mul_121666( fd_f25519_t * r,
      92           0 :                       FD_FN_UNUSED fd_f25519_t const * a ) {
      93           0 :   (r->el) = fd_r43x6_fold_unsigned( fd_r43x6_scale_fast( 121666L, (a->el) ) );
      94           0 :   return r;
      95           0 : }
      96             : 
      97             : /* fd_f25519_frombytes deserializes a 32-byte buffer buf into a
      98             :    fd_f25519_t element r, and returns r.
      99             :    buf is in little endian form, according to RFC 8032. */
     100             : FD_25519_INLINE fd_f25519_t *
     101             : fd_f25519_frombytes( fd_f25519_t * r,
     102     1196369 :                      uchar const   buf[ 32 ] ) {
     103     1196369 :   ulong y0 = fd_ulong_load_8_fast( buf );                         /* Bits   0- 63 */
     104     1196369 :   ulong y1 = fd_ulong_load_8_fast( buf+8 );                       /* Bits  64-127 */
     105     1196369 :   ulong y2 = fd_ulong_load_8_fast( buf+16 );                      /* Bits 128-191 */
     106     1196369 :   ulong y3 = fd_ulong_load_8_fast( buf+24 ) & 0x7fffffffffffffff; /* Bits 192-254 */
     107     1196369 :   r->el = fd_r43x6_unpack( wv( y0, y1, y2, y3 ) );
     108     1196369 :   return r;
     109     1196369 : }
     110             : 
     111             : /* fd_f25519_tobytes serializes a fd_f25519_t element a into
     112             :    a 32-byte buffer out, and returns out.
     113             :    out is in little endian form, according to RFC 8032. */
     114             : FD_25519_INLINE uchar *
     115             : fd_f25519_tobytes( uchar               out[ 32 ],
     116     3167688 :                    fd_f25519_t const * a ) {
     117     3167688 :   wv_stu( out, fd_r43x6_pack( fd_r43x6_mod( a->el ) ) );
     118     3167688 :   return out;
     119     3167688 : }
     120             : 
     121             : /* fd_f25519_if sets r = a0 if cond, else r = a1, equivalent to:
     122             :    r = cond ? a0 : a1.
     123             :    Note: this is constant time. */
     124             : FD_25519_INLINE fd_f25519_t *
     125             : fd_f25519_if( fd_f25519_t *       r,
     126             :               int const           cond, /* 0, 1 */
     127             :               fd_f25519_t const * a0,
     128     1252057 :               fd_f25519_t const * a1 ) {
     129     1252057 :   r->el = fd_r43x6_if( -!!cond, a0->el, a1->el );
     130     1252057 :   return r;
     131     1252057 : }
     132             : 
     133             : /* fd_f25519_swap_if swaps r1, r2 if cond, else leave them as is.
     134             :    Note: this is constant time. */
     135             : FD_25519_INLINE void
     136             : fd_f25519_swap_if( fd_f25519_t * restrict r1,
     137             :                    fd_f25519_t * restrict r2,
     138           0 :                    int const              cond /* 0, 1 */ ) {
     139           0 :   wwl_t zero = wwl_zero();
     140           0 :   wwl_t m = wwl_xor(r1->el, r2->el);
     141           0 :   m  = wwl_if( -!!cond, m, zero );
     142           0 :   r1->el = wwl_xor( r1->el, m );
     143           0 :   r2->el = wwl_xor( r2->el, m );
     144           0 : }
     145             : 
     146             : /* fd_f25519_set copies r = a, and returns r. */
     147             : FD_25519_INLINE fd_f25519_t *
     148             : fd_f25519_set( fd_f25519_t * r,
     149     1130538 :                fd_f25519_t const * a ) {
     150     1130538 :   r->el = a->el;
     151     1130538 :   return r;
     152     1130538 : }
     153             : 
     154             : /* fd_f25519_is_zero returns 1 if a == 0, 0 otherwise. */
     155             : FD_25519_INLINE int
     156     2939231 : fd_f25519_is_zero( fd_f25519_t const * a ) {
     157     2939231 :   return ( ( wwl_eq( a->el, fd_r43x6_zero() ) & 0xFF ) == 0xFF )
     158     2939231 :       || ( ( wwl_eq( a->el, fd_r43x6_p() )    & 0xFF ) == 0xFF );
     159     2939231 : }
     160             : 
     161             : /*
     162             :  * Vectorized
     163             :  */
     164             : 
     165             : /* fd_f25519_muln computes r_i = a_i * b_i */
     166             : FD_25519_INLINE void
     167             : fd_f25519_mul2( fd_f25519_t * r1, fd_f25519_t const * a1, fd_f25519_t const * b1,
     168      599761 :                 fd_f25519_t * r2, fd_f25519_t const * a2, fd_f25519_t const * b2 ) {
     169      599761 :   FD_R43X6_MUL2_INL( r1->el, a1->el, b1->el,
     170      599761 :                      r2->el, a2->el, b2->el );
     171      599761 : }
     172             : 
     173             : FD_25519_INLINE void
     174             : fd_f25519_mul3( fd_f25519_t * r1, fd_f25519_t const * a1, fd_f25519_t const * b1,
     175             :                 fd_f25519_t * r2, fd_f25519_t const * a2, fd_f25519_t const * b2,
     176           0 :                 fd_f25519_t * r3, fd_f25519_t const * a3, fd_f25519_t const * b3 ) {
     177           0 :   FD_R43X6_MUL3_INL( r1->el, a1->el, b1->el,
     178           0 :                      r2->el, a2->el, b2->el,
     179           0 :                      r3->el, a3->el, b3->el );
     180           0 : }
     181             : 
     182             : FD_25519_INLINE void
     183             : fd_f25519_mul4( fd_f25519_t * r1, fd_f25519_t const * a1, fd_f25519_t const * b1,
     184             :                 fd_f25519_t * r2, fd_f25519_t const * a2, fd_f25519_t const * b2,
     185             :                 fd_f25519_t * r3, fd_f25519_t const * a3, fd_f25519_t const * b3,
     186       30002 :                 fd_f25519_t * r4, fd_f25519_t const * a4, fd_f25519_t const * b4 ) {
     187       30002 :   FD_R43X6_MUL4_INL( r1->el, a1->el, b1->el,
     188       30002 :                      r2->el, a2->el, b2->el,
     189       30002 :                      r3->el, a3->el, b3->el,
     190       30002 :                      r4->el, a4->el, b4->el );
     191       30002 : }
     192             : 
     193             : /* fd_f25519_sqrn computes r_i = a_i^2 */
     194             : FD_25519_INLINE void
     195             : fd_f25519_sqr2( fd_f25519_t * r1, fd_f25519_t const * a1,
     196           0 :                 fd_f25519_t * r2, fd_f25519_t const * a2 ) {
     197           0 :   FD_R43X6_SQR2_INL( r1->el, a1->el,
     198           0 :                      r2->el, a2->el );
     199           0 : }
     200             : 
     201             : FD_25519_INLINE void
     202             : fd_f25519_sqr3( fd_f25519_t * r1, fd_f25519_t const * a1,
     203             :                 fd_f25519_t * r2, fd_f25519_t const * a2,
     204           0 :                 fd_f25519_t * r3, fd_f25519_t const * a3 ) {
     205           0 :   FD_R43X6_SQR3_INL( r1->el, a1->el,
     206           0 :                      r2->el, a2->el,
     207           0 :                      r3->el, a3->el );
     208           0 : }
     209             : 
     210             : FD_25519_INLINE void
     211             : fd_f25519_sqr4( fd_f25519_t * r1, fd_f25519_t const * a1,
     212             :                 fd_f25519_t * r2, fd_f25519_t const * a2,
     213             :                 fd_f25519_t * r3, fd_f25519_t const * a3,
     214           0 :                 fd_f25519_t * r4, fd_f25519_t const * a4 ) {
     215             :   FD_R43X6_SQR4_INL( r1->el, a1->el,
     216           0 :                      r2->el, a2->el,
     217           0 :                      r3->el, a3->el,
     218           0 :                      r4->el, a4->el );
     219           0 : }
     220             : 
     221             : FD_PROTOTYPES_END

Generated by: LCOV version 1.14