Line data Source code
1 : #include "../fd_curve25519.h"
2 : #include "./fd_r43x6_ge.h"
3 :
4 : /*
5 : * Add
6 : */
7 :
8 : /* fd_ed25519_point_add_with_opts computes r = a + b, and returns r.
9 :
10 : https://eprint.iacr.org/2008/522
11 : Sec 4.2, 4-Processor Montgomery addition and doubling.
12 :
13 : This implementation includes several optional optimizations
14 : that are used for speeding up scalar multiplication:
15 :
16 : - b_Z_is_one, if b->Z == 1 (affine, or decompressed), we can skip 1mul
17 :
18 : - b_is_precomputed, since the scalar mul loop typically accumulates
19 : points from a table, we can pre-compute kT into the table points and
20 : therefore skip 1mul in during the loop.
21 :
22 : - skip_last_mul, since dbl can be computed with just (X, Y, Z)
23 : and doesn't need T, we can skip the last 4 mul and selectively
24 : compute (X, Y, Z) or (X, Y, Z, T) during the scalar mul loop.
25 : */
26 : FD_25519_INLINE fd_ed25519_point_t *
27 : fd_ed25519_point_add_with_opts( fd_ed25519_point_t * r,
28 : fd_ed25519_point_t const * a,
29 : fd_ed25519_point_t const * b,
30 : FD_PARAM_UNUSED int const b_Z_is_one,
31 : int const b_is_precomputed,
32 25076562 : FD_PARAM_UNUSED int const skip_last_mul ) {
33 :
34 25076562 : if( b_is_precomputed ) {
35 23063947 : fd_ed25519_point_t tmp[2];
36 23063947 : FD_R43X6_GE_ADD_TABLE_ALT( r->P, a->P, b->P, tmp[0].P, tmp[1].P );
37 23063947 : } else {
38 2012615 : FD_R43X6_GE_ADD( r->P, a->P, b->P );
39 2012615 : }
40 25076562 : return r;
41 25076562 : }
42 :
43 : /* fd_ed25519_point_add computes r = a + b, and returns r. */
44 : fd_ed25519_point_t *
45 : fd_ed25519_point_add( fd_ed25519_point_t * r,
46 : fd_ed25519_point_t const * a,
47 1012504 : fd_ed25519_point_t const * b ) {
48 1012504 : return fd_ed25519_point_add_with_opts( r, a, b, 0, 0, 0 );
49 1012504 : }
50 :
51 : /* fd_ed25519_point_add_final_mul computes just the final mul step in point add.
52 : See fd_ed25519_point_add_with_opts. */
53 : FD_25519_INLINE fd_ed25519_point_t *
54 : fd_ed25519_point_add_final_mul( fd_ed25519_point_t * restrict r,
55 23332111 : fd_ed25519_point_t const * a ) {
56 23332111 : fd_ed25519_point_set( r, a );
57 23332111 : return r;
58 23332111 : }
59 :
60 : /* fd_ed25519_point_add_final_mul_projective computes just the final mul step
61 : in point add, assuming the result is projective (X, Y, Z), i.e. ignoring T.
62 : This is useful because dbl only needs (X, Y, Z) in input, so we can save 1mul.
63 : See fd_ed25519_point_add_with_opts. */
64 : FD_25519_INLINE fd_ed25519_point_t *
65 : fd_ed25519_point_add_final_mul_projective( fd_ed25519_point_t * restrict r,
66 67145782 : fd_ed25519_point_t const * a ) {
67 67145782 : fd_ed25519_point_set( r, a );
68 67145782 : return r;
69 67145782 : }
70 :
71 : /*
72 : * Sub
73 : */
74 :
75 : /* fd_ed25519_point_sub sets r = -a. */
76 : FD_25519_INLINE fd_ed25519_point_t *
77 : fd_ed25519_point_neg_precomputed( fd_ed25519_point_t * r,
78 10115483 : fd_ed25519_point_t const * a ) {
79 : /* use p instead of zero to avoid mod reduction */
80 10115483 : FD_R43X6_QUAD_DECL( _p );
81 10115483 : _p03 = wwl( 8796093022189L, 8796093022189L, 8796093022189L, 8796093022189L, 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L );
82 10115483 : _p14 = wwl( 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L );
83 10115483 : _p25 = wwl( 8796093022207L, 8796093022207L, 8796093022207L, 8796093022207L, 1099511627775L, 1099511627775L, 1099511627775L, 1099511627775L );
84 10115483 : FD_R43X6_QUAD_LANE_SUB_FAST( r->P, a->P, 0,0,0,1, _p, a->P );
85 10115483 : FD_R43X6_QUAD_PERMUTE ( r->P, 1,0,2,3, r->P );
86 10115483 : return r;
87 10115483 : }
88 :
89 : /* fd_ed25519_point_sub_with_opts computes r = a - b, and returns r.
90 : This is like fd_ed25519_point_add_with_opts, replacing:
91 : - b->X => -b->X
92 : - b->T => -b->T
93 : See fd_ed25519_point_add_with_opts for details.
94 : */
95 : FD_25519_INLINE fd_ed25519_point_t *
96 : fd_ed25519_point_sub_with_opts( fd_ed25519_point_t * r,
97 : fd_ed25519_point_t const * a,
98 : fd_ed25519_point_t const * b,
99 : int const b_Z_is_one,
100 : int const b_is_precomputed,
101 11115594 : int const skip_last_mul ) {
102 :
103 11115594 : fd_ed25519_point_t neg[1];
104 11115594 : if (b_is_precomputed) {
105 10115483 : fd_ed25519_point_neg_precomputed( neg, b );
106 10115483 : } else {
107 1000111 : fd_ed25519_point_neg( neg, b );
108 1000111 : }
109 11115594 : return fd_ed25519_point_add_with_opts( r, a, neg, b_Z_is_one, b_is_precomputed, skip_last_mul );
110 11115594 : }
111 :
112 : /* fd_ed25519_point_sub computes r = a - b, and returns r. */
113 : fd_ed25519_point_t *
114 : fd_ed25519_point_sub( fd_ed25519_point_t * r,
115 : fd_ed25519_point_t const * a,
116 1000111 : fd_ed25519_point_t const * b ) {
117 1000111 : return fd_ed25519_point_sub_with_opts( r, a, b, 0, 0, 0 );
118 1000111 : }
119 :
120 : /*
121 : * Dbl
122 : */
123 :
124 : /* Dedicated dbl
125 : https://eprint.iacr.org/2008/522
126 : Sec 4.4.
127 : This uses sqr instead of mul.
128 :
129 : TODO: use the same iface with_opts?
130 : */
131 :
132 : FD_25519_INLINE fd_ed25519_point_t *
133 : fd_ed25519_partial_dbl( fd_ed25519_point_t * r,
134 67413946 : fd_ed25519_point_t const * a ) {
135 67413946 : FD_R43X6_GE_DBL( r->P, a->P );
136 67413946 : return r;
137 67413946 : }
138 :
139 : fd_ed25519_point_t *
140 : fd_ed25519_point_dbl( fd_ed25519_point_t * r,
141 0 : fd_ed25519_point_t const * a ) {
142 0 : FD_R43X6_GE_DBL( r->P, a->P );
143 0 : return r;
144 0 : }
145 :
146 : /*
147 : * Ser/de
148 : */
149 :
150 : int
151 : fd_ed25519_point_frombytes_2x( fd_ed25519_point_t * r1,
152 : uchar const buf1[ 32 ],
153 : fd_ed25519_point_t * r2,
154 300755 : uchar const buf2[ 32 ] ) {
155 : //TODO: consider unifying code with ref
156 300755 : return FD_R43X6_GE_DECODE2( r1->P, buf1, r2->P, buf2 );
157 300755 : }
158 :
159 : /*
160 : Affine (only for init(), can be slow)
161 : */
162 : fd_ed25519_point_t *
163 : fd_curve25519_affine_frombytes( fd_ed25519_point_t * r,
164 : uchar const _x[ 32 ],
165 1 : uchar const _y[ 32 ] ) {
166 1 : fd_f25519_t x[1], y[1], z[1], t[1];
167 1 : fd_f25519_frombytes( x, _x );
168 1 : fd_f25519_frombytes( y, _y );
169 1 : fd_f25519_set( z, fd_f25519_one );
170 1 : fd_f25519_mul( t, x, y );
171 1 : FD_R43X6_QUAD_PACK( r->P, x->el, y->el, z->el, t->el );
172 1 : return r;
173 1 : }
174 :
175 : fd_ed25519_point_t *
176 0 : fd_curve25519_into_affine( fd_ed25519_point_t * r ) {
177 0 : fd_f25519_t x[1], y[1], z[1], t[1];
178 0 : FD_R43X6_QUAD_UNPACK( x->el, y->el, z->el, t->el, r->P );
179 0 : fd_f25519_inv( z, z );
180 0 : fd_f25519_mul( x, x, z );
181 0 : fd_f25519_mul( y, y, z );
182 0 : fd_f25519_set( z, fd_f25519_one );
183 0 : fd_f25519_mul( t, x, y );
184 0 : FD_R43X6_QUAD_PACK( r->P, x->el, y->el, z->el, t->el );
185 0 : return r;
186 0 : }
|