Line data Source code
1 : #include "fd_r43x6_ge.h"
2 :
3 : /* Direct quotes from RFC 8032 are indicated with '//' style comments. */
4 :
5 : wv_t
6 0 : fd_r43x6_ge_encode( wwl_t P03, wwl_t P14, wwl_t P25 ) {
7 :
8 : // 5.1.2. Encoding
9 : //
10 : // All values are coded as octet strings, and integers are coded using
11 : // little-endian convention, i.e., a 32-octet string h h[0],...h[31]
12 : // represents the integer h[0] + 2^8 * h[1] + ... + 2^248 * h[31].
13 : //
14 : // A curve point (x,y), with coordinates in the range 0 <= x,y < p, is
15 : // coded as follows.
16 :
17 : /* The below computes x=mod(X/Z) and y=mod(Y/Z) but doesn't repack
18 : the results into fd_r43x6_t's since we need to tweak y's limbs with
19 : values from x0 in the next part. */
20 :
21 0 : fd_r43x6_t X,Y,Z,T; FD_R43X6_QUAD_UNPACK( X,Y,Z,T, P ); /* in u47/u47/u47/u47 */
22 0 : (void)T;
23 :
24 0 : fd_r43x6_t one_Z = fd_r43x6_invert( Z ); /* in u44 */
25 :
26 : /* TODO: Consider a FD_R43X6_MUL2_FAST_INL? */
27 0 : fd_r43x6_t x = fd_r43x6_mul_fast( X, one_Z ); /* in u63 */ fd_r43x6_t y = fd_r43x6_mul_fast( Y, one_Z ); /* in u63 */
28 :
29 0 : long x0,x1,x2,x3,x4,x5; fd_r43x6_extract_limbs( x, x ); long y0,y1,y2,y3,y4,y5; fd_r43x6_extract_limbs( y, y );
30 :
31 0 : fd_r43x6_biased_carry_propagate_limbs( x, x, 0L ); /* in nr */ fd_r43x6_biased_carry_propagate_limbs( y, y, 0L ); /* in nr */
32 :
33 0 : fd_r43x6_mod_nearly_reduced_limbs( x, x ); /* in [0,p) */ fd_r43x6_mod_nearly_reduced_limbs( y, y ); /* in [0,p) */
34 :
35 : // First, encode the y-coordinate as a little-endian string of 32
36 : // octets. The most significant bit of the final octet is always
37 : // zero. To form the encoding of the point, copy the least
38 : // significant bit of the x-coordinate to the most significant bit of
39 : // the final octet.
40 :
41 : /* Since the limbs of y are already in a packed form, we copy the
42 : least significant bit before we pack up the uint256 to save some
43 : memory operations. */
44 :
45 0 : return fd_r43x6_pack( fd_r43x6( y0, y1, y2, y3, y4, y5 | ((x0 & 1L)<<40) ) );
46 0 : }
47 :
48 : int
49 : fd_r43x6_ge_decode( wwl_t * _P03, wwl_t * _P14, wwl_t * _P25,
50 0 : void const * _vs ) {
51 :
52 : // 5.1.3. Decoding
53 : //
54 : // Decoding a point, given as a 32-octet string, is a little more
55 : // complicated.
56 : //
57 : // 1. First, interpret the string as an integer in little-endian
58 : // representation.
59 :
60 0 : ulong _s[4] __attribute__((aligned(32)));
61 0 : memcpy( _s, _vs, 32UL );
62 0 : ulong y0 = _s[0]; /* Bits 0- 63 */
63 0 : ulong y1 = _s[1]; /* Bits 64-127 */
64 0 : ulong y2 = _s[2]; /* Bits 128-191 */
65 0 : ulong y3 = _s[3]; /* Bits 192-255 */
66 :
67 : // Bit 255 of this number is the least significant bit of the
68 : // x-coordinate and denote this value x_0.
69 :
70 0 : int x_0 = (int)(y3>>63);
71 :
72 : // The y-coordinate is recovered simply by clearing this bit.
73 :
74 0 : y3 &= ~(1UL<<63);
75 :
76 0 : fd_r43x6_t y = fd_r43x6_unpack( wv( y0, y1, y2, y3 ) );
77 :
78 : // 2. To recover the x-coordinate, the curve equation implies x^2 =
79 : // (y^2 - 1) / (d y^2 + 1) (mod p). The denominator is always
80 : // non-zero mod p. Let u = y^2 - 1 and v = d y^2 + 1.
81 :
82 0 : fd_r43x6_t const one = fd_r43x6_one();
83 0 : fd_r43x6_t const d = fd_r43x6_d();
84 :
85 0 : fd_r43x6_t ysq = fd_r43x6_sqr( y ); /* y^2 in u44 */
86 0 : fd_r43x6_t u = fd_r43x6_sub( ysq, one ); /* u = y^2 - 1 in u44 */
87 0 : fd_r43x6_t v = fd_r43x6_add_fast( fd_r43x6_mul( d, ysq ), one ); /* v = d y^2 + 1 in u44 */
88 :
89 : // To compute the square root of (u/v), the first step is to compute
90 : // the candidate root x = (u/v)^((p+3)/8). This can be done with the
91 : // following trick, using a single modular powering for both the
92 : // inversion of v and the square root:
93 : //
94 : // (p+3)/8 3 (p-5)/8
95 : // x = (u/v) = u v (u v^7) (mod p)
96 :
97 0 : fd_r43x6_t v2 = fd_r43x6_sqr( v ); /* v^2 in u44 */
98 0 : fd_r43x6_t v4 = fd_r43x6_sqr( v2 ); /* v^4 in u44 */
99 0 : fd_r43x6_t v3 = fd_r43x6_mul( v, v2 ); /* v^3 in u44 */
100 0 : fd_r43x6_t uv3 = fd_r43x6_mul( u, v3 ); /* u v^3 in u44 */
101 0 : fd_r43x6_t uv7 = fd_r43x6_mul( uv3, v4 ); /* u v^7 in u44 */
102 0 : fd_r43x6_t t0 = fd_r43x6_pow22523( uv7 ); /* (u v^7)^((p-5)/8) in u44 */
103 0 : fd_r43x6_t x = fd_r43x6_mul( uv3, t0 ); /* x in u44 */
104 :
105 : // 3. Again, there are three cases:
106 : //
107 : // 1. If v x^2 = u (mod p), x is a square root.
108 : //
109 : // 2. If v x^2 = -u (mod p), set x <-- x * 2^((p-1)/4), which is
110 : // a square root.
111 : //
112 : // 3. Otherwise, no square root exists for modulo p, and decoding
113 : // fails
114 :
115 : /* The implementation below has flattened the above branches to make
116 : it more amenable to doing multiple decodes concurrently. This also
117 : makes this essentially a constant time algorithm (but such isn't
118 : required here). */
119 :
120 0 : fd_r43x6_t x2 = fd_r43x6_sqr( x ); /* x^2 in u44 */
121 0 : fd_r43x6_t vx2 = fd_r43x6_mul( v, x2 ); /* v x^2 in u44 */
122 0 : fd_r43x6_t t1 = fd_r43x6_sub_fast( vx2, u ); /* v x^2 - u in s44 */
123 0 : fd_r43x6_t t2 = fd_r43x6_add_fast( vx2, u ); /* v x^2 + u in u45 */
124 0 : int t1nz = fd_r43x6_is_nonzero( t1 );
125 0 : int t2nz = fd_r43x6_is_nonzero( t2 );
126 0 : if( FD_UNLIKELY( t1nz & t2nz ) ) goto fail; /* case 3 */
127 0 : fd_r43x6_t t3 = fd_r43x6_if( t1nz, fd_r43x6_imag(), one ); /* in u43 */
128 0 : /**/ x = fd_r43x6_mul( x, t3 ); /* in u44 */
129 :
130 : // 4. Finally, use the x_0 bit to select the right square root. If x
131 : // = 0, and x_0 = 1, decoding fails." */
132 :
133 : /* Note that we could merge this branch with the above for even
134 : more deterministic performance.
135 :
136 : WARNING! This check seems to be missing from the OpenSSL
137 : implementation. */
138 :
139 0 : int x_mod_2 = fd_r43x6_diagnose( x );
140 0 : if( FD_UNLIKELY( (x_mod_2==-1) & (x_0==1) ) ) goto fail;
141 :
142 : // Otherwise, if x_0 != x mod 2, set x <-- p - x.
143 :
144 0 : x = fd_r43x6_if( x_0!=x_mod_2, fd_r43x6_neg( x ) /* in u44 */, x );
145 :
146 : // Return the decoded point (x,y).
147 :
148 0 : FD_R43X6_QUAD_PACK( *_P,
149 0 : x, /* in u44 */
150 0 : y, /* Reduced */
151 0 : one, /* Reduced */
152 0 : fd_r43x6_mul( x, y ) ); /* in u44 */
153 0 : FD_R43X6_QUAD_FOLD_UNSIGNED( *_P, *_P );
154 0 : return 0;
155 :
156 0 : fail:
157 0 : *_P03 = fd_r43x6_zero();
158 0 : *_P14 = fd_r43x6_zero();
159 0 : *_P25 = fd_r43x6_zero();
160 0 : return -1;
161 0 : }
162 :
163 : int
164 : fd_r43x6_ge_decode2( wwl_t * _Pa03, wwl_t * _Pa14, wwl_t * _Pa25,
165 : void const * _vsa,
166 : wwl_t * _Pb03, wwl_t * _Pb14, wwl_t * _Pb25,
167 300754 : void const * _vsb ) {
168 :
169 : # if 0 /* Reference implementation */
170 :
171 : if( FD_UNLIKELY( fd_r43x6_ge_decode( _Pa03, _Pa14, _Pa25, _vsa ) ) ) {
172 : *_Pb03 = wwl_zero(); *_Pb14 = wwl_zero(); *_Pb25 = wwl_zero();
173 : return -1;
174 : }
175 :
176 : if( FD_UNLIKELY( fd_r43x6_ge_decode( _Pb03, _Pb14, _Pb25, _vsb ) ) ) {
177 : *_Pa03 = wwl_zero(); *_Pa14 = wwl_zero(); *_Pa25 = wwl_zero();
178 : return -2;
179 : }
180 :
181 : return 0;
182 :
183 : # else /* HPC implementation */
184 :
185 300754 : fd_r43x6_t const one = fd_r43x6_one();
186 300754 : fd_r43x6_t const d = fd_r43x6_d();
187 300754 : fd_r43x6_t const sqrt_m1 = fd_r43x6_imag();
188 :
189 300754 : ulong _sa[4] __attribute__((aligned(32))); ulong _sb[4] __attribute__((aligned(32)));
190 300754 : memcpy( _sa, _vsa, 32UL ); memcpy( _sb, _vsb, 32UL );
191 300754 : ulong y0a = _sa[0]; ulong y0b = _sb[0];
192 300754 : ulong y1a = _sa[1]; ulong y1b = _sb[1];
193 300754 : ulong y2a = _sa[2]; ulong y2b = _sb[2];
194 300754 : ulong y3a = _sa[3]; ulong y3b = _sb[3];
195 :
196 300754 : int x_0a = (int)(y3a>>63); int x_0b = (int)(y3b>>63);
197 :
198 300754 : y3a &= ~(1UL<<63); y3b &= ~(1UL<<63);
199 :
200 300754 : _sa[0] = y0a; _sb[0] = y0b;
201 300754 : _sa[1] = y1a; _sb[1] = y1b;
202 300754 : _sa[2] = y2a; _sb[2] = y2b;
203 300754 : _sa[3] = y3a; _sb[3] = y3b;
204 300754 : fd_r43x6_t ya = fd_r43x6_unpack( wv_ld( _sa ) ); fd_r43x6_t yb = fd_r43x6_unpack( wv_ld( _sb ) );
205 :
206 300754 : fd_r43x6_t ysqa, ysqb; FD_R43X6_SQR2_INL ( ysqa, ya, ysqb, yb );
207 300754 : fd_r43x6_t ua = fd_r43x6_sub( ysqa, one ); fd_r43x6_t ub = fd_r43x6_sub( ysqb, one );
208 300754 : fd_r43x6_t va = fd_r43x6_add_fast( fd_r43x6_mul(d,ysqa), one ); fd_r43x6_t vb = fd_r43x6_add_fast( fd_r43x6_mul(d,ysqb), one );
209 :
210 300754 : fd_r43x6_t v2a, v2b; FD_R43X6_SQR2_INL ( v2a, va, v2b, vb );
211 300754 : fd_r43x6_t v4a, v4b; FD_R43X6_SQR2_INL ( v4a, v2a, v4b, v2b );
212 300754 : fd_r43x6_t v3a, v3b; FD_R43X6_MUL2_INL ( v3a, va,v2a, v3b, vb,v2b );
213 300754 : fd_r43x6_t uv3a, uv3b; FD_R43X6_MUL2_INL ( uv3a, ua,v3a, uv3b, ub,v3b );
214 300754 : fd_r43x6_t uv7a, uv7b; FD_R43X6_MUL2_INL ( uv7a, uv3a,v4a, uv7b, uv3b,v4b );
215 300754 : fd_r43x6_t t0a, t0b; FD_R43X6_POW22523_2_INL( t0a, uv7a, t0b, uv7b );
216 300754 : fd_r43x6_t xa, xb; FD_R43X6_MUL2_INL ( xa, uv3a,t0a, xb, uv3b,t0b );
217 :
218 300754 : fd_r43x6_t x2a, x2b; FD_R43X6_SQR2_INL ( x2a, xa, x2b, xb );
219 300754 : fd_r43x6_t vx2a, vx2b; FD_R43X6_MUL2_INL ( vx2a, va,x2a, vx2b, vb,x2b );
220 300754 : fd_r43x6_t t1a = fd_r43x6_sub_fast( vx2a, ua ); fd_r43x6_t t1b = fd_r43x6_sub_fast( vx2b, ub );
221 300754 : fd_r43x6_t t2a = fd_r43x6_add_fast( vx2a, ua ); fd_r43x6_t t2b = fd_r43x6_add_fast( vx2b, ub );
222 300754 : int t1nza = fd_r43x6_is_nonzero( t1a ); int t1nzb = fd_r43x6_is_nonzero( t1b );
223 300754 : int t2nza = fd_r43x6_is_nonzero( t2a ); int t2nzb = fd_r43x6_is_nonzero( t2b );
224 300754 : if( FD_UNLIKELY( t1nza & t2nza ) ) goto faila;
225 260559 : /**/ if( FD_UNLIKELY( t1nzb & t2nzb ) ) goto failb;
226 :
227 257974 : fd_r43x6_t t3a = fd_r43x6_if( t1nza, sqrt_m1, one ); fd_r43x6_t t3b = fd_r43x6_if( t1nzb, sqrt_m1, one );
228 257974 : /**/ FD_R43X6_MUL2_INL ( xa, xa,t3a, xb, xb,t3b );
229 :
230 257974 : int x_mod_2a = fd_r43x6_diagnose( xa ); int x_mod_2b = fd_r43x6_diagnose( xb );
231 257974 : if( FD_UNLIKELY( (x_mod_2a==-1) & (x_0a==1) ) ) goto faila;
232 257704 : /**/ if( FD_UNLIKELY( (x_mod_2b==-1) & (x_0b==1) ) ) goto failb;
233 257333 : xa = fd_r43x6_if( x_0a!=x_mod_2a, fd_r43x6_neg( xa ), xa ); xb = fd_r43x6_if( x_0b!=x_mod_2b, fd_r43x6_neg( xb ), xb );
234 :
235 257333 : fd_r43x6_t xya, xyb; FD_R43X6_MUL2_INL ( xya, xa,ya, xyb, xb,yb );
236 :
237 257333 : FD_R43X6_QUAD_PACK( *_Pa, xa,ya,one,xya ); FD_R43X6_QUAD_PACK( *_Pb, xb,yb,one,xyb );
238 257333 : FD_R43X6_QUAD_FOLD_UNSIGNED( *_Pa, *_Pa ); FD_R43X6_QUAD_FOLD_UNSIGNED( *_Pb, *_Pb );
239 257333 : return 0;
240 :
241 40465 : faila:
242 40465 : *_Pa03 = wwl_zero(); *_Pb03 = wwl_zero();
243 40465 : *_Pa14 = wwl_zero(); *_Pb14 = wwl_zero();
244 40465 : *_Pa25 = wwl_zero(); *_Pb25 = wwl_zero();
245 40465 : return -1;
246 :
247 2956 : failb:
248 2956 : *_Pa03 = wwl_zero(); *_Pb03 = wwl_zero();
249 2956 : *_Pa14 = wwl_zero(); *_Pb14 = wwl_zero();
250 2956 : *_Pa25 = wwl_zero(); *_Pb25 = wwl_zero();
251 2956 : return -2;
252 :
253 257704 : # endif
254 257704 : }
|