Line data Source code
1 : #include "fd_curve25519_scalar.h" 2 : 3 : uchar * 4 : fd_curve25519_scalar_reduce( uchar out[ 32 ], 5 7228827 : uchar const in [ 64 ] ) { 6 : 7 : /* Load the 512 bits to reduce */ 8 : 9 7228827 : ulong in0 = fd_ulong_load_8_fast( in ); 10 7228827 : ulong in1 = fd_ulong_load_8_fast( in + 8 ); 11 7228827 : ulong in2 = fd_ulong_load_8_fast( in + 16 ); 12 7228827 : ulong in3 = fd_ulong_load_8_fast( in + 24 ); 13 7228827 : ulong in4 = fd_ulong_load_8_fast( in + 32 ); 14 7228827 : ulong in5 = fd_ulong_load_8_fast( in + 40 ); 15 7228827 : ulong in6 = fd_ulong_load_8_fast( in + 48 ); 16 7228827 : ulong in7 = fd_ulong_load_8_fast( in + 56 ); 17 : 18 : /* Unpack into 23 21-bit integers and a 29-bit straggler */ 19 : 20 7228827 : ulong mask = FD_ULONG_MASK_LSB( 21 ); 21 : 22 7228827 : long s0 = (long)( in0 & mask); long s1 = (long)((in0>>21) & mask); long s2 = (long)((in0>>42) & mask); 23 7228827 : long s3 = (long)(((in0>>63) | (in1<<1)) & mask); long s4 = (long)((in1>>20) & mask); long s5 = (long)((in1>>41) & mask); 24 7228827 : long s6 = (long)(((in1>>62) | (in2<<2)) & mask); long s7 = (long)((in2>>19) & mask); long s8 = (long)((in2>>40) & mask); 25 7228827 : long s9 = (long)(((in2>>61) | (in3<<3)) & mask); long s10 = (long)((in3>>18) & mask); long s11 = (long)((in3>>39) & mask); 26 7228827 : long s12 = (long)(((in3>>60) | (in4<<4)) & mask); long s13 = (long)((in4>>17) & mask); long s14 = (long)((in4>>38) & mask); 27 7228827 : long s15 = (long)(((in4>>59) | (in5<<5)) & mask); long s16 = (long)((in5>>16) & mask); long s17 = (long)((in5>>37) & mask); 28 7228827 : long s18 = (long)(((in5>>58) | (in6<<6)) & mask); long s19 = (long)((in6>>15) & mask); long s20 = (long)((in6>>36) & mask); 29 7228827 : long s21 = (long)(((in6>>57) | (in7<<7)) & mask); long s22 = (long)((in7>>14) & mask); long s23 = (long)( in7>>35 ); 30 : 31 : /* Do the reduction */ 32 : 33 7228827 : s11 += s23*666643L; s12 += s23*470296L; s13 += s23*654183L; s14 -= s23*997805L; s15 += s23*136657L; s16 -= s23*683901L; s23 = 0L; 34 7228827 : s10 += s22*666643L; s11 += s22*470296L; s12 += s22*654183L; s13 -= s22*997805L; s14 += s22*136657L; s15 -= s22*683901L; s22 = 0L; 35 7228827 : s9 += s21*666643L; s10 += s21*470296L; s11 += s21*654183L; s12 -= s21*997805L; s13 += s21*136657L; s14 -= s21*683901L; s21 = 0L; 36 7228827 : s8 += s20*666643L; s9 += s20*470296L; s10 += s20*654183L; s11 -= s20*997805L; s12 += s20*136657L; s13 -= s20*683901L; s20 = 0L; 37 7228827 : s7 += s19*666643L; s8 += s19*470296L; s9 += s19*654183L; s10 -= s19*997805L; s11 += s19*136657L; s12 -= s19*683901L; s19 = 0L; 38 7228827 : s6 += s18*666643L; s7 += s18*470296L; s8 += s18*654183L; s9 -= s18*997805L; s10 += s18*136657L; s11 -= s18*683901L; s18 = 0L; 39 : 40 7228827 : long carry6 = (s6 + (1L << 20)) >> 21; s7 += carry6; s6 -= (long)((ulong)carry6 << 21); 41 7228827 : long carry8 = (s8 + (1L << 20)) >> 21; s9 += carry8; s8 -= (long)((ulong)carry8 << 21); 42 7228827 : long carry10 = (s10 + (1L << 20)) >> 21; s11 += carry10; s10 -= (long)((ulong)carry10 << 21); 43 7228827 : long carry12 = (s12 + (1L << 20)) >> 21; s13 += carry12; s12 -= (long)((ulong)carry12 << 21); 44 7228827 : long carry14 = (s14 + (1L << 20)) >> 21; s15 += carry14; s14 -= (long)((ulong)carry14 << 21); 45 7228827 : long carry16 = (s16 + (1L << 20)) >> 21; s17 += carry16; s16 -= (long)((ulong)carry16 << 21); 46 : 47 7228827 : long carry7 = (s7 + (1L << 20)) >> 21; s8 += carry7; s7 -= (long)((ulong)carry7 << 21); 48 7228827 : long carry9 = (s9 + (1L << 20)) >> 21; s10 += carry9; s9 -= (long)((ulong)carry9 << 21); 49 7228827 : long carry11 = (s11 + (1L << 20)) >> 21; s12 += carry11; s11 -= (long)((ulong)carry11 << 21); 50 7228827 : long carry13 = (s13 + (1L << 20)) >> 21; s14 += carry13; s13 -= (long)((ulong)carry13 << 21); 51 7228827 : long carry15 = (s15 + (1L << 20)) >> 21; s16 += carry15; s15 -= (long)((ulong)carry15 << 21); 52 : 53 7228827 : s5 += s17*666643L; s6 += s17*470296L; s7 += s17*654183L; s8 -= s17*997805L; s9 += s17*136657L; s10 -= s17*683901L; s17 = 0L; 54 7228827 : s4 += s16*666643L; s5 += s16*470296L; s6 += s16*654183L; s7 -= s16*997805L; s8 += s16*136657L; s9 -= s16*683901L; s16 = 0L; 55 7228827 : s3 += s15*666643L; s4 += s15*470296L; s5 += s15*654183L; s6 -= s15*997805L; s7 += s15*136657L; s8 -= s15*683901L; s15 = 0L; 56 7228827 : s2 += s14*666643L; s3 += s14*470296L; s4 += s14*654183L; s5 -= s14*997805L; s6 += s14*136657L; s7 -= s14*683901L; s14 = 0L; 57 7228827 : s1 += s13*666643L; s2 += s13*470296L; s3 += s13*654183L; s4 -= s13*997805L; s5 += s13*136657L; s6 -= s13*683901L; s13 = 0L; 58 7228827 : s0 += s12*666643L; s1 += s12*470296L; s2 += s12*654183L; s3 -= s12*997805L; s4 += s12*136657L; s5 -= s12*683901L; s12 = 0L; 59 : 60 7228827 : long carry0 = (s0 + (1L << 20)) >> 21; s1 += carry0; s0 -= (long)((ulong)carry0 << 21); 61 7228827 : long carry2 = (s2 + (1L << 20)) >> 21; s3 += carry2; s2 -= (long)((ulong)carry2 << 21); 62 7228827 : long carry4 = (s4 + (1L << 20)) >> 21; s5 += carry4; s4 -= (long)((ulong)carry4 << 21); 63 7228827 : /**/ carry6 = (s6 + (1L << 20)) >> 21; s7 += carry6; s6 -= (long)((ulong)carry6 << 21); 64 7228827 : /**/ carry8 = (s8 + (1L << 20)) >> 21; s9 += carry8; s8 -= (long)((ulong)carry8 << 21); 65 7228827 : /**/ carry10 = (s10 + (1L << 20)) >> 21; s11 += carry10; s10 -= (long)((ulong)carry10 << 21); 66 : 67 7228827 : long carry1 = (s1 + (1L << 20)) >> 21; s2 += carry1; s1 -= (long)((ulong)carry1 << 21); 68 7228827 : long carry3 = (s3 + (1L << 20)) >> 21; s4 += carry3; s3 -= (long)((ulong)carry3 << 21); 69 7228827 : long carry5 = (s5 + (1L << 20)) >> 21; s6 += carry5; s5 -= (long)((ulong)carry5 << 21); 70 7228827 : /**/ carry7 = (s7 + (1L << 20)) >> 21; s8 += carry7; s7 -= (long)((ulong)carry7 << 21); 71 7228827 : /**/ carry9 = (s9 + (1L << 20)) >> 21; s10 += carry9; s9 -= (long)((ulong)carry9 << 21); 72 7228827 : /**/ carry11 = (s11 + (1L << 20)) >> 21; s12 += carry11; s11 -= (long)((ulong)carry11 << 21); 73 : 74 7228827 : s0 += s12*666643L; s1 += s12*470296L; s2 += s12*654183L; s3 -= s12*997805L; s4 += s12*136657L; s5 -= s12*683901L; s12 = 0L; 75 : 76 7228827 : carry0 = s0 >> 21; s1 += carry0; s0 -= (long)((ulong)carry0 << 21); 77 7228827 : carry1 = s1 >> 21; s2 += carry1; s1 -= (long)((ulong)carry1 << 21); 78 7228827 : carry2 = s2 >> 21; s3 += carry2; s2 -= (long)((ulong)carry2 << 21); 79 7228827 : carry3 = s3 >> 21; s4 += carry3; s3 -= (long)((ulong)carry3 << 21); 80 7228827 : carry4 = s4 >> 21; s5 += carry4; s4 -= (long)((ulong)carry4 << 21); 81 7228827 : carry5 = s5 >> 21; s6 += carry5; s5 -= (long)((ulong)carry5 << 21); 82 7228827 : carry6 = s6 >> 21; s7 += carry6; s6 -= (long)((ulong)carry6 << 21); 83 7228827 : carry7 = s7 >> 21; s8 += carry7; s7 -= (long)((ulong)carry7 << 21); 84 7228827 : carry8 = s8 >> 21; s9 += carry8; s8 -= (long)((ulong)carry8 << 21); 85 7228827 : carry9 = s9 >> 21; s10 += carry9; s9 -= (long)((ulong)carry9 << 21); 86 7228827 : carry10 = s10 >> 21; s11 += carry10; s10 -= (long)((ulong)carry10 << 21); 87 7228827 : carry11 = s11 >> 21; s12 += carry11; s11 -= (long)((ulong)carry11 << 21); 88 : 89 7228827 : s0 += s12*666643L; s1 += s12*470296L; s2 += s12*654183L; s3 -= s12*997805L; s4 += s12*136657L; s5 -= s12*683901L; s12 = 0L; 90 : 91 7228827 : carry0 = s0 >> 21; s1 += carry0; s0 -= (long)((ulong)carry0 << 21); 92 7228827 : carry1 = s1 >> 21; s2 += carry1; s1 -= (long)((ulong)carry1 << 21); 93 7228827 : carry2 = s2 >> 21; s3 += carry2; s2 -= (long)((ulong)carry2 << 21); 94 7228827 : carry3 = s3 >> 21; s4 += carry3; s3 -= (long)((ulong)carry3 << 21); 95 7228827 : carry4 = s4 >> 21; s5 += carry4; s4 -= (long)((ulong)carry4 << 21); 96 7228827 : carry5 = s5 >> 21; s6 += carry5; s5 -= (long)((ulong)carry5 << 21); 97 7228827 : carry6 = s6 >> 21; s7 += carry6; s6 -= (long)((ulong)carry6 << 21); 98 7228827 : carry7 = s7 >> 21; s8 += carry7; s7 -= (long)((ulong)carry7 << 21); 99 7228827 : carry8 = s8 >> 21; s9 += carry8; s8 -= (long)((ulong)carry8 << 21); 100 7228827 : carry9 = s9 >> 21; s10 += carry9; s9 -= (long)((ulong)carry9 << 21); 101 7228827 : carry10 = s10 >> 21; s11 += carry10; s10 -= (long)((ulong)carry10 << 21); 102 : 103 : /* Pack the results into out */ 104 : 105 7228827 : *(ulong *) out = (((ulong)s0 ) ) | (((ulong)s1 )<<21) | (((ulong)s2 )<<42) | (((ulong)s3 )<<63); 106 7228827 : *(ulong *)(out+ 8) = (((ulong)s3 )>>1) | (((ulong)s4 )<<20) | (((ulong)s5 )<<41) | (((ulong)s6 )<<62); 107 7228827 : *(ulong *)(out+16) = (((ulong)s6 )>>2) | (((ulong)s7 )<<19) | (((ulong)s8 )<<40) | (((ulong)s9 )<<61); 108 7228827 : *(ulong *)(out+24) = (((ulong)s9 )>>3) | (((ulong)s10)<<18) | (((ulong)s11)<<39); 109 7228827 : return out; 110 7228827 : } 111 : 112 : uchar * 113 : fd_curve25519_scalar_muladd( uchar s[ 32 ], 114 : uchar const * a, 115 : uchar const b[ 32 ], 116 4887483 : uchar const c[ 32 ] ) { 117 : 118 : /* Load a, b and c */ 119 : 120 4887483 : ulong ia0 = fd_ulong_load_8( a ); ulong ib0 = fd_ulong_load_8( b ); ulong ic0 = fd_ulong_load_8( c ); 121 4887483 : ulong ia1 = fd_ulong_load_8( a + 8 ); ulong ib1 = fd_ulong_load_8( b + 8 ); ulong ic1 = fd_ulong_load_8( c + 8 ); 122 4887483 : ulong ia2 = fd_ulong_load_8( a + 16 ); ulong ib2 = fd_ulong_load_8( b + 16 ); ulong ic2 = fd_ulong_load_8( c + 16 ); 123 4887483 : ulong ia3 = fd_ulong_load_8( a + 24 ); ulong ib3 = fd_ulong_load_8( b + 24 ); ulong ic3 = fd_ulong_load_8( c + 24 ); 124 : 125 : /* Unpack each into 11 21-bit integers and a 25-bit straggler */ 126 : 127 4887483 : ulong mask = FD_ULONG_MASK_LSB( 21 ); 128 : 129 4887483 : long a0 = (long)( ia0 & mask); long a1 = (long)((ia0>>21) & mask); long a2 = (long)((ia0>>42) & mask); 130 4887483 : long a3 = (long)(((ia0>>63) | (ia1<<1)) & mask); long a4 = (long)((ia1>>20) & mask); long a5 = (long)((ia1>>41) & mask); 131 4887483 : long a6 = (long)(((ia1>>62) | (ia2<<2)) & mask); long a7 = (long)((ia2>>19) & mask); long a8 = (long)((ia2>>40) & mask); 132 4887483 : long a9 = (long)(((ia2>>61) | (ia3<<3)) & mask); long a10 = (long)((ia3>>18) & mask); long a11 = (long)( ia3>>39 ); 133 : 134 4887483 : long b0 = (long)( ib0 & mask); long b1 = (long)((ib0>>21) & mask); long b2 = (long)((ib0>>42) & mask); 135 4887483 : long b3 = (long)(((ib0>>63) | (ib1<<1)) & mask); long b4 = (long)((ib1>>20) & mask); long b5 = (long)((ib1>>41) & mask); 136 4887483 : long b6 = (long)(((ib1>>62) | (ib2<<2)) & mask); long b7 = (long)((ib2>>19) & mask); long b8 = (long)((ib2>>40) & mask); 137 4887483 : long b9 = (long)(((ib2>>61) | (ib3<<3)) & mask); long b10 = (long)((ib3>>18) & mask); long b11 = (long)( ib3>>39 ); 138 : 139 4887483 : long c0 = (long)( ic0 & mask); long c1 = (long)((ic0>>21) & mask); long c2 = (long)((ic0>>42) & mask); 140 4887483 : long c3 = (long)(((ic0>>63) | (ic1<<1)) & mask); long c4 = (long)((ic1>>20) & mask); long c5 = (long)((ic1>>41) & mask); 141 4887483 : long c6 = (long)(((ic1>>62) | (ic2<<2)) & mask); long c7 = (long)((ic2>>19) & mask); long c8 = (long)((ic2>>40) & mask); 142 4887483 : long c9 = (long)(((ic2>>61) | (ic3<<3)) & mask); long c10 = (long)((ic3>>18) & mask); long c11 = (long)( ic3>>39 ); 143 : 144 : /* Do the muladd */ 145 : 146 4887483 : long s0 = c0 + a0*b0; 147 4887483 : long s1 = c1 + a0*b1 + a1*b0; 148 4887483 : long s2 = c2 + a0*b2 + a1*b1 + a2*b0; 149 4887483 : long s3 = c3 + a0*b3 + a1*b2 + a2*b1 + a3*b0; 150 4887483 : long s4 = c4 + a0*b4 + a1*b3 + a2*b2 + a3*b1 + a4*b0; 151 4887483 : long s5 = c5 + a0*b5 + a1*b4 + a2*b3 + a3*b2 + a4*b1 + a5*b0; 152 4887483 : long s6 = c6 + a0*b6 + a1*b5 + a2*b4 + a3*b3 + a4*b2 + a5*b1 + a6*b0; 153 4887483 : long s7 = c7 + a0*b7 + a1*b6 + a2*b5 + a3*b4 + a4*b3 + a5*b2 + a6*b1 + a7*b0; 154 4887483 : long s8 = c8 + a0*b8 + a1*b7 + a2*b6 + a3*b5 + a4*b4 + a5*b3 + a6*b2 + a7*b1 + a8*b0; 155 4887483 : long s9 = c9 + a0*b9 + a1*b8 + a2*b7 + a3*b6 + a4*b5 + a5*b4 + a6*b3 + a7*b2 + a8*b1 + a9*b0; 156 4887483 : long s10 = c10 + a0*b10 + a1*b9 + a2*b8 + a3*b7 + a4*b6 + a5*b5 + a6*b4 + a7*b3 + a8*b2 + a9*b1 + a10*b0; 157 4887483 : long s11 = c11 + a0*b11 + a1*b10 + a2*b9 + a3*b8 + a4*b7 + a5*b6 + a6*b5 + a7*b4 + a8*b3 + a9*b2 + a10*b1 + a11*b0; 158 4887483 : long s12 = a1*b11 + a2*b10 + a3*b9 + a4*b8 + a5*b7 + a6*b6 + a7*b5 + a8*b4 + a9*b3 + a10*b2 + a11*b1; 159 4887483 : long s13 = a2*b11 + a3*b10 + a4*b9 + a5*b8 + a6*b7 + a7*b6 + a8*b5 + a9*b4 + a10*b3 + a11*b2; 160 4887483 : long s14 = a3*b11 + a4*b10 + a5*b9 + a6*b8 + a7*b7 + a8*b6 + a9*b5 + a10*b4 + a11*b3; 161 4887483 : long s15 = a4*b11 + a5*b10 + a6*b9 + a7*b8 + a8*b7 + a9*b6 + a10*b5 + a11*b4; 162 4887483 : long s16 = a5*b11 + a6*b10 + a7*b9 + a8*b8 + a9*b7 + a10*b6 + a11*b5; 163 4887483 : long s17 = a6*b11 + a7*b10 + a8*b9 + a9*b8 + a10*b7 + a11*b6; 164 4887483 : long s18 = a7*b11 + a8*b10 + a9*b9 + a10*b8 + a11*b7; 165 4887483 : long s19 = a8*b11 + a9*b10 + a10*b9 + a11*b8; 166 4887483 : long s20 = a9*b11 + a10*b10 + a11*b9; 167 4887483 : long s21 = a10*b11 + a11*b10; 168 4887483 : long s22 = a11*b11; 169 4887483 : long s23 = 0L; 170 : 171 : /* Reduce the result */ 172 : 173 4887483 : long carry0 = (s0 + (1L << 20)) >> 21; s1 += carry0; s0 -= (long)((ulong)carry0 << 21); 174 4887483 : long carry2 = (s2 + (1L << 20)) >> 21; s3 += carry2; s2 -= (long)((ulong)carry2 << 21); 175 4887483 : long carry4 = (s4 + (1L << 20)) >> 21; s5 += carry4; s4 -= (long)((ulong)carry4 << 21); 176 4887483 : long carry6 = (s6 + (1L << 20)) >> 21; s7 += carry6; s6 -= (long)((ulong)carry6 << 21); 177 4887483 : long carry8 = (s8 + (1L << 20)) >> 21; s9 += carry8; s8 -= (long)((ulong)carry8 << 21); 178 4887483 : long carry10 = (s10 + (1L << 20)) >> 21; s11 += carry10; s10 -= (long)((ulong)carry10 << 21); 179 4887483 : long carry12 = (s12 + (1L << 20)) >> 21; s13 += carry12; s12 -= (long)((ulong)carry12 << 21); 180 4887483 : long carry14 = (s14 + (1L << 20)) >> 21; s15 += carry14; s14 -= (long)((ulong)carry14 << 21); 181 4887483 : long carry16 = (s16 + (1L << 20)) >> 21; s17 += carry16; s16 -= (long)((ulong)carry16 << 21); 182 4887483 : long carry18 = (s18 + (1L << 20)) >> 21; s19 += carry18; s18 -= (long)((ulong)carry18 << 21); 183 4887483 : long carry20 = (s20 + (1L << 20)) >> 21; s21 += carry20; s20 -= (long)((ulong)carry20 << 21); 184 4887483 : long carry22 = (s22 + (1L << 20)) >> 21; s23 += carry22; s22 -= (long)((ulong)carry22 << 21); 185 : 186 4887483 : long carry1 = (s1 + (1L << 20)) >> 21; s2 += carry1; s1 -= (long)((ulong)carry1 << 21); 187 4887483 : long carry3 = (s3 + (1L << 20)) >> 21; s4 += carry3; s3 -= (long)((ulong)carry3 << 21); 188 4887483 : long carry5 = (s5 + (1L << 20)) >> 21; s6 += carry5; s5 -= (long)((ulong)carry5 << 21); 189 4887483 : long carry7 = (s7 + (1L << 20)) >> 21; s8 += carry7; s7 -= (long)((ulong)carry7 << 21); 190 4887483 : long carry9 = (s9 + (1L << 20)) >> 21; s10 += carry9; s9 -= (long)((ulong)carry9 << 21); 191 4887483 : long carry11 = (s11 + (1L << 20)) >> 21; s12 += carry11; s11 -= (long)((ulong)carry11 << 21); 192 4887483 : long carry13 = (s13 + (1L << 20)) >> 21; s14 += carry13; s13 -= (long)((ulong)carry13 << 21); 193 4887483 : long carry15 = (s15 + (1L << 20)) >> 21; s16 += carry15; s15 -= (long)((ulong)carry15 << 21); 194 4887483 : long carry17 = (s17 + (1L << 20)) >> 21; s18 += carry17; s17 -= (long)((ulong)carry17 << 21); 195 4887483 : long carry19 = (s19 + (1L << 20)) >> 21; s20 += carry19; s19 -= (long)((ulong)carry19 << 21); 196 4887483 : long carry21 = (s21 + (1L << 20)) >> 21; s22 += carry21; s21 -= (long)((ulong)carry21 << 21); 197 : 198 4887483 : s11 += s23*666643L; s12 += s23*470296L; s13 += s23*654183L; s14 -= s23*997805L; s15 += s23*136657L; s16 -= s23*683901L; s23 = 0L; 199 4887483 : s10 += s22*666643L; s11 += s22*470296L; s12 += s22*654183L; s13 -= s22*997805L; s14 += s22*136657L; s15 -= s22*683901L; s22 = 0L; 200 4887483 : s9 += s21*666643L; s10 += s21*470296L; s11 += s21*654183L; s12 -= s21*997805L; s13 += s21*136657L; s14 -= s21*683901L; s21 = 0L; 201 4887483 : s8 += s20*666643L; s9 += s20*470296L; s10 += s20*654183L; s11 -= s20*997805L; s12 += s20*136657L; s13 -= s20*683901L; s20 = 0L; 202 4887483 : s7 += s19*666643L; s8 += s19*470296L; s9 += s19*654183L; s10 -= s19*997805L; s11 += s19*136657L; s12 -= s19*683901L; s19 = 0L; 203 4887483 : s6 += s18*666643L; s7 += s18*470296L; s8 += s18*654183L; s9 -= s18*997805L; s10 += s18*136657L; s11 -= s18*683901L; s18 = 0L; 204 : 205 4887483 : carry6 = (s6 + (1L << 20)) >> 21; s7 += carry6; s6 -= (long)((ulong)carry6 << 21); 206 4887483 : carry8 = (s8 + (1L << 20)) >> 21; s9 += carry8; s8 -= (long)((ulong)carry8 << 21); 207 4887483 : carry10 = (s10 + (1L << 20)) >> 21; s11 += carry10; s10 -= (long)((ulong)carry10 << 21); 208 4887483 : carry12 = (s12 + (1L << 20)) >> 21; s13 += carry12; s12 -= (long)((ulong)carry12 << 21); 209 4887483 : carry14 = (s14 + (1L << 20)) >> 21; s15 += carry14; s14 -= (long)((ulong)carry14 << 21); 210 4887483 : carry16 = (s16 + (1L << 20)) >> 21; s17 += carry16; s16 -= (long)((ulong)carry16 << 21); 211 : 212 4887483 : carry7 = (s7 + (1L << 20)) >> 21; s8 += carry7; s7 -= (long)((ulong)carry7 << 21); 213 4887483 : carry9 = (s9 + (1L << 20)) >> 21; s10 += carry9; s9 -= (long)((ulong)carry9 << 21); 214 4887483 : carry11 = (s11 + (1L << 20)) >> 21; s12 += carry11; s11 -= (long)((ulong)carry11 << 21); 215 4887483 : carry13 = (s13 + (1L << 20)) >> 21; s14 += carry13; s13 -= (long)((ulong)carry13 << 21); 216 4887483 : carry15 = (s15 + (1L << 20)) >> 21; s16 += carry15; s15 -= (long)((ulong)carry15 << 21); 217 : 218 4887483 : s5 += s17*666643L; s6 += s17*470296L; s7 += s17*654183L; s8 -= s17*997805L; s9 += s17*136657L; s10 -= s17*683901L; s17 = 0L; 219 4887483 : s4 += s16*666643L; s5 += s16*470296L; s6 += s16*654183L; s7 -= s16*997805L; s8 += s16*136657L; s9 -= s16*683901L; s16 = 0L; 220 4887483 : s3 += s15*666643L; s4 += s15*470296L; s5 += s15*654183L; s6 -= s15*997805L; s7 += s15*136657L; s8 -= s15*683901L; s15 = 0L; 221 4887483 : s2 += s14*666643L; s3 += s14*470296L; s4 += s14*654183L; s5 -= s14*997805L; s6 += s14*136657L; s7 -= s14*683901L; s14 = 0L; 222 4887483 : s1 += s13*666643L; s2 += s13*470296L; s3 += s13*654183L; s4 -= s13*997805L; s5 += s13*136657L; s6 -= s13*683901L; s13 = 0L; 223 4887483 : s0 += s12*666643L; s1 += s12*470296L; s2 += s12*654183L; s3 -= s12*997805L; s4 += s12*136657L; s5 -= s12*683901L; s12 = 0L; 224 : 225 4887483 : carry0 = (s0 + (1L << 20)) >> 21; s1 += carry0; s0 -= (long)((ulong)carry0 << 21); 226 4887483 : carry2 = (s2 + (1L << 20)) >> 21; s3 += carry2; s2 -= (long)((ulong)carry2 << 21); 227 4887483 : carry4 = (s4 + (1L << 20)) >> 21; s5 += carry4; s4 -= (long)((ulong)carry4 << 21); 228 4887483 : carry6 = (s6 + (1L << 20)) >> 21; s7 += carry6; s6 -= (long)((ulong)carry6 << 21); 229 4887483 : carry8 = (s8 + (1L << 20)) >> 21; s9 += carry8; s8 -= (long)((ulong)carry8 << 21); 230 4887483 : carry10 = (s10 + (1L << 20)) >> 21; s11 += carry10; s10 -= (long)((ulong)carry10 << 21); 231 : 232 4887483 : carry1 = (s1 + (1L << 20)) >> 21; s2 += carry1; s1 -= (long)((ulong)carry1 << 21); 233 4887483 : carry3 = (s3 + (1L << 20)) >> 21; s4 += carry3; s3 -= (long)((ulong)carry3 << 21); 234 4887483 : carry5 = (s5 + (1L << 20)) >> 21; s6 += carry5; s5 -= (long)((ulong)carry5 << 21); 235 4887483 : carry7 = (s7 + (1L << 20)) >> 21; s8 += carry7; s7 -= (long)((ulong)carry7 << 21); 236 4887483 : carry9 = (s9 + (1L << 20)) >> 21; s10 += carry9; s9 -= (long)((ulong)carry9 << 21); 237 4887483 : carry11 = (s11 + (1L << 20)) >> 21; s12 += carry11; s11 -= (long)((ulong)carry11 << 21); 238 : 239 4887483 : s0 += s12*666643L; s1 += s12*470296L; s2 += s12*654183L; s3 -= s12*997805L; s4 += s12*136657L; s5 -= s12*683901L; s12 = 0L; 240 : 241 4887483 : carry0 = s0 >> 21; s1 += carry0; s0 -= (long)((ulong)carry0 << 21); 242 4887483 : carry1 = s1 >> 21; s2 += carry1; s1 -= (long)((ulong)carry1 << 21); 243 4887483 : carry2 = s2 >> 21; s3 += carry2; s2 -= (long)((ulong)carry2 << 21); 244 4887483 : carry3 = s3 >> 21; s4 += carry3; s3 -= (long)((ulong)carry3 << 21); 245 4887483 : carry4 = s4 >> 21; s5 += carry4; s4 -= (long)((ulong)carry4 << 21); 246 4887483 : carry5 = s5 >> 21; s6 += carry5; s5 -= (long)((ulong)carry5 << 21); 247 4887483 : carry6 = s6 >> 21; s7 += carry6; s6 -= (long)((ulong)carry6 << 21); 248 4887483 : carry7 = s7 >> 21; s8 += carry7; s7 -= (long)((ulong)carry7 << 21); 249 4887483 : carry8 = s8 >> 21; s9 += carry8; s8 -= (long)((ulong)carry8 << 21); 250 4887483 : carry9 = s9 >> 21; s10 += carry9; s9 -= (long)((ulong)carry9 << 21); 251 4887483 : carry10 = s10 >> 21; s11 += carry10; s10 -= (long)((ulong)carry10 << 21); 252 4887483 : carry11 = s11 >> 21; s12 += carry11; s11 -= (long)((ulong)carry11 << 21); 253 : 254 4887483 : s0 += s12*666643L; s1 += s12*470296L; s2 += s12*654183L; s3 -= s12*997805L; s4 += s12*136657L; s5 -= s12*683901L; s12 = 0L; 255 : 256 4887483 : carry0 = s0 >> 21; s1 += carry0; s0 -= (long)((ulong)carry0 << 21); 257 4887483 : carry1 = s1 >> 21; s2 += carry1; s1 -= (long)((ulong)carry1 << 21); 258 4887483 : carry2 = s2 >> 21; s3 += carry2; s2 -= (long)((ulong)carry2 << 21); 259 4887483 : carry3 = s3 >> 21; s4 += carry3; s3 -= (long)((ulong)carry3 << 21); 260 4887483 : carry4 = s4 >> 21; s5 += carry4; s4 -= (long)((ulong)carry4 << 21); 261 4887483 : carry5 = s5 >> 21; s6 += carry5; s5 -= (long)((ulong)carry5 << 21); 262 4887483 : carry6 = s6 >> 21; s7 += carry6; s6 -= (long)((ulong)carry6 << 21); 263 4887483 : carry7 = s7 >> 21; s8 += carry7; s7 -= (long)((ulong)carry7 << 21); 264 4887483 : carry8 = s8 >> 21; s9 += carry8; s8 -= (long)((ulong)carry8 << 21); 265 4887483 : carry9 = s9 >> 21; s10 += carry9; s9 -= (long)((ulong)carry9 << 21); 266 4887483 : carry10 = s10 >> 21; s11 += carry10; s10 -= (long)((ulong)carry10 << 21); 267 : 268 : /* Pack the results into s */ 269 : 270 4887483 : *(ulong *) s = (((ulong)s0 ) ) | (((ulong)s1 )<<21) | (((ulong)s2 )<<42) | (((ulong)s3 )<<63); 271 4887483 : *(ulong *)(s+ 8) = (((ulong)s3 )>>1) | (((ulong)s4 )<<20) | (((ulong)s5 )<<41) | (((ulong)s6 )<<62); 272 4887483 : *(ulong *)(s+16) = (((ulong)s6 )>>2) | (((ulong)s7 )<<19) | (((ulong)s8 )<<40) | (((ulong)s9 )<<61); 273 4887483 : *(ulong *)(s+24) = (((ulong)s9 )>>3) | (((ulong)s10)<<18) | (((ulong)s11)<<39); 274 4887483 : return s; 275 4887483 : } 276 : 277 : void FD_FN_NO_ASAN 278 : fd_curve25519_scalar_wnaf( short _t[ 256 ], /* 256-entry */ 279 : uchar const _vs[ 32 ], /* 32-byte, assumes valid scalar */ 280 1790538 : int bits ) { /* range: [1:12], 1 = NAF */ 281 1790538 : short max = (short)((1 << bits) - 1); 282 1790538 : uchar const * _s = (uchar const *)_vs; 283 : 284 : /* Unpack s bits into _t */ 285 : 286 458377728 : for( int i=0; i<255; i++ ) _t[i] = ((short)_s[i>>3] >> (i&7)) & 1; 287 1790538 : _t[255] = 0; /* Guarantee 0 termination even if bad data passed */ 288 : 289 : /* At this point _t[*] in [0,1] */ 290 : 291 1790538 : int i; 292 : 293 3671201 : for( i=0; i<256; i++ ) if( _t[i] ) break; /* Find first non-zero t */ 294 : 295 63942052 : while( i<256 ) { 296 : 297 : /* At this point [0,i) have been made sparse and t[i] is 1. 298 : Absorb as many tj for j in (i,256) into ti as possible */ 299 : 300 62151514 : short ti = 1; 301 : 302 62151514 : int j; 303 395327450 : for( j=i+1; j<256; j++ ) { 304 393536930 : short tj = _t[j]; 305 393536930 : if( !tj ) continue; 306 : 307 : /* At this point, we've zeroed out (i,j) and we know tj is 308 : 1. We also know that ti is odd and in [-max,max]. Thus, if 309 : 2^shift>(2*max), ti +/- 2^shift*tj is _not_ in [-max,max] and 310 : we can't merge this j and any following into i. */ 311 : 312 191278760 : short delta = (short)(1 << fd_int_min( j-i, 14 )); /* Note: delta is even, and delta <= 2^14 */ 313 191278760 : if( delta>(2*max) ) break; 314 : 315 : /* See if we can add tj to ti. If so, this implies we are 316 : subtracting 1 from tj, making it 0. */ 317 : 318 161092948 : short tip = (short)(ti + delta); /* odd + even -> odd */ 319 161092948 : if( tip<=max ) { /* Yep ... add it to ti and zero it out */ 320 130917766 : ti = tip; /* odd */ 321 130917766 : _t[j] = 0; 322 130917766 : continue; 323 130917766 : } 324 : 325 : /* See if we can instead subtract tj from ti. This implies we are 326 : adding 1 to tj, making it 2. We carry-propagate this into tk 327 : for k>j, turning tj and possibly later tk to 0. We note that 328 : delta for the next iteration will be so large that we can't 329 : possibly absorb it into ti so we can abort this inner loop. 330 : 331 : Note that if this carry propagates to _t[255] (which is 332 : strictly zero initially), we have _t[k]==0 for k in [j,254) and 333 : _t[255]==1. The outer loop iteration will resume at i==255 and 334 : detect it is done when it can't scan further for additional j 335 : to absorb. Hence we will never carry propagate off the end and 336 : the loop below is guaranteed to terminate. */ 337 : 338 30175182 : short tim = (short)(ti - delta); /* odd + even -> odd */ 339 30175182 : if( tim>=-max ) { /* Yep ... sub it from ti and carry propagate */ 340 30175182 : ti = tim; /* odd */ 341 30175182 : _t[j] = 0; 342 61169615 : for(;;) { 343 61169615 : j++; 344 61169615 : if( !_t[j] ) { _t[j] = 1; break; } 345 30994433 : _t[j] = 0; 346 30994433 : } 347 30175182 : break; 348 30175182 : } 349 : 350 : /* We can't absorb tj into ti */ 351 : 352 0 : break; 353 30175182 : } 354 : 355 : /* Finalize ti and advance */ 356 : 357 62151514 : _t[i] = ti; 358 62151514 : i = j; 359 62151514 : } 360 1790538 : }