Line data Source code
1 : #include "fd_blake3_private.h"
2 :
3 : static inline void
4 : g( uint * state,
5 : ulong a,
6 : ulong b,
7 : ulong c,
8 : ulong d,
9 : uint x,
10 269068800 : uint y ) {
11 :
12 269068800 : state[a] = state[a] + state[b] + x;
13 269068800 : state[d] = fd_uint_rotate_right( state[d] ^ state[a], 16 );
14 269068800 : state[c] = state[c] + state[d];
15 269068800 : state[b] = fd_uint_rotate_right( state[b] ^ state[c], 12 );
16 269068800 : state[a] = state[a] + state[b] + y;
17 269068800 : state[d] = fd_uint_rotate_right( state[d] ^ state[a], 8 );
18 269068800 : state[c] = state[c] + state[d];
19 269068800 : state[b] = fd_uint_rotate_right( state[b] ^ state[c], 7 );
20 :
21 269068800 : }
22 :
23 : static inline void
24 : round_fn( uint state[ static 16 ],
25 : uint const * msg,
26 33633600 : ulong round ) {
27 : /* Select the message schedule based on the round */
28 33633600 : uchar const * schedule = FD_BLAKE3_MSG_SCHEDULE[round];
29 :
30 : /* Mix the columns */
31 33633600 : g( state, 0, 4, 8, 12, msg[ schedule[ 0] ], msg[ schedule[ 1] ] );
32 33633600 : g( state, 1, 5, 9, 13, msg[ schedule[ 2] ], msg[ schedule[ 3] ] );
33 33633600 : g( state, 2, 6, 10, 14, msg[ schedule[ 4] ], msg[ schedule[ 5] ] );
34 33633600 : g( state, 3, 7, 11, 15, msg[ schedule[ 6] ], msg[ schedule[ 7] ] );
35 :
36 : /* Mix the rows */
37 33633600 : g( state, 0, 5, 10, 15, msg[ schedule[ 8] ], msg[ schedule[ 9] ] );
38 33633600 : g( state, 1, 6, 11, 12, msg[ schedule[10] ], msg[ schedule[11] ] );
39 33633600 : g( state, 2, 7, 8, 13, msg[ schedule[12] ], msg[ schedule[13] ] );
40 33633600 : g( state, 3, 4, 9, 14, msg[ schedule[14] ], msg[ schedule[15] ] );
41 33633600 : }
42 :
43 : static inline void
44 : compress_pre( uint state[ static 16 ],
45 : uint const cv [ static 8 ],
46 : uchar const block[ FD_BLAKE3_BLOCK_SZ ],
47 : uint block_len,
48 : ulong counter,
49 4804800 : uint flags ) {
50 :
51 4804800 : uint block_words[16];
52 4804800 : memcpy( block_words, block, 64 );
53 :
54 4804800 : uint ctr_lo = (uint)(counter&UINT_MAX);
55 4804800 : uint ctr_hi = (uint)(counter>>32);
56 :
57 4804800 : state[ 0] = cv[0]; state[ 1] = cv[1];
58 4804800 : state[ 2] = cv[2]; state[ 3] = cv[3];
59 4804800 : state[ 4] = cv[4]; state[ 5] = cv[5];
60 4804800 : state[ 6] = cv[6]; state[ 7] = cv[7];
61 4804800 : state[ 8] = FD_BLAKE3_IV[0]; state[ 9] = FD_BLAKE3_IV[1];
62 4804800 : state[10] = FD_BLAKE3_IV[2]; state[11] = FD_BLAKE3_IV[3];
63 4804800 : state[12] = ctr_lo; state[13] = ctr_hi;
64 4804800 : state[14] = block_len; state[15] = flags;
65 :
66 4804800 : round_fn( state, &block_words[0], 0 );
67 4804800 : round_fn( state, &block_words[0], 1 );
68 4804800 : round_fn( state, &block_words[0], 2 );
69 4804800 : round_fn( state, &block_words[0], 3 );
70 4804800 : round_fn( state, &block_words[0], 4 );
71 4804800 : round_fn( state, &block_words[0], 5 );
72 4804800 : round_fn( state, &block_words[0], 6 );
73 4804800 : }
74 :
75 : static inline void
76 : compress_block( uint cv[8],
77 : uchar const block[ FD_BLAKE3_BLOCK_SZ ],
78 : uint block_len,
79 : ulong counter,
80 : uint flags,
81 4804800 : uint cv_hi[8] ) {
82 4804800 : if( flags & FD_BLAKE3_FLAG_ROOT ) FD_BLAKE3_TRACE(( "fd_blake3_ref_compress_block(counter=%lu,flags=%x)", counter, flags ));
83 4804800 : uint state[16];
84 4804800 : if( FD_UNLIKELY( cv_hi ) ) memcpy( cv_hi, cv, 32 );
85 4804800 : compress_pre( state, cv, block, block_len, counter, flags );
86 4804800 : cv[0] = state[0] ^ state[ 8];
87 4804800 : cv[1] = state[1] ^ state[ 9];
88 4804800 : cv[2] = state[2] ^ state[10];
89 4804800 : cv[3] = state[3] ^ state[11];
90 4804800 : cv[4] = state[4] ^ state[12];
91 4804800 : cv[5] = state[5] ^ state[13];
92 4804800 : cv[6] = state[6] ^ state[14];
93 4804800 : cv[7] = state[7] ^ state[15];
94 4804800 : if( FD_UNLIKELY( cv_hi ) ) {
95 0 : cv_hi[0] ^= state[ 8];
96 0 : cv_hi[1] ^= state[ 9];
97 0 : cv_hi[2] ^= state[10];
98 0 : cv_hi[3] ^= state[11];
99 0 : cv_hi[4] ^= state[12];
100 0 : cv_hi[5] ^= state[13];
101 0 : cv_hi[6] ^= state[14];
102 0 : cv_hi[7] ^= state[15];
103 0 : }
104 4804800 : }
105 :
106 : void
107 : fd_blake3_ref_compress1( uchar * restrict out,
108 : uchar const * restrict msg,
109 : uint msg_sz,
110 : ulong counter,
111 : uint flags,
112 : uchar * restrict out_chain,
113 300300 : uchar const * restrict in_chain ) {
114 300300 : FD_BLAKE3_TRACE(( "fd_blake3_ref_compress1(out=%p,msg=%p,sz=%u,counter=%lu,flags=%02x)",
115 300300 : (void *)out, (void *)msg, msg_sz, counter, flags ));
116 :
117 300300 : uint cv[8] = { FD_BLAKE3_IV[0], FD_BLAKE3_IV[1], FD_BLAKE3_IV[2], FD_BLAKE3_IV[3],
118 300300 : FD_BLAKE3_IV[4], FD_BLAKE3_IV[5], FD_BLAKE3_IV[6], FD_BLAKE3_IV[7] };
119 300300 : uint * cv_hi = NULL;
120 300300 : if( FD_UNLIKELY( in_chain ) ) {
121 0 : memcpy( cv, in_chain, FD_BLAKE3_OUTCHAIN_SZ );
122 0 : if( FD_UNLIKELY( in_chain ) ) cv_hi = (uint *)( out+32 );
123 0 : }
124 :
125 300300 : uint block_flags = flags | fd_uint_if( flags&FD_BLAKE3_FLAG_PARENT, 0, FD_BLAKE3_FLAG_CHUNK_START );
126 300300 : if( FD_UNLIKELY( in_chain && !(flags&FD_BLAKE3_FLAG_CHUNK_START) ) ) {
127 0 : block_flags &= ~FD_BLAKE3_FLAG_CHUNK_START;
128 0 : }
129 4804800 : while( FD_LIKELY( msg_sz>FD_BLAKE3_BLOCK_SZ ) ) {
130 4504500 : compress_block( cv, msg, FD_BLAKE3_BLOCK_SZ, counter, block_flags&(~FD_BLAKE3_FLAG_ROOT), cv_hi );
131 4504500 : block_flags = flags;
132 4504500 : msg += FD_BLAKE3_BLOCK_SZ;
133 4504500 : msg_sz -= (uint)FD_BLAKE3_BLOCK_SZ;
134 4504500 : }
135 :
136 300300 : uchar block[ FD_BLAKE3_BLOCK_SZ ] = {0};
137 300300 : fd_memcpy( block, msg, msg_sz );
138 :
139 300300 : block_flags = block_flags | fd_uint_if( flags&FD_BLAKE3_FLAG_PARENT, 0, FD_BLAKE3_FLAG_CHUNK_END );
140 300300 : if( FD_UNLIKELY( out_chain ) ) {
141 : /* If requested, capture the output chaining value before processing
142 : the last block. This is useful for XOF mode, which repeats the
143 : hash operation of the last block with increasing counter values.
144 : We don't need to perform the final compression here (which
145 : computes the first 32 bytes of hash output) in the XOF case,
146 : since the fast/parallel XOF implementation that calls this
147 : function repeats compression for XOF slot 0 (first 64 bytes).
148 :
149 : FIXME better document and polish the transition from the compress
150 : part to the expand part. */
151 0 : memcpy( out, block, FD_BLAKE3_BLOCK_SZ ); /* FIXME DOCUMENT OVERLOADING OF OUT ARGUMENT */
152 0 : memcpy( out_chain, cv, FD_BLAKE3_OUTCHAIN_SZ );
153 0 : FD_BLAKE3_TRACE(( "fd_blake3_ref_compress1: done (XOF mode)" ));
154 0 : return;
155 0 : }
156 300300 : compress_block( cv, block, msg_sz, counter, block_flags, cv_hi );
157 300300 : memcpy( out, cv, 32 );
158 :
159 300300 : FD_BLAKE3_TRACE(( "fd_blake3_ref_compress1: done" ));
160 300300 : }
|