Line data Source code
1 : // Source originally from https://github.com/BLAKE3-team/BLAKE3
2 : // From commit: 64747d48ffe9d1fbf4b71e94cabeb8a211461081
3 :
4 : #include "blake3_impl.h"
5 : #include <string.h>
6 :
7 0 : INLINE uint32_t rotr32(uint32_t w, uint32_t c) {
8 0 : return (w >> c) | (w << (32 - c));
9 0 : }
10 :
11 : INLINE void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,
12 0 : uint32_t x, uint32_t y) {
13 0 : state[a] = state[a] + state[b] + x;
14 0 : state[d] = rotr32(state[d] ^ state[a], 16);
15 0 : state[c] = state[c] + state[d];
16 0 : state[b] = rotr32(state[b] ^ state[c], 12);
17 0 : state[a] = state[a] + state[b] + y;
18 0 : state[d] = rotr32(state[d] ^ state[a], 8);
19 0 : state[c] = state[c] + state[d];
20 0 : state[b] = rotr32(state[b] ^ state[c], 7);
21 0 : }
22 :
23 0 : INLINE void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) {
24 : // Select the message schedule based on the round.
25 0 : const uint8_t *schedule = MSG_SCHEDULE[round];
26 :
27 : // Mix the columns.
28 0 : g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
29 0 : g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
30 0 : g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
31 0 : g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
32 :
33 : // Mix the rows.
34 0 : g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
35 0 : g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
36 0 : g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
37 0 : g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
38 0 : }
39 :
40 : INLINE void compress_pre(uint32_t state[16], const uint32_t cv[8],
41 : const uint8_t block[BLAKE3_BLOCK_LEN],
42 0 : uint8_t block_len, uint64_t counter, uint8_t flags) {
43 0 : uint32_t block_words[16];
44 0 : block_words[0] = load32(block + 4 * 0);
45 0 : block_words[1] = load32(block + 4 * 1);
46 0 : block_words[2] = load32(block + 4 * 2);
47 0 : block_words[3] = load32(block + 4 * 3);
48 0 : block_words[4] = load32(block + 4 * 4);
49 0 : block_words[5] = load32(block + 4 * 5);
50 0 : block_words[6] = load32(block + 4 * 6);
51 0 : block_words[7] = load32(block + 4 * 7);
52 0 : block_words[8] = load32(block + 4 * 8);
53 0 : block_words[9] = load32(block + 4 * 9);
54 0 : block_words[10] = load32(block + 4 * 10);
55 0 : block_words[11] = load32(block + 4 * 11);
56 0 : block_words[12] = load32(block + 4 * 12);
57 0 : block_words[13] = load32(block + 4 * 13);
58 0 : block_words[14] = load32(block + 4 * 14);
59 0 : block_words[15] = load32(block + 4 * 15);
60 :
61 0 : state[0] = cv[0];
62 0 : state[1] = cv[1];
63 0 : state[2] = cv[2];
64 0 : state[3] = cv[3];
65 0 : state[4] = cv[4];
66 0 : state[5] = cv[5];
67 0 : state[6] = cv[6];
68 0 : state[7] = cv[7];
69 0 : state[8] = IV[0];
70 0 : state[9] = IV[1];
71 0 : state[10] = IV[2];
72 0 : state[11] = IV[3];
73 0 : state[12] = counter_low(counter);
74 0 : state[13] = counter_high(counter);
75 0 : state[14] = (uint32_t)block_len;
76 0 : state[15] = (uint32_t)flags;
77 :
78 0 : round_fn(state, &block_words[0], 0);
79 0 : round_fn(state, &block_words[0], 1);
80 0 : round_fn(state, &block_words[0], 2);
81 0 : round_fn(state, &block_words[0], 3);
82 0 : round_fn(state, &block_words[0], 4);
83 0 : round_fn(state, &block_words[0], 5);
84 0 : round_fn(state, &block_words[0], 6);
85 0 : }
86 :
87 : void fd_blake3_compress_in_place_portable(uint32_t cv[8],
88 : const uint8_t block[BLAKE3_BLOCK_LEN],
89 : uint8_t block_len, uint64_t counter,
90 0 : uint8_t flags) {
91 0 : uint32_t state[16];
92 0 : compress_pre(state, cv, block, block_len, counter, flags);
93 0 : cv[0] = state[0] ^ state[8];
94 0 : cv[1] = state[1] ^ state[9];
95 0 : cv[2] = state[2] ^ state[10];
96 0 : cv[3] = state[3] ^ state[11];
97 0 : cv[4] = state[4] ^ state[12];
98 0 : cv[5] = state[5] ^ state[13];
99 0 : cv[6] = state[6] ^ state[14];
100 0 : cv[7] = state[7] ^ state[15];
101 0 : }
102 :
103 : void fd_blake3_compress_xof_portable(const uint32_t cv[8],
104 : const uint8_t block[BLAKE3_BLOCK_LEN],
105 : uint8_t block_len, uint64_t counter,
106 0 : uint8_t flags, uint8_t out[64]) {
107 0 : uint32_t state[16];
108 0 : compress_pre(state, cv, block, block_len, counter, flags);
109 :
110 0 : store32(&out[0 * 4], state[0] ^ state[8]);
111 0 : store32(&out[1 * 4], state[1] ^ state[9]);
112 0 : store32(&out[2 * 4], state[2] ^ state[10]);
113 0 : store32(&out[3 * 4], state[3] ^ state[11]);
114 0 : store32(&out[4 * 4], state[4] ^ state[12]);
115 0 : store32(&out[5 * 4], state[5] ^ state[13]);
116 0 : store32(&out[6 * 4], state[6] ^ state[14]);
117 0 : store32(&out[7 * 4], state[7] ^ state[15]);
118 0 : store32(&out[8 * 4], state[8] ^ cv[0]);
119 0 : store32(&out[9 * 4], state[9] ^ cv[1]);
120 0 : store32(&out[10 * 4], state[10] ^ cv[2]);
121 0 : store32(&out[11 * 4], state[11] ^ cv[3]);
122 0 : store32(&out[12 * 4], state[12] ^ cv[4]);
123 0 : store32(&out[13 * 4], state[13] ^ cv[5]);
124 0 : store32(&out[14 * 4], state[14] ^ cv[6]);
125 0 : store32(&out[15 * 4], state[15] ^ cv[7]);
126 0 : }
127 :
128 : INLINE void hash_one_portable(const uint8_t *input, size_t blocks,
129 : const uint32_t key[8], uint64_t counter,
130 : uint8_t flags, uint8_t flags_start,
131 0 : uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
132 0 : uint32_t cv[8];
133 0 : memcpy(cv, key, BLAKE3_KEY_LEN);
134 0 : uint8_t block_flags = flags | flags_start;
135 0 : while (blocks > 0) {
136 0 : if (blocks == 1) {
137 0 : block_flags |= flags_end;
138 0 : }
139 0 : fd_blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter,
140 0 : block_flags);
141 0 : input = &input[BLAKE3_BLOCK_LEN];
142 0 : blocks -= 1;
143 0 : block_flags = flags;
144 0 : }
145 0 : store_cv_words(out, cv);
146 0 : }
147 :
148 : void fd_blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
149 : size_t blocks, const uint32_t key[8],
150 : uint64_t counter, bool increment_counter,
151 : uint8_t flags, uint8_t flags_start,
152 0 : uint8_t flags_end, uint8_t *out) {
153 0 : while (num_inputs > 0) {
154 0 : hash_one_portable(inputs[0], blocks, key, counter, flags, flags_start,
155 0 : flags_end, out);
156 0 : if (increment_counter) {
157 0 : counter += 1;
158 0 : }
159 0 : inputs += 1;
160 0 : num_inputs -= 1;
161 0 : out = &out[BLAKE3_OUT_LEN];
162 0 : }
163 0 : }
|