Line data Source code
1 : // Source originally from https://github.com/BLAKE3-team/BLAKE3
2 : // From commit: 34d293eb2aa75005406d8a7d78687896f714e89a
3 :
4 : #include <stdbool.h>
5 : #include <stddef.h>
6 : #include <stdint.h>
7 :
8 : #include "blake3_impl.h"
9 :
10 : #include <immintrin.h>
11 :
12 : #define MAYBE_UNUSED(x) (void)((x))
13 :
14 : void blake3_compress_in_place(uint32_t cv[8],
15 : const uint8_t block[BLAKE3_BLOCK_LEN],
16 : uint8_t block_len, uint64_t counter,
17 38983116 : uint8_t flags) {
18 12994372 : #if FD_HAS_AVX512
19 12994372 : blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
20 : #elif FD_HAS_AVX
21 : blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);
22 : #elif FD_HAS_SSE
23 : blake3_compress_in_place_sse2(cv, block, block_len, counter, flags);
24 : #else
25 : blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
26 : #endif
27 38983116 : }
28 :
29 : void blake3_compress_xof(const uint32_t cv[8],
30 : const uint8_t block[BLAKE3_BLOCK_LEN],
31 : uint8_t block_len, uint64_t counter, uint8_t flags,
32 25437008 : uint8_t out[64]) {
33 8472496 : #if FD_HAS_AVX512
34 8472496 : blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
35 : #elif FD_HAS_AVX
36 : blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);
37 : #elif FD_HAS_SSE
38 : blake3_compress_xof_sse2(cv, block, block_len, counter, flags, out);
39 : #else
40 : blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
41 : #endif
42 25437008 : }
43 :
44 :
45 : void blake3_xof_many(const uint32_t cv[8],
46 : const uint8_t block[BLAKE3_BLOCK_LEN],
47 : uint8_t block_len, uint64_t counter, uint8_t flags,
48 915 : uint8_t out[64], size_t outblocks) {
49 915 : if (outblocks == 0) {
50 : // The current assembly implementation always outputs at least 1 block.
51 0 : return;
52 0 : }
53 305 : #if FD_HAS_AVX512
54 305 : blake3_xof_many_avx512(cv, block, block_len, counter, flags, out, outblocks);
55 : #else
56 20130 : for(size_t i = 0; i < outblocks; ++i) {
57 19520 : blake3_compress_xof(cv, block, block_len, counter + i, flags, out + 64*i);
58 19520 : }
59 610 : #endif
60 915 : }
61 :
62 : void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
63 : size_t blocks, const uint32_t key[8], uint64_t counter,
64 : bool increment_counter, uint8_t flags,
65 2875994 : uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
66 760622 : #if FD_HAS_AVX512
67 760622 : blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
68 760622 : increment_counter, flags, flags_start, flags_end,
69 760622 : out);
70 : #elif FD_HAS_AVX
71 : blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
72 2115372 : increment_counter, flags, flags_start, flags_end,
73 2115372 : out);
74 : #elif FD_HAS_SSE
75 : /* TODO use sse4.1 here? */
76 : blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
77 : increment_counter, flags, flags_start, flags_end,
78 : out);
79 : #else
80 : blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
81 : increment_counter, flags, flags_start, flags_end,
82 : out);
83 : #endif
84 2875994 : }
85 :
86 : // The dynamically detected SIMD degree of the current platform.
87 2973531 : size_t blake3_simd_degree(void) {
88 661055 : #if FD_HAS_AVX512
89 661055 : return 16;
90 : #elif FD_HAS_AVX
91 : return 8;
92 : #elif FD_HAS_SSE
93 : return 4;
94 : #else
95 : return 1;
96 : #endif
97 2973531 : }
|