LCOV - code coverage report
Current view: top level - ballet/blake3 - blake3_impl.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 8 49 16.3 %
Date: 2025-01-08 12:08:44 Functions: 5 54 9.3 %

          Line data    Source code
       1             : // Source originally from https://github.com/BLAKE3-team/BLAKE3
       2             : // From commit: 64747d48ffe9d1fbf4b71e94cabeb8a211461081
       3             : 
       4             : #ifndef BLAKE3_IMPL_H
       5             : #define BLAKE3_IMPL_H
       6             : 
       7             : #include <assert.h>
       8             : #include <stdbool.h>
       9             : #include <stddef.h>
      10             : #include <stdint.h>
      11             : #include <string.h>
      12             : 
      13             : #include "blake3.h"
      14             : 
      15             : // internal flags
      16             : enum blake3_flags {
      17             :   CHUNK_START         = 1 << 0,
      18             :   CHUNK_END           = 1 << 1,
      19             :   PARENT              = 1 << 2,
      20             :   ROOT                = 1 << 3,
      21             :   KEYED_HASH          = 1 << 4,
      22             :   DERIVE_KEY_CONTEXT  = 1 << 5,
      23             :   DERIVE_KEY_MATERIAL = 1 << 6,
      24             : };
      25             : 
      26             : #define INLINE static inline __attribute__((always_inline))
      27             : 
      28             : #define BLAKE3_USE_NEON 0
      29             : 
      30             : #if FD_HAS_X86
      31           0 : #define MAX_SIMD_DEGREE 16
      32             : #elif BLAKE3_USE_NEON == 1
      33             : #define MAX_SIMD_DEGREE 4
      34             : #else
      35             : #define MAX_SIMD_DEGREE 1
      36             : #endif
      37             : 
      38             : // There are some places where we want a static size that's equal to the
      39             : // MAX_SIMD_DEGREE, but also at least 2.
      40           0 : #define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
      41             : 
      42             : static const uint32_t IV[8] = {0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL,
      43             :                                0xA54FF53AUL, 0x510E527FUL, 0x9B05688CUL,
      44             :                                0x1F83D9ABUL, 0x5BE0CD19UL};
      45             : 
      46             : static const uint8_t MSG_SCHEDULE[7][16] = {
      47             :     {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
      48             :     {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},
      49             :     {3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},
      50             :     {10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},
      51             :     {12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},
      52             :     {9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},
      53             :     {11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},
      54             : };
      55             : 
      56             : /* Find index of the highest set bit */
      57             : /* x is assumed to be nonzero.       */
      58           0 : static unsigned int highest_one(uint64_t x) {
      59           0 : #if defined(__GNUC__) || defined(__clang__)
      60           0 :   return 63 ^ (unsigned int)__builtin_clzll(x);
      61             : #elif defined(_MSC_VER) && defined(IS_X86_64)
      62             :   unsigned long index;
      63             :   _BitScanReverse64(&index, x);
      64             :   return index;
      65             : #elif defined(_MSC_VER) && defined(IS_X86_32)
      66             :   if(x >> 32) {
      67             :     unsigned long index;
      68             :     _BitScanReverse(&index, (unsigned long)(x >> 32));
      69             :     return 32 + index;
      70             :   } else {
      71             :     unsigned long index;
      72             :     _BitScanReverse(&index, (unsigned long)x);
      73             :     return index;
      74             :   }
      75             : #else
      76             :   unsigned int c = 0;
      77             :   if(x & 0xffffffff00000000ULL) { x >>= 32; c += 32; }
      78             :   if(x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; }
      79             :   if(x & 0x000000000000ff00ULL) { x >>=  8; c +=  8; }
      80             :   if(x & 0x00000000000000f0ULL) { x >>=  4; c +=  4; }
      81             :   if(x & 0x000000000000000cULL) { x >>=  2; c +=  2; }
      82             :   if(x & 0x0000000000000002ULL) {           c +=  1; }
      83             :   return c;
      84             : #endif
      85           0 : }
      86             : 
      87             : // Count the number of 1 bits.
      88          15 : INLINE unsigned int popcnt(uint64_t x) {
      89          15 : #if defined(__GNUC__) || defined(__clang__)
      90          15 :   return (unsigned int)__builtin_popcountll(x);
      91             : #else
      92             :   unsigned int count = 0;
      93             :   while (x != 0) {
      94             :     count += 1;
      95             :     x &= x - 1;
      96             :   }
      97             :   return count;
      98             : #endif
      99          15 : }
     100             : 
     101             : // Largest power of two less than or equal to x. As a special case, returns 1
     102             : // when x is 0.
     103           0 : INLINE uint64_t round_down_to_power_of_2(uint64_t x) {
     104           0 :   return 1ULL << highest_one(x | 1);
     105           0 : }
     106             : 
     107          15 : INLINE uint32_t counter_low(uint64_t counter) { return (uint32_t)counter; }
     108             : 
     109          15 : INLINE uint32_t counter_high(uint64_t counter) {
     110          15 :   return (uint32_t)(counter >> 32);
     111          15 : }
     112             : 
     113           0 : INLINE uint32_t load32(const void *src) {
     114           0 :   const uint8_t *p = (const uint8_t *)src;
     115           0 :   return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |
     116           0 :          ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
     117           0 : }
     118             : 
     119             : INLINE void load_key_words(const uint8_t key[BLAKE3_KEY_LEN],
     120           0 :                            uint32_t key_words[8]) {
     121           0 :   key_words[0] = load32(&key[0 * 4]);
     122           0 :   key_words[1] = load32(&key[1 * 4]);
     123           0 :   key_words[2] = load32(&key[2 * 4]);
     124           0 :   key_words[3] = load32(&key[3 * 4]);
     125           0 :   key_words[4] = load32(&key[4 * 4]);
     126           0 :   key_words[5] = load32(&key[5 * 4]);
     127           0 :   key_words[6] = load32(&key[6 * 4]);
     128           0 :   key_words[7] = load32(&key[7 * 4]);
     129           0 : }
     130             : 
     131           0 : INLINE void store32(void *dst, uint32_t w) {
     132           0 :   uint8_t *p = (uint8_t *)dst;
     133           0 :   p[0] = (uint8_t)(w >> 0);
     134           0 :   p[1] = (uint8_t)(w >> 8);
     135           0 :   p[2] = (uint8_t)(w >> 16);
     136           0 :   p[3] = (uint8_t)(w >> 24);
     137           0 : }
     138             : 
     139           0 : INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) {
     140           0 :   store32(&bytes_out[0 * 4], cv_words[0]);
     141           0 :   store32(&bytes_out[1 * 4], cv_words[1]);
     142           0 :   store32(&bytes_out[2 * 4], cv_words[2]);
     143           0 :   store32(&bytes_out[3 * 4], cv_words[3]);
     144           0 :   store32(&bytes_out[4 * 4], cv_words[4]);
     145           0 :   store32(&bytes_out[5 * 4], cv_words[5]);
     146           0 :   store32(&bytes_out[6 * 4], cv_words[6]);
     147           0 :   store32(&bytes_out[7 * 4], cv_words[7]);
     148           0 : }
     149             : 
     150             : void fd_blake3_compress_in_place(uint32_t cv[8],
     151             :                                  const uint8_t block[BLAKE3_BLOCK_LEN],
     152             :                                  uint8_t block_len, uint64_t counter,
     153             :                                  uint8_t flags);
     154             : 
     155             : void fd_blake3_compress_xof(const uint32_t cv[8],
     156             :                             const uint8_t block[BLAKE3_BLOCK_LEN],
     157             :                             uint8_t block_len, uint64_t counter, uint8_t flags,
     158             :                             uint8_t out[64]);
     159             : 
     160             : void fd_blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
     161             :                          size_t blocks, const uint32_t key[8], uint64_t counter,
     162             :                          bool increment_counter, uint8_t flags,
     163             :                          uint8_t flags_start, uint8_t flags_end, uint8_t *out);
     164             : 
     165             : size_t fd_blake3_simd_degree(void);
     166             : 
     167             : 
     168             : // Declarations for implementation-specific functions.
     169             : void fd_blake3_compress_in_place_portable(uint32_t cv[8],
     170             :                                           const uint8_t block[BLAKE3_BLOCK_LEN],
     171             :                                           uint8_t block_len, uint64_t counter,
     172             :                                           uint8_t flags);
     173             : 
     174             : void fd_blake3_compress_xof_portable(const uint32_t cv[8],
     175             :                                      const uint8_t block[BLAKE3_BLOCK_LEN],
     176             :                                      uint8_t block_len, uint64_t counter,
     177             :                                      uint8_t flags, uint8_t out[64]);
     178             : 
     179             : void fd_blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
     180             :                                   size_t blocks, const uint32_t key[8],
     181             :                                   uint64_t counter, bool increment_counter,
     182             :                                   uint8_t flags, uint8_t flags_start,
     183             :                                   uint8_t flags_end, uint8_t *out);
     184             : 
     185             : #if FD_HAS_X86
     186             : #if FD_HAS_SSE
     187             : void fd_blake3_compress_in_place_sse2(uint32_t cv[8],
     188             :                                       const uint8_t block[BLAKE3_BLOCK_LEN],
     189             :                                       uint8_t block_len, uint64_t counter,
     190             :                                       uint8_t flags);
     191             : void fd_blake3_compress_xof_sse2(const uint32_t cv[8],
     192             :                                  const uint8_t block[BLAKE3_BLOCK_LEN],
     193             :                                  uint8_t block_len, uint64_t counter,
     194             :                                  uint8_t flags, uint8_t out[64]);
     195             : void fd_blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs,
     196             :                               size_t blocks, const uint32_t key[8],
     197             :                               uint64_t counter, bool increment_counter,
     198             :                               uint8_t flags, uint8_t flags_start,
     199             :                               uint8_t flags_end, uint8_t *out);
     200             : #endif /* FD_HAS_SSE */
     201             : #if FD_HAS_AVX
     202             : void fd_blake3_compress_in_place_sse41(uint32_t cv[8],
     203             :                                        const uint8_t block[BLAKE3_BLOCK_LEN],
     204             :                                        uint8_t block_len, uint64_t counter,
     205             :                                        uint8_t flags);
     206             : void fd_blake3_compress_xof_sse41(const uint32_t cv[8],
     207             :                                   const uint8_t block[BLAKE3_BLOCK_LEN],
     208             :                                   uint8_t block_len, uint64_t counter,
     209             :                                   uint8_t flags, uint8_t out[64]);
     210             : void fd_blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
     211             :                                size_t blocks, const uint32_t key[8],
     212             :                                uint64_t counter, bool increment_counter,
     213             :                                uint8_t flags, uint8_t flags_start,
     214             :                                uint8_t flags_end, uint8_t *out);
     215             : void fd_blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
     216             :                               size_t blocks, const uint32_t key[8],
     217             :                               uint64_t counter, bool increment_counter,
     218             :                               uint8_t flags, uint8_t flags_start,
     219             :                               uint8_t flags_end, uint8_t *out);
     220             : #endif /* FD_HAS_AVX */
     221             : #if FD_HAS_AVX512
     222             : void fd_blake3_compress_in_place_avx512(uint32_t cv[8],
     223             :                                         const uint8_t block[BLAKE3_BLOCK_LEN],
     224             :                                         uint8_t block_len, uint64_t counter,
     225             :                                         uint8_t flags);
     226             : 
     227             : void fd_blake3_compress_xof_avx512(const uint32_t cv[8],
     228             :                                    const uint8_t block[BLAKE3_BLOCK_LEN],
     229             :                                    uint8_t block_len, uint64_t counter,
     230             :                                    uint8_t flags, uint8_t out[64]);
     231             : 
     232             : void fd_blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
     233             :                                 size_t blocks, const uint32_t key[8],
     234             :                                 uint64_t counter, bool increment_counter,
     235             :                                 uint8_t flags, uint8_t flags_start,
     236             :                                 uint8_t flags_end, uint8_t *out);
     237             : #endif /* FD_HAS_AVX512 */
     238             : #endif /* FD_HAS_X86 */
     239             : 
     240             : #if BLAKE3_USE_NEON == 1
     241             : void fd_blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
     242             :                               size_t blocks, const uint32_t key[8],
     243             :                               uint64_t counter, bool increment_counter,
     244             :                               uint8_t flags, uint8_t flags_start,
     245             :                               uint8_t flags_end, uint8_t *out);
     246             : #endif
     247             : 
     248             : 
     249             : #endif /* BLAKE3_IMPL_H */

Generated by: LCOV version 1.14