LCOV - code coverage report
Current view: top level - ballet/blake3 - fd_blake3_private.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 5 5 100.0 %
Date: 2025-10-13 04:42:14 Functions: 0 0 -

          Line data    Source code
       1             : #ifndef HEADER_fd_src_ballet_blake3_fd_blake3_private_h
       2             : #define HEADER_fd_src_ballet_blake3_fd_blake3_private_h
       3             : 
       4             : #include "fd_blake3.h"
       5             : 
       6             : /* Set FD_BLAKE3_TRACING to 1 to dump out a high-level trace of BLAKE3
       7             :    operations to the debug log.  This is useful during debugging or
       8             :    development. */
       9             : #define FD_BLAKE3_TRACING 0
      10             : 
      11             : #if FD_BLAKE3_TRACING
      12             : #define FD_BLAKE3_TRACE( ... ) FD_LOG_DEBUG( __VA_ARGS__ )
      13             : #else
      14   840982671 : #define FD_BLAKE3_TRACE( ... ) (void)0
      15             : #endif
      16             : 
      17             : /* Protocol constants *************************************************/
      18             : 
      19             : static const uchar FD_BLAKE3_MSG_SCHEDULE[7][16] = {
      20             :   {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
      21             :   {  2,  6,  3, 10,  7,  0,  4, 13,  1, 11, 12,  5,  9, 14, 15,  8 },
      22             :   {  3,  4, 10, 12, 13,  2,  7, 14,  6,  5,  9,  0, 11, 15,  8,  1 },
      23             :   { 10,  7, 12,  9, 14,  3, 13, 15,  4,  0, 11,  2,  5,  8,  1,  6 },
      24             :   { 12, 13,  9, 11, 15, 10, 14,  8,  7,  2,  5,  3,  0,  1,  6,  4 },
      25             :   {  9, 14, 11,  5,  8, 12, 15,  1, 13,  3,  0, 10,  2,  6,  4,  7 },
      26             :   { 11, 15,  5,  0,  1,  9,  8,  6, 14, 10,  2, 12,  3,  4,  7, 13 },
      27             : };
      28             : 
      29             : static const uint FD_BLAKE3_IV[8] = {
      30             :   0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
      31             :   0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
      32             : };
      33             : 
      34   204854549 : #define FD_BLAKE3_FLAG_CHUNK_START (1u<<0) /* 1 */
      35   626610327 : #define FD_BLAKE3_FLAG_CHUNK_END   (1u<<1) /* 2 */
      36   204521773 : #define FD_BLAKE3_FLAG_PARENT      (1u<<2) /* 4 */
      37   496116480 : #define FD_BLAKE3_FLAG_ROOT        (1u<<3) /* 8 */
      38             : 
      39             : /* Possible flag combinations:
      40             :    0x1:  first block of a chunk with at least 2 blocks
      41             :    0x2:  last block of a chunk, tree that has at least 1 parent
      42             :    0x3:  last chunk (<=64 bytes), input >1024 bytes
      43             :    0x4:  non-root parent node
      44             :    0xa:  last block of the only chunk, input_sz>64 input_sz<=1024
      45             :    0xb:  only block, input_sz<=64
      46             :    0xc:  root parent node */
      47             : 
      48             : /* Scheduler **********************************************************/
      49             : 
      50             : union __attribute__((aligned(32))) fd_blake3_op {
      51             : 
      52             :   struct {
      53             :     uchar const * msg;
      54             :     uchar *       out;
      55             : 
      56             :     ulong         counter;
      57             :     union {
      58             :       struct {
      59             :         ushort    off;
      60             :         ushort    sz;
      61             :       };
      62             :       uint        off_sz;
      63             :     };
      64             :     uchar         flags;
      65             :   };
      66             : 
      67             : };
      68             : 
      69             : typedef union fd_blake3_op fd_blake3_op_t;
      70             : 
      71             : /* Compression function ***********************************************/
      72             : 
      73             : FD_PROTOTYPES_BEGIN
      74             : 
      75             : void
      76             : fd_blake3_fini_xof_compress( fd_blake3_t * sha,
      77             :                              uchar *       root_msg,
      78             :                              uchar *       root_cv_pre );
      79             : 
      80             : void
      81             : fd_blake3_ref_compress1( uchar * restrict       out, /* align==1 len==32 */
      82             :                          uchar const * restrict msg, /* align==1 len==64 */
      83             :                          uint                   msg_sz,
      84             :                          ulong                  counter,
      85             :                          uint                   flags,
      86             :                          uchar * restrict       out_chain,  /* optional, 16 byte output chaining value of last block */
      87             :                          uchar const * restrict in_chain ); /* optional, 16 byte input chaining value of first block (default IV) */
      88             : 
      89             : #if FD_HAS_SSE
      90             : 
      91             : void
      92             : fd_blake3_sse_compress1( uchar * restrict       out, /* align==1 len==32 */
      93             :                          uchar const * restrict msg, /* align==1 len==64 */
      94             :                          uint                   msg_sz,
      95             :                          ulong                  counter,
      96             :                          uint                   flags,
      97             :                          uchar * restrict       out_chain,
      98             :                          uchar const * restrict in_chain );
      99             : 
     100             : #endif /* FD_HAS_SSE */
     101             : 
     102             : #if FD_HAS_AVX
     103             : 
     104             : /* BLAKE3 AVX cores
     105             : 
     106             :    compress8 compresses one to eight tree nodes.  batch_cnt is the
     107             :    number of nodes to process.  For each node in the batch with index i,
     108             :    - _batch_data[i] points to the input data of the node (message bytes
     109             :      for leaf nodes, a pair of output chaining values for branch nodes)
     110             :    - batch_sz[i] is the input byte count of the node, from which the
     111             :      'len' value of each of the node's blocks is derived
     112             :    - ctr_vec[i] is the 'counter' value of the node
     113             :    - batch_flags[i] is the 'flag' value of the node
     114             :    - cv is optional.  If set, cv[i] is the 'chaining value' of the first
     115             :      block of the node.  This is useful for XOF.
     116             : 
     117             :    compress8 has three different output modes:
     118             :    - "LtHash in-place": If lthash is set, each node's output is expanded
     119             :      (XOF) to 2048 bytes and interpreted as an 'LtHash' value (i.e.
     120             :      a vector of 1024 uint16).  These vectors are then added together
     121             :      and the result is written to lthash.  The root flag MUST be set for
     122             :      all batch_flags inputs, otherwise this function will read OOB.
     123             :    - "Simple": Otherwise, _batch_hash[i] is populated with the 32-byte
     124             :       output chaining value.  (If node i is a root node, this is 'the
     125             :       BLAKE3 hash', i.e. the first 32 bytes of the XOF stream).
     126             : 
     127             :    These modes are all packed into the same function because the
     128             :    alternatives are worse (either worse code footprint due to duplicated
     129             :    core, or worse throughput due to high penalty passing vector regs
     130             :    between functions in SysV ABI).
     131             : 
     132             :    compress8_fast does a subset of what compress8 can, but is ~10-20%
     133             :    faster. */
     134             : 
     135             : void
     136             : fd_blake3_avx_compress8( ulong                   batch_cnt,
     137             :                          void   const * restrict _batch_data,   /* align==32 len in [1,8) */
     138             :                          uint   const * restrict batch_sz,      /* len in [1,8] */
     139             :                          ulong  const * restrict ctr_vec,       /* len==8 */
     140             :                          uint   const * restrict batch_flags,   /* align==32 len==8 */
     141             :                          void * const * restrict _batch_hash,   /* align==32 len in [1,8) */
     142             :                          ushort *       restrict lthash,        /* align==32 byte_sz=2048 */
     143             :                          uint                    out_sz,        /* 32 or 64 */
     144             :                          void const *   restrict batch_cv );    /* align==8 len==8 ele_align==32 optional */
     145             : 
     146             : void
     147             : fd_blake3_avx_compress8_fast( uchar const * restrict batch_data,  /* align==32 len==8*64 */
     148             :                               uchar       * restrict batch_hash,  /* align==32 len==8*32 */
     149             :                               ulong                  counter,
     150             :                               uchar                  flags );
     151             : 
     152             : #endif /* FD_HAS_AVX */
     153             : 
     154             : #if FD_HAS_AVX512
     155             : 
     156             : /* fd_blake3_avx512_compress16{,fast} are analogous to the avx APIs
     157             :    above.  The only difference is larger alignment assumptions and that
     158             :    these process up to sixteen elements. */
     159             : 
     160             : void
     161             : fd_blake3_avx512_compress16( ulong                   batch_cnt,
     162             :                              void const   * restrict _batch_data,   /* align=64 len=16 ele_align=1  */
     163             :                              uint const   * restrict batch_sz,      /* align= 4 len=16 */
     164             :                              ulong const  * restrict ctr_vec,       /* align= 8 len=16 */
     165             :                              uint const   * restrict batch_flags,   /* align= 4 len=16 */
     166             :                              void * const * restrict _batch_hash,   /* align=64 len=16 */
     167             :                              ushort *       restrict lthash,        /* align=32 byte_sz=2048 */
     168             :                              uint                    out_sz,        /* 32 or 64 */
     169             :                              void const *   restrict batch_cv );    /* align= 8 len=16 ele_align=16 optional */
     170             : 
     171             : void
     172             : fd_blake3_avx512_compress16_fast( uchar const * restrict batch_data,  /* align==32 len==16*64 */
     173             :                                   uchar       * restrict batch_hash,  /* align==32 len==16*32 */
     174             :                                   ulong                  counter,
     175             :                                   uchar                  flags );
     176             : 
     177             : #endif /* FD_HAS_AVX512 */
     178             : 
     179             : FD_PROTOTYPES_END
     180             : 
     181             : #endif /* HEADER_fd_src_ballet_blake3_fd_blake3_private_h */

Generated by: LCOV version 1.14