LCOV - code coverage report
Current view: top level - ballet/sha256 - fd_sha256.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 349 355 98.3 %
Date: 2026-01-21 05:06:28 Functions: 13 13 100.0 %

          Line data    Source code
       1             : #include "fd_sha256.h"
       2             : #include "fd_sha256_constants.h"
       3             : 
       4             : #if FD_HAS_SHANI
       5             : /* For the optimized repeated hash */
       6             : #include "../../util/simd/fd_sse.h"
       7             : #endif
       8             : 
       9             : ulong
      10      929106 : fd_sha256_align( void ) {
      11      929106 :   return FD_SHA256_ALIGN;
      12      929106 : }
      13             : 
      14             : ulong
      15      464526 : fd_sha256_footprint( void ) {
      16      464526 :   return FD_SHA256_FOOTPRINT;
      17      464526 : }
      18             : 
      19             : void *
      20      464529 : fd_sha256_new( void * shmem ) {
      21      464529 :   fd_sha256_t * sha = (fd_sha256_t *)shmem;
      22             : 
      23      464529 :   if( FD_UNLIKELY( !shmem ) ) {
      24           3 :     FD_LOG_WARNING(( "NULL shmem" ));
      25           3 :     return NULL;
      26           3 :   }
      27             : 
      28      464526 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, fd_sha256_align() ) ) ) {
      29           3 :     FD_LOG_WARNING(( "misaligned shmem" ));
      30           3 :     return NULL;
      31           3 :   }
      32             : 
      33      464523 :   ulong footprint = fd_sha256_footprint();
      34             : 
      35      464523 :   fd_memset( sha, 0, footprint );
      36             : 
      37      464523 :   FD_COMPILER_MFENCE();
      38      464523 :   FD_VOLATILE( sha->magic ) = FD_SHA256_MAGIC;
      39      464523 :   FD_COMPILER_MFENCE();
      40             : 
      41      464523 :   return (void *)sha;
      42      464526 : }
      43             : 
      44             : fd_sha256_t *
      45      464529 : fd_sha256_join( void * shsha ) {
      46             : 
      47      464529 :   if( FD_UNLIKELY( !shsha ) ) {
      48           3 :     FD_LOG_WARNING(( "NULL shsha" ));
      49           3 :     return NULL;
      50           3 :   }
      51             : 
      52      464526 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shsha, fd_sha256_align() ) ) ) {
      53           3 :     FD_LOG_WARNING(( "misaligned shsha" ));
      54           3 :     return NULL;
      55           3 :   }
      56             : 
      57      464523 :   fd_sha256_t * sha = (fd_sha256_t *)shsha;
      58             : 
      59      464523 :   if( FD_UNLIKELY( sha->magic!=FD_SHA256_MAGIC ) ) {
      60           0 :     FD_LOG_WARNING(( "bad magic" ));
      61           0 :     return NULL;
      62           0 :   }
      63             : 
      64      464523 :   return sha;
      65      464523 : }
      66             : 
      67             : void *
      68          51 : fd_sha256_leave( fd_sha256_t * sha ) {
      69             : 
      70          51 :   if( FD_UNLIKELY( !sha ) ) {
      71           3 :     FD_LOG_WARNING(( "NULL sha" ));
      72           3 :     return NULL;
      73           3 :   }
      74             : 
      75          48 :   return (void *)sha;
      76          51 : }
      77             : 
      78             : void *
      79          54 : fd_sha256_delete( void * shsha ) {
      80             : 
      81          54 :   if( FD_UNLIKELY( !shsha ) ) {
      82           3 :     FD_LOG_WARNING(( "NULL shsha" ));
      83           3 :     return NULL;
      84           3 :   }
      85             : 
      86          51 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shsha, fd_sha256_align() ) ) ) {
      87           3 :     FD_LOG_WARNING(( "misaligned shsha" ));
      88           3 :     return NULL;
      89           3 :   }
      90             : 
      91          48 :   fd_sha256_t * sha = (fd_sha256_t *)shsha;
      92             : 
      93          48 :   if( FD_UNLIKELY( sha->magic!=FD_SHA256_MAGIC ) ) {
      94           0 :     FD_LOG_WARNING(( "bad magic" ));
      95           0 :     return NULL;
      96           0 :   }
      97             : 
      98          48 :   FD_COMPILER_MFENCE();
      99          48 :   FD_VOLATILE( sha->magic ) = 0UL;
     100          48 :   FD_COMPILER_MFENCE();
     101             : 
     102          48 :   return (void *)sha;
     103          48 : }
     104             : 
     105             : #ifndef FD_SHA256_CORE_IMPL
     106             : #if FD_HAS_SHANI
     107             : #define FD_SHA256_CORE_IMPL 1
     108             : #else
     109             : #define FD_SHA256_CORE_IMPL 0
     110             : #endif
     111             : #endif
     112             : 
     113             : #if FD_SHA256_CORE_IMPL==0
     114             : 
     115             : /* The implementation below was derived from OpenSSL's SHA-256
     116             :    implementation (Apache-2.0 licensed).  See in particular:
     117             : 
     118             :     https://github.com/openssl/openssl/blob/master/crypto/sha/sha256.c
     119             : 
     120             :    (link valid circa 2022-Dec).  It has been made more strict with more
     121             :    extensive implementation documentation, has been simplified and has
     122             :    been streamlined specifically for use inside Firedancer base machine
     123             :    model (no machine specific capabilities required).
     124             : 
     125             :    In particular, fd_sha256_core_ref is based on OpenSSL's
     126             :    OPENSSL_SMALL_FOOTPRINT SHA-256 implementation (Apache licensed).
     127             :    This should work anywhere but it is not the highest performance
     128             :    implementation possible.
     129             : 
     130             :    It is also straightforward to replace these implementations with HPC
     131             :    implementations that target specific machine capabilities without
     132             :    requiring any changes to caller code. */
     133             : 
     134             : static void
     135             : fd_sha256_core_ref( uint *        state,
     136             :                     uchar const * block,
     137   453019068 :                     ulong         block_cnt ) {
     138             : 
     139             : 
     140 >34540*10^7 : # define ROTATE     fd_uint_rotate_left
     141 38378518912 : # define Sigma0(x)  (ROTATE((x),30) ^ ROTATE((x),19) ^ ROTATE((x),10))
     142 38378518912 : # define Sigma1(x)  (ROTATE((x),26) ^ ROTATE((x),21) ^ ROTATE((x),7))
     143 28783889184 : # define sigma0(x)  (ROTATE((x),25) ^ ROTATE((x),14) ^ ((x)>>3))
     144 28783889184 : # define sigma1(x)  (ROTATE((x),15) ^ ROTATE((x),13) ^ ((x)>>10))
     145 38378518912 : # define Ch(x,y,z)  (((x) & (y)) ^ ((~(x)) & (z)))
     146 38378518912 : # define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
     147             : 
     148   453019068 :   uint const * W = (uint const *)block;
     149   599664358 :   do {
     150   599664358 :     uint a = state[0];
     151   599664358 :     uint b = state[1];
     152   599664358 :     uint c = state[2];
     153   599664358 :     uint d = state[3];
     154   599664358 :     uint e = state[4];
     155   599664358 :     uint f = state[5];
     156   599664358 :     uint g = state[6];
     157   599664358 :     uint h = state[7];
     158             : 
     159   599664358 :     uint X[16];
     160             : 
     161   599664358 :     ulong i;
     162 10194294086 :     for( i=0UL; i<16UL; i++ ) {
     163  9594629728 :       X[i] = fd_uint_bswap( W[i] );
     164  9594629728 :       uint T1 = X[i] + h + Sigma1(e) + Ch(e, f, g) + fd_sha256_K[i];
     165  9594629728 :       uint T2 = Sigma0(a) + Maj(a, b, c);
     166  9594629728 :       h = g;
     167  9594629728 :       g = f;
     168  9594629728 :       f = e;
     169  9594629728 :       e = d + T1;
     170  9594629728 :       d = c;
     171  9594629728 :       c = b;
     172  9594629728 :       b = a;
     173  9594629728 :       a = T1 + T2;
     174  9594629728 :     }
     175 29383553542 :     for( ; i<64UL; i++ ) {
     176 28783889184 :       uint s0 = X[(i +  1UL) & 0x0fUL];
     177 28783889184 :       uint s1 = X[(i + 14UL) & 0x0fUL];
     178 28783889184 :       s0 = sigma0(s0);
     179 28783889184 :       s1 = sigma1(s1);
     180 28783889184 :       X[i & 0xfUL] += s0 + s1 + X[(i + 9UL) & 0xfUL];
     181 28783889184 :       uint T1 = X[i & 0xfUL ] + h + Sigma1(e) + Ch(e, f, g) + fd_sha256_K[i];
     182 28783889184 :       uint T2 = Sigma0(a) + Maj(a, b, c);
     183 28783889184 :       h = g;
     184 28783889184 :       g = f;
     185 28783889184 :       f = e;
     186 28783889184 :       e = d + T1;
     187 28783889184 :       d = c;
     188 28783889184 :       c = b;
     189 28783889184 :       b = a;
     190 28783889184 :       a = T1 + T2;
     191 28783889184 :     }
     192             : 
     193   599664358 :     state[0] += a;
     194   599664358 :     state[1] += b;
     195   599664358 :     state[2] += c;
     196   599664358 :     state[3] += d;
     197   599664358 :     state[4] += e;
     198   599664358 :     state[5] += f;
     199   599664358 :     state[6] += g;
     200   599664358 :     state[7] += h;
     201             : 
     202   599664358 :     W += 16UL;
     203   599664358 :   } while( --block_cnt );
     204             : 
     205   453019068 : # undef ROTATE
     206   453019068 : # undef Sigma0
     207   453019068 : # undef Sigma1
     208   453019068 : # undef sigma0
     209   453019068 : # undef sigma1
     210   453019068 : # undef Ch
     211   453019068 : # undef Maj
     212             : 
     213   453019068 : }
     214             : 
     215   453019068 : #define fd_sha256_core fd_sha256_core_ref
     216             : 
     217             : #elif FD_SHA256_CORE_IMPL==1
     218             : 
     219             : /* _mm_sha256rnds2_epu32 does two rounds, one from the first uint in
     220             :    wk and one from the second.  Since wk stores four rounds worth of
     221             :    message schedule values, it makes sense for the macro to do four
     222             :    rounds at a time.  We need to permute wk in between so that the
     223             :    second call to the intrinsic will use the other values. */
     224  4954064256 : #define FOUR_ROUNDS( wk ) do {                                                               \
     225  4954064256 :       vu_t __wk = (wk);                                                                      \
     226  4954064256 :       vu_t temp_state = stateFEBA;                                                           \
     227  4954064256 :       stateFEBA = _mm_sha256rnds2_epu32( stateHGDC, stateFEBA, __wk );                       \
     228  4954064256 :       stateHGDC = temp_state;                                                                \
     229  4954064256 :                                                                                              \
     230  4954064256 :       temp_state = stateFEBA;                                                                \
     231  4954064256 :       stateFEBA = _mm_sha256rnds2_epu32( stateHGDC, stateFEBA, vu_permute( __wk, 2,3,0,1 ) );\
     232  4954064256 :       stateHGDC = temp_state;                                                                \
     233  4954064256 :     } while( 0 )
     234             : 
     235             : 
     236             : /* For completeness, here's the documentation for _mm_sha256msg1_epu32
     237             :    and _mm_sha256msg2_epu32 in a slightly reformatted way, where all
     238             :    values are uints, and "-" indicates a don't-care value:
     239             : 
     240             :        _mm_sha256msg1_epu32( (w[j  ], w[j+1], w[j+1], w[j+3]),
     241             :                              (w[j+4], -,      -,      -     ) )
     242             :          = ( w[j  ]+s0( w[j+1] ),  w[j+1]+s0( w[j+2] ),
     243             :              w[j+2]+s0( w[j+3] ),  w[j+3]+s0( w[j+4] ) ).
     244             : 
     245             : 
     246             :        _mm_sha256msg2_epu32( (v[j  ], v[j+1], v[j+1], v[j+3]),
     247             :                              (-,      -,      w[j-2], w[j-1]) )
     248             :          sets w[j  ] = v[j  ] + s1( w[j-2] ) and
     249             :               w[j+1] = v[j+1] + s1( w[j-1] ), and then returns
     250             : 
     251             :            ( v[j  ]+s1( w[j-2] ), v[j+1]+s1( w[j-1] ),
     252             :              v[j+2]+s1( w[j  ] ), v[j+3]+s1( w[j+1] ) )   */
     253             : 
     254             : 
     255             : /* w[i] for i>= 16 is w[i-16] + s0(w[i-15]) + w[i-7] + s1(w[i-2])
     256             :    Since our vector size is 4 uints, it's only s1 that is a little
     257             :    problematic, because it references items in the same vector.
     258             :    Thankfully, the msg2 intrinsic takes care of the complexity, but we
     259             :    need to execute it last.
     260             : 
     261             :    We get w[i-16] and s0(s[i-15]) using the msg1 intrinsic, setting j =
     262             :    i-16.  For example, to compute w1013, we pass in w0003 and w0407.
     263             :    Then we can get w[i-7] by using the alignr instruction on
     264             :    (w[i-8], w[i-7], w[i-6], w[i-5]) and (w[i-4], w[i-3], w[i-2], w[i-1])
     265             :    to concatenate them and shift by one uint.  Continuing with the
     266             :    example of w1013, we need w080b and w0c0f.  We then put
     267             :              v[i] = w[i-16] + s0(w[i-15]) + w[i-7],
     268             :    and invoke the msg2 intrinsic with j=i, which gives w[i], as desired.
     269             :    Each invocation of NEXT_W computes 4 values of w. */
     270             : 
     271  3715548192 : #define NEXT_W( w_minus_16, w_minus_12, w_minus_8, w_minus_4 ) (__extension__({      \
     272  3715548192 :     vu_t __w_i_16_s0_i_15 = _mm_sha256msg1_epu32( w_minus_16, w_minus_12 );          \
     273  3715548192 :     vu_t __w_i_7          = _mm_alignr_epi8( w_minus_4, w_minus_8, 4 );              \
     274  3715548192 :     _mm_sha256msg2_epu32( vu_add( __w_i_7, __w_i_16_s0_i_15 ), w_minus_4 );          \
     275  3715548192 :     }))
     276             : 
     277             : /* Zen 5's sha256rnds2 has an RTP of 2, while Zen 4's has an RTP of 1. We can
     278             :    win some performance by moving the schedule updates earlier in the loop,
     279             :    which improves the speed by around 1M hashes/s for the repeated hashing. */
     280             : #ifdef __znver5__
     281             : #define FULL_ROUNDS() do {                                                                                        \
     282             :     vu_t w1013 = NEXT_W( w0003, w0407, w080b, w0c0f ); FOUR_ROUNDS( vu_add( w0003, vu_ld( fd_sha256_K+ 0UL ) ) ); \
     283             :     vu_t w1417 = NEXT_W( w0407, w080b, w0c0f, w1013 ); FOUR_ROUNDS( vu_add( w0407, vu_ld( fd_sha256_K+ 4UL ) ) ); \
     284             :     vu_t w181b = NEXT_W( w080b, w0c0f, w1013, w1417 ); FOUR_ROUNDS( vu_add( w080b, vu_ld( fd_sha256_K+ 8UL ) ) ); \
     285             :     vu_t w1c1f = NEXT_W( w0c0f, w1013, w1417, w181b ); FOUR_ROUNDS( vu_add( w0c0f, vu_ld( fd_sha256_K+12UL ) ) ); \
     286             :     vu_t w2023 = NEXT_W( w1013, w1417, w181b, w1c1f ); FOUR_ROUNDS( vu_add( w1013, vu_ld( fd_sha256_K+16UL ) ) ); \
     287             :     vu_t w2427 = NEXT_W( w1417, w181b, w1c1f, w2023 ); FOUR_ROUNDS( vu_add( w1417, vu_ld( fd_sha256_K+20UL ) ) ); \
     288             :     vu_t w282b = NEXT_W( w181b, w1c1f, w2023, w2427 ); FOUR_ROUNDS( vu_add( w181b, vu_ld( fd_sha256_K+24UL ) ) ); \
     289             :     vu_t w2c2f = NEXT_W( w1c1f, w2023, w2427, w282b ); FOUR_ROUNDS( vu_add( w1c1f, vu_ld( fd_sha256_K+28UL ) ) ); \
     290             :     vu_t w3033 = NEXT_W( w2023, w2427, w282b, w2c2f ); FOUR_ROUNDS( vu_add( w2023, vu_ld( fd_sha256_K+32UL ) ) ); \
     291             :     vu_t w3437 = NEXT_W( w2427, w282b, w2c2f, w3033 ); FOUR_ROUNDS( vu_add( w2427, vu_ld( fd_sha256_K+36UL ) ) ); \
     292             :     vu_t w383b = NEXT_W( w282b, w2c2f, w3033, w3437 ); FOUR_ROUNDS( vu_add( w282b, vu_ld( fd_sha256_K+40UL ) ) ); \
     293             :     vu_t w3c3f = NEXT_W( w2c2f, w3033, w3437, w383b ); FOUR_ROUNDS( vu_add( w2c2f, vu_ld( fd_sha256_K+44UL ) ) ); \
     294             :     /*                                              */ FOUR_ROUNDS( vu_add( w3033, vu_ld( fd_sha256_K+48UL ) ) ); \
     295             :     /*                                              */ FOUR_ROUNDS( vu_add( w3437, vu_ld( fd_sha256_K+52UL ) ) ); \
     296             :     /*                                              */ FOUR_ROUNDS( vu_add( w383b, vu_ld( fd_sha256_K+56UL ) ) ); \
     297             :     /*                                              */ FOUR_ROUNDS( vu_add( w3c3f, vu_ld( fd_sha256_K+60UL ) ) ); \
     298             :     } while ( 0 )
     299             : #else
     300   309629016 : #define FULL_ROUNDS() do {                                                                                        \
     301   309629016 :     /*                                              */ FOUR_ROUNDS( vu_add( w0003, vu_ld( fd_sha256_K+ 0UL ) ) ); \
     302   309629016 :     /*                                              */ FOUR_ROUNDS( vu_add( w0407, vu_ld( fd_sha256_K+ 4UL ) ) ); \
     303   309629016 :     /*                                              */ FOUR_ROUNDS( vu_add( w080b, vu_ld( fd_sha256_K+ 8UL ) ) ); \
     304   309629016 :     /*                                              */ FOUR_ROUNDS( vu_add( w0c0f, vu_ld( fd_sha256_K+12UL ) ) ); \
     305   309629016 :     vu_t w1013 = NEXT_W( w0003, w0407, w080b, w0c0f ); FOUR_ROUNDS( vu_add( w1013, vu_ld( fd_sha256_K+16UL ) ) ); \
     306   309629016 :     vu_t w1417 = NEXT_W( w0407, w080b, w0c0f, w1013 ); FOUR_ROUNDS( vu_add( w1417, vu_ld( fd_sha256_K+20UL ) ) ); \
     307   309629016 :     vu_t w181b = NEXT_W( w080b, w0c0f, w1013, w1417 ); FOUR_ROUNDS( vu_add( w181b, vu_ld( fd_sha256_K+24UL ) ) ); \
     308   309629016 :     vu_t w1c1f = NEXT_W( w0c0f, w1013, w1417, w181b ); FOUR_ROUNDS( vu_add( w1c1f, vu_ld( fd_sha256_K+28UL ) ) ); \
     309   309629016 :     vu_t w2023 = NEXT_W( w1013, w1417, w181b, w1c1f ); FOUR_ROUNDS( vu_add( w2023, vu_ld( fd_sha256_K+32UL ) ) ); \
     310   309629016 :     vu_t w2427 = NEXT_W( w1417, w181b, w1c1f, w2023 ); FOUR_ROUNDS( vu_add( w2427, vu_ld( fd_sha256_K+36UL ) ) ); \
     311   309629016 :     vu_t w282b = NEXT_W( w181b, w1c1f, w2023, w2427 ); FOUR_ROUNDS( vu_add( w282b, vu_ld( fd_sha256_K+40UL ) ) ); \
     312   309629016 :     vu_t w2c2f = NEXT_W( w1c1f, w2023, w2427, w282b ); FOUR_ROUNDS( vu_add( w2c2f, vu_ld( fd_sha256_K+44UL ) ) ); \
     313   309629016 :     vu_t w3033 = NEXT_W( w2023, w2427, w282b, w2c2f ); FOUR_ROUNDS( vu_add( w3033, vu_ld( fd_sha256_K+48UL ) ) ); \
     314   309629016 :     vu_t w3437 = NEXT_W( w2427, w282b, w2c2f, w3033 ); FOUR_ROUNDS( vu_add( w3437, vu_ld( fd_sha256_K+52UL ) ) ); \
     315   309629016 :     vu_t w383b = NEXT_W( w282b, w2c2f, w3033, w3437 ); FOUR_ROUNDS( vu_add( w383b, vu_ld( fd_sha256_K+56UL ) ) ); \
     316   309629016 :     vu_t w3c3f = NEXT_W( w2c2f, w3033, w3437, w383b ); FOUR_ROUNDS( vu_add( w3c3f, vu_ld( fd_sha256_K+60UL ) ) ); \
     317   309629016 :     } while ( 0 )
     318             : #endif
     319             : 
     320             : 
     321             : void
     322             : fd_sha256_core_shaext( uint *        state,       /* 64-byte aligned, 8 entries */
     323             :                        uchar const * block,       /* ideally 128-byte aligned (but not required), 64*block_cnt in size */
     324   105062896 :                        ulong         block_cnt ) {/* positive */
     325   105062896 :   vu_t stateABCD = vu_ld( state     );
     326   105062896 :   vu_t stateEFGH = vu_ld( state+4UL );
     327             : 
     328   105062896 :   vu_t baseFEBA = vu_permute2( stateEFGH, stateABCD, 1, 0, 1, 0 );
     329   105062896 :   vu_t baseHGDC = vu_permute2( stateEFGH, stateABCD, 3, 2, 3, 2 );
     330             : 
     331   291890713 :   for( ulong b=0UL; b<block_cnt; b++ ) {
     332   186827817 :     vu_t stateFEBA = baseFEBA;
     333   186827817 :     vu_t stateHGDC = baseHGDC;
     334             : 
     335   186827817 :     vu_t w0003 = vu_bswap( vu_ldu( block+64UL*b      ) );
     336   186827817 :     vu_t w0407 = vu_bswap( vu_ldu( block+64UL*b+16UL ) );
     337   186827817 :     vu_t w080b = vu_bswap( vu_ldu( block+64UL*b+32UL ) );
     338   186827817 :     vu_t w0c0f = vu_bswap( vu_ldu( block+64UL*b+48UL ) );
     339             : 
     340   186827817 :     FULL_ROUNDS();
     341             : 
     342   186827817 :     baseFEBA = vu_add( baseFEBA, stateFEBA );
     343   186827817 :     baseHGDC = vu_add( baseHGDC, stateHGDC );
     344             : 
     345   186827817 :   }
     346             : 
     347   105062896 :   stateABCD = vu_permute2( baseFEBA, baseHGDC, 3, 2, 3, 2 );
     348   105062896 :   stateEFGH = vu_permute2( baseFEBA, baseHGDC, 1, 0, 1, 0 );
     349   105062896 :   vu_st( state,     stateABCD );
     350   105062896 :   vu_st( state+4UL, stateEFGH );
     351   105062896 : }
     352             : 
     353   105062896 : #define fd_sha256_core fd_sha256_core_shaext
     354             : 
     355             : #else
     356             : #error "Unsupported FD_SHA256_CORE_IMPL"
     357             : #endif
     358             : 
     359             : fd_sha256_t *
     360     1566987 : fd_sha256_init( fd_sha256_t * sha ) {
     361     1566987 :   sha->state[0] = FD_SHA256_INITIAL_A;
     362     1566987 :   sha->state[1] = FD_SHA256_INITIAL_B;
     363     1566987 :   sha->state[2] = FD_SHA256_INITIAL_C;
     364     1566987 :   sha->state[3] = FD_SHA256_INITIAL_D;
     365     1566987 :   sha->state[4] = FD_SHA256_INITIAL_E;
     366     1566987 :   sha->state[5] = FD_SHA256_INITIAL_F;
     367     1566987 :   sha->state[6] = FD_SHA256_INITIAL_G;
     368     1566987 :   sha->state[7] = FD_SHA256_INITIAL_H;
     369     1566987 :   sha->buf_used = 0UL;
     370     1566987 :   sha->bit_cnt  = 0UL;
     371     1566987 :   return sha;
     372     1566987 : }
     373             : 
     374             : fd_sha256_t *
     375             : fd_sha256_append( fd_sha256_t * sha,
     376             :                   void const *  _data,
     377     5721051 :                   ulong         sz ) {
     378             : 
     379             :   /* If no data to append, we are done */
     380             : 
     381     5721051 :   if( FD_UNLIKELY( !sz ) ) return sha; /* optimize for non-trivial append */
     382             : 
     383             :   /* Unpack inputs */
     384             : 
     385     5720793 :   uint *  state    = sha->state;
     386     5720793 :   uchar * buf      = sha->buf;
     387     5720793 :   ulong   buf_used = sha->buf_used;
     388     5720793 :   ulong   bit_cnt  = sha->bit_cnt;
     389             : 
     390     5720793 :   uchar const * data = (uchar const *)_data;
     391             : 
     392             :   /* Update bit_cnt */
     393             :   /* FIXME: could accumulate bytes here and do bit conversion in append */
     394             :   /* FIXME: Overflow handling if more than 2^64 bits (unlikely) */
     395             : 
     396     5720793 :   sha->bit_cnt = bit_cnt + (sz<<3);
     397             : 
     398             :   /* Handle buffered bytes from previous appends */
     399             : 
     400     5720793 :   if( FD_UNLIKELY( buf_used ) ) { /* optimized for well aligned use of append */
     401             : 
     402             :     /* If the append isn't large enough to complete the current block,
     403             :        buffer these bytes too and return */
     404             : 
     405       79566 :     ulong buf_rem = FD_SHA256_PRIVATE_BUF_MAX - buf_used; /* In (0,FD_SHA256_PRIVATE_BUF_MAX) */
     406       79566 :     if( FD_UNLIKELY( sz < buf_rem ) ) { /* optimize for large append */
     407         576 :       fd_memcpy( buf + buf_used, data, sz );
     408         576 :       sha->buf_used = buf_used + sz;
     409         576 :       return sha;
     410         576 :     }
     411             : 
     412             :     /* Otherwise, buffer enough leading bytes of data to complete the
     413             :        block, update the hash and then continue processing any remaining
     414             :        bytes of data. */
     415             : 
     416       78990 :     fd_memcpy( buf + buf_used, data, buf_rem );
     417       78990 :     data += buf_rem;
     418       78990 :     sz   -= buf_rem;
     419             : 
     420       78990 :     fd_sha256_core( state, buf, 1UL );
     421       78990 :     sha->buf_used = 0UL;
     422       78990 :   }
     423             : 
     424             :   /* Append the bulk of the data */
     425             : 
     426     5720217 :   ulong block_cnt = sz >> FD_SHA256_PRIVATE_LG_BUF_MAX;
     427     5720217 :   if( FD_LIKELY( block_cnt ) ) fd_sha256_core( state, data, block_cnt ); /* optimized for large append */
     428             : 
     429             :   /* Buffer any leftover bytes */
     430             : 
     431     5720217 :   buf_used = sz & (FD_SHA256_PRIVATE_BUF_MAX-1UL); /* In [0,FD_SHA256_PRIVATE_BUF_MAX) */
     432     5720217 :   if( FD_UNLIKELY( buf_used ) ) { /* optimized for well aligned use of append */
     433     1345872 :     fd_memcpy( buf, data + (block_cnt << FD_SHA256_PRIVATE_LG_BUF_MAX), buf_used );
     434     1345872 :     sha->buf_used = buf_used; /* In (0,FD_SHA256_PRIVATE_BUF_MAX) */
     435     1345872 :   }
     436             : 
     437     5720217 :   return sha;
     438     5720793 : }
     439             : 
     440             : void *
     441             : fd_sha256_fini( fd_sha256_t * sha,
     442     1615596 :                 void *        _hash ) {
     443             : 
     444             :   /* Unpack inputs */
     445             : 
     446     1615596 :   uint *  state    = sha->state;
     447     1615596 :   uchar * buf      = sha->buf;
     448     1615596 :   ulong   buf_used = sha->buf_used; /* In [0,FD_SHA256_PRIVATE_BUF_MAX) */
     449     1615596 :   ulong   bit_cnt  = sha->bit_cnt;
     450             : 
     451             :   /* Append the terminating message byte */
     452             : 
     453     1615596 :   buf[ buf_used ] = (uchar)0x80;
     454     1615596 :   buf_used++;
     455             : 
     456             :   /* If there isn't enough room to save the message length in bits at
     457             :      the end of the in progress block, clear the rest of the in progress
     458             :      block, update the hash and start a new block. */
     459             : 
     460     1615596 :   if( FD_UNLIKELY( buf_used > (FD_SHA256_PRIVATE_BUF_MAX-8UL) ) ) { /* optimize for well aligned use of append */
     461       12150 :     fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-buf_used );
     462       12150 :     fd_sha256_core( state, buf, 1UL );
     463       12150 :     buf_used = 0UL;
     464       12150 :   }
     465             : 
     466             :   /* Clear in progress block up to last 64-bits, append the message
     467             :      size in bytes in the last 64-bits of the in progress block and
     468             :      update the hash to finalize it. */
     469             : 
     470     1615596 :   fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-8UL-buf_used );
     471     1615596 :   FD_STORE( ulong, buf+FD_SHA256_PRIVATE_BUF_MAX-8UL, fd_ulong_bswap( bit_cnt ) );
     472     1615596 :   fd_sha256_core( state, buf, 1UL );
     473             : 
     474             :   /* Unpack the result into md (annoying bswaps here) */
     475             : 
     476     1615596 :   state[0] = fd_uint_bswap( state[0] );
     477     1615596 :   state[1] = fd_uint_bswap( state[1] );
     478     1615596 :   state[2] = fd_uint_bswap( state[2] );
     479     1615596 :   state[3] = fd_uint_bswap( state[3] );
     480     1615596 :   state[4] = fd_uint_bswap( state[4] );
     481     1615596 :   state[5] = fd_uint_bswap( state[5] );
     482     1615596 :   state[6] = fd_uint_bswap( state[6] );
     483     1615596 :   state[7] = fd_uint_bswap( state[7] );
     484     1615596 :   return memcpy( _hash, state, 32 );
     485     1615596 : }
     486             : 
     487             : void *
     488             : fd_sha256_hash( void const * _data,
     489             :                 ulong        sz,
     490   187345835 :                 void *       _hash ) {
     491   187345835 :   uchar const * data = (uchar const *)_data;
     492             : 
     493             :   /* This is just the above streamlined to eliminate all the overheads
     494             :      to support incremental hashing. */
     495             : 
     496   187345835 :   uchar buf[ FD_SHA256_PRIVATE_BUF_MAX ] __attribute__((aligned(128)));
     497   187345835 :   uint  state[8] __attribute__((aligned(32)));
     498             : 
     499   187345835 :   state[0] = FD_SHA256_INITIAL_A;
     500   187345835 :   state[1] = FD_SHA256_INITIAL_B;
     501   187345835 :   state[2] = FD_SHA256_INITIAL_C;
     502   187345835 :   state[3] = FD_SHA256_INITIAL_D;
     503   187345835 :   state[4] = FD_SHA256_INITIAL_E;
     504   187345835 :   state[5] = FD_SHA256_INITIAL_F;
     505   187345835 :   state[6] = FD_SHA256_INITIAL_G;
     506   187345835 :   state[7] = FD_SHA256_INITIAL_H;
     507             : 
     508   187345835 :   ulong block_cnt = sz >> FD_SHA256_PRIVATE_LG_BUF_MAX;
     509   187345835 :   if( FD_LIKELY( block_cnt ) ) fd_sha256_core( state, data, block_cnt );
     510             : 
     511   187345835 :   ulong buf_used = sz & (FD_SHA256_PRIVATE_BUF_MAX-1UL);
     512   187345835 :   if( FD_UNLIKELY( buf_used ) ) fd_memcpy( buf, data + (block_cnt << FD_SHA256_PRIVATE_LG_BUF_MAX), buf_used );
     513   187345835 :   buf[ buf_used ] = (uchar)0x80;
     514   187345835 :   buf_used++;
     515             : 
     516   187345835 :   if( FD_UNLIKELY( buf_used > (FD_SHA256_PRIVATE_BUF_MAX-8UL) ) ) {
     517     1176942 :     fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-buf_used );
     518     1176942 :     fd_sha256_core( state, buf, 1UL );
     519     1176942 :     buf_used = 0UL;
     520     1176942 :   }
     521             : 
     522   187345835 :   ulong bit_cnt = sz << 3;
     523   187345835 :   fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-8UL-buf_used );
     524   187345835 :   FD_STORE( ulong, buf+FD_SHA256_PRIVATE_BUF_MAX-8UL, fd_ulong_bswap( bit_cnt ) );
     525   187345835 :   fd_sha256_core( state, buf, 1UL );
     526             : 
     527   187345835 :   state[0] = fd_uint_bswap( state[0] );
     528   187345835 :   state[1] = fd_uint_bswap( state[1] );
     529   187345835 :   state[2] = fd_uint_bswap( state[2] );
     530   187345835 :   state[3] = fd_uint_bswap( state[3] );
     531   187345835 :   state[4] = fd_uint_bswap( state[4] );
     532   187345835 :   state[5] = fd_uint_bswap( state[5] );
     533   187345835 :   state[6] = fd_uint_bswap( state[6] );
     534   187345835 :   state[7] = fd_uint_bswap( state[7] );
     535   187345835 :   return memcpy( _hash, state, 32 );
     536   187345835 : }
     537             : 
     538             : 
     539             : 
     540             : void *
     541             : fd_sha256_hash_32_repeated( void const * _data,
     542             :                             void *       _hash,
     543      303099 :                             ulong        cnt ) {
     544      303099 :   uchar const * data = (uchar const *)_data;
     545      303099 :   uchar       * hash = (uchar       *)_hash;
     546      101033 : #if FD_HAS_SHANI
     547      101033 :   vu_t       w0003 = vu_bswap( vu_ldu( data      ) );
     548      101033 :   vu_t       w0407 = vu_bswap( vu_ldu( data+16UL ) );
     549      101033 :   vb_t const w080b = vb( 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
     550      101033 :                          0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 );
     551      101033 :   vb_t const w0c0f = vb( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     552      101033 :                          0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00 ); /* 32 bytes */
     553             : 
     554      101033 :   vu_t const initialFEBA = vu( FD_SHA256_INITIAL_F, FD_SHA256_INITIAL_E, FD_SHA256_INITIAL_B, FD_SHA256_INITIAL_A );
     555      101033 :   vu_t const initialHGDC = vu( FD_SHA256_INITIAL_H, FD_SHA256_INITIAL_G, FD_SHA256_INITIAL_D, FD_SHA256_INITIAL_C );
     556             : 
     557   122902232 :   for( ulong iter=0UL; iter<cnt; iter++ ) {
     558   122801199 :     vu_t stateFEBA = initialFEBA;
     559   122801199 :     vu_t stateHGDC = initialHGDC;
     560             : 
     561   122801199 :     FULL_ROUNDS();
     562             : 
     563   122801199 :     stateFEBA = vu_add( stateFEBA, initialFEBA );
     564   122801199 :     stateHGDC = vu_add( stateHGDC, initialHGDC );
     565             : 
     566   122801199 :     vu_t stateABCD = vu_permute2( stateFEBA, stateHGDC, 3, 2, 3, 2 );
     567   122801199 :     vu_t stateEFGH = vu_permute2( stateFEBA, stateHGDC, 1, 0, 1, 0 );
     568             : 
     569   122801199 :     w0003 = stateABCD;
     570   122801199 :     w0407 = stateEFGH;
     571   122801199 :   }
     572      101033 :   vu_stu( hash,      vu_bswap( w0003 ) );
     573      101033 :   vu_stu( hash+16UL, vu_bswap( w0407 ) );
     574      101033 : #undef NEXT_W
     575      101033 : #undef FOUR_ROUNDS
     576      101033 : #undef FULL_ROUNDS
     577             : 
     578             : #else
     579             : 
     580      202066 :   uchar buf[ FD_SHA256_PRIVATE_BUF_MAX ] __attribute__((aligned(128)));
     581             : 
     582             :   /* Prepare padding once */
     583      202066 :   ulong buf_used = 32UL;
     584      202066 :   memcpy( buf, data, 32UL );
     585      202066 :   buf[ buf_used ] = (uchar)0x80;
     586      202066 :   buf_used++;
     587             : 
     588      202066 :   ulong bit_cnt = 32UL << 3;
     589      202066 :   memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-8UL-buf_used );
     590      202066 :   FD_STORE( ulong, buf+FD_SHA256_PRIVATE_BUF_MAX-8UL, fd_ulong_bswap( bit_cnt ) );
     591             : 
     592             :   /* This is just the above streamlined to eliminate all the overheads
     593             :      to support incremental hashing. */
     594   245804464 :   for( ulong iter=0UL; iter<cnt; iter++ ) {
     595             : 
     596   245602398 :     uint  state[8] __attribute__((aligned(32)));
     597             : 
     598   245602398 :     state[0] = FD_SHA256_INITIAL_A;
     599   245602398 :     state[1] = FD_SHA256_INITIAL_B;
     600   245602398 :     state[2] = FD_SHA256_INITIAL_C;
     601   245602398 :     state[3] = FD_SHA256_INITIAL_D;
     602   245602398 :     state[4] = FD_SHA256_INITIAL_E;
     603   245602398 :     state[5] = FD_SHA256_INITIAL_F;
     604   245602398 :     state[6] = FD_SHA256_INITIAL_G;
     605   245602398 :     state[7] = FD_SHA256_INITIAL_H;
     606             : 
     607   245602398 :     fd_sha256_core( state, buf, 1UL );
     608             : 
     609   245602398 :     state[0] = fd_uint_bswap( state[0] );
     610   245602398 :     state[1] = fd_uint_bswap( state[1] );
     611   245602398 :     state[2] = fd_uint_bswap( state[2] );
     612   245602398 :     state[3] = fd_uint_bswap( state[3] );
     613   245602398 :     state[4] = fd_uint_bswap( state[4] );
     614   245602398 :     state[5] = fd_uint_bswap( state[5] );
     615   245602398 :     state[6] = fd_uint_bswap( state[6] );
     616   245602398 :     state[7] = fd_uint_bswap( state[7] );
     617   245602398 :     memcpy( buf, state, 32UL );
     618   245602398 :   }
     619      202066 :   memcpy( hash, buf, 32UL );
     620      202066 : #endif
     621      303099 :   return _hash;
     622      303099 : }
     623             : 
     624             : #undef fd_sha256_core

Generated by: LCOV version 1.14