LCOV - code coverage report
Current view: top level - ballet/sha256 - fd_sha256.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 360 366 98.4 %
Date: 2025-07-13 05:02:02 Functions: 13 13 100.0 %

          Line data    Source code
       1             : #include "fd_sha256.h"
       2             : #include "fd_sha256_constants.h"
       3             : 
       4             : #if FD_HAS_SHANI
       5             : /* For the optimized repeated hash */
       6             : #include "../../util/simd/fd_sse.h"
       7             : #endif
       8             : 
       9             : ulong
      10      921300 : fd_sha256_align( void ) {
      11      921300 :   return FD_SHA256_ALIGN;
      12      921300 : }
      13             : 
      14             : ulong
      15      460641 : fd_sha256_footprint( void ) {
      16      460641 :   return FD_SHA256_FOOTPRINT;
      17      460641 : }
      18             : 
      19             : void *
      20      460644 : fd_sha256_new( void * shmem ) {
      21      460644 :   fd_sha256_t * sha = (fd_sha256_t *)shmem;
      22             : 
      23      460644 :   if( FD_UNLIKELY( !shmem ) ) {
      24           3 :     FD_LOG_WARNING(( "NULL shmem" ));
      25           3 :     return NULL;
      26           3 :   }
      27             : 
      28      460641 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, fd_sha256_align() ) ) ) {
      29           3 :     FD_LOG_WARNING(( "misaligned shmem" ));
      30           3 :     return NULL;
      31           3 :   }
      32             : 
      33      460638 :   ulong footprint = fd_sha256_footprint();
      34             : 
      35      460638 :   fd_memset( sha, 0, footprint );
      36             : 
      37      460638 :   FD_COMPILER_MFENCE();
      38      460638 :   FD_VOLATILE( sha->magic ) = FD_SHA256_MAGIC;
      39      460638 :   FD_COMPILER_MFENCE();
      40             : 
      41      460638 :   return (void *)sha;
      42      460641 : }
      43             : 
      44             : fd_sha256_t *
      45      460644 : fd_sha256_join( void * shsha ) {
      46             : 
      47      460644 :   if( FD_UNLIKELY( !shsha ) ) {
      48           3 :     FD_LOG_WARNING(( "NULL shsha" ));
      49           3 :     return NULL;
      50           3 :   }
      51             : 
      52      460641 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shsha, fd_sha256_align() ) ) ) {
      53           3 :     FD_LOG_WARNING(( "misaligned shsha" ));
      54           3 :     return NULL;
      55           3 :   }
      56             : 
      57      460638 :   fd_sha256_t * sha = (fd_sha256_t *)shsha;
      58             : 
      59      460638 :   if( FD_UNLIKELY( sha->magic!=FD_SHA256_MAGIC ) ) {
      60           0 :     FD_LOG_WARNING(( "bad magic" ));
      61           0 :     return NULL;
      62           0 :   }
      63             : 
      64      460638 :   return sha;
      65      460638 : }
      66             : 
      67             : void *
      68          15 : fd_sha256_leave( fd_sha256_t * sha ) {
      69             : 
      70          15 :   if( FD_UNLIKELY( !sha ) ) {
      71           3 :     FD_LOG_WARNING(( "NULL sha" ));
      72           3 :     return NULL;
      73           3 :   }
      74             : 
      75          12 :   return (void *)sha;
      76          15 : }
      77             : 
      78             : void *
      79          18 : fd_sha256_delete( void * shsha ) {
      80             : 
      81          18 :   if( FD_UNLIKELY( !shsha ) ) {
      82           3 :     FD_LOG_WARNING(( "NULL shsha" ));
      83           3 :     return NULL;
      84           3 :   }
      85             : 
      86          15 :   if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shsha, fd_sha256_align() ) ) ) {
      87           3 :     FD_LOG_WARNING(( "misaligned shsha" ));
      88           3 :     return NULL;
      89           3 :   }
      90             : 
      91          12 :   fd_sha256_t * sha = (fd_sha256_t *)shsha;
      92             : 
      93          12 :   if( FD_UNLIKELY( sha->magic!=FD_SHA256_MAGIC ) ) {
      94           0 :     FD_LOG_WARNING(( "bad magic" ));
      95           0 :     return NULL;
      96           0 :   }
      97             : 
      98          12 :   FD_COMPILER_MFENCE();
      99          12 :   FD_VOLATILE( sha->magic ) = 0UL;
     100          12 :   FD_COMPILER_MFENCE();
     101             : 
     102          12 :   return (void *)sha;
     103          12 : }
     104             : 
     105             : #ifndef FD_SHA256_CORE_IMPL
     106             : #if FD_HAS_SHANI
     107             : #define FD_SHA256_CORE_IMPL 1
     108             : #else
     109             : #define FD_SHA256_CORE_IMPL 0
     110             : #endif
     111             : #endif
     112             : 
     113             : #if FD_SHA256_CORE_IMPL==0
     114             : 
     115             : /* The implementation below was derived from OpenSSL's SHA-256
     116             :    implementation (Apache-2.0 licensed).  See in particular:
     117             : 
     118             :     https://github.com/openssl/openssl/blob/master/crypto/sha/sha256.c
     119             : 
     120             :    (link valid circa 2022-Dec).  It has been made more strict with more
     121             :    extensive implementation documentation, has been simplified and has
     122             :    been streamlined specifically for use inside Firedancer base machine
     123             :    model (no machine specific capabilities required).
     124             : 
     125             :    In particular, fd_sha256_core_ref is based on OpenSSL's
     126             :    OPENSSL_SMALL_FOOTPRINT SHA-256 implementation (Apache licensed).
     127             :    This should work anywhere but it is not the highest performance
     128             :    implementation possible.
     129             : 
     130             :    It is also straightforward to replace these implementations with HPC
     131             :    implementations that target specific machine capabilities without
     132             :    requiring any changes to caller code. */
     133             : 
     134             : static void
     135             : fd_sha256_core_ref( uint *        state,
     136             :                     uchar const * block,
     137   454601128 :                     ulong         block_cnt ) {
     138             : 
     139             : 
     140 >34628*10^7 : # define ROTATE     fd_uint_rotate_left
     141 38476339968 : # define Sigma0(x)  (ROTATE((x),30) ^ ROTATE((x),19) ^ ROTATE((x),10))
     142 38476339968 : # define Sigma1(x)  (ROTATE((x),26) ^ ROTATE((x),21) ^ ROTATE((x),7))
     143 28857254976 : # define sigma0(x)  (ROTATE((x),25) ^ ROTATE((x),14) ^ ((x)>>3))
     144 28857254976 : # define sigma1(x)  (ROTATE((x),15) ^ ROTATE((x),13) ^ ((x)>>10))
     145 38476339968 : # define Ch(x,y,z)  (((x) & (y)) ^ ((~(x)) & (z)))
     146 38476339968 : # define Maj(x,y,z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
     147             : 
     148   454601128 :   uint const * W = (uint const *)block;
     149   601192812 :   do {
     150   601192812 :     uint a = state[0];
     151   601192812 :     uint b = state[1];
     152   601192812 :     uint c = state[2];
     153   601192812 :     uint d = state[3];
     154   601192812 :     uint e = state[4];
     155   601192812 :     uint f = state[5];
     156   601192812 :     uint g = state[6];
     157   601192812 :     uint h = state[7];
     158             : 
     159   601192812 :     uint X[16];
     160             : 
     161   601192812 :     ulong i;
     162 10220277804 :     for( i=0UL; i<16UL; i++ ) {
     163  9619084992 :       X[i] = fd_uint_bswap( W[i] );
     164  9619084992 :       uint T1 = X[i] + h + Sigma1(e) + Ch(e, f, g) + fd_sha256_K[i];
     165  9619084992 :       uint T2 = Sigma0(a) + Maj(a, b, c);
     166  9619084992 :       h = g;
     167  9619084992 :       g = f;
     168  9619084992 :       f = e;
     169  9619084992 :       e = d + T1;
     170  9619084992 :       d = c;
     171  9619084992 :       c = b;
     172  9619084992 :       b = a;
     173  9619084992 :       a = T1 + T2;
     174  9619084992 :     }
     175 29458447788 :     for( ; i<64UL; i++ ) {
     176 28857254976 :       uint s0 = X[(i +  1UL) & 0x0fUL];
     177 28857254976 :       uint s1 = X[(i + 14UL) & 0x0fUL];
     178 28857254976 :       s0 = sigma0(s0);
     179 28857254976 :       s1 = sigma1(s1);
     180 28857254976 :       X[i & 0xfUL] += s0 + s1 + X[(i + 9UL) & 0xfUL];
     181 28857254976 :       uint T1 = X[i & 0xfUL ] + h + Sigma1(e) + Ch(e, f, g) + fd_sha256_K[i];
     182 28857254976 :       uint T2 = Sigma0(a) + Maj(a, b, c);
     183 28857254976 :       h = g;
     184 28857254976 :       g = f;
     185 28857254976 :       f = e;
     186 28857254976 :       e = d + T1;
     187 28857254976 :       d = c;
     188 28857254976 :       c = b;
     189 28857254976 :       b = a;
     190 28857254976 :       a = T1 + T2;
     191 28857254976 :     }
     192             : 
     193   601192812 :     state[0] += a;
     194   601192812 :     state[1] += b;
     195   601192812 :     state[2] += c;
     196   601192812 :     state[3] += d;
     197   601192812 :     state[4] += e;
     198   601192812 :     state[5] += f;
     199   601192812 :     state[6] += g;
     200   601192812 :     state[7] += h;
     201             : 
     202   601192812 :     W += 16UL;
     203   601192812 :   } while( --block_cnt );
     204             : 
     205   454601128 : # undef ROTATE
     206   454601128 : # undef Sigma0
     207   454601128 : # undef Sigma1
     208   454601128 : # undef sigma0
     209   454601128 : # undef sigma1
     210   454601128 : # undef Ch
     211   454601128 : # undef Maj
     212             : 
     213   454601128 : }
     214             : 
     215   454601128 : #define fd_sha256_core fd_sha256_core_ref
     216             : 
     217             : #elif FD_SHA256_CORE_IMPL==1
     218             : 
     219             : /* _mm_sha256rnds2_epu32 does two rounds, one from the first uint in
     220             :    wk and one from the second.  Since wk stores four rounds worth of
     221             :    message schedule values, it makes sense for the macro to do four
     222             :    rounds at a time.  We need to permute wk in between so that the
     223             :    second call to the intrinsic will use the other values. */
     224  4966291888 : #define FOUR_ROUNDS( wk ) do {                                                               \
     225  4966291888 :       vu_t __wk = (wk);                                                                      \
     226  4966291888 :       vu_t temp_state = stateFEBA;                                                           \
     227  4966291888 :       stateFEBA = _mm_sha256rnds2_epu32( stateHGDC, stateFEBA, __wk );                       \
     228  4966291888 :       stateHGDC = temp_state;                                                                \
     229  4966291888 :                                                                                              \
     230  4966291888 :       temp_state = stateFEBA;                                                                \
     231  4966291888 :       stateFEBA = _mm_sha256rnds2_epu32( stateHGDC, stateFEBA, vu_permute( __wk, 2,3,0,1 ) );\
     232  4966291888 :       stateHGDC = temp_state;                                                                \
     233  4966291888 :     } while( 0 )
     234             : 
     235             : 
     236             : /* For completeness, here's the documentation for _mm_sha256msg1_epu32
     237             :    and _mm_sha256msg2_epu32 in a slightly reformatted way, where all
     238             :    values are uints, and "-" indicates a don't-care value:
     239             : 
     240             :        _mm_sha256msg1_epu32( (w[j  ], w[j+1], w[j+1], w[j+3]),
     241             :                              (w[j+4], -,      -,      -     ) )
     242             :          = ( w[j  ]+s0( w[j+1] ),  w[j+1]+s0( w[j+2] ),
     243             :              w[j+2]+s0( w[j+3] ),  w[j+3]+s0( w[j+4] ) ).
     244             : 
     245             : 
     246             :        _mm_sha256msg2_epu32( (v[j  ], v[j+1], v[j+1], v[j+3]),
     247             :                              (-,      -,      w[j-2], w[j-1]) )
     248             :          sets w[j  ] = v[j  ] + s1( w[j-2] ) and
     249             :               w[j+1] = v[j+1] + s1( w[j-1] ), and then returns
     250             : 
     251             :            ( v[j  ]+s1( w[j-2] ), v[j+1]+s1( w[j-1] ),
     252             :              v[j+2]+s1( w[j  ] ), v[j+3]+s1( w[j+1] ) )   */
     253             : 
     254             : 
     255             : /* w[i] for i>= 16 is w[i-16] + s0(w[i-15]) + w[i-7] + s1(w[i-2])
     256             :    Since our vector size is 4 uints, it's only s1 that is a little
     257             :    problematic, because it references items in the same vector.
     258             :    Thankfully, the msg2 intrinsic takes care of the complexity, but we
     259             :    need to execute it last.
     260             : 
     261             :    We get w[i-16] and s0(s[i-15]) using the msg1 intrinsic, setting j =
     262             :    i-16.  For example, to compute w1013, we pass in w0003 and w0407.
     263             :    Then we can get w[i-7] by using the alignr instruction on
     264             :    (w[i-8], w[i-7], w[i-6], w[i-5]) and (w[i-4], w[i-3], w[i-2], w[i-1])
     265             :    to concatenate them and shift by one uint.  Continuing with the
     266             :    example of w1013, we need w080b and w0c0f.  We then put
     267             :              v[i] = w[i-16] + s0(w[i-15]) + w[i-7],
     268             :    and invoke the msg2 intrinsic with j=i, which gives w[i], as desired.
     269             :    Each invocation of NEXT_W computes 4 values of w. */
     270             : 
     271  3724718916 : #define NEXT_W( w_minus_16, w_minus_12, w_minus_8, w_minus_4 ) (__extension__({      \
     272  3724718916 :     vu_t __w_i_16_s0_i_15 = _mm_sha256msg1_epu32( w_minus_16, w_minus_12 );          \
     273  3724718916 :     vu_t __w_i_7          = _mm_alignr_epi8( w_minus_4, w_minus_8, 4 );              \
     274  3724718916 :     _mm_sha256msg2_epu32( vu_add( __w_i_7, __w_i_16_s0_i_15 ), w_minus_4 );          \
     275  3724718916 :     }))
     276             : 
     277             : void
     278             : fd_sha256_core_shaext( uint *        state,       /* 64-byte aligned, 8 entries */
     279             :                        uchar const * block,       /* ideally 128-byte aligned (but not required), 64*block_cnt in size */
     280   105853926 :                        ulong         block_cnt ) {/* positive */
     281   105853926 :   vu_t stateABCD = vu_ld( state     );
     282   105853926 :   vu_t stateEFGH = vu_ld( state+4UL );
     283             : 
     284   105853926 :   vu_t baseFEBA = vu_permute2( stateEFGH, stateABCD, 1, 0, 1, 0 );
     285   105853926 :   vu_t baseHGDC = vu_permute2( stateEFGH, stateABCD, 3, 2, 3, 2 );
     286             : 
     287   293445970 :   for( ulong b=0UL; b<block_cnt; b++ ) {
     288   187592044 :     vu_t stateFEBA = baseFEBA;
     289   187592044 :     vu_t stateHGDC = baseHGDC;
     290             : 
     291   187592044 :     vu_t w0003 = vu_bswap( vu_ldu( block+64UL*b      ) );
     292   187592044 :     vu_t w0407 = vu_bswap( vu_ldu( block+64UL*b+16UL ) );
     293   187592044 :     vu_t w080b = vu_bswap( vu_ldu( block+64UL*b+32UL ) );
     294   187592044 :     vu_t w0c0f = vu_bswap( vu_ldu( block+64UL*b+48UL ) );
     295             : 
     296   187592044 :     /*                                              */ FOUR_ROUNDS( vu_add( w0003, vu_ld( fd_sha256_K+ 0UL ) ) );
     297   187592044 :     /*                                              */ FOUR_ROUNDS( vu_add( w0407, vu_ld( fd_sha256_K+ 4UL ) ) );
     298   187592044 :     /*                                              */ FOUR_ROUNDS( vu_add( w080b, vu_ld( fd_sha256_K+ 8UL ) ) );
     299   187592044 :     /*                                              */ FOUR_ROUNDS( vu_add( w0c0f, vu_ld( fd_sha256_K+12UL ) ) );
     300   187592044 :     vu_t w1013 = NEXT_W( w0003, w0407, w080b, w0c0f ); FOUR_ROUNDS( vu_add( w1013, vu_ld( fd_sha256_K+16UL ) ) );
     301   187592044 :     vu_t w1417 = NEXT_W( w0407, w080b, w0c0f, w1013 ); FOUR_ROUNDS( vu_add( w1417, vu_ld( fd_sha256_K+20UL ) ) );
     302   187592044 :     vu_t w181b = NEXT_W( w080b, w0c0f, w1013, w1417 ); FOUR_ROUNDS( vu_add( w181b, vu_ld( fd_sha256_K+24UL ) ) );
     303   187592044 :     vu_t w1c1f = NEXT_W( w0c0f, w1013, w1417, w181b ); FOUR_ROUNDS( vu_add( w1c1f, vu_ld( fd_sha256_K+28UL ) ) );
     304   187592044 :     vu_t w2023 = NEXT_W( w1013, w1417, w181b, w1c1f ); FOUR_ROUNDS( vu_add( w2023, vu_ld( fd_sha256_K+32UL ) ) );
     305   187592044 :     vu_t w2427 = NEXT_W( w1417, w181b, w1c1f, w2023 ); FOUR_ROUNDS( vu_add( w2427, vu_ld( fd_sha256_K+36UL ) ) );
     306   187592044 :     vu_t w282b = NEXT_W( w181b, w1c1f, w2023, w2427 ); FOUR_ROUNDS( vu_add( w282b, vu_ld( fd_sha256_K+40UL ) ) );
     307   187592044 :     vu_t w2c2f = NEXT_W( w1c1f, w2023, w2427, w282b ); FOUR_ROUNDS( vu_add( w2c2f, vu_ld( fd_sha256_K+44UL ) ) );
     308   187592044 :     vu_t w3033 = NEXT_W( w2023, w2427, w282b, w2c2f ); FOUR_ROUNDS( vu_add( w3033, vu_ld( fd_sha256_K+48UL ) ) );
     309   187592044 :     vu_t w3437 = NEXT_W( w2427, w282b, w2c2f, w3033 ); FOUR_ROUNDS( vu_add( w3437, vu_ld( fd_sha256_K+52UL ) ) );
     310   187592044 :     vu_t w383b = NEXT_W( w282b, w2c2f, w3033, w3437 ); FOUR_ROUNDS( vu_add( w383b, vu_ld( fd_sha256_K+56UL ) ) );
     311   187592044 :     vu_t w3c3f = NEXT_W( w2c2f, w3033, w3437, w383b ); FOUR_ROUNDS( vu_add( w3c3f, vu_ld( fd_sha256_K+60UL ) ) );
     312             : 
     313   187592044 :     baseFEBA = vu_add( baseFEBA, stateFEBA );
     314   187592044 :     baseHGDC = vu_add( baseHGDC, stateHGDC );
     315             : 
     316   187592044 :   }
     317             : 
     318   105853926 :   stateABCD = vu_permute2( baseFEBA, baseHGDC, 3, 2, 3, 2 );
     319   105853926 :   stateEFGH = vu_permute2( baseFEBA, baseHGDC, 1, 0, 1, 0 );
     320   105853926 :   vu_st( state,     stateABCD );
     321   105853926 :   vu_st( state+4UL, stateEFGH );
     322   105853926 : }
     323             : 
     324   105853926 : #define fd_sha256_core fd_sha256_core_shaext
     325             : 
     326             : #else
     327             : #error "Unsupported FD_SHA256_CORE_IMPL"
     328             : #endif
     329             : 
     330             : fd_sha256_t *
     331     1554381 : fd_sha256_init( fd_sha256_t * sha ) {
     332     1554381 :   sha->state[0] = FD_SHA256_INITIAL_A;
     333     1554381 :   sha->state[1] = FD_SHA256_INITIAL_B;
     334     1554381 :   sha->state[2] = FD_SHA256_INITIAL_C;
     335     1554381 :   sha->state[3] = FD_SHA256_INITIAL_D;
     336     1554381 :   sha->state[4] = FD_SHA256_INITIAL_E;
     337     1554381 :   sha->state[5] = FD_SHA256_INITIAL_F;
     338     1554381 :   sha->state[6] = FD_SHA256_INITIAL_G;
     339     1554381 :   sha->state[7] = FD_SHA256_INITIAL_H;
     340     1554381 :   sha->buf_used = 0UL;
     341     1554381 :   sha->bit_cnt  = 0UL;
     342     1554381 :   return sha;
     343     1554381 : }
     344             : 
     345             : fd_sha256_t *
     346             : fd_sha256_append( fd_sha256_t * sha,
     347             :                   void const *  _data,
     348     5700150 :                   ulong         sz ) {
     349             : 
     350             :   /* If no data to append, we are done */
     351             : 
     352     5700150 :   if( FD_UNLIKELY( !sz ) ) return sha; /* optimize for non-trivial append */
     353             : 
     354             :   /* Unpack inputs */
     355             : 
     356     5699892 :   uint *  state    = sha->state;
     357     5699892 :   uchar * buf      = sha->buf;
     358     5699892 :   ulong   buf_used = sha->buf_used;
     359     5699892 :   ulong   bit_cnt  = sha->bit_cnt;
     360             : 
     361     5699892 :   uchar const * data = (uchar const *)_data;
     362             : 
     363             :   /* Update bit_cnt */
     364             :   /* FIXME: could accumulate bytes here and do bit conversion in append */
     365             :   /* FIXME: Overflow handling if more than 2^64 bits (unlikely) */
     366             : 
     367     5699892 :   sha->bit_cnt = bit_cnt + (sz<<3);
     368             : 
     369             :   /* Handle buffered bytes from previous appends */
     370             : 
     371     5699892 :   if( FD_UNLIKELY( buf_used ) ) { /* optimized for well aligned use of append */
     372             : 
     373             :     /* If the append isn't large enough to complete the current block,
     374             :        buffer these bytes too and return */
     375             : 
     376       78888 :     ulong buf_rem = FD_SHA256_PRIVATE_BUF_MAX - buf_used; /* In (0,FD_SHA256_PRIVATE_BUF_MAX) */
     377       78888 :     if( FD_UNLIKELY( sz < buf_rem ) ) { /* optimize for large append */
     378         531 :       fd_memcpy( buf + buf_used, data, sz );
     379         531 :       sha->buf_used = buf_used + sz;
     380         531 :       return sha;
     381         531 :     }
     382             : 
     383             :     /* Otherwise, buffer enough leading bytes of data to complete the
     384             :        block, update the hash and then continue processing any remaining
     385             :        bytes of data. */
     386             : 
     387       78357 :     fd_memcpy( buf + buf_used, data, buf_rem );
     388       78357 :     data += buf_rem;
     389       78357 :     sz   -= buf_rem;
     390             : 
     391       78357 :     fd_sha256_core( state, buf, 1UL );
     392       78357 :     sha->buf_used = 0UL;
     393       78357 :   }
     394             : 
     395             :   /* Append the bulk of the data */
     396             : 
     397     5699361 :   ulong block_cnt = sz >> FD_SHA256_PRIVATE_LG_BUF_MAX;
     398     5699361 :   if( FD_LIKELY( block_cnt ) ) fd_sha256_core( state, data, block_cnt ); /* optimized for large append */
     399             : 
     400             :   /* Buffer any leftover bytes */
     401             : 
     402     5699361 :   buf_used = sz & (FD_SHA256_PRIVATE_BUF_MAX-1UL); /* In [0,FD_SHA256_PRIVATE_BUF_MAX) */
     403     5699361 :   if( FD_UNLIKELY( buf_used ) ) { /* optimized for well aligned use of append */
     404     1332633 :     fd_memcpy( buf, data + (block_cnt << FD_SHA256_PRIVATE_LG_BUF_MAX), buf_used );
     405     1332633 :     sha->buf_used = buf_used; /* In (0,FD_SHA256_PRIVATE_BUF_MAX) */
     406     1332633 :   }
     407             : 
     408     5699361 :   return sha;
     409     5699892 : }
     410             : 
     411             : void *
     412             : fd_sha256_fini( fd_sha256_t * sha,
     413     1602555 :                 void *        _hash ) {
     414             : 
     415             :   /* Unpack inputs */
     416             : 
     417     1602555 :   uint *  state    = sha->state;
     418     1602555 :   uchar * buf      = sha->buf;
     419     1602555 :   ulong   buf_used = sha->buf_used; /* In [0,FD_SHA256_PRIVATE_BUF_MAX) */
     420     1602555 :   ulong   bit_cnt  = sha->bit_cnt;
     421             : 
     422             :   /* Append the terminating message byte */
     423             : 
     424     1602555 :   buf[ buf_used ] = (uchar)0x80;
     425     1602555 :   buf_used++;
     426             : 
     427             :   /* If there isn't enough room to save the message length in bits at
     428             :      the end of the in progress block, clear the rest of the in progress
     429             :      block, update the hash and start a new block. */
     430             : 
     431     1602555 :   if( FD_UNLIKELY( buf_used > (FD_SHA256_PRIVATE_BUF_MAX-8UL) ) ) { /* optimize for well aligned use of append */
     432       12048 :     fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-buf_used );
     433       12048 :     fd_sha256_core( state, buf, 1UL );
     434       12048 :     buf_used = 0UL;
     435       12048 :   }
     436             : 
     437             :   /* Clear in progress block up to last 64-bits, append the message
     438             :      size in bytes in the last 64-bits of the in progress block and
     439             :      update the hash to finalize it. */
     440             : 
     441     1602555 :   fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-8UL-buf_used );
     442     1602555 :   FD_STORE( ulong, buf+FD_SHA256_PRIVATE_BUF_MAX-8UL, fd_ulong_bswap( bit_cnt ) );
     443     1602555 :   fd_sha256_core( state, buf, 1UL );
     444             : 
     445             :   /* Unpack the result into md (annoying bswaps here) */
     446             : 
     447     1602555 :   state[0] = fd_uint_bswap( state[0] );
     448     1602555 :   state[1] = fd_uint_bswap( state[1] );
     449     1602555 :   state[2] = fd_uint_bswap( state[2] );
     450     1602555 :   state[3] = fd_uint_bswap( state[3] );
     451     1602555 :   state[4] = fd_uint_bswap( state[4] );
     452     1602555 :   state[5] = fd_uint_bswap( state[5] );
     453     1602555 :   state[6] = fd_uint_bswap( state[6] );
     454     1602555 :   state[7] = fd_uint_bswap( state[7] );
     455     1602555 :   return memcpy( _hash, state, 32 );
     456     1602555 : }
     457             : 
     458             : void *
     459             : fd_sha256_hash( void const * _data,
     460             :                 ulong        sz,
     461   189745613 :                 void *       _hash ) {
     462   189745613 :   uchar const * data = (uchar const *)_data;
     463             : 
     464             :   /* This is just the above streamlined to eliminate all the overheads
     465             :      to support incremental hashing. */
     466             : 
     467   189745613 :   uchar buf[ FD_SHA256_PRIVATE_BUF_MAX ] __attribute__((aligned(128)));
     468   189745613 :   uint  state[8] __attribute__((aligned(32)));
     469             : 
     470   189745613 :   state[0] = FD_SHA256_INITIAL_A;
     471   189745613 :   state[1] = FD_SHA256_INITIAL_B;
     472   189745613 :   state[2] = FD_SHA256_INITIAL_C;
     473   189745613 :   state[3] = FD_SHA256_INITIAL_D;
     474   189745613 :   state[4] = FD_SHA256_INITIAL_E;
     475   189745613 :   state[5] = FD_SHA256_INITIAL_F;
     476   189745613 :   state[6] = FD_SHA256_INITIAL_G;
     477   189745613 :   state[7] = FD_SHA256_INITIAL_H;
     478             : 
     479   189745613 :   ulong block_cnt = sz >> FD_SHA256_PRIVATE_LG_BUF_MAX;
     480   189745613 :   if( FD_LIKELY( block_cnt ) ) fd_sha256_core( state, data, block_cnt );
     481             : 
     482   189745613 :   ulong buf_used = sz & (FD_SHA256_PRIVATE_BUF_MAX-1UL);
     483   189745613 :   if( FD_UNLIKELY( buf_used ) ) fd_memcpy( buf, data + (block_cnt << FD_SHA256_PRIVATE_LG_BUF_MAX), buf_used );
     484   189745613 :   buf[ buf_used ] = (uchar)0x80;
     485   189745613 :   buf_used++;
     486             : 
     487   189745613 :   if( FD_UNLIKELY( buf_used > (FD_SHA256_PRIVATE_BUF_MAX-8UL) ) ) {
     488     1176942 :     fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-buf_used );
     489     1176942 :     fd_sha256_core( state, buf, 1UL );
     490     1176942 :     buf_used = 0UL;
     491     1176942 :   }
     492             : 
     493   189745613 :   ulong bit_cnt = sz << 3;
     494   189745613 :   fd_memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-8UL-buf_used );
     495   189745613 :   FD_STORE( ulong, buf+FD_SHA256_PRIVATE_BUF_MAX-8UL, fd_ulong_bswap( bit_cnt ) );
     496   189745613 :   fd_sha256_core( state, buf, 1UL );
     497             : 
     498   189745613 :   state[0] = fd_uint_bswap( state[0] );
     499   189745613 :   state[1] = fd_uint_bswap( state[1] );
     500   189745613 :   state[2] = fd_uint_bswap( state[2] );
     501   189745613 :   state[3] = fd_uint_bswap( state[3] );
     502   189745613 :   state[4] = fd_uint_bswap( state[4] );
     503   189745613 :   state[5] = fd_uint_bswap( state[5] );
     504   189745613 :   state[6] = fd_uint_bswap( state[6] );
     505   189745613 :   state[7] = fd_uint_bswap( state[7] );
     506   189745613 :   return memcpy( _hash, state, 32 );
     507   189745613 : }
     508             : 
     509             : 
     510             : 
     511             : void *
     512             : fd_sha256_hash_32_repeated( void const * _data,
     513             :                             void *       _hash,
     514      303099 :                             ulong        cnt ) {
     515      303099 :   uchar const * data = (uchar const *)_data;
     516      303099 :   uchar       * hash = (uchar       *)_hash;
     517      101033 : #if FD_HAS_SHANI
     518      101033 :   vu_t       w0003 = vu_bswap( vu_ldu( data      ) );
     519      101033 :   vu_t       w0407 = vu_bswap( vu_ldu( data+16UL ) );
     520      101033 :   vb_t const w080b = vb( 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00,
     521      101033 :                          0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 );
     522      101033 :   vb_t const w0c0f = vb( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     523      101033 :                          0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00 ); /* 32 bytes */
     524             : 
     525      101033 :   vu_t const initialFEBA = vu( FD_SHA256_INITIAL_F, FD_SHA256_INITIAL_E, FD_SHA256_INITIAL_B, FD_SHA256_INITIAL_A );
     526      101033 :   vu_t const initialHGDC = vu( FD_SHA256_INITIAL_H, FD_SHA256_INITIAL_G, FD_SHA256_INITIAL_D, FD_SHA256_INITIAL_C );
     527             : 
     528   122902232 :   for( ulong iter=0UL; iter<cnt; iter++ ) {
     529   122801199 :     vu_t stateFEBA = initialFEBA;
     530   122801199 :     vu_t stateHGDC = initialHGDC;
     531             : 
     532             : 
     533             : 
     534   122801199 :     /*                                              */ FOUR_ROUNDS( vu_add( w0003, vu_ld( fd_sha256_K+ 0UL ) ) );
     535   122801199 :     /*                                              */ FOUR_ROUNDS( vu_add( w0407, vu_ld( fd_sha256_K+ 4UL ) ) );
     536   122801199 :     /*                                              */ FOUR_ROUNDS( vu_add( w080b, vu_ld( fd_sha256_K+ 8UL ) ) );
     537   122801199 :     /*                                              */ FOUR_ROUNDS( vu_add( w0c0f, vu_ld( fd_sha256_K+12UL ) ) );
     538   122801199 :     vu_t w1013 = NEXT_W( w0003, w0407, w080b, w0c0f ); FOUR_ROUNDS( vu_add( w1013, vu_ld( fd_sha256_K+16UL ) ) );
     539   122801199 :     vu_t w1417 = NEXT_W( w0407, w080b, w0c0f, w1013 ); FOUR_ROUNDS( vu_add( w1417, vu_ld( fd_sha256_K+20UL ) ) );
     540   122801199 :     vu_t w181b = NEXT_W( w080b, w0c0f, w1013, w1417 ); FOUR_ROUNDS( vu_add( w181b, vu_ld( fd_sha256_K+24UL ) ) );
     541   122801199 :     vu_t w1c1f = NEXT_W( w0c0f, w1013, w1417, w181b ); FOUR_ROUNDS( vu_add( w1c1f, vu_ld( fd_sha256_K+28UL ) ) );
     542   122801199 :     vu_t w2023 = NEXT_W( w1013, w1417, w181b, w1c1f ); FOUR_ROUNDS( vu_add( w2023, vu_ld( fd_sha256_K+32UL ) ) );
     543   122801199 :     vu_t w2427 = NEXT_W( w1417, w181b, w1c1f, w2023 ); FOUR_ROUNDS( vu_add( w2427, vu_ld( fd_sha256_K+36UL ) ) );
     544   122801199 :     vu_t w282b = NEXT_W( w181b, w1c1f, w2023, w2427 ); FOUR_ROUNDS( vu_add( w282b, vu_ld( fd_sha256_K+40UL ) ) );
     545   122801199 :     vu_t w2c2f = NEXT_W( w1c1f, w2023, w2427, w282b ); FOUR_ROUNDS( vu_add( w2c2f, vu_ld( fd_sha256_K+44UL ) ) );
     546   122801199 :     vu_t w3033 = NEXT_W( w2023, w2427, w282b, w2c2f ); FOUR_ROUNDS( vu_add( w3033, vu_ld( fd_sha256_K+48UL ) ) );
     547   122801199 :     vu_t w3437 = NEXT_W( w2427, w282b, w2c2f, w3033 ); FOUR_ROUNDS( vu_add( w3437, vu_ld( fd_sha256_K+52UL ) ) );
     548   122801199 :     vu_t w383b = NEXT_W( w282b, w2c2f, w3033, w3437 ); FOUR_ROUNDS( vu_add( w383b, vu_ld( fd_sha256_K+56UL ) ) );
     549   122801199 :     vu_t w3c3f = NEXT_W( w2c2f, w3033, w3437, w383b ); FOUR_ROUNDS( vu_add( w3c3f, vu_ld( fd_sha256_K+60UL ) ) );
     550             : 
     551   122801199 :     stateFEBA = vu_add( stateFEBA, initialFEBA );
     552   122801199 :     stateHGDC = vu_add( stateHGDC, initialHGDC );
     553             : 
     554   122801199 :     vu_t stateABCD = vu_permute2( stateFEBA, stateHGDC, 3, 2, 3, 2 );
     555   122801199 :     vu_t stateEFGH = vu_permute2( stateFEBA, stateHGDC, 1, 0, 1, 0 );
     556             : 
     557   122801199 :     w0003 = stateABCD;
     558   122801199 :     w0407 = stateEFGH;
     559   122801199 :   }
     560      101033 :   vu_stu( hash,      vu_bswap( w0003 ) );
     561      101033 :   vu_stu( hash+16UL, vu_bswap( w0407 ) );
     562      101033 : #undef FOUND_ROUNDS
     563      101033 : #undef NEXT_W
     564             : 
     565             : #else
     566             : 
     567      202066 :   uchar buf[ FD_SHA256_PRIVATE_BUF_MAX ] __attribute__((aligned(128)));
     568             : 
     569             :   /* Prepare padding once */
     570      202066 :   ulong buf_used = 32UL;
     571      202066 :   memcpy( buf, data, 32UL );
     572      202066 :   buf[ buf_used ] = (uchar)0x80;
     573      202066 :   buf_used++;
     574             : 
     575      202066 :   ulong bit_cnt = 32UL << 3;
     576      202066 :   memset( buf + buf_used, 0, FD_SHA256_PRIVATE_BUF_MAX-8UL-buf_used );
     577      202066 :   FD_STORE( ulong, buf+FD_SHA256_PRIVATE_BUF_MAX-8UL, fd_ulong_bswap( bit_cnt ) );
     578             : 
     579             :   /* This is just the above streamlined to eliminate all the overheads
     580             :      to support incremental hashing. */
     581   245804464 :   for( ulong iter=0UL; iter<cnt; iter++ ) {
     582             : 
     583   245602398 :     uint  state[8] __attribute__((aligned(32)));
     584             : 
     585   245602398 :     state[0] = FD_SHA256_INITIAL_A;
     586   245602398 :     state[1] = FD_SHA256_INITIAL_B;
     587   245602398 :     state[2] = FD_SHA256_INITIAL_C;
     588   245602398 :     state[3] = FD_SHA256_INITIAL_D;
     589   245602398 :     state[4] = FD_SHA256_INITIAL_E;
     590   245602398 :     state[5] = FD_SHA256_INITIAL_F;
     591   245602398 :     state[6] = FD_SHA256_INITIAL_G;
     592   245602398 :     state[7] = FD_SHA256_INITIAL_H;
     593             : 
     594   245602398 :     fd_sha256_core( state, buf, 1UL );
     595             : 
     596   245602398 :     state[0] = fd_uint_bswap( state[0] );
     597   245602398 :     state[1] = fd_uint_bswap( state[1] );
     598   245602398 :     state[2] = fd_uint_bswap( state[2] );
     599   245602398 :     state[3] = fd_uint_bswap( state[3] );
     600   245602398 :     state[4] = fd_uint_bswap( state[4] );
     601   245602398 :     state[5] = fd_uint_bswap( state[5] );
     602   245602398 :     state[6] = fd_uint_bswap( state[6] );
     603   245602398 :     state[7] = fd_uint_bswap( state[7] );
     604   245602398 :     memcpy( buf, state, 32UL );
     605   245602398 :   }
     606      202066 :   memcpy( hash, buf, 32UL );
     607      202066 : #endif
     608      303099 :   return _hash;
     609      303099 : }
     610             : 
     611             : #undef fd_sha256_core

Generated by: LCOV version 1.14