LCOV - code coverage report
Current view: top level - ballet/chacha - fd_chacha_sse.c (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 52 52 100.0 %
Date: 2025-09-19 04:41:14 Functions: 3 3 100.0 %

          Line data    Source code
       1             : #include "fd_chacha.h"
       2             : #include "../../util/simd/fd_sse.h"
       3             : 
       4             : 
       5             : static void *
       6             : fd_chacha_block_sse( void *       _block,
       7             :                      void const * _key,
       8             :                      void const * _idx_nonce,
       9    39599997 :                      ulong        rnd2_cnt ) {
      10             : 
      11    39599997 :   uint *       block     = __builtin_assume_aligned( _block,     64UL );
      12    39599997 :   uint const * key       = __builtin_assume_aligned( _key,       32UL );
      13    39599997 :   uint const * idx_nonce = __builtin_assume_aligned( _idx_nonce, 16UL );
      14             : 
      15             :   /* Construct the input ChaCha20 block state as the following
      16             :      matrix of little endian uint entries:
      17             : 
      18             :      cccccccc  cccccccc  cccccccc  cccccccc
      19             :      kkkkkkkk  kkkkkkkk  kkkkkkkk  kkkkkkkk
      20             :      kkkkkkkk  kkkkkkkk  kkkkkkkk  kkkkkkkk
      21             :      bbbbbbbb  nnnnnnnn  nnnnnnnn  nnnnnnnn
      22             : 
      23             :      Where
      24             :        c are the constants 0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
      25             :        k is the input key
      26             :        b is the block index
      27             :        n is the nonce */
      28             : 
      29             :   /* Remember the input state for later use */
      30    39599997 :   vu_t row0_init = vu( 0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U );
      31    39599997 :   vu_t row1_init = vu_ld( key       );
      32    39599997 :   vu_t row2_init = vu_ld( key+4     );
      33    39599997 :   vu_t row3_init = vu_ld( idx_nonce );
      34             : 
      35    39599997 :   vu_t row0 = row0_init;
      36    39599997 :   vu_t row1 = row1_init;
      37    39599997 :   vu_t row2 = row2_init;
      38    39599997 :   vu_t row3 = row3_init;
      39             : 
      40             :   /* These rotates are a bit faster, and they're on the critical path,
      41             :      so this makes a difference. */
      42   752399976 : #define ROTATE_LEFT_16( x ) _mm_shuffle_epi8( (x), vb( 2,3,0,1, 6,7,4,5, 10,11,8,9,  14,15,12,13 ) )
      43   752399976 : #define ROTATE_LEFT_08( x ) _mm_shuffle_epi8( (x), vb( 3,0,1,2, 7,4,5,6, 11,8,9,10,  15,12,13,14 ) )
      44   752399976 : #define ROTATE_LEFT_12( x ) vu_rol( (x), 12 )
      45   752399976 : #define ROTATE_LEFT_07( x ) vu_rol( (x),  7 )
      46             : 
      47             :   /* Run the ChaCha round function 20 times.
      48             :      (Each iteration does a column round and a diagonal round.) */
      49   415799985 :   for( ulong i=0UL; i<rnd2_cnt; i++ ) {
      50             :     /* Column round */
      51   376199988 :     row0 = vu_add( row0, row1 ); row3 = vu_xor( row3, row0 ); row3 = ROTATE_LEFT_16( row3 );
      52   376199988 :     row2 = vu_add( row2, row3 ); row1 = vu_xor( row1, row2 ); row1 = ROTATE_LEFT_12( row1 );
      53   376199988 :     row0 = vu_add( row0, row1 ); row3 = vu_xor( row3, row0 ); row3 = ROTATE_LEFT_08( row3 );
      54   376199988 :     row2 = vu_add( row2, row3 ); row1 = vu_xor( row1, row2 ); row1 = ROTATE_LEFT_07( row1 );
      55             : 
      56   376199988 :     row1 = _mm_shuffle_epi32( row1, _MM_SHUFFLE( 0, 3, 2, 1 ) );
      57   376199988 :     row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE( 1, 0, 3, 2 ) );
      58   376199988 :     row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE( 2, 1, 0, 3 ) );
      59             : 
      60             :     /* Diagonal round */
      61   376199988 :     row0 = vu_add( row0, row1 ); row3 = vu_xor( row3, row0 ); row3 = ROTATE_LEFT_16( row3 );
      62   376199988 :     row2 = vu_add( row2, row3 ); row1 = vu_xor( row1, row2 ); row1 = ROTATE_LEFT_12( row1 );
      63   376199988 :     row0 = vu_add( row0, row1 ); row3 = vu_xor( row3, row0 ); row3 = ROTATE_LEFT_08( row3 );
      64   376199988 :     row2 = vu_add( row2, row3 ); row1 = vu_xor( row1, row2 ); row1 = ROTATE_LEFT_07( row1 );
      65             : 
      66   376199988 :     row1 = _mm_shuffle_epi32( row1, _MM_SHUFFLE( 2, 1, 0, 3 ) );
      67   376199988 :     row2 = _mm_shuffle_epi32( row2, _MM_SHUFFLE( 1, 0, 3, 2 ) );
      68   376199988 :     row3 = _mm_shuffle_epi32( row3, _MM_SHUFFLE( 0, 3, 2, 1 ) );
      69   376199988 :   }
      70    39599997 : #undef ROTATE_LEFT_07
      71    39599997 : #undef ROTATE_LEFT_12
      72    39599997 : #undef ROTATE_LEFT_08
      73    39599997 : #undef ROTATE_LEFT_16
      74             : 
      75             : 
      76             :   /* Complete the block by adding the input state */
      77    39599997 :   row0 = vu_add( row0, row0_init );
      78    39599997 :   row1 = vu_add( row1, row1_init );
      79    39599997 :   row2 = vu_add( row2, row2_init );
      80    39599997 :   row3 = vu_add( row3, row3_init );
      81             : 
      82    39599997 :   vu_st( block,    row0 );
      83    39599997 :   vu_st( block+ 4, row1 );
      84    39599997 :   vu_st( block+ 8, row2 );
      85    39599997 :   vu_st( block+12, row3 );
      86             : 
      87    39599997 :   return _block;
      88    39599997 : }
      89             : 
      90             : void *
      91             : fd_chacha8_block( void *       _block,
      92             :                   void const * _key,
      93     3299997 :                   void const * _idx_nonce ) {
      94     3299997 :   return fd_chacha_block_sse( _block, _key, _idx_nonce, 4UL );
      95     3299997 : }
      96             : 
      97             : void *
      98             : fd_chacha20_block( void *       _block,
      99             :                    void const * _key,
     100    36300000 :                    void const * _idx_nonce ) {
     101    36300000 :   return fd_chacha_block_sse( _block, _key, _idx_nonce, 10UL );
     102    36300000 : }

Generated by: LCOV version 1.14