LCOV - code coverage report
Current view: top level - ballet/reedsol - fd_reedsol_arith_gfni.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 14 14 100.0 %
Date: 2024-11-13 11:58:15 Functions: 0 0 -

          Line data    Source code
       1             : #ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h
       2             : #define HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h
       3             : 
       4             : #ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_private_h
       5             : #error "Do not include this file directly; use fd_reedsol_private.h"
       6             : #endif
       7             : 
       8             : #include "../../util/simd/fd_avx.h"
       9             : 
      10             : typedef wb_t gf_t;
      11             : 
      12    10498500 : #define GF_WIDTH W_FOOTPRINT
      13             : 
      14             : FD_PROTOTYPES_BEGIN
      15             : 
      16   142367403 : #define gf_ldu  wb_ldu
      17   151944978 : #define gf_stu  wb_stu
      18   145112474 : #define gf_zero wb_zero
      19             : 
      20             : extern uchar const fd_reedsol_arith_consts_gfni_mul[]  __attribute__((aligned(128)));
      21             : 
      22  2250842064 : #define GF_ADD wb_xor
      23             : 
      24         507 : #define GF_OR  wb_or
      25             : 
      26             : /* Older versions of GCC have a bug that cause them to think
      27             :    _mm256_gf2p8affine_epi64_epi8 is a symmetric in the first two
      28             :    arguments (other than that the second argument can be a memory
      29             :    address).  That's totally incorrect.  It was fixed in GCC 10.  See
      30             :    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92889 for more details. */
      31             : 
      32             : #if !FD_USING_CLANG
      33             : #define GCC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
      34             : #endif
      35             : 
      36             : #if FD_USING_CLANG || (GCC_VERSION >= 100000)
      37             : 
      38   164197698 : #define GF_MUL( a, c ) (__extension__({                                                            \
      39   164197698 :     wb_t _a = (a);                                                                                 \
      40   164197698 :     int  _c = (c);                                                                                 \
      41   164197698 :     /* c is known at compile time, so this is not a runtime branch */                              \
      42   164197698 :     ((_c==0) ? wb_zero() : ((_c==1) ? _a :                                                         \
      43   164197698 :      _mm256_gf2p8affine_epi64_epi8( _a, wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*_c ), 0 ) )); \
      44   164197698 :   }))
      45             : 
      46       96000 : #define GF_MUL_VAR( a, c ) (_mm256_gf2p8affine_epi64_epi8( (a), wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) ), 0 ))
      47             : 
      48             : #else
      49             : 
      50             : #define GF_MUL( a, c ) (__extension__({                                      \
      51             :     wb_t _a = (a);                                                           \
      52             :     int  _c = (c);                                                           \
      53             :     wb_t _product;                                                           \
      54             :     __asm__( "vgf2p8affineqb $0x0, %[cons], %[vec], %[out]"                  \
      55             :            : [out]"=x"  (_product)                                           \
      56             :            : [cons]"xm" (wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*_c )), \
      57             :              [vec]"x"   (_a) );                                              \
      58             :     /* c is known at compile time, so this is not a runtime branch */        \
      59             :     (_c==0) ? wb_zero() : ( (_c==1) ? (_a) : _product );                     \
      60             :   }))
      61             : 
      62             : #define GF_MUL_VAR( a, c ) (__extension__({                                   \
      63             :     wb_t _product;                                                            \
      64             :     __asm__( "vgf2p8affineqb $0x0, %[cons], %[vec], %[out]"                   \
      65             :            : [out]"=x"  (_product)                                            \
      66             :            : [cons]"xm" (wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) )), \
      67             :              [vec]"x"   (a) );                                                \
      68             :     (_product);                                                               \
      69             :   }))
      70             : 
      71             : #endif
      72             : 
      73             : #define GF_ANY( x ) (0 != _mm256_movemask_epi8( wb_ne( (x), wb_zero() ) ))
      74             : 
      75             : FD_PROTOTYPES_END
      76             : 
      77             : #endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h */

Generated by: LCOV version 1.14