Line data Source code
1 : #ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h 2 : #define HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h 3 : 4 : #ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_private_h 5 : #error "Do not include this file directly; use fd_reedsol_private.h" 6 : #endif 7 : 8 : #include "../../util/simd/fd_avx.h" 9 : 10 : typedef wb_t gf_t; 11 : 12 10498500 : #define GF_WIDTH W_FOOTPRINT 13 : 14 : FD_PROTOTYPES_BEGIN 15 : 16 142367403 : #define gf_ldu wb_ldu 17 151944978 : #define gf_stu wb_stu 18 145112474 : #define gf_zero wb_zero 19 : 20 : extern uchar const fd_reedsol_arith_consts_gfni_mul[] __attribute__((aligned(128))); 21 : 22 2250842064 : #define GF_ADD wb_xor 23 : 24 507 : #define GF_OR wb_or 25 : 26 : /* Older versions of GCC have a bug that cause them to think 27 : _mm256_gf2p8affine_epi64_epi8 is a symmetric in the first two 28 : arguments (other than that the second argument can be a memory 29 : address). That's totally incorrect. It was fixed in GCC 10. See 30 : https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92889 for more details. */ 31 : 32 : #if !FD_USING_CLANG 33 : #define GCC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__) 34 : #endif 35 : 36 : #if FD_USING_CLANG || (GCC_VERSION >= 100000) 37 : 38 164197698 : #define GF_MUL( a, c ) (__extension__({ \ 39 164197698 : wb_t _a = (a); \ 40 164197698 : int _c = (c); \ 41 164197698 : /* c is known at compile time, so this is not a runtime branch */ \ 42 164197698 : ((_c==0) ? wb_zero() : ((_c==1) ? _a : \ 43 164197698 : _mm256_gf2p8affine_epi64_epi8( _a, wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*_c ), 0 ) )); \ 44 164197698 : })) 45 : 46 96000 : #define GF_MUL_VAR( a, c ) (_mm256_gf2p8affine_epi64_epi8( (a), wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) ), 0 )) 47 : 48 : #else 49 : 50 : #define GF_MUL( a, c ) (__extension__({ \ 51 : wb_t _a = (a); \ 52 : int _c = (c); \ 53 : wb_t _product; \ 54 : __asm__( "vgf2p8affineqb $0x0, %[cons], %[vec], %[out]" \ 55 : : [out]"=x" (_product) \ 56 : : [cons]"xm" (wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*_c )), \ 57 : [vec]"x" (_a) ); \ 58 : /* c is known at compile time, so this is not a runtime branch */ \ 59 : (_c==0) ? wb_zero() : ( (_c==1) ? (_a) : _product ); \ 60 : })) 61 : 62 : #define GF_MUL_VAR( a, c ) (__extension__({ \ 63 : wb_t _product; \ 64 : __asm__( "vgf2p8affineqb $0x0, %[cons], %[vec], %[out]" \ 65 : : [out]"=x" (_product) \ 66 : : [cons]"xm" (wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) )), \ 67 : [vec]"x" (a) ); \ 68 : (_product); \ 69 : })) 70 : 71 : #endif 72 : 73 : #define GF_ANY( x ) (0 != _mm256_movemask_epi8( wb_ne( (x), wb_zero() ) )) 74 : 75 : FD_PROTOTYPES_END 76 : 77 : #endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h */