LCOV - code coverage report
Current view: top level - ballet/reedsol - fd_reedsol_arith_avx2.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 25 25 100.0 %
Date: 2025-01-08 12:08:44 Functions: 0 0 -

          Line data    Source code
       1             : #ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h
       2             : #define HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h
       3             : 
       4             : #ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_private_h
       5             : #error "Do not include this file directly; use fd_reedsol_private.h"
       6             : #endif
       7             : 
       8             : #include "../../util/simd/fd_avx.h"
       9             : 
      10             : typedef wb_t gf_t;
      11             : 
      12    94846728 : #define GF_WIDTH W_FOOTPRINT
      13             : 
      14             : FD_PROTOTYPES_BEGIN
      15             : 
      16  1072465238 : #define gf_ldu  wb_ldu
      17  1091620388 : #define gf_stu  wb_stu
      18   684090164 : #define gf_zero wb_zero
      19             : 
      20             : extern uchar const fd_reedsol_arith_consts_avx_mul[]  __attribute__((aligned(128)));
      21             : 
      22             : /* TODO: This linkage is kinda wonky (maybe use FD_FN_UNUSED) if this
      23             :    include gets used more generally.  The below currently needs to be
      24             :    available at compile time, not link time, to allow the optimizer to
      25             :    use it. */
      26             : 
      27             : static uchar const fd_reedsol_arith_scale4[ 256UL ] = {
      28             :     0,  16,  32,  48,  64,  80,  96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
      29             :    29,  13,  61,  45,  93,  77, 125, 109, 157, 141, 189, 173, 221, 205, 253, 237,
      30             :    58,  42,  26,  10, 122, 106,  90,  74, 186, 170, 154, 138, 250, 234, 218, 202,
      31             :    39,  55,   7,  23, 103, 119,  71,  87, 167, 183, 135, 151, 231, 247, 199, 215,
      32             :   116, 100,  84,  68,  52,  36,  20,   4, 244, 228, 212, 196, 180, 164, 148, 132,
      33             :   105, 121,  73,  89,  41,  57,   9,  25, 233, 249, 201, 217, 169, 185, 137, 153,
      34             :    78,  94, 110, 126,  14,  30,  46,  62, 206, 222, 238, 254, 142, 158, 174, 190,
      35             :    83,  67, 115,  99,  19,   3,  51,  35, 211, 195, 243, 227, 147, 131, 179, 163,
      36             :   232, 248, 200, 216, 168, 184, 136, 152, 104, 120,  72,  88,  40,  56,   8,  24,
      37             :   245, 229, 213, 197, 181, 165, 149, 133, 117, 101,  85,  69,  53,  37,  21,   5,
      38             :   210, 194, 242, 226, 146, 130, 178, 162,  82,  66, 114,  98,  18,   2,  50,  34,
      39             :   207, 223, 239, 255, 143, 159, 175, 191,  79,  95, 111, 127,  15,  31,  47,  63,
      40             :   156, 140, 188, 172, 220, 204, 252, 236,  28,  12,  60,  44,  92,  76, 124, 108,
      41             :   129, 145, 161, 177, 193, 209, 225, 241,   1,  17,  33,  49,  65,  81,  97, 113,
      42             :   166, 182, 134, 150, 230, 246, 198, 214,  38,  54,   6,  22, 102, 118,  70,  86,
      43             :   187, 171, 155, 139, 251, 235, 219, 203,  59,  43,  27,  11, 123, 107,  91,  75
      44             : };
      45             : 
      46 12378988448 : #define GF_ADD wb_xor
      47             : 
      48        1014 : #define GF_OR  wb_or
      49             : 
      50   328395396 : #define GF_MUL( a, c ) (__extension__({                                                                                 \
      51   328395396 :     wb_t _a  = (a);                                                                                                     \
      52   328395396 :     int  _c  = (c);                                                                                                     \
      53   328395396 :     wb_t _lo = wb_and( _a, wb_bcast( 0x0F ) );                                                                          \
      54   328395396 :     wb_t _hi = wb_shr( _a, 4 );                                                                                         \
      55   328395396 :     wb_t _p0 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*_c                            ), _lo ); \
      56   328395396 :     wb_t _p1 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*fd_reedsol_arith_scale4[ _c ] ), _hi ); \
      57   328395396 :     /* c is known at compile time, so this is not a runtime branch */                                                   \
      58   328395396 :     (_c==0) ? wb_zero() : ( (_c==1) ? _a : wb_xor( _p0, _p1 ) );                                                        \
      59   328395396 :   }))
      60             : 
      61      192000 : #define GF_MUL_VAR( a, c ) (__extension__({                                                                             \
      62      192000 :     wb_t _a  = (a);                                                                                                     \
      63      192000 :     int  _c  = (c);                                                                                                     \
      64      192000 :     wb_t _lo = wb_and( _a, wb_bcast( 0x0F ) );                                                                          \
      65      192000 :     wb_t _hi = wb_shr( _a, 4 );                                                                                         \
      66      192000 :     wb_t _p0 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*_c                            ), _lo ); \
      67      192000 :     wb_t _p1 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*fd_reedsol_arith_scale4[ _c ] ), _hi ); \
      68      192000 :     wb_xor( _p0, _p1 );                                                                                                 \
      69      192000 :   }))
      70             : 
      71             : #define GF_ANY( x ) (0 != _mm256_movemask_epi8( wb_ne( (x), wb_zero() ) ))
      72             : 
      73             : FD_PROTOTYPES_END
      74             : 
      75             : #endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h */

Generated by: LCOV version 1.14