LCOV - code coverage report
Current view: top level - util/simd - fd_avx.h (source / functions) Hit Total Coverage
Test: cov.lcov Lines: 1 1 100.0 %
Date: 2025-01-08 12:08:44 Functions: 0 0 -

          Line data    Source code
       1             : #ifndef HEADER_fd_src_util_simd_fd_avx_h
       2             : #define HEADER_fd_src_util_simd_fd_avx_h
       3             : 
       4             : #if FD_HAS_AVX
       5             : 
       6             : /* An API for writing vectorized C/C++ code using 8-wide 32-bit ints,
       7             :    8-wide 32-bit uints, 8-wide 32-bit floats, 4-wide 64-bit doubles,
       8             :    4-wide 64-bit longs, 4-wide 64-bit ulongs and 8- or 4-wide logicals
       9             :    assuming a platform with AVX support.
      10             : 
      11             :    Essentially, all the usual C/C++ operations you can do on an int,
      12             :    uint, float, double, long, ulong or logical has a fast O(1)
      13             :    vectorized equivalent here.  Most operations boil down to a single
      14             :    assembly instruction in most cases and the macros are robust.
      15             : 
      16             :    Further operations commonly used to transition from scalar/vector to
      17             :    vector/scalar code, to do cross lane data motion, etc are also
      18             :    provided to make it much easier to convert scalar implementations
      19             :    into highly optimized vectorized implementations.
      20             : 
      21             :    That is, this is a thin wrapper around Intel's AVX intrinsics to give
      22             :    it a sane type system and robust semantics for writing mixed type and
      23             :    mixed width vectorized code (including branching).  This includes a
      24             :    lot of non-obvious tricks, fixes for ultra high density of
      25             :    irregularities in their intrinsics, implementations of missing
      26             :    intrinsics and lots of workarounds to get Intel AVX to behave sanely.
      27             : 
      28             :    A side effect is that this API also makes it easy to port code
      29             :    vectorized for AVX to non-Intel architectures.  Just make
      30             :    implementations of these wrappers for the target platform and then,
      31             :    magically, code written in terms of this API has been ported.  (This
      32             :    is similar to how CUDA works under the hood.  Developers don't write
      33             :    GPU code ... they write CUDA code that is then adapted for the target
      34             :    architecture by the CUDA tooling at compile- or run-time.)
      35             : 
      36             :    Much like the fd_util_base.h primitive types, APIs in here generally
      37             :    aren't prefixed with fd_ given how aggressively they get used in
      38             :    writing compute intensive code.  This is unlikely to matter
      39             :    practically given this API is both optional and limited to particular
      40             :    build targets (i.e. namespace collisions highly unlikely to occur
      41             :    accidentally). */
      42             : 
      43             : #include "../bits/fd_bits.h"
      44             : #include <x86intrin.h> /* Include the intrinsics we are going to patch up */
      45             : 
      46             : /* Some useful constants */
      47             : 
      48             : #define W_WIDTH         (8) /* Vector width / element count / lanes (32-bit elements) */
      49   105345228 : #define W_FOOTPRINT    (32) /* Vector byte size */
      50             : #define W_ALIGN        (32) /* Vector byte alignment required for aligned operations */
      51             : #define W_LG_WIDTH      (3) /* log_2 W_WIDTH */
      52             : #define W_LG_FOOTPRINT  (5) /* log_2 W_FOOTPRINT */
      53             : #define W_LG_ALIGN      (5) /* log_2 W_ALIGN */
      54             : #define W_ATTR         __attribute__((aligned(W_ALIGN)))
      55             : 
      56             : /* Include all the APIs */
      57             : 
      58             : #include "fd_avx_wc.h" /* Vector conditional support */
      59             : #include "fd_avx_wf.h" /* Vector float support */
      60             : #include "fd_avx_wi.h" /* Vector int support */
      61             : #include "fd_avx_wu.h" /* Vector uint support */
      62             : #include "fd_avx_wd.h" /* Vector double support */
      63             : #include "fd_avx_wl.h" /* Vector long support */
      64             : #include "fd_avx_wv.h" /* Vector ulong support */
      65             : #include "fd_avx_wb.h" /* Vector uchar (byte) support */
      66             : #include "fd_avx_ws.h" /* Vector short support */
      67             : #include "fd_avx_wh.h" /* Vector ushort support */
      68             : 
      69             : #else
      70             : #error "Build target does not support AVX wrappers"
      71             : #endif
      72             : 
      73             : #endif /* HEADER_fd_src_util_simd_fd_avx_h */
      74             : 

Generated by: LCOV version 1.14