Line data Source code
1 : #ifndef HEADER_fd_src_util_simd_fd_avx512_h 2 : #error "Do not include this directly; use fd_avx512.h" 3 : #endif 4 : 5 : /* Vector ushort API **************************************************/ 6 : 7 : /* A wwh_t is a vector where each 16-bit wide lane holds an unsigned 8 : 16-bit integer (a "ushort"). 9 : 10 : These mirror the other APIs as much as possible. Macros are 11 : preferred over static inlines when it is possible to do it robustly 12 : to reduce the risk of the compiler mucking it up. */ 13 : 14 : #define wwh_t __m512i 15 : 16 : /* Constructors */ 17 : 18 : /* Given the ushort values, return ... */ 19 : 20 144303745 : #define wwh_bcast(b0) _mm512_set1_epi16( (ushort)(b0) ) /* [ b0 b0 ... b0 ] */ 21 : 22 : 23 : /* Predefined constants */ 24 : 25 : #define wwh_zero() _mm512_setzero_si512() /* wwh(0, 0, ... 0) */ 26 : #define wwh_one() _mm512_set1_epi32( 1 ) /* wwh(1, 1, ... 1) */ 27 : 28 : /* Memory operations */ 29 : /* Note: wwh_{ld,st} assume m is 64-byte aligned while wwh_{ldu,stu} 30 : allow m to have arbitrary alignment */ 31 : 32 0 : static inline wwh_t wwh_ld( ushort const * m ) { return _mm512_load_epi32( m ); } /* wwh( m[0], m[1], ... m[15] ) */ 33 11816192 : static inline void wwh_st( ushort * m, wwh_t x ) { _mm512_store_epi32( m, x ); } /* does m[0] = x0, m[1] = x1, ... m[15] = xf */ 34 : 35 0 : static inline wwh_t wwh_ldu( void const * m ) { return _mm512_loadu_epi32( m ); } /* wwh( m[0], m[1], ... m[15]) */ 36 0 : static inline void wwh_stu( void * m, wwh_t x ) { _mm512_storeu_epi32( m, x ); } /* does m[0] = x0, m[1] = x1, ... m[15] = xf */ 37 : 38 : /* Arithmetic operations */ 39 : 40 177242880 : #define wwh_add(x,y) _mm512_add_epi16( (x), (y) ) /* wwh( x0+y0, x1+y1, ... xf+y31 ) */ 41 : #define wwh_sub(x,y) _mm512_sub_epi16( (x), (y) ) /* wwh( x0-y0, x1-y1, ... xf-y31 ) */ 42 : 43 : 44 : /* Bit operations */ 45 : 46 : #define wwh_shl(a,imm) _mm512_slli_epi16( (a), (imm) ) /* [ a0<<imm a1<<imm ... a31<<imm ] */ 47 : #define wwh_shr(a,imm) _mm512_srli_epi16( (a), (imm) ) /* [ a0>>imm a1>>imm ... a31>>imm ] */