diff --git a/include/xsimd/arch/xsimd_vsx.hpp b/include/xsimd/arch/xsimd_vsx.hpp index 122b916c7..7af3ef807 100644 --- a/include/xsimd/arch/xsimd_vsx.hpp +++ b/include/xsimd/arch/xsimd_vsx.hpp @@ -33,6 +33,37 @@ namespace xsimd namespace kernel { + // builtin_t - the scalar type as it would be used for a vector intrinsic + // VSX vector intrinsics do not support long, unsigned long, and char + // The builtin definition can be used to map the incoming + // type to the right one to be used with the intrinsics. + template + struct builtin_scalar + { + using type = T; + }; + + template <> + struct builtin_scalar + { + using type = unsigned long long; + }; + + template <> + struct builtin_scalar + { + using type = long long; + }; + + template <> + struct builtin_scalar + { + using type = typename std::conditional::value, signed char, unsigned char>::type; + }; + + template + using builtin_t = typename builtin_scalar::type; + template XSIMD_INLINE batch avg(batch const&, batch const&, requires_arch) noexcept; template @@ -218,7 +249,7 @@ namespace xsimd template ::value>> XSIMD_INLINE batch broadcast(T val, requires_arch) noexcept { - return vec_splats(val); + return vec_splats(static_cast>(val)); } // ceil @@ -421,18 +452,18 @@ namespace xsimd return ~vec_cmpeq(self.data, self.data); } - // load_aligned + // load_unaligned template ::value>> - XSIMD_INLINE batch load_aligned(T const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_unaligned(T const* mem, convert, requires_arch) noexcept { - return vec_ld(0, reinterpret_cast::register_type*>(mem)); + return (typename batch::register_type)vec_xl(0, (builtin_t*)mem); } - // load_unaligned + // load_aligned template ::value>> - XSIMD_INLINE batch load_unaligned(T const* mem, convert, requires_arch) noexcept + XSIMD_INLINE batch load_aligned(T const* mem, convert, requires_arch) noexcept { - return vec_vsx_ld(0, (typename batch::register_type const*)mem); + return load_unaligned(mem, kernel::convert {}, vsx {}); } // load_complex @@ -758,14 +789,14 @@ namespace xsimd template ::value>> XSIMD_INLINE void store_aligned(T* mem, batch const& self, requires_arch) noexcept { - return vec_st(self.data, 0, reinterpret_cast::register_type*>(mem)); + vec_xst((typename batch::register_type)self.data, 0, (builtin_t*)mem); } // store_unaligned template ::value>> XSIMD_INLINE void store_unaligned(T* mem, batch const& self, requires_arch) noexcept { - return vec_vsx_st(self.data, 0, reinterpret_cast::register_type*>(mem)); + store_aligned(mem, self, vsx {}); } // sub diff --git a/include/xsimd/types/xsimd_vsx_register.hpp b/include/xsimd/types/xsimd_vsx_register.hpp index cfd450317..36b933902 100644 --- a/include/xsimd/types/xsimd_vsx_register.hpp +++ b/include/xsimd/types/xsimd_vsx_register.hpp @@ -39,7 +39,7 @@ namespace xsimd namespace types { -#define XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(T, Tb) \ +#define XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(T, Tv, Tb) \ template <> \ struct get_bool_simd_register \ { \ @@ -55,19 +55,26 @@ namespace xsimd operator register_type() const noexcept { return data; } \ }; \ }; \ - XSIMD_DECLARE_SIMD_REGISTER(T, vsx, __vector T) + XSIMD_DECLARE_SIMD_REGISTER(T, vsx, __vector Tv) - XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(signed char, char); - XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned char, char); - XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(char, char); - XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned short, short); - XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(short, short); - XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned int, int); - XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(int, int); - XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned long, long); - XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(long, long); - XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(float, int); - XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(double, long); + // The VSX vector intrinsics do not support long, unsigned long, + // and char data types. batches of these types are vectors of + // equivalent types. + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(signed char, signed char, char); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned char, unsigned char, char); +#ifdef __CHAR_UNSIGNED__ + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(char, unsigned char, char); +#else + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(char, signed char, char); +#endif + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned short, unsigned short, short); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(short, short, short); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned int, unsigned int, int); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(int, int, int); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned long, unsigned long long, long long); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(long, long long, long long); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(float, float, int); + XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(double, double, long long); #undef XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER }