Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 40 additions & 9 deletions include/xsimd/arch/xsimd_vsx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,37 @@ namespace xsimd

namespace kernel
{
// builtin_t<T> - the scalar type as it would be used for a vector intrinsic
// VSX vector intrinsics do not support long, unsigned long, and char
// The builtin<T> definition can be used to map the incoming
// type to the right one to be used with the intrinsics.
template <typename T>
struct builtin_scalar
{
using type = T;
};

template <>
struct builtin_scalar<unsigned long>
{
using type = unsigned long long;
};

template <>
struct builtin_scalar<long>
{
using type = long long;
};

template <>
struct builtin_scalar<char>
{
using type = typename std::conditional<std::is_signed<char>::value, signed char, unsigned char>::type;
};

template <typename T>
using builtin_t = typename builtin_scalar<T>::type;

template <class A, class T>
XSIMD_INLINE batch<T, A> avg(batch<T, A> const&, batch<T, A> const&, requires_arch<common>) noexcept;
template <class A, class T>
Expand Down Expand Up @@ -218,7 +249,7 @@ namespace xsimd
template <class A, class T, class = std::enable_if_t<std::is_scalar<T>::value>>
XSIMD_INLINE batch<T, A> broadcast(T val, requires_arch<vsx>) noexcept
{
return vec_splats(val);
return vec_splats(static_cast<builtin_t<T>>(val));
}

// ceil
Expand Down Expand Up @@ -421,18 +452,18 @@ namespace xsimd
return ~vec_cmpeq(self.data, self.data);
}

// load_aligned
// load_unaligned
template <class A, class T, class = std::enable_if_t<std::is_scalar<T>::value>>
XSIMD_INLINE batch<T, A> load_aligned(T const* mem, convert<T>, requires_arch<vsx>) noexcept
XSIMD_INLINE batch<T, A> load_unaligned(T const* mem, convert<T>, requires_arch<vsx>) noexcept
{
return vec_ld(0, reinterpret_cast<const typename batch<T, A>::register_type*>(mem));
return (typename batch<T, A>::register_type)vec_xl(0, (builtin_t<T>*)mem);
}

// load_unaligned
// load_aligned
template <class A, class T, class = std::enable_if_t<std::is_scalar<T>::value>>
XSIMD_INLINE batch<T, A> load_unaligned(T const* mem, convert<T>, requires_arch<vsx>) noexcept
XSIMD_INLINE batch<T, A> load_aligned(T const* mem, convert<T>, requires_arch<vsx>) noexcept
{
return vec_vsx_ld(0, (typename batch<T, A>::register_type const*)mem);
return load_unaligned<A>(mem, kernel::convert<T> {}, vsx {});
}

// load_complex
Expand Down Expand Up @@ -758,14 +789,14 @@ namespace xsimd
template <class A, class T, class = std::enable_if_t<std::is_scalar<T>::value>>
XSIMD_INLINE void store_aligned(T* mem, batch<T, A> const& self, requires_arch<vsx>) noexcept
{
return vec_st(self.data, 0, reinterpret_cast<typename batch<T, A>::register_type*>(mem));
vec_xst((typename batch<T, A>::register_type)self.data, 0, (builtin_t<T>*)mem);
}

// store_unaligned
template <class A, class T, class = std::enable_if_t<std::is_scalar<T>::value>>
XSIMD_INLINE void store_unaligned(T* mem, batch<T, A> const& self, requires_arch<vsx>) noexcept
{
return vec_vsx_st(self.data, 0, reinterpret_cast<typename batch<T, A>::register_type*>(mem));
store_aligned<A>(mem, self, vsx {});
}

// sub
Expand Down
33 changes: 20 additions & 13 deletions include/xsimd/types/xsimd_vsx_register.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ namespace xsimd
namespace types
{

#define XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(T, Tb) \
#define XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(T, Tv, Tb) \
template <> \
struct get_bool_simd_register<T, vsx> \
{ \
Expand All @@ -55,19 +55,26 @@ namespace xsimd
operator register_type() const noexcept { return data; } \
}; \
}; \
XSIMD_DECLARE_SIMD_REGISTER(T, vsx, __vector T)
XSIMD_DECLARE_SIMD_REGISTER(T, vsx, __vector Tv)

XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(signed char, char);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned char, char);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(char, char);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned short, short);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(short, short);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned int, int);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(int, int);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned long, long);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(long, long);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(float, int);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(double, long);
// The VSX vector intrinsics do not support long, unsigned long,
// and char data types. batches of these types are vectors of
// equivalent types.
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(signed char, signed char, char);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned char, unsigned char, char);
#ifdef __CHAR_UNSIGNED__
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(char, unsigned char, char);
#else
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(char, signed char, char);
#endif
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned short, unsigned short, short);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(short, short, short);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned int, unsigned int, int);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(int, int, int);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(unsigned long, unsigned long long, long long);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(long, long long, long long);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(float, float, int);
XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER(double, double, long long);

#undef XSIMD_DECLARE_SIMD_BOOL_VSX_REGISTER
}
Expand Down
Loading