diff --git a/LICENSES/XSIMD_LICENSE b/LICENSES/XSIMD_LICENSE new file mode 100644 index 0000000000000..eee7a54bc956b --- /dev/null +++ b/LICENSES/XSIMD_LICENSE @@ -0,0 +1,29 @@ +Copyright (c) 2016, Johan Mabille, Sylvain Corlay, Wolf Vollprecht and Martin Renou +Copyright (c) 2016, QuantStack +Copyright (c) 2018, Serge Guelton +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/meson.build b/meson.build index d2874e85cc3a7..23d2717e237dd 100644 --- a/meson.build +++ b/meson.build @@ -10,7 +10,7 @@ project( default_options: [ 'buildtype=release', 'c_std=c17', - 'cpp_std=c++17', + 'cpp_std=c++20', 'warning_level=2', ], ) @@ -37,6 +37,7 @@ add_project_arguments( ) cc = meson.get_compiler('c') +cxx = meson.get_compiler('cpp') if cc.get_id() == 'msvc' # Tracking issue: https://github.com/pandas-dev/pandas/issues/63701 # Ignore some MSVC specific warnings: @@ -44,8 +45,9 @@ if cc.get_id() == 'msvc' # C4267: conversion from `size_t` to smaller type. # C4551: occurs due to Cython generating code with (void)func. # https://github.com/cython/cython/issues/3579 + # C4146: unary minus operator applied to unsigned type. Occurs in xsimd. add_project_arguments( - ['/wd4244', '/wd4267', '/wd4551'], + ['/wd4244', '/wd4267', '/wd4551', '/wd4146'], language: ['c', 'cpp'], ) endif diff --git a/pandas/_libs/algos.pxd b/pandas/_libs/algos.pxd index e47f7d56ba343..5cb09973983c2 100644 --- a/pandas/_libs/algos.pxd +++ b/pandas/_libs/algos.pxd @@ -1,4 +1,5 @@ cimport cython +from cython cimport size_t from libc.math cimport ( NAN, sqrt, @@ -6,6 +7,7 @@ from libc.math cimport ( from numpy cimport ( float64_t, int64_t, + uint8_t, ) from pandas._libs.dtypes cimport ( @@ -50,6 +52,22 @@ cdef inline void moments_add_value( mean[0] += delta_n +cdef extern from "pandas/moments.h": + ctypedef struct Moments: + float64_t mean + float64_t m2 + float64_t m3 + float64_t m4 + size_t n + + Moments moments_reduce( + const double *values, + size_t n, + bint skipna, + const uint8_t *mask, + int max_moment) noexcept nogil + + @cython.cdivision(True) cdef inline float64_t calc_skew( int64_t nobs, float64_t m2, float64_t m3 diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 97aa64593a3fc..0cc8ef75baab1 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -1603,9 +1603,9 @@ def diff_2d( @cython.boundscheck(False) @cython.wraparound(False) cdef void accumulate_moments_scalar( - const float64_t[:] values, + const float64_t[::1] values, bint skipna, - const uint8_t[:] mask, + const uint8_t[::1] mask, int64_t* nobs, float64_t* mean, float64_t* m2, @@ -1614,17 +1614,20 @@ cdef void accumulate_moments_scalar( int max_moment, ) noexcept nogil: cdef: - Py_ssize_t i, n = len(values) - bint uses_mask = mask is not None - float64_t val + Moments moments + const float64_t* values_ptr = &values[0] + const uint8_t* mask_ptr = &mask[0] if mask is not None else NULL + size_t n = values.shape[0] - for i in range(n): - val = values[i] - if uses_mask and mask[i]: - val = NaN - if skipna and isnan(val): - continue - moments_add_value(val, nobs, mean, m2, m3, m4, max_moment) + moments = moments_reduce(values_ptr, n, skipna, mask_ptr, max_moment) + if max_moment >= 4: + m4[0] = moments.m4 + if max_moment >= 3: + m3[0] = moments.m3 + + m2[0] = moments.m2 + mean[0] = moments.mean + nobs[0] = moments.n @cython.boundscheck(False) @@ -1676,9 +1679,9 @@ cdef void accumulate_moments_axis( @cython.boundscheck(False) @cython.wraparound(False) def scalar_skew( - const float64_t[:] values, + const float64_t[::1] values, bint skipna, - const uint8_t[:] mask, + const uint8_t[::1] mask, ) -> float: cdef: int64_t nobs = 0 @@ -1693,9 +1696,9 @@ def scalar_skew( @cython.boundscheck(False) @cython.wraparound(False) def scalar_kurt( - const float64_t[:] values, + const float64_t[::1] values, bint skipna, - const uint8_t[:] mask, + const uint8_t[::1] mask, ) -> float: cdef: int64_t nobs = 0 diff --git a/pandas/_libs/include/pandas/moments.h b/pandas/_libs/include/pandas/moments.h new file mode 100644 index 0000000000000..98f1f1a1e31f8 --- /dev/null +++ b/pandas/_libs/include/pandas/moments.h @@ -0,0 +1,33 @@ +/* +Copyright (c) 2026, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#pragma once + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + double mean; + double m2; + double m3; + double m4; + size_t n; +} Moments; + +/// Compute central moments until `max_moment` using `n` elements from `values`. +Moments moments_reduce(const double *values, size_t n, bool skipna, + const uint8_t *mask, int max_moment); +#ifdef __cplusplus +} +#endif diff --git a/pandas/_libs/include/pandas/simd/moments_simd.hpp b/pandas/_libs/include/pandas/simd/moments_simd.hpp new file mode 100644 index 0000000000000..fbedeea0dd932 --- /dev/null +++ b/pandas/_libs/include/pandas/simd/moments_simd.hpp @@ -0,0 +1,388 @@ +/* +Copyright (c) 2026, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#pragma once + +#include "pandas/moments.h" +#include "pandas_simd_config.h" +#include "xsimd/xsimd.hpp" +#include +#include +#include +#include + +namespace pandas::moments { + +namespace detail { + +static inline void moments_add_value(Moments &moments, double val, + int max_moment) { + const auto delta = val - moments.mean; + moments.n++; + const auto n = static_cast(moments.n); + const auto delta_n = delta / n; + const auto term1 = delta * delta_n * (n - 1.0); + + if (max_moment >= 4) { + const auto m3_term = -4.0 * moments.m3; + const auto m2_term = 6.0 * moments.m2; + const auto m0_term = (n * (n - 3.0)) + 3.0; + moments.m4 += + delta_n * (m3_term + (delta_n * (m2_term + (term1 * m0_term)))); + } + if (max_moment >= 3) { + const auto m2_term = -3.0 * moments.m2; + const auto m0_term = n - 2.0; + moments.m3 += delta_n * (m2_term + (term1 * m0_term)); + } + moments.m2 += term1; + moments.mean += delta_n; +} + +template +void update_moments_batch(batch_type &mean, batch_type &m2, batch_type &m3, + batch_type &m4, batch_type &nobs, batch_type val, + typename batch_type::batch_bool_type nan_mask, + int max_moment) { + const batch_type zero(0.0); + const batch_type one(1.0); + const batch_type three(3.0); + + const auto nobs_increment = xsimd::select(nan_mask, zero, one); + nobs += nobs_increment; + + const auto n_nonzero = xsimd::max(nobs, one); + const auto delta = xsimd::select(nan_mask, zero, val - mean); + const auto delta_n = delta / n_nonzero; + const auto delta_n2 = delta * delta_n; + const auto term1 = delta_n2 * (nobs - one); + + if (max_moment >= 4) { + const auto m3_term = batch_type(-4.0) * m3; + const auto m2_term = batch_type(6.0) * m2; + const auto m0_term = (nobs * (nobs - three)) + three; + m4 += delta_n * (m3_term + (delta_n * (m2_term + (term1 * m0_term)))); + } + + if (max_moment >= 3) { + const auto m2_term = three * m2; + const auto m0_term = nobs - batch_type(2.0); + m3 += delta_n * ((term1 * m0_term) - m2_term); + } + + m2 += term1; + mean += delta_n; +} + +/// Merge results from moments accumulators. +/// It uses the formula for merging central moments: +/// $M_{p; N} = \sum_{k=1}^l \sum_{j=0}^p +/// \binom{p}{j} M_{p-j; k} (-\frac{\delta_k}{n})^j$ +/// where $\delta_k = \sum_{j=1}^k n_j * (\bar{x}_j - \bar{x}_k)$. +template +Moments merge_batches(batch_type &nobs, batch_type &mean, batch_type &m2, + batch_type &m3, batch_type &m4, int max_moment) { + constexpr std::size_t step = batch_type::size; + Moments result{}; + + const auto total_n = xsimd::reduce_add(nobs); + assert(total_n >= 0); + result.n = static_cast(total_n); + + if (result.n == 0) { + return result; + } + + const auto mean_orig = mean; + batch_type delta(0.0); + for (std::size_t i = 0; i + 1 < step; ++i) { + mean = xsimd::rotate_left<1>(mean); + nobs = xsimd::rotate_left<1>(nobs); + delta += nobs * (mean - mean_orig); + } + + // Restore mean and nobs to their original lane positions + mean = xsimd::rotate_left<1>(mean); + nobs = xsimd::rotate_left<1>(nobs); + + const batch_type total_n_v(total_n); + const auto delta_n = delta / total_n_v; + const auto delta2_n = delta_n * delta_n; + + if (max_moment >= 4) { + const auto m3_term = batch_type(-4.0) * m3; + const auto m2_term = batch_type(6.0) * m2; + const auto m4_acc = + m4 + (delta_n * (m3_term + (delta_n * (m2_term + (delta2_n * nobs))))); + result.m4 = xsimd::reduce_add(m4_acc); + } + + if (max_moment >= 3) { + const auto m2_term = batch_type(-3.0) * m2; + const auto m3_acc = m3 + (delta_n * (m2_term - (nobs * delta2_n))); + result.m3 = xsimd::reduce_add(m3_acc); + } + + const auto m2_acc = m2 + (nobs * delta2_n); + result.m2 = xsimd::reduce_add(m2_acc); + + const auto mean_v = mean + delta_n; + result.mean = mean_v.first(); + + [[maybe_unused]] constexpr double rtol = 1e-12; + [[maybe_unused]] constexpr double atol = 1e-8; + assert((xsimd::all(xsimd::isnan(mean_v)) || + xsimd::all(xsimd::abs(mean_v - result.mean) < + ((rtol * xsimd::abs(result.mean)) + atol))) && + "mean lanes aren't homogeneous after merge"); + + return result; +} + +/// https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Higher-order_statistics +static inline void moments_merge(Moments &acc, const Moments &src, + int max_moment) { + if (acc.n == 0) { + acc = src; + return; + } + if (src.n == 0) { + return; + } + + const auto n_a = static_cast(acc.n); + const auto n_b = static_cast(src.n); + acc.n += src.n; + + const auto delta = src.mean - acc.mean; + const auto delta_n = delta / static_cast(acc.n); + const auto term1 = delta * delta_n * n_a * n_b; + + if (max_moment >= 4) { + const auto m3_term = 4.0 * ((n_a * src.m3) - (n_b * acc.m3)); + const auto m2_term = 6.0 * ((n_a * n_a * src.m2) + (n_b * n_b * acc.m2)); + const auto m0_term = (n_a * n_a) - (n_a * n_b) + (n_b * n_b); + acc.m4 += src.m4 + + (delta_n * (m3_term + (delta_n * (m2_term + (term1 * m0_term))))); + } + + if (max_moment >= 3) { + const auto m2_term = 3.0 * ((n_a * src.m2) - (n_b * acc.m2)); + const auto m0_term = n_a - n_b; + acc.m3 += src.m3 + (delta_n * (m2_term + (term1 * m0_term))); + } + + acc.m2 += src.m2 + term1; + acc.mean += delta_n * n_b; +} + +template +static inline void set_moments_nan(batch_type &mean, batch_type &m2, + batch_type &m3, batch_type &m4, + batch_type &nobs, std::size_t total_n) { + nobs = batch_type(static_cast(total_n)); + mean = batch_type(NAN); + m2 = batch_type(NAN); + m3 = batch_type(NAN); + m4 = batch_type(NAN); +} + +template +void accumulate_moments_simd_impl(xsimd::batch &nobs, + xsimd::batch &mean, + xsimd::batch &m2, + xsimd::batch &m3, + xsimd::batch &m4, + int max_moment, + std::span values, bool skipna) { + using batch_type = xsimd::batch; + constexpr std::size_t step = batch_type::size; + assert(values.size() % step == 0); + + for (std::size_t i = 0; i < values.size(); i += step) { + auto val = xsimd::load_unaligned(&values[i]); + auto nan_mask = xsimd::isnan(val); + + if (!skipna && xsimd::any(nan_mask)) [[unlikely]] { + const std::size_t nobs_per_lane = values.size() / step; + set_moments_nan(mean, m2, m3, m4, nobs, nobs_per_lane); + return; + } + + detail::update_moments_batch(mean, m2, m3, m4, nobs, val, nan_mask, + max_moment); + } +} + +template +void accumulate_moments_simd_masked_impl( + xsimd::batch &nobs, xsimd::batch &mean, + xsimd::batch &m2, xsimd::batch &m3, + xsimd::batch &m4, int max_moment, std::span values, + std::span mask, bool skipna) { + using mask_batch_type = xsimd::batch; + using value_batch_type = xsimd::batch; + constexpr std::size_t mask_step = mask_batch_type::size; + constexpr std::size_t val_step = value_batch_type::size; + + assert(values.size() == mask.size()); + assert(mask.size() % mask_step == 0); + + std::size_t left = 0; + for (std::size_t right = 0; right < mask.size(); right += mask_step) { + const mask_batch_type mask_batch = xsimd::load_unaligned(&mask[right]); + const auto is_masked = mask_batch != mask_batch_type(0U); + + if (!xsimd::any(is_masked)) { + continue; + } + + if (!skipna) { + const std::size_t nobs_per_lane = values.size() / val_step; + set_moments_nan(mean, m2, m3, m4, nobs, nobs_per_lane); + return; + } + + // NaN values aren't skipped when there is a mask + accumulate_moments_simd_impl(nobs, mean, m2, m3, m4, max_moment, + values.subspan(left, right - left), + /*skipna=*/false); + + const std::uint64_t is_masked_bitmask = is_masked.mask(); + static_assert(mask_step % val_step == 0); + + const std::uint64_t lane_values_mask = ((1 << val_step) - 1); + for (std::size_t i = 0; i < mask_step; i += val_step) { + const std::uint64_t lane_mask_bits = + (is_masked_bitmask >> i) & lane_values_mask; + const auto isna_pd = + xsimd::batch_bool::from_mask(lane_mask_bits); + + const value_batch_type val = xsimd::load_unaligned(&values[right + i]); + + detail::update_moments_batch(mean, m2, m3, m4, nobs, val, isna_pd, + max_moment); + } + + left = right + mask_step; + } + + accumulate_moments_simd_impl(nobs, mean, m2, m3, m4, max_moment, + values.last(values.size() - left), false); +} +} // namespace detail + +struct accumulate_moments_simd { + template + Moments operator()(Arch, std::span values, bool skipna, + std::optional> mask, + int max_moment) noexcept; +}; + +template <> +inline Moments accumulate_moments_simd::operator()( + xsimd::common, std::span values, bool skipna, + std::optional> mask, int max_moment) noexcept { + Moments acc{}; + for (std::size_t i = 0; i < values.size(); i++) { + const auto val = values[i]; + const auto isna_entry = + mask.has_value() ? (*mask)[i] != 0 : std::isnan(val); + + if (skipna && isna_entry) { + continue; + } + if (isna_entry) [[unlikely]] { + return {.mean = NAN, .m2 = NAN, .m3 = NAN, .m4 = NAN, .n = values.size()}; + } + detail::moments_add_value(acc, val, max_moment); + } + return acc; +} + +template +Moments accumulate_moments_simd::operator()( + Arch, std::span values, bool skipna, + std::optional> mask, int max_moment) noexcept { + using values_batch_type = xsimd::batch; + values_batch_type mean(0.0); + values_batch_type m2(0.0); + values_batch_type m3(0.0); + values_batch_type m4(0.0); + values_batch_type nobs(0.0); + + std::size_t vec_size; + std::size_t tail_size; + + if (mask.has_value()) { + using mask_batch_type = xsimd::batch; + constexpr std::size_t batch_size = mask_batch_type::size; + tail_size = values.size() % batch_size; + vec_size = values.size() - tail_size; + + detail::accumulate_moments_simd_masked_impl( + nobs, mean, m2, m3, m4, max_moment, values.first(vec_size), + mask->first(vec_size), skipna); + } else { + constexpr std::size_t batch_size = values_batch_type::size; + tail_size = values.size() % batch_size; + vec_size = values.size() - tail_size; + + detail::accumulate_moments_simd_impl( + nobs, mean, m2, m3, m4, max_moment, values.first(vec_size), skipna); + } + + Moments moments_acc = + detail::merge_batches(nobs, mean, m2, m3, m4, max_moment); + + auto values_tail = values.last(tail_size); + std::optional> mask_tail{}; + if (mask.has_value()) { + mask_tail = mask->last(tail_size); + } + + Moments tail = accumulate_moments_simd{}(xsimd::common{}, values_tail, skipna, + mask_tail, max_moment); + detail::moments_merge(moments_acc, tail, max_moment); + + return moments_acc; +} + +extern template Moments accumulate_moments_simd::operator()( + xsimd::avx512cd, std::span, bool, + std::optional>, int) noexcept; +extern template Moments accumulate_moments_simd::operator()( + xsimd::avx2, std::span, bool, + std::optional>, int) noexcept; +extern template Moments accumulate_moments_simd::operator()( + xsimd::sse2, std::span, bool, + std::optional>, int) noexcept; +extern template Moments accumulate_moments_simd::operator()( + xsimd::neon64, std::span, bool, + std::optional>, int) noexcept; + +// TODO: remove macro +using arch_list = xsimd::arch_list<> +#if PANDAS_HAVE_AVX512CD + ::add +#endif +#if PANDAS_HAVE_AVX2 + ::add +#endif +#if PANDAS_HAVE_SSE2 + ::add +#endif +#if PANDAS_HAVE_NEON + ::add +#endif +#if PANDAS_HAVE_SCALAR + ::add +#endif + ; +} // namespace pandas::moments diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build index 56ff2a01b450c..fd758c787eb7e 100644 --- a/pandas/_libs/meson.build +++ b/pandas/_libs/meson.build @@ -53,6 +53,9 @@ _khash_primitive_helper_dep = declare_dependency( m_dep = cc.find_library('m', required: false) fast_float = subproject('fast_float') fast_float_dep = fast_float.get_variable('fast_float_dep') +xsimd_dep = dependency('xsimd', version: '>=14.2') + +subdir('simd') subdir('tslibs') @@ -61,7 +64,7 @@ libs_sources = { # numpy include dir is implicitly included 'algos': { 'sources': ['algos.pyx', _algos_common_helper, _algos_take_helper], - 'deps': [_khash_primitive_helper_dep, m_dep], + 'deps': [_khash_primitive_helper_dep, m_dep, moments_simd_dep], }, 'arrays': {'sources': ['arrays.pyx']}, 'groupby': {'sources': ['groupby.pyx'], 'deps': [m_dep]}, diff --git a/pandas/_libs/simd/meson.build b/pandas/_libs/simd/meson.build new file mode 100644 index 0000000000000..b59e1c25efaa7 --- /dev/null +++ b/pandas/_libs/simd/meson.build @@ -0,0 +1,55 @@ +# All architectures we might support +# Key is the architecture name used in file suffixes and macros +is_msvc_syntax = cxx.get_argument_syntax() == 'msvc' +simd_x86_flags = { + 'sse2': is_msvc_syntax ? ['/arch:SSE2'] : ['-msse2'], + 'avx2': is_msvc_syntax ? ['/arch:AVX2'] : ['-mavx2'], + 'avx512cd': is_msvc_syntax ? ['/arch:AVX512'] : ['-mavx512cd'], +} + +simd_config = configuration_data() +supported_simd_archs = {} +if host_machine.cpu_family() == 'aarch64' + supported_simd_archs += {'neon': []} + simd_config.set('PANDAS_HAVE_NEON', 1) +elif host_machine.cpu_family() in ['x86', 'x86_64'] + foreach name, flags : simd_x86_flags + if cxx.has_multi_arguments(flags) + supported_simd_archs += {name: flags} + simd_config.set('PANDAS_HAVE_@0@'.format(name.to_upper()), 1) + endif + endforeach +endif + +# Ensure scalar version on all architectures for now... +simd_config.set('PANDAS_HAVE_SCALAR', 1) + +configure_file( + output: 'pandas_simd_config.h', + configuration: simd_config, +) + +simd_config_inc = include_directories('.') + +moments_libs = [] +foreach arch_name, arch_flags : supported_simd_archs + moments_libs += static_library( + 'moments_simd_@0@'.format(arch_name), + 'moments_inst_@0@.cpp'.format(arch_name), + include_directories: [inc_pd], + dependencies: [xsimd_dep], + cpp_args: arch_flags, + ) +endforeach + +moments_libs += static_library( + 'moments_base', + 'moments.cpp', + include_directories: [inc_pd], + dependencies: [xsimd_dep], +) + +moments_simd_dep = declare_dependency( + link_with: moments_libs, + include_directories: [inc_pd], +) diff --git a/pandas/_libs/simd/moments.cpp b/pandas/_libs/simd/moments.cpp new file mode 100644 index 0000000000000..008873cba0226 --- /dev/null +++ b/pandas/_libs/simd/moments.cpp @@ -0,0 +1,24 @@ +/* +Copyright (c) 2026, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#include "pandas/moments.h" +#include "pandas/simd/moments_simd.hpp" +#include +#include + +Moments moments_reduce(const double *values, size_t n, bool skipna, + const uint8_t *mask, int max_moment) { + auto moments_dispatch = xsimd::dispatch( + pandas::moments::accumulate_moments_simd{}); + std::span values_span(values, n); + std::optional> mask_span{}; + if (mask != nullptr) + mask_span = std::span(mask, n); + return moments_dispatch(values_span, skipna, mask_span, max_moment); +} diff --git a/pandas/_libs/simd/moments_inst_avx2.cpp b/pandas/_libs/simd/moments_inst_avx2.cpp new file mode 100644 index 0000000000000..974060c5d960d --- /dev/null +++ b/pandas/_libs/simd/moments_inst_avx2.cpp @@ -0,0 +1,18 @@ +/* +Copyright (c) 2026, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#include "pandas/simd/moments_simd.hpp" + +namespace pandas::moments { + +template Moments accumulate_moments_simd::operator()( + xsimd::avx2, std::span, bool, + std::optional>, int) noexcept; + +} // namespace pandas::moments diff --git a/pandas/_libs/simd/moments_inst_avx512cd.cpp b/pandas/_libs/simd/moments_inst_avx512cd.cpp new file mode 100644 index 0000000000000..cf5ad0a8d08fc --- /dev/null +++ b/pandas/_libs/simd/moments_inst_avx512cd.cpp @@ -0,0 +1,18 @@ +/* +Copyright (c) 2026, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#include "pandas/simd/moments_simd.hpp" + +namespace pandas::moments { + +template Moments accumulate_moments_simd::operator()( + xsimd::avx512cd, std::span, bool, + std::optional>, int) noexcept; + +} // namespace pandas::moments diff --git a/pandas/_libs/simd/moments_inst_neon.cpp b/pandas/_libs/simd/moments_inst_neon.cpp new file mode 100644 index 0000000000000..34072425e0d44 --- /dev/null +++ b/pandas/_libs/simd/moments_inst_neon.cpp @@ -0,0 +1,18 @@ +/* +Copyright (c) 2026, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#include "pandas/simd/moments_simd.hpp" + +namespace pandas::moments { + +template Moments accumulate_moments_simd::operator()( + xsimd::neon64, std::span, bool, + std::optional>, int) noexcept; + +} // namespace pandas::moments diff --git a/pandas/_libs/simd/moments_inst_sse2.cpp b/pandas/_libs/simd/moments_inst_sse2.cpp new file mode 100644 index 0000000000000..240bc54706664 --- /dev/null +++ b/pandas/_libs/simd/moments_inst_sse2.cpp @@ -0,0 +1,18 @@ +/* +Copyright (c) 2026, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + +#include "pandas/simd/moments_simd.hpp" + +namespace pandas::moments { + +template Moments accumulate_moments_simd::operator()( + xsimd::sse2, std::span, bool, + std::optional>, int) noexcept; + +} // namespace pandas::moments diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py index a5938b2aa93e8..7e4da94cc7919 100644 --- a/pandas/tests/arrays/floating/test_function.py +++ b/pandas/tests/arrays/floating/test_function.py @@ -116,7 +116,7 @@ def test_stat_method(pandasmethname, kwargs): s2 = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], dtype="float64") pandasmeth = getattr(s2, pandasmethname) expected = pandasmeth(**kwargs) - assert expected == result + tm.assert_almost_equal(result, expected) def test_value_counts_na(): diff --git a/pyproject.toml b/pyproject.toml index 9160526cf0f1a..40cfca5ac638b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ license-files = [ "LICENSES/PYUPGRADE_LICENSE", # MIT "LICENSES/SAS7BDAT_LICENSE", # MIT "LICENSES/ULTRAJSON_LICENSE", # BSD-3-Clause AND TCL + "LICENSES/XSIMD_LICENSE", # BSD-3-Clause "subprojects/fast_float-*/LICENSE-APACHE", # Apache-2.0 "subprojects/fast_float-*/LICENSE-BOOST", # BSL "subprojects/fast_float-*/LICENSE-MIT", # MIT diff --git a/subprojects/packagefiles/xsimd/meson.build b/subprojects/packagefiles/xsimd/meson.build new file mode 100644 index 0000000000000..595fb6122ef51 --- /dev/null +++ b/subprojects/packagefiles/xsimd/meson.build @@ -0,0 +1,12 @@ +project( + 'xsimd', + 'cpp', + meson_version: '>=0.58.0', + license: 'BSD-3-Clause', + version: '14.2.0', +) + +xsimd_inc = include_directories('include') + +xsimd_dep = declare_dependency(include_directories: xsimd_inc) +meson.override_dependency('xsimd', xsimd_dep) diff --git a/subprojects/xsimd.wrap b/subprojects/xsimd.wrap new file mode 100644 index 0000000000000..39706456be925 --- /dev/null +++ b/subprojects/xsimd.wrap @@ -0,0 +1,9 @@ +[wrap-file] +directory = xsimd-14.2.0 +source_url = https://github.com/xtensor-stack/xsimd/archive/refs/tags/14.2.0.tar.gz +source_filename = xsimd-14.2.0.tar.gz +source_hash = 21e841ab684b05331e81e7f782431753a029ef7b7d9d6d3ddab837e7782a40ee +patch_directory = xsimd + +[provide] +dependency_names = xsimd