pandas-dev · jbrockmendel · May 5, 2026 · May 6, 2026 · May 6, 2026 · May 6, 2026
diff --git a/LICENSES/XSIMD_LICENSE b/LICENSES/XSIMD_LICENSE
@@ -0,0 +1,29 @@
+Copyright (c) 2016, Johan Mabille, Sylvain Corlay, Wolf Vollprecht and Martin Renou
+Copyright (c) 2016, QuantStack
+Copyright (c) 2018, Serge Guelton
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/meson.build b/meson.build
@@ -37,15 +37,17 @@ add_project_arguments(
 )
 
 cc = meson.get_compiler('c')
+cxx = meson.get_compiler('cpp')
 if cc.get_id() == 'msvc'
     # Tracking issue: https://github.com/pandas-dev/pandas/issues/63701
     # Ignore some MSVC specific warnings:
     # C4244: possible loss of data in conversion. Reproductible with `-Wconversion`.
     # C4267: conversion from `size_t` to smaller type.
     # C4551: occurs due to Cython generating code with (void)func.
     #        https://github.com/cython/cython/issues/3579
+    # C4146: unary minus operator applied to unsigned type. Occurs in xsimd.
     add_project_arguments(
-        ['/wd4244', '/wd4267', '/wd4551'],
+        ['/wd4244', '/wd4267', '/wd4551', '/wd4146'],
         language: ['c', 'cpp'],
     )
 endif

diff --git a/pandas/_libs/include/pandas/parser/simd_scan.h b/pandas/_libs/include/pandas/parser/simd_scan.h
@@ -0,0 +1,40 @@
+/*
+Copyright (c) 2026, PyData Development Team
+All rights reserved.
+
+Distributed under the terms of the BSD Simplified License.
+
+The full license is in the LICENSE file, distributed with this software.
+*/
+
+#pragma once
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Minimum bytes the scanner can process in one call. Callers should
+// fall through to the scalar path when fewer bytes remain.
+#define PD_SCAN_MIN_BYTES 16
+
+typedef struct pd_scanner pd_scanner;
+
+// Build a scanner that halts on any of `n` special bytes. Supported
+// values for `n` are 2 (quoted-field scan) and 6 (unquoted-field scan).
+// Returns NULL on allocation failure or unsupported `n`.
+pd_scanner *pd_scanner_create(const char *chars, int n);
+
+// Free a scanner. Accepts NULL.
+void pd_scanner_destroy(pd_scanner *scanner);
+
+// Returns the byte offset of the first special char in data[0..len),
+// or `len` if no special char was found within full SIMD chunks. The
+// trailing <PD_SCAN_MIN_BYTES bytes are not scanned; the caller's
+// scalar fallback handles them.
+size_t pd_scanner_scan(const pd_scanner *scanner, const char *data, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build
@@ -53,6 +53,9 @@ _khash_primitive_helper_dep = declare_dependency(
 m_dep = cc.find_library('m', required: false)
 fast_float = subproject('fast_float')
 fast_float_dep = fast_float.get_variable('fast_float_dep')
+xsimd_dep = dependency('xsimd', version: '>=14.2')
+
+subdir('simd')
 
 subdir('tslibs')
 
@@ -93,8 +96,9 @@ libs_sources = {
             'lib.pyx',
             'src/parser/tokenizer.c',
             'src/parser/fast_float_strtod.cpp',
+            'src/parser/simd_scan.cpp',
         ],
-        'deps': [fast_float_dep],
+        'deps': [fast_float_dep, xsimd_dep],
     },
     'missing': {'sources': ['missing.pyx']},
     'pandas_datetime': {
@@ -109,19 +113,21 @@ libs_sources = {
         'sources': [
             'src/parser/tokenizer.c',
             'src/parser/fast_float_strtod.cpp',
+            'src/parser/simd_scan.cpp',
             'src/parser/io.c',
             'src/parser/pd_parser.c',
         ],
-        'deps': [fast_float_dep],
+        'deps': [fast_float_dep, xsimd_dep],
     },
     'parsers': {
         'sources': [
             'parsers.pyx',
             'src/parser/tokenizer.c',
             'src/parser/fast_float_strtod.cpp',
+            'src/parser/simd_scan.cpp',
             'src/parser/io.c',
         ],
-        'deps': [fast_float_dep, _khash_primitive_helper_dep],
+        'deps': [fast_float_dep, xsimd_dep, _khash_primitive_helper_dep],
     },
     '_ujson': {
         'sources': [

diff --git a/pandas/_libs/simd/meson.build b/pandas/_libs/simd/meson.build
@@ -0,0 +1,32 @@
+# All architectures we might support
+# Key is the architecture name used in file suffixes and macros
+is_msvc_syntax = cxx.get_argument_syntax() == 'msvc'
+simd_x86_flags = {
+    'sse2': is_msvc_syntax ? ['/arch:SSE2'] : ['-msse2'],
+    'avx2': is_msvc_syntax ? ['/arch:AVX2'] : ['-mavx2'],
+    'avx512cd': is_msvc_syntax ? ['/arch:AVX512'] : ['-mavx512cd'],
+}
+
+simd_config = configuration_data()
+supported_simd_archs = {}
+if host_machine.cpu_family() == 'aarch64'
+    supported_simd_archs += {'neon': []}
+    simd_config.set('PANDAS_HAVE_NEON', 1)
+elif host_machine.cpu_family() in ['x86', 'x86_64']
+    foreach name, flags : simd_x86_flags
+        if cxx.has_multi_arguments(flags)
+            supported_simd_archs += {name: flags}
+            simd_config.set('PANDAS_HAVE_@0@'.format(name.to_upper()), 1)
+        endif
+    endforeach
+endif
+
+# Ensure scalar version on all architectures for now...
+simd_config.set('PANDAS_HAVE_SCALAR', 1)
+
+configure_file(
+    output: 'pandas_simd_config.h',
+    configuration: simd_config,
+)
+
+simd_config_inc = include_directories('.')
diff --git a/pandas/_libs/src/parser/simd_scan.cpp b/pandas/_libs/src/parser/simd_scan.cpp
@@ -0,0 +1,92 @@
+/*
+Copyright (c) 2026, PyData Development Team
+All rights reserved.
+
+Distributed under the terms of the BSD Simplified License.
+
+The full license is in the LICENSE file, distributed with this software.
+*/
+
+#include "pandas/parser/simd_scan.h"
+
+#include <xsimd/xsimd.hpp>
+
+#include <cstdint>
+#include <new>
+
+#if defined(_MSC_VER)
+#  include <intrin.h>
+#endif
+
+namespace {
+
+using batch_u8 = xsimd::batch<std::uint8_t>;
+constexpr std::size_t kStep = batch_u8::size;
+
+static_assert(kStep >= PD_SCAN_MIN_BYTES,
+              "xsimd batch<uint8_t> must be at least 16 lanes wide");
+
+static inline unsigned ctz64(std::uint64_t value) {
+#if defined(_MSC_VER)
+  unsigned long index;
+  _BitScanForward64(&index, value);
+  return static_cast<unsigned>(index);
+#else
+  return static_cast<unsigned>(__builtin_ctzll(value));
+#endif
+}
+
+template <int N>
+static inline std::size_t scan_impl(const batch_u8 *v, const char *data,
+                                    std::size_t len) {
+  const auto *p = reinterpret_cast<const std::uint8_t *>(data);
+  std::size_t i = 0;
+  for (; i + kStep <= len; i += kStep) {
+    const auto chunk = batch_u8::load_unaligned(p + i);
+    auto mask = (chunk == v[0]);
+    for (int j = 1; j < N; ++j) {
+      mask = mask | (chunk == v[j]);
+    }
+    if (xsimd::any(mask)) {
+      return i + ctz64(mask.mask());
+    }
+  }
+  return i;
+}
+
+} // namespace
+
+struct pd_scanner {
+  batch_u8 v[6];
+  int n;
+};
+
+extern "C" {
+
+pd_scanner *pd_scanner_create(const char *chars, int n) {
+  if (n != 2 && n != 6)
+    return nullptr;
+  auto *scanner = new (std::nothrow) pd_scanner;
+  if (!scanner)
+    return nullptr;
+  scanner->n = n;
+  for (int j = 0; j < n; ++j) {
+    scanner->v[j] = batch_u8::broadcast(static_cast<std::uint8_t>(chars[j]));
+  }
+  return scanner;
+}
+
+void pd_scanner_destroy(pd_scanner *scanner) { delete scanner; }
+
+size_t pd_scanner_scan(const pd_scanner *scanner, const char *data,
+                       size_t len) {
+  switch (scanner->n) {
+  case 2:
+    return scan_impl<2>(scanner->v, data, len);
+  case 6:
+    return scan_impl<6>(scanner->v, data, len);
+  }
+  return len;
+}
+
+} // extern "C"
diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c
@@ -25,6 +25,7 @@ GitHub. See Python Software Foundation License and BSD licenses for these.
 #include <stdbool.h>
 #include <stdlib.h>
 
+#include "pandas/parser/simd_scan.h"
 #include "pandas/portable.h"
 #include "pandas/vendored/klib/khash.h" // for kh_int64_t, kh_destroy_int64
 
@@ -583,6 +584,8 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes,
   ((!delim_whitespace && c == delimiter) || (delim_whitespace && isblank(c)))
 
 #define _TOKEN_CLEANUP()                                                       \
+  pd_scanner_destroy(unquoted_scanner);                                        \
+  pd_scanner_destroy(quoted_scanner);                                          \
   self->stream_len = slen;                                                     \
   self->datapos = i;
 
@@ -633,7 +636,27 @@ static int tokenize_bytes(parser_t *self, uint64_t line_limit,
   const bool has_skip = (self->skipfunc != NULL || self->skipset != NULL ||
                          self->skip_first_N_rows >= 0);
 
+  // Build SIMD scanners over the chars that halt a bulk scan. Disabled
+  // features alias to lineterminator so the scanners always see 6/2 chars
+  // and the call sites stay branch-free.
+  const char unquoted_chars[6] = {
+      delimiter,
+      lineterminator,
+      has_carriage ? carriage_symbol : lineterminator,
+      (self->quoting != QUOTE_NONE) ? self->quotechar : lineterminator,
+      has_escape ? escape_symbol : lineterminator,
+      has_comment ? comment_symbol : lineterminator,
+  };
+  const char quoted_chars[2] = {
+      (self->quoting != QUOTE_NONE) ? self->quotechar : lineterminator,
+      has_escape ? escape_symbol : lineterminator,
+  };
+  pd_scanner *unquoted_scanner = pd_scanner_create(unquoted_chars, 6);
+  pd_scanner *quoted_scanner = pd_scanner_create(quoted_chars, 2);
+
   if (make_stream_space(self, self->datalen - self->datapos) < 0) {
+    pd_scanner_destroy(unquoted_scanner);
+    pd_scanner_destroy(quoted_scanner);
     const size_t bufsize = 100;
     self->error_msg = malloc(bufsize);
     snprintf(self->error_msg, bufsize, "out of memory");
@@ -922,8 +945,23 @@ static int tokenize_bytes(parser_t *self, uint64_t line_limit,
         // normal character - save in field
         PUSH_CHAR(c);
 
-        // Bulk scan: copy remaining ordinary characters directly,
-        // bypassing the per-char state machine overhead.
+        // SIMD bulk scan: process a full SIMD chunk at a time, copying
+        // normal characters directly without state-machine overhead.
+        if (unquoted_scanner && !self->delim_whitespace) {
+          size_t remaining = self->datalen - (i + 1);
+          if (remaining >= PD_SCAN_MIN_BYTES) {
+            size_t skip = pd_scanner_scan(unquoted_scanner, buf, remaining);
+            if (skip > 0) {
+              memcpy(stream, buf, skip);
+              stream += skip;
+              slen += skip;
+              buf += skip;
+              i += skip;
+            }
+          }
+        }
+        // Scalar bulk scan fallback: copy remaining ordinary characters
+        // directly, bypassing the per-char state machine overhead.
         while (i + 1 < self->datalen &&
                !(breaks_field_scan[(uint8_t)*buf] & 0x1)) {
           *stream++ = *buf++;
@@ -950,8 +988,23 @@ static int tokenize_bytes(parser_t *self, uint64_t line_limit,
         // normal character - save in field
         PUSH_CHAR(c);
 
-        // Bulk scan: copy remaining ordinary characters directly,
-        // bypassing the per-char state machine overhead.
+        // SIMD bulk scan for quoted fields: only quote and escape
+        // chars are special, so use a lighter scan.
+        if (quoted_scanner) {
+          size_t remaining = self->datalen - (i + 1);
+          if (remaining >= PD_SCAN_MIN_BYTES) {
+            size_t skip = pd_scanner_scan(quoted_scanner, buf, remaining);
+            if (skip > 0) {
+              memcpy(stream, buf, skip);
+              stream += skip;
+              slen += skip;
+              buf += skip;
+              i += skip;
+            }
+          }
+        }
+        // Scalar bulk scan fallback: copy remaining ordinary characters
+        // directly, bypassing the per-char state machine overhead.
         while (i + 1 < self->datalen &&
                !(breaks_field_scan[(uint8_t)*buf] & 0x2)) {
           *stream++ = *buf++;

diff --git a/pandas/_libs/tslibs/meson.build b/pandas/_libs/tslibs/meson.build
@@ -14,8 +14,9 @@ tslibs_sources = {
             'parsing.pyx',
             '../src/parser/tokenizer.c',
             '../src/parser/fast_float_strtod.cpp',
+            '../src/parser/simd_scan.cpp',
         ],
-        'deps': [fast_float_dep],
+        'deps': [fast_float_dep, xsimd_dep],
     },
     'period': {'sources': ['period.pyx']},
     'strptime': {'sources': ['strptime.pyx']},

diff --git a/pyproject.toml b/pyproject.toml
@@ -38,6 +38,7 @@ license-files = [
   "LICENSES/PYUPGRADE_LICENSE", # MIT
   "LICENSES/SAS7BDAT_LICENSE", # MIT
   "LICENSES/ULTRAJSON_LICENSE", # BSD-3-Clause AND TCL
+  "LICENSES/XSIMD_LICENSE", # BSD-3-Clause
   "subprojects/fast_float-*/LICENSE-APACHE", # Apache-2.0
   "subprojects/fast_float-*/LICENSE-BOOST", # BSL
   "subprojects/fast_float-*/LICENSE-MIT", # MIT

diff --git a/subprojects/packagefiles/xsimd/meson.build b/subprojects/packagefiles/xsimd/meson.build
@@ -0,0 +1,12 @@
+project(
+    'xsimd',
+    'cpp',
+    meson_version: '>=0.58.0',
+    license: 'BSD-3-Clause',
+    version: '14.2.0',
+)
+
+xsimd_inc = include_directories('include')
+
+xsimd_dep = declare_dependency(include_directories: xsimd_inc)
+meson.override_dependency('xsimd', xsimd_dep)
diff --git a/subprojects/xsimd.wrap b/subprojects/xsimd.wrap
@@ -0,0 +1,9 @@
+[wrap-file]
+directory = xsimd-14.2.0
+source_url = https://github.com/xtensor-stack/xsimd/archive/refs/tags/14.2.0.tar.gz
+source_filename = xsimd-14.2.0.tar.gz
+source_hash = 21e841ab684b05331e81e7f782431753a029ef7b7d9d6d3ddab837e7782a40ee
+patch_directory = xsimd
+
+[provide]
+dependency_names = xsimd