From 2c6be00c766244f847feb23a8a9b6e44bbc85212 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 9 Jun 2026 17:06:24 +0000 Subject: [PATCH] Fix xdist race on shared scanpy dataset cache in tests Under pytest -n auto, multiple xdist workers invoked sc.datasets.pbmc3k() concurrently, racing on the single shared cache file pbmc3k_raw.h5ad. One worker reading while another wrote/downloaded intermittently produced an HDF5 "filter returned failure during read" OSError during adata_pbmc3k setup. Give each worker its own scanpy datasetdir keyed by PYTEST_XDIST_WORKER via a pytest_configure hook, eliminating the shared-file contention. The fixture stays function-scoped, so downstream mutation behavior is unchanged. --- .gitignore | 1 + CHANGELOG.md | 3 +++ tests/conftest.py | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+) diff --git a/.gitignore b/.gitignore index e09293e..a08610a 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,7 @@ uv.lock # Tests and coverage /data/ +/tests/data/scanpy_cache/ /node_modules/ /.coverage* diff --git a/CHANGELOG.md b/CHANGELOG.md index 1422f70..5f2fd49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning][]. ## [Unreleased] +### Fixed +- Fixed intermittent `OSError: Can't synchronously read data (filter returned failure during read)` when running the test suite under `pytest -n auto`, caused by xdist workers racing on scanpy's shared `pbmc3k_raw.h5ad` dataset cache. Each worker now uses its own cache directory. + ## [v0.2.5] ### Added diff --git a/tests/conftest.py b/tests/conftest.py index 63281d1..7168a3c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,4 @@ +import os from pathlib import Path import numpy as np @@ -11,6 +12,24 @@ TESTS_DIR = Path(__file__).parent +def pytest_configure(config): + """Give each pytest-xdist worker its own scanpy dataset cache directory. + + ``sc.datasets.pbmc3k()`` (used by the ``adata_pbmc3k`` fixture) downloads and + reads a single shared cache file (``/pbmc3k_raw.h5ad``). Under + ``pytest -n auto`` multiple worker processes race on that file -- one worker + reading it while another is still downloading/writing -- which intermittently + surfaces as an HDF5 ``OSError: Can't synchronously read data (filter returned + failure during read)``. Pointing each worker at its own cache directory + removes the shared-file contention entirely. + """ + worker_id = os.environ.get("PYTEST_XDIST_WORKER") + if worker_id is not None: + cache_dir = TESTS_DIR / "data" / "scanpy_cache" / worker_id + cache_dir.mkdir(parents=True, exist_ok=True) + sc.settings.datasetdir = cache_dir + + @pytest.fixture def sample_distances(): # 3 samples, 2 neighbors each