From 6cbce5e4ad35ba51e8d41ae8e6ff93eb7fb873a6 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 18 Mar 2026 16:50:24 +0100 Subject: [PATCH 01/24] chore: upgrade to zarr v3 sharding default --- src/anndata/_settings.py | 4 ++-- src/anndata/_settings.pyi | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/anndata/_settings.py b/src/anndata/_settings.py index 8c569cbc1..4fe9a68e3 100644 --- a/src/anndata/_settings.py +++ b/src/anndata/_settings.py @@ -470,7 +470,7 @@ def validate_zarr_sharding(auto_shard: bool, settings: SettingsManager): # noqa settings.register( "zarr_write_format", - default_value=2, + default_value=3, description="Which version of zarr to write to when anndata must internally open a write-able zarr group.", validate=validate_zarr_write_format, get_from_env=lambda name, default: check_and_get_environ_var( @@ -517,7 +517,7 @@ def validate_sparse_settings(val: Any, settings: SettingsManager) -> None: settings.register( "auto_shard_zarr_v3", - default_value=False, + default_value=True, description="Whether or not to use zarr's auto computation of sharding for v3. For v2 this setting will be ignored. The setting will apply to all calls to anndata's writing mechanism (write_zarr / write_elem) and will **not** override any user-defined kwargs for shards.", validate=validate_zarr_sharding, get_from_env=check_and_get_bool, diff --git a/src/anndata/_settings.pyi b/src/anndata/_settings.pyi index d55afdeda..c0bf5b3ac 100644 --- a/src/anndata/_settings.pyi +++ b/src/anndata/_settings.pyi @@ -41,11 +41,11 @@ class _AnnDataSettingsManager(SettingsManager): check_uniqueness: bool = True copy_on_write_X: bool = False allow_write_nullable_strings: bool | None = None - zarr_write_format: Literal[2, 3] = 2 + zarr_write_format: Literal[2, 3] = 3 use_sparse_array_on_read: bool = False min_rows_for_chunked_h5_copy: int = 1000 disallow_forward_slash_in_h5ad: bool = False write_csr_csc_indices_with_min_possible_dtype: bool = False - auto_shard_zarr_v3: bool = False + auto_shard_zarr_v3: bool = True settings: _AnnDataSettingsManager From a15cdca3475905d2e4adb9c300af4a4b585b4223 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 18 Mar 2026 17:33:39 +0100 Subject: [PATCH 02/24] fix: setting --- tests/conftest.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index bfc37a7b4..1abe7f822 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -43,7 +43,9 @@ def diskfmt( if (fmt := request.param[0]) == "h5ad": yield fmt else: - with ad.settings.override(zarr_write_format=request.param[1]): + with ad.settings.override( + zarr_write_format=request.param[1], auto_shard_zarr_v3=request.param[1] == 3 + ): yield fmt @@ -52,7 +54,7 @@ def diskfmt2( diskfmt: Literal["h5ad", "zarr"], ) -> Generator[Literal["zarr", "h5ad"], None, None]: if diskfmt == "h5ad": - with ad.settings.override(zarr_write_format=2): + with ad.settings.override(auto_shard_zarr_v3=False, zarr_write_format=2): yield "zarr" else: yield "h5ad" From de96c4d1b43f88fba19228f8865b29eb23daba88 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 18 Mar 2026 17:39:13 +0100 Subject: [PATCH 03/24] fix: shard first --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 1abe7f822..473909954 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -44,7 +44,7 @@ def diskfmt( yield fmt else: with ad.settings.override( - zarr_write_format=request.param[1], auto_shard_zarr_v3=request.param[1] == 3 + auto_shard_zarr_v3=request.param[1] == 3, zarr_write_format=request.param[1] ): yield fmt From 1edeaed3827ba4a6250a68cfe20f2303c01a6453 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 1 Apr 2026 11:21:27 +0200 Subject: [PATCH 04/24] fix: reverse check order --- src/anndata/_settings.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/anndata/_settings.py b/src/anndata/_settings.py index 4fe9a68e3..dd6a406aa 100644 --- a/src/anndata/_settings.py +++ b/src/anndata/_settings.py @@ -463,11 +463,19 @@ def validate_zarr_write_format(format: int, settings: SettingsManager): def validate_zarr_sharding(auto_shard: bool, settings: SettingsManager): # noqa: FBT001 validate_bool(auto_shard, settings) - if auto_shard and settings.zarr_write_format == 2: + if auto_shard and getattr(settings, "zarr_write_format", 3) == 2: msg = "Cannot shard v2 format data. Please set `anndata.settings.zarr_write_format` to 3." raise ValueError(msg) +settings.register( + "auto_shard_zarr_v3", + default_value=True, + description="Whether or not to use zarr's auto computation of sharding for v3. For v2 this setting will be ignored. The setting will apply to all calls to anndata's writing mechanism (write_zarr / write_elem) and will **not** override any user-defined kwargs for shards.", + validate=validate_zarr_sharding, + get_from_env=check_and_get_bool, +) + settings.register( "zarr_write_format", default_value=3, @@ -515,15 +523,6 @@ def validate_sparse_settings(val: Any, settings: SettingsManager) -> None: get_from_env=check_and_get_bool, ) -settings.register( - "auto_shard_zarr_v3", - default_value=True, - description="Whether or not to use zarr's auto computation of sharding for v3. For v2 this setting will be ignored. The setting will apply to all calls to anndata's writing mechanism (write_zarr / write_elem) and will **not** override any user-defined kwargs for shards.", - validate=validate_zarr_sharding, - get_from_env=check_and_get_bool, -) - - settings.register( "copy_on_write_X", default_value=False, From ff3daf4230beba6ee8fc2208fa3a00dda37a58ec Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 1 Apr 2026 11:23:34 +0200 Subject: [PATCH 05/24] chore: relnote --- docs/release-notes/2368.feat.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 docs/release-notes/2368.feat.md diff --git a/docs/release-notes/2368.feat.md b/docs/release-notes/2368.feat.md new file mode 100644 index 000000000..7bc30bab6 --- /dev/null +++ b/docs/release-notes/2368.feat.md @@ -0,0 +1 @@ +Write zarr sharding + v3 by default {user}`ilan-gold` From 47c6f2194ce98b2a63006458cd5fe1bee05f6a04 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 1 Apr 2026 11:35:16 +0200 Subject: [PATCH 06/24] fix: int as chunks --- src/anndata/_io/specs/registry.py | 2 +- tests/lazy/test_read.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/anndata/_io/specs/registry.py b/src/anndata/_io/specs/registry.py index 51726e4e2..4e7cbb777 100644 --- a/src/anndata/_io/specs/registry.py +++ b/src/anndata/_io/specs/registry.py @@ -485,7 +485,7 @@ def read_elem_lazy( >>> adata.layers["dense"] = ad.experimental.read_elem_lazy(g["layers/dense"]) >>> adata.layers["dense"] - dask.array + dask.array Making a new anndata object from on-disk, with custom chunks: diff --git a/tests/lazy/test_read.py b/tests/lazy/test_read.py index c8e3a3104..b721e58b4 100644 --- a/tests/lazy/test_read.py +++ b/tests/lazy/test_read.py @@ -225,7 +225,7 @@ def df_group(tmp_path_factory) -> zarr.Group: df = gen_typed_df(120) path = tmp_path_factory.mktemp("foo.zarr") g = zarr.open_group(path, mode="w", zarr_format=2) - write_elem(g, "foo", df, dataset_kwargs={"chunks": 25}) + write_elem(g, "foo", df, dataset_kwargs={"chunks": (25,)}) return zarr.open(path, mode="r")["foo"] From df1eedcb94c44aaa30c7c13e544a2bdbb9dfacfc Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 1 Apr 2026 11:42:42 +0200 Subject: [PATCH 07/24] fix: check shard v3 for default --- src/anndata/tests/helpers.py | 3 ++- tests/test_concatenate_disk.py | 4 ++-- tests/test_dask.py | 4 ++-- tests/test_io_elementwise.py | 4 ++-- tests/test_readwrite.py | 13 +++++++++++++ 5 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py index 784951da9..e21fbe722 100644 --- a/src/anndata/tests/helpers.py +++ b/src/anndata/tests/helpers.py @@ -1278,9 +1278,10 @@ def visititems_zarr( visitor(key, maybe_group) -def check_all_sharded(g: ZarrGroup): +def check_all_sharded_v3(g: ZarrGroup): def visit(key: str, arr: zarr.Array | zarr.Group): # Check for recarray via https://numpy.org/doc/stable/user/basics.rec.html#manipulating-and-displaying-structured-datatypes + assert arr.metadata.zarr_format == 3 if isinstance(arr, zarr.Array) and arr.shape != () and arr.dtype.names is None: assert arr.shards is not None diff --git a/tests/test_concatenate_disk.py b/tests/test_concatenate_disk.py index 194649166..5f8b4d190 100644 --- a/tests/test_concatenate_disk.py +++ b/tests/test_concatenate_disk.py @@ -16,7 +16,7 @@ from anndata._core.merge import _resolve_axis from anndata.experimental.merge import as_group, concat_on_disk from anndata.io import read_elem, write_elem -from anndata.tests.helpers import assert_equal, check_all_sharded, gen_adata +from anndata.tests.helpers import assert_equal, check_all_sharded_v3, gen_adata from anndata.utils import asarray if TYPE_CHECKING: @@ -269,7 +269,7 @@ def test_concatenate_zarr_v3_shard(xxxm_adatas, tmp_path): g = zarr.open(tmp_path) assert g.metadata.zarr_format == 3 - check_all_sharded(g) + check_all_sharded_v3(g) def test_singleton(xxxm_adatas, tmp_path, file_format): diff --git a/tests/test_dask.py b/tests/test_dask.py index a88139c90..af857a373 100644 --- a/tests/test_dask.py +++ b/tests/test_dask.py @@ -25,7 +25,7 @@ as_sparse_dask_array, as_sparse_dask_matrix, assert_equal, - check_all_sharded, + check_all_sharded_v3, gen_adata, ) @@ -145,7 +145,7 @@ def test_dask_distributed_write( # TODO: See https://github.com/zarr-developers/zarr-python/issues/2716 with as_group(pth, mode="r") as g: if auto_shard_zarr_v3: - check_all_sharded(g) + check_all_sharded_v3(g) curr = ad.io.read_elem(g) with pytest.raises(AssertionError): diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py index 7b4d4a356..77354eed1 100644 --- a/tests/test_io_elementwise.py +++ b/tests/test_io_elementwise.py @@ -31,7 +31,7 @@ as_cupy_sparse_dask_array, as_dense_cupy_dask_array, assert_equal, - check_all_sharded, + check_all_sharded_v3, gen_adata, visititems_zarr, ) @@ -927,7 +927,7 @@ def test_write_auto_sharded(tmp_path: Path, override: dict): with ad.settings.override(**override): adata.write_zarr(path) - check_all_sharded(zarr.open(path)) + check_all_sharded_v3(zarr.open(path)) @pytest.mark.zarr_io diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py index 3359b2ff8..b12a3a332 100644 --- a/tests/test_readwrite.py +++ b/tests/test_readwrite.py @@ -29,9 +29,11 @@ _read_attr, ) from anndata.tests.helpers import ( + DEFAULT_KEY_TYPES, GEN_ADATA_NO_XARRAY_ARGS, as_dense_dask_array, assert_equal, + check_all_sharded_v3, gen_adata, jnp, jnp_array_or_idempotent, @@ -902,6 +904,17 @@ def test_io_dtype(tmp_path, diskfmt, dtype, roundtrip): assert curr.X.dtype == dtype +def test_zarr_v3_sharded_default(tmp_path): + pth = tmp_path / "adata.zarr" + + orig = gen_adata( + (10, 20), obsm_types=DEFAULT_KEY_TYPES, varm_types=DEFAULT_KEY_TYPES + ) + orig.write_zarr(pth) + + check_all_sharded_v3(zarr.open(pth)) + + def test_h5py_attr_limit(tmp_path): N = 10_000 a = ad.AnnData(np.ones((5, 10))) From ccf4936d0f8ec0cb7a1fdbc30f87b8e2c1345eea Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 1 Apr 2026 11:47:55 +0200 Subject: [PATCH 08/24] fix: group check --- tests/test_dask.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_dask.py b/tests/test_dask.py index af857a373..bb8450da7 100644 --- a/tests/test_dask.py +++ b/tests/test_dask.py @@ -10,6 +10,7 @@ import numpy as np import pandas as pd import pytest +import zarr import anndata as ad from anndata._core.anndata import AnnData @@ -144,7 +145,7 @@ def test_dask_distributed_write( ad.io.write_elem(g, "", orig) # TODO: See https://github.com/zarr-developers/zarr-python/issues/2716 with as_group(pth, mode="r") as g: - if auto_shard_zarr_v3: + if auto_shard_zarr_v3 and isinstance(g, zarr.Group): check_all_sharded_v3(g) curr = ad.io.read_elem(g) From 33e4627b7b820d35b3e6252027856faeebdccffd Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 1 Apr 2026 12:01:22 +0200 Subject: [PATCH 09/24] fixL why? --- tests/lazy/test_read.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/lazy/test_read.py b/tests/lazy/test_read.py index b721e58b4..625f47357 100644 --- a/tests/lazy/test_read.py +++ b/tests/lazy/test_read.py @@ -224,7 +224,7 @@ def test_h5_file_obj(tmp_path: Path): def df_group(tmp_path_factory) -> zarr.Group: df = gen_typed_df(120) path = tmp_path_factory.mktemp("foo.zarr") - g = zarr.open_group(path, mode="w", zarr_format=2) + g = zarr.open_group(path, mode="w") write_elem(g, "foo", df, dataset_kwargs={"chunks": (25,)}) return zarr.open(path, mode="r")["foo"] From abdc4bafe9f71f4295aa70754a5869ef172efa3e Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 1 Apr 2026 15:44:33 +0200 Subject: [PATCH 10/24] fix: more 1d chunking --- tests/test_io_elementwise.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py index 77354eed1..a65c09994 100644 --- a/tests/test_io_elementwise.py +++ b/tests/test_io_elementwise.py @@ -857,7 +857,7 @@ def test_chunking_1d_array( chunks: tuple[int] | None, expected_chunks: tuple[int], ): - write_elem(store, "foo", arr, dataset_kwargs={"chunks": 25}) + write_elem(store, "foo", arr, dataset_kwargs={"chunks": (25,)}) arr = read_elem_lazy(store["foo"], chunks=chunks) assert arr.chunksize == expected_chunks From 2dfac0541c4035f172964cfc67bb4e9f006a9d82 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 1 Apr 2026 16:55:03 +0200 Subject: [PATCH 11/24] fix: consolidated metadata + default sharding --- tests/lazy/test_read.py | 30 ++++++++++++++++++++++++------ tests/test_io_elementwise.py | 7 +++---- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/tests/lazy/test_read.py b/tests/lazy/test_read.py index 625f47357..36be8a8ab 100644 --- a/tests/lazy/test_read.py +++ b/tests/lazy/test_read.py @@ -1,6 +1,8 @@ from __future__ import annotations +import json from importlib.util import find_spec +from pathlib import Path from typing import TYPE_CHECKING import numpy as np @@ -9,6 +11,7 @@ import zarr from anndata import AnnData +from anndata._settings import settings from anndata.compat import DaskArray from anndata.experimental import read_elem_lazy, read_lazy from anndata.experimental.backed._io import ANNDATA_ELEMS @@ -23,7 +26,7 @@ if TYPE_CHECKING: from collections.abc import Callable - from pathlib import Path + from typing import Literal from anndata._types import AnnDataElem @@ -184,18 +187,33 @@ def test_view_of_view_to_memory(adata_remote: AnnData, adata_orig: AnnData): @pytest.mark.zarr_io -def test_unconsolidated(tmp_path: Path, mtx_format): - adata = gen_adata((10, 10), mtx_format, **GEN_ADATA_NO_XARRAY_ARGS) +@pytest.mark.parametrize("zarr_version", [2, 3]) +def test_unconsolidated(tmp_path: Path, zarr_version: Literal[2, 3]): + if zarr_version == 2: + settings.auto_shard_zarr_v3 = False + settings.zarr_write_format = zarr_version + adata = gen_adata((10, 10), **GEN_ADATA_NO_XARRAY_ARGS) orig_pth = tmp_path / "orig.zarr" adata.write_zarr(orig_pth) - (orig_pth / ".zmetadata").unlink() + if zarr_version == 2: + (orig_pth / ".zmetadata").unlink() + else: + z = zarr.open(orig_pth) + metadata = z.metadata.to_dict() + del metadata["consolidated_metadata"] + with Path.open(orig_pth / "zarr.json", mode="w") as f: + f.write(json.dumps(metadata)) store = AccessTrackingStore(orig_pth, read_only=True) - store.initialize_key_trackers(["obs/.zgroup", ".zgroup"]) + store.initialize_key_trackers( + ["obs/.zgroup"] if zarr_version == 2 else ["obs/zarr.json"] + ) with pytest.warns(UserWarning, match=r"Did not read zarr as consolidated"): remote = read_lazy(store) remote_to_memory = remote.to_memory() assert_equal(remote_to_memory, adata) - store.assert_access_count("obs/.zgroup", 1) + store.assert_access_count( + f"obs/{'.zgroup' if zarr_version == 2 else 'zarr.json'}", 1 + ) @pytest.mark.zarr_io diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py index a65c09994..075d1baa4 100644 --- a/tests/test_io_elementwise.py +++ b/tests/test_io_elementwise.py @@ -931,11 +931,10 @@ def test_write_auto_sharded(tmp_path: Path, override: dict): @pytest.mark.zarr_io -def test_write_auto_sharded_against_v2_format(): - with pytest.raises(ValueError, match=r"Cannot shard v2 format data."): # noqa: PT012, SIM117 +def test_write_auto_sharded_against_v2_format_default(): + with pytest.raises(ValueError, match=r"Cannot set `zarr_write_format` to 2"): # noqa: SIM117 with ad.settings.override(zarr_write_format=2): - with ad.settings.override(auto_shard_zarr_v3=True): - pass + pass @pytest.mark.zarr_io From daf059edebaf604ea77d28b50910d2a9efac5f49 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 1 Apr 2026 17:49:10 +0200 Subject: [PATCH 12/24] fix: tests again --- tests/test_backed_sparse.py | 5 ++--- tests/test_readwrite.py | 9 ++++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/test_backed_sparse.py b/tests/test_backed_sparse.py index 3112dd75e..5ff7a2328 100644 --- a/tests/test_backed_sparse.py +++ b/tests/test_backed_sparse.py @@ -395,10 +395,9 @@ def test_lazy_array_cache( a_disk[3:5] a_disk[6:7] a_disk[8:9] - # Three hits for metadata in zarr v3: - # see https://github.com/zarr-developers/zarr-python/discussions/2760 for more info on the difference. + # 1 hit for metadata in zarr v3 for zarr.json: # Then there is actual data access, 1 more when cached, 4 more otherwise. - c_expected = 4 if should_cache_indptr else 7 + c_expected = 2 if should_cache_indptr else 5 assert store.get_access_count("X/indptr") == c_expected for elem_not_indptr in elems - {"indptr"}: assert ( diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py index b12a3a332..81e5f2a13 100644 --- a/tests/test_readwrite.py +++ b/tests/test_readwrite.py @@ -412,6 +412,8 @@ def check_compressed(key, value): def test_zarr_compression( tmp_path: Path, zarr_write_format: Literal[2, 3], *, use_compression: bool ): + if zarr_write_format == 2: + ad.settings.auto_shard_zarr_v3 = False ad.settings.zarr_write_format = zarr_write_format pth = str(Path(tmp_path) / "adata.zarr") adata = gen_adata((10, 8), **GEN_ADATA_NO_XARRAY_ARGS) @@ -985,8 +987,13 @@ def test_write_elem_version_mismatch(tmp_path: Path): g = zarr.open_group( zarr_path, mode="w", - zarr_format=2 if ad.settings.zarr_write_format == 3 else 3, + zarr_format=2, ) + with pytest.raises( + ValueError, match=r"Zarr format 2 arrays can only be created with `shard_shape`" + ): + ad.io.write_elem(g, "/", adata) + ad.settings.auto_shard_zarr_v3 = False ad.io.write_elem(g, "/", adata) adata_roundtripped = ad.read_zarr(g) assert_equal(adata_roundtripped, adata) From acc41693d4a622f39618d58f674db23d96379caf Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 1 Apr 2026 18:08:10 +0200 Subject: [PATCH 13/24] fix: documented intention --- src/anndata/_io/specs/methods.py | 12 ++++++++---- src/anndata/_settings.py | 3 --- tests/conftest.py | 2 +- tests/lazy/test_read.py | 2 -- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py index 43b084a00..3a079fa86 100644 --- a/src/anndata/_io/specs/methods.py +++ b/src/anndata/_io/specs/methods.py @@ -438,7 +438,8 @@ def write_basic( f.create_dataset(k, data=elem, shape=elem.shape, dtype=dtype, **dataset_kwargs) else: dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs) - dataset_kwargs = zarr_v3_sharding(dataset_kwargs) + if f.metadata.zarr_format == 3: + dataset_kwargs = zarr_v3_sharding(dataset_kwargs) f.create_array(k, shape=elem.shape, dtype=dtype, **dataset_kwargs) # see https://github.com/zarr-developers/zarr-python/discussions/2712 if isinstance(elem, ZarrArray | H5Array): @@ -518,7 +519,8 @@ def write_basic_dask_dask_dense( is_h5 = isinstance(f, H5Group) if not is_h5: dataset_kwargs = zarr_v3_compressor_compat(dataset_kwargs) - dataset_kwargs = zarr_v3_sharding(dataset_kwargs) + if f.metadata.zarr_format == 3: + dataset_kwargs = zarr_v3_sharding(dataset_kwargs) if is_h5: g = f.require_dataset(k, shape=elem.shape, dtype=elem.dtype, **dataset_kwargs) else: @@ -602,7 +604,8 @@ def write_vlen_string_array_zarr( filters, fill_value = None, None if f.metadata.zarr_format == 2: filters, fill_value = [VLenUTF8()], "" - dataset_kwargs = zarr_v3_sharding(dataset_kwargs) + if f.metadata.zarr_format == 3: + dataset_kwargs = zarr_v3_sharding(dataset_kwargs) f.create_array( k, shape=elem.shape, @@ -727,7 +730,8 @@ def write_sparse_compressed( attr_name, data=attr, shape=attr.shape, dtype=dtype, **dataset_kwargs ) else: - dataset_kwargs = zarr_v3_sharding(dataset_kwargs) + if f.metadata.zarr_format == 3: + dataset_kwargs = zarr_v3_sharding(dataset_kwargs) arr = g.create_array( attr_name, shape=attr.shape, dtype=dtype, **dataset_kwargs ) diff --git a/src/anndata/_settings.py b/src/anndata/_settings.py index dd6a406aa..82f5e99f4 100644 --- a/src/anndata/_settings.py +++ b/src/anndata/_settings.py @@ -456,9 +456,6 @@ def validate_zarr_write_format(format: int, settings: SettingsManager): if format not in {2, 3}: msg = "non-v2 zarr on-disk format not supported" raise ValueError(msg) - if format == 2 and getattr(settings, "auto_shard_zarr_v3", False): - msg = "Cannot set `zarr_write_format` to 2 with autosharding on. Please set to `False` `anndata.settings.auto_shard_zarr_v3`" - raise ValueError(msg) def validate_zarr_sharding(auto_shard: bool, settings: SettingsManager): # noqa: FBT001 diff --git a/tests/conftest.py b/tests/conftest.py index 473909954..e2379c766 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -54,7 +54,7 @@ def diskfmt2( diskfmt: Literal["h5ad", "zarr"], ) -> Generator[Literal["zarr", "h5ad"], None, None]: if diskfmt == "h5ad": - with ad.settings.override(auto_shard_zarr_v3=False, zarr_write_format=2): + with ad.settings.override(zarr_write_format=2): yield "zarr" else: yield "h5ad" diff --git a/tests/lazy/test_read.py b/tests/lazy/test_read.py index 36be8a8ab..77e2ce81f 100644 --- a/tests/lazy/test_read.py +++ b/tests/lazy/test_read.py @@ -189,8 +189,6 @@ def test_view_of_view_to_memory(adata_remote: AnnData, adata_orig: AnnData): @pytest.mark.zarr_io @pytest.mark.parametrize("zarr_version", [2, 3]) def test_unconsolidated(tmp_path: Path, zarr_version: Literal[2, 3]): - if zarr_version == 2: - settings.auto_shard_zarr_v3 = False settings.zarr_write_format = zarr_version adata = gen_adata((10, 10), **GEN_ADATA_NO_XARRAY_ARGS) orig_pth = tmp_path / "orig.zarr" From fe4a9a61cd259275f93a7c053980c238985a0ec4 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 1 Apr 2026 18:09:09 +0200 Subject: [PATCH 14/24] fix: revert old change --- src/anndata/_settings.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/anndata/_settings.py b/src/anndata/_settings.py index 82f5e99f4..7d7b54c7f 100644 --- a/src/anndata/_settings.py +++ b/src/anndata/_settings.py @@ -465,14 +465,6 @@ def validate_zarr_sharding(auto_shard: bool, settings: SettingsManager): # noqa raise ValueError(msg) -settings.register( - "auto_shard_zarr_v3", - default_value=True, - description="Whether or not to use zarr's auto computation of sharding for v3. For v2 this setting will be ignored. The setting will apply to all calls to anndata's writing mechanism (write_zarr / write_elem) and will **not** override any user-defined kwargs for shards.", - validate=validate_zarr_sharding, - get_from_env=check_and_get_bool, -) - settings.register( "zarr_write_format", default_value=3, @@ -520,6 +512,14 @@ def validate_sparse_settings(val: Any, settings: SettingsManager) -> None: get_from_env=check_and_get_bool, ) +settings.register( + "auto_shard_zarr_v3", + default_value=True, + description="Whether or not to use zarr's auto computation of sharding for v3. For v2 this setting will be ignored. The setting will apply to all calls to anndata's writing mechanism (write_zarr / write_elem) and will **not** override any user-defined kwargs for shards.", + validate=validate_zarr_sharding, + get_from_env=check_and_get_bool, +) + settings.register( "copy_on_write_X", default_value=False, From fb65c9378a31c00396732024175e7244b9ad4910 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 1 Apr 2026 18:18:19 +0200 Subject: [PATCH 15/24] fix: more validation removal --- src/anndata/_settings.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/anndata/_settings.py b/src/anndata/_settings.py index 7d7b54c7f..f064a2fcc 100644 --- a/src/anndata/_settings.py +++ b/src/anndata/_settings.py @@ -460,9 +460,6 @@ def validate_zarr_write_format(format: int, settings: SettingsManager): def validate_zarr_sharding(auto_shard: bool, settings: SettingsManager): # noqa: FBT001 validate_bool(auto_shard, settings) - if auto_shard and getattr(settings, "zarr_write_format", 3) == 2: - msg = "Cannot shard v2 format data. Please set `anndata.settings.zarr_write_format` to 3." - raise ValueError(msg) settings.register( From 597614770faa1629c7de79145249551f35ffa657 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 1 Apr 2026 18:22:35 +0200 Subject: [PATCH 16/24] fix: more check --- tests/test_dask.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_dask.py b/tests/test_dask.py index bb8450da7..025ffa0d0 100644 --- a/tests/test_dask.py +++ b/tests/test_dask.py @@ -128,8 +128,6 @@ def test_dask_distributed_write( *, auto_shard_zarr_v3: bool, ) -> None: - if auto_shard_zarr_v3 and ad.settings.zarr_write_format == 2: - pytest.skip(reason="Cannot shard v2 data") import dask.array as da import dask.distributed as dd import numpy as np @@ -145,7 +143,11 @@ def test_dask_distributed_write( ad.io.write_elem(g, "", orig) # TODO: See https://github.com/zarr-developers/zarr-python/issues/2716 with as_group(pth, mode="r") as g: - if auto_shard_zarr_v3 and isinstance(g, zarr.Group): + if ( + auto_shard_zarr_v3 + and ad.settings.zarr_write_format == 3 + and isinstance(g, zarr.Group) + ): check_all_sharded_v3(g) curr = ad.io.read_elem(g) From bba236bdcd2141c45ba877c4131062e89164eb20 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 1 Apr 2026 18:24:17 +0200 Subject: [PATCH 17/24] fix: remove test --- tests/test_io_elementwise.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py index 075d1baa4..54447dcf1 100644 --- a/tests/test_io_elementwise.py +++ b/tests/test_io_elementwise.py @@ -31,7 +31,6 @@ as_cupy_sparse_dask_array, as_dense_cupy_dask_array, assert_equal, - check_all_sharded_v3, gen_adata, visititems_zarr, ) @@ -912,24 +911,6 @@ def test_h5_unchunked( assert arr.chunksize == expected_chunks -@pytest.mark.zarr_io -@pytest.mark.parametrize( - "override", - [ - {"auto_shard_zarr_v3": True, "zarr_write_format": 3}, - {"zarr_write_format": 3, "auto_shard_zarr_v3": True}, - ], - ids=["shard_first", "write_format_first"], -) -def test_write_auto_sharded(tmp_path: Path, override: dict): - path = tmp_path / "check.zarr" - adata = gen_adata((1000, 100), **GEN_ADATA_NO_XARRAY_ARGS) - with ad.settings.override(**override): - adata.write_zarr(path) - - check_all_sharded_v3(zarr.open(path)) - - @pytest.mark.zarr_io def test_write_auto_sharded_against_v2_format_default(): with pytest.raises(ValueError, match=r"Cannot set `zarr_write_format` to 2"): # noqa: SIM117 From 254d705e430baae7d178a7b4c0ed284099dc32fa Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 1 Apr 2026 18:34:11 +0200 Subject: [PATCH 18/24] fix: oops! --- src/anndata/_io/specs/methods.py | 4 ++-- src/anndata/tests/helpers.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py index 3a079fa86..d818bd50d 100644 --- a/src/anndata/_io/specs/methods.py +++ b/src/anndata/_io/specs/methods.py @@ -604,8 +604,8 @@ def write_vlen_string_array_zarr( filters, fill_value = None, None if f.metadata.zarr_format == 2: filters, fill_value = [VLenUTF8()], "" - if f.metadata.zarr_format == 3: - dataset_kwargs = zarr_v3_sharding(dataset_kwargs) + if f.metadata.zarr_format == 3: + dataset_kwargs = zarr_v3_sharding(dataset_kwargs) f.create_array( k, shape=elem.shape, diff --git a/src/anndata/tests/helpers.py b/src/anndata/tests/helpers.py index e21fbe722..8813c1f9f 100644 --- a/src/anndata/tests/helpers.py +++ b/src/anndata/tests/helpers.py @@ -1283,6 +1283,6 @@ def visit(key: str, arr: zarr.Array | zarr.Group): # Check for recarray via https://numpy.org/doc/stable/user/basics.rec.html#manipulating-and-displaying-structured-datatypes assert arr.metadata.zarr_format == 3 if isinstance(arr, zarr.Array) and arr.shape != () and arr.dtype.names is None: - assert arr.shards is not None + assert arr.shards is not None, arr visititems_zarr(g, visitor=visit) From 534f5b5200ea106d49465694fefa4465ef4c953a Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 1 Apr 2026 19:11:38 +0200 Subject: [PATCH 19/24] fix: last ones! --- tests/test_io_elementwise.py | 16 ---------------- tests/test_readwrite.py | 5 ----- 2 files changed, 21 deletions(-) diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py index 54447dcf1..b17b1f028 100644 --- a/tests/test_io_elementwise.py +++ b/tests/test_io_elementwise.py @@ -911,22 +911,6 @@ def test_h5_unchunked( assert arr.chunksize == expected_chunks -@pytest.mark.zarr_io -def test_write_auto_sharded_against_v2_format_default(): - with pytest.raises(ValueError, match=r"Cannot set `zarr_write_format` to 2"): # noqa: SIM117 - with ad.settings.override(zarr_write_format=2): - pass - - -@pytest.mark.zarr_io -def test_write_auto_cannot_set_v2_format_after_sharding(): - with pytest.raises(ValueError, match=r"Cannot set `zarr_write_format` to 2"): # noqa: PT012, SIM117 - with ad.settings.override(zarr_write_format=3): - with ad.settings.override(auto_shard_zarr_v3=True): - with ad.settings.override(zarr_write_format=2): - pass - - @pytest.mark.zarr_io def test_write_auto_sharded_does_not_override(tmp_path: Path): z = open_write_group(tmp_path / "arr.zarr", zarr_format=3) diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py index 81e5f2a13..467b16b44 100644 --- a/tests/test_readwrite.py +++ b/tests/test_readwrite.py @@ -989,11 +989,6 @@ def test_write_elem_version_mismatch(tmp_path: Path): mode="w", zarr_format=2, ) - with pytest.raises( - ValueError, match=r"Zarr format 2 arrays can only be created with `shard_shape`" - ): - ad.io.write_elem(g, "/", adata) - ad.settings.auto_shard_zarr_v3 = False ad.io.write_elem(g, "/", adata) adata_roundtripped = ad.read_zarr(g) assert_equal(adata_roundtripped, adata) From bd77b1a19db87a1e6715fbfae4d944f13d265b2b Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 2 Apr 2026 09:36:16 +0200 Subject: [PATCH 20/24] final one? --- src/anndata/_io/specs/registry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/anndata/_io/specs/registry.py b/src/anndata/_io/specs/registry.py index 4e7cbb777..6ef4aa17d 100644 --- a/src/anndata/_io/specs/registry.py +++ b/src/anndata/_io/specs/registry.py @@ -485,7 +485,7 @@ def read_elem_lazy( >>> adata.layers["dense"] = ad.experimental.read_elem_lazy(g["layers/dense"]) >>> adata.layers["dense"] - dask.array + dask.array Making a new anndata object from on-disk, with custom chunks: From 8998fce436a922db4e1c0adc2e7df5d2ec1a8720 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 2 Apr 2026 10:18:48 +0200 Subject: [PATCH 21/24] fix: specify chunks --- src/anndata/_io/specs/registry.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/anndata/_io/specs/registry.py b/src/anndata/_io/specs/registry.py index 6ef4aa17d..fbb0421ea 100644 --- a/src/anndata/_io/specs/registry.py +++ b/src/anndata/_io/specs/registry.py @@ -483,9 +483,11 @@ def read_elem_lazy( Reading a dense matrix from a zarr store lazily: - >>> adata.layers["dense"] = ad.experimental.read_elem_lazy(g["layers/dense"]) + >>> adata.layers["dense"] = ad.experimental.read_elem_lazy( + ... g["layers/dense"], chunks=(500, 500) + ... ) >>> adata.layers["dense"] - dask.array + dask.array Making a new anndata object from on-disk, with custom chunks: From e2aecd5f3f1b880617eabf14d176636dfcaa5c54 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 2 Apr 2026 10:53:58 +0200 Subject: [PATCH 22/24] fix: remove unneeded warnings --- pyproject.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e849e3ebc..895285ed3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -171,12 +171,8 @@ filterwarnings_when_strict = [ "default:(Observation|Variable) names are not unique. To make them unique:UserWarning", "default::scipy.sparse.SparseEfficiencyWarning", "default::dask.array.core.PerformanceWarning", - "default:anndata will no longer support zarr v2:DeprecationWarning", "default:Consolidated metadata is:UserWarning", - "default:.*Structured:zarr.core.dtype.common.UnstableSpecificationWarning", - "default:.*FixedLengthUTF32:zarr.core.dtype.common.UnstableSpecificationWarning", "default:Automatic shard shape inference is experimental", - "default:Writing zarr v2:UserWarning", # TODO: Remove in conjunction with or before https://github.com/scverse/anndata/pull/1707 "default:.*will obey copy-on-write semantics:FutureWarning", ] From 926667b03b942d2826b1c99f06a7bdec082370c8 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 2 Apr 2026 10:58:39 +0200 Subject: [PATCH 23/24] fix: structured still unstable --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 895285ed3..e1cdf142a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -172,6 +172,7 @@ filterwarnings_when_strict = [ "default::scipy.sparse.SparseEfficiencyWarning", "default::dask.array.core.PerformanceWarning", "default:Consolidated metadata is:UserWarning", + "default:.*Structured:zarr.core.dtype.common.UnstableSpecificationWarning", "default:Automatic shard shape inference is experimental", # TODO: Remove in conjunction with or before https://github.com/scverse/anndata/pull/1707 "default:.*will obey copy-on-write semantics:FutureWarning", From f47f04ec6de5d8a10efc13d70eee72273a0606f1 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Thu, 2 Apr 2026 11:35:29 +0200 Subject: [PATCH 24/24] fix: fixedlenutf32 still unstable --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index e1cdf142a..55704dc5c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -172,7 +172,9 @@ filterwarnings_when_strict = [ "default::scipy.sparse.SparseEfficiencyWarning", "default::dask.array.core.PerformanceWarning", "default:Consolidated metadata is:UserWarning", + # https://github.com/zarr-developers/zarr-python/pull/3781 "default:.*Structured:zarr.core.dtype.common.UnstableSpecificationWarning", + "default:.*FixedLengthUTF32:zarr.core.dtype.common.UnstableSpecificationWarning", "default:Automatic shard shape inference is experimental", # TODO: Remove in conjunction with or before https://github.com/scverse/anndata/pull/1707 "default:.*will obey copy-on-write semantics:FutureWarning",