Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
155 commits
Select commit Hold shift + click to select a range
19698c3
WIP
keller-mark May 6, 2024
5100a40
More tests passing
keller-mark May 6, 2024
4c9d01f
Fix df read bug
keller-mark May 6, 2024
eb8b69c
More tests passing after fixing zero-dimensional get bug in pizzarr
keller-mark May 7, 2024
58f5ce4
WIP: writing
keller-mark May 7, 2024
a15513a
Fix more tests
keller-mark May 7, 2024
1ef1be6
Zarr df writing
keller-mark May 7, 2024
77a7ee2
WIP: ZarrAnnData class
keller-mark May 7, 2024
27ce6c1
Tests passing
keller-mark May 7, 2024
8790715
Tests that compare h5ad to zarr
keller-mark May 8, 2024
bb0c6c7
Use Rarr to read full numeric arrays
keller-mark May 17, 2024
0456ecd
Fix bugs. Add test for from_SingleCellExperiment with Zarr
keller-mark Jun 18, 2024
acac772
Add a to_dense param to ZarrAnnData constructor. Add overwrite params…
keller-mark Jun 20, 2024
30316ed
Update
keller-mark Jun 20, 2024
c6b4d89
Backwards dense/sparse
keller-mark Jun 20, 2024
9cadc26
Merge branch 'keller-mark/zarr' of https://github.com/keller-mark/ann…
Artur-man Nov 1, 2024
a40a618
Merge branch 'keller-mark/zarr' into zarr
Artur-man Nov 1, 2024
1afd6eb
Simplify how obs and var names handled in ZarrAnnData (similar to #171)
Artur-man Nov 1, 2024
7f53049
update extdata and documentation
Artur-man Nov 1, 2024
3800f38
fix set/get zarr _index, update text example.zarr and update tests si…
Artur-man Nov 2, 2024
4a7d4c2
Merge pull request #5 from Artur-man/zarr
keller-mark Nov 5, 2024
c881bb9
Merge
keller-mark Nov 5, 2024
4a1bbde
Fix test
keller-mark Nov 5, 2024
15dfbde
Revert unnecessary changes
keller-mark Nov 5, 2024
438809a
Formatting
keller-mark Nov 5, 2024
2215402
Merge pull request #6 from keller-mark/keller-mark/zarr-2
keller-mark Nov 5, 2024
087ffb7
Add comments
keller-mark Nov 5, 2024
37d1ae5
Merge pull request #7 from keller-mark/keller-mark/comments
keller-mark Nov 5, 2024
357a8d7
remove unnecessary example zarr store
Artur-man Nov 6, 2024
d192e68
lintr and R check for zarr related utilities and functions, updated s…
Artur-man Nov 6, 2024
1e0e868
add pizzarr to Suggests and README
Artur-man Nov 6, 2024
fe07028
proj
Artur-man Mar 10, 2025
7ef94f8
Merge branch 'main' into keller-mark/zarr
Artur-man Mar 10, 2025
bf8e797
add keller-mark/pizzarr to Remotes
Artur-man Mar 10, 2025
5abcc75
zip example.zarr
Artur-man Mar 14, 2025
c5ec1c0
Merge branch 'main' into keller-mark/zarr
Artur-man Apr 10, 2025
84ad61f
Merge branch 'main' into keller-mark/zarr
Artur-man Apr 12, 2025
c3cb8aa
adapt read_zarr to Rarr
Artur-man Apr 12, 2025
63f102c
adapt write_zarr to Rarr
Artur-man Apr 12, 2025
a31ff9b
update to most recent anndataR
Artur-man Nov 10, 2025
e98f877
remove old scripts
Artur-man Nov 23, 2025
b0bfad4
update write_zarr
Artur-man Nov 23, 2025
7ebe151
initial update to ZarrAnnData
Artur-man Nov 23, 2025
96c5824
update ZarrAnnData, documentation, and implement read_zarr_rec_array
Artur-man Nov 23, 2025
c41a042
review read zarr helpers, and update tests
Artur-man Nov 23, 2025
370ac17
update read_zarr, read tests pass
Artur-man Nov 23, 2025
ddb5271
some updates for writing zarr
Artur-man Nov 23, 2025
755904d
update write_empty_zarr
Artur-man Nov 24, 2025
4290aed
remove pizzarr, update documentation
Artur-man Nov 24, 2025
e43c819
remove pizzarr from tests
Artur-man Nov 24, 2025
a98b58f
fix test-ZarrAnnData
Artur-man Nov 24, 2025
2d551d8
update ZarrAnnData to imitate HDF5AnnData
Artur-man Nov 24, 2025
42fcbb1
check redundant files, correct lines
Artur-man Nov 24, 2025
205dee4
update example_h5ad.py, add zarr and change to example_files.py
Artur-man Nov 24, 2025
f7638eb
add new test example
Artur-man Nov 25, 2025
acede3c
some linting changes
Artur-man Nov 26, 2025
07c92f7
remove read/write_zattrs since implemented in Rarr
Artur-man Nov 26, 2025
2b672ab
access read/write_zarr_attr
Artur-man Nov 26, 2025
555a634
Merge branch 'main' into keller-mark/zarr
Artur-man Dec 1, 2025
dcaf157
add some missing tests
Artur-man Dec 1, 2025
b10faa5
Merge branch 'main' into keller-mark/zarr
Artur-man Dec 4, 2025
e3d08f8
update readers, update tests
Artur-man Dec 4, 2025
1e2addc
correct nullable string zarr array write/read, introduce ordering in …
Artur-man Dec 5, 2025
570325b
do some linting, fix commented out code
Artur-man Dec 5, 2025
0fac149
update some zarr writers and classes
Artur-man Dec 5, 2025
79023b4
fix documentation
Artur-man Dec 5, 2025
bece447
fix compression interface for zarr
Artur-man Dec 5, 2025
a46c9e1
full lint check
Artur-man Dec 5, 2025
f90d70a
fix examples
Artur-man Dec 5, 2025
73934a7
check, biocheck and lintr
Artur-man Dec 5, 2025
f42a6df
fix development status
Artur-man Dec 5, 2025
a373973
air format
Artur-man Dec 5, 2025
2f73501
air format test
Artur-man Dec 5, 2025
540852d
update example.zarr.zip, skip some test (waiting for Rarr)
Artur-man Dec 5, 2025
a22d007
update example.zarr, fix some read_zarr_
Artur-man Dec 6, 2025
1cc5ff6
fix examples
Artur-man Dec 6, 2025
2499d5c
remove overwrite
Artur-man Dec 6, 2025
bd6238a
R code styling
Artur-man Dec 9, 2025
aaf9801
fixes from @lazappi
Artur-man Dec 12, 2025
43d4f1f
Merge branch 'main' into keller-mark/zarr
Artur-man Dec 12, 2025
73ee0e3
air format
Artur-man Dec 12, 2025
d70a011
update some documentation
Artur-man Dec 20, 2025
ccb0cdf
fix some tests
Artur-man Dec 21, 2025
fe8f196
more fixes on anndata-zarr integration
Artur-man Dec 21, 2025
e6efbf2
update ZarrAnnData$initialize
Artur-man Jan 1, 2026
8b0d1b0
update zarr compression
Artur-man Jan 1, 2026
bafae8e
fix column-order here, C based ordering for arrays
Artur-man Jan 2, 2026
eead040
implement roundtrip tests for anndata-zarr
Artur-man Jan 2, 2026
64e4289
add zarr to vignettes
Artur-man Jan 2, 2026
23f8ac5
update README and software_design.rmd
Artur-man Jan 2, 2026
0852908
update AnnData-usage
Artur-man Jan 2, 2026
8677fad
update write_zarr documentation
Artur-man Jan 2, 2026
efa2ca0
update write_zarr_null
Artur-man Jan 8, 2026
5025035
fix rec_array, update tests and example datasets
Artur-man Jan 16, 2026
0c53019
fix duplicate chunks in Rmd
Artur-man Jan 16, 2026
5011c2c
add write_zarr_null
Artur-man Jan 16, 2026
e90e7b9
update write string array (zarr), air and lint
Artur-man Jan 19, 2026
6b06099
implement writing empty zarr elements
Artur-man Jan 20, 2026
2ac2eba
update tests for rec_array conformance of h5ad and zarr
Artur-man Jan 20, 2026
b85e706
update mapping conformance test for h5ad and zarr
Artur-man Jan 20, 2026
b26c9b5
implement H5_ITER like ordering and fix h5ad vs zarr testing
Artur-man Jan 21, 2026
2c7b5b3
air and lint
Artur-man Jan 21, 2026
978482f
fix test bug
Artur-man Jan 21, 2026
36c2960
do not call expect_equal outside of test
Artur-man Jan 21, 2026
4b657d1
implement examples, test and datasets for zarr v3
Artur-man Feb 25, 2026
20bb482
Merge branch 'main' into keller-mark/zarr
Artur-man Feb 25, 2026
ee635b1
fix issues, lint and update example datasets to new anndata version
Artur-man Feb 25, 2026
4d98248
Merge branch 'main' into keller-mark/zarr
Artur-man Feb 25, 2026
c6bb382
Merge branch 'main' into keller-mark/zarr
Artur-man Mar 13, 2026
3c5c22a
lint and merge
Artur-man Mar 13, 2026
5bcaa5d
revert some lines
Artur-man Mar 13, 2026
d6021e9
small changes
Artur-man Mar 13, 2026
493d707
revert small changes
Artur-man Mar 13, 2026
5698d7b
air format some tests
Artur-man Mar 13, 2026
e483e8e
Merge remote-tracking branch 'origin/devel' into keller-mark/zarr
lazappi Apr 13, 2026
b7080c3
Set v2 in write_zarr_* helpers
lazappi Apr 14, 2026
fb90100
Fix stop message in write_zarr_element()
lazappi Apr 14, 2026
417fd77
Fix roxygen comment in write_zarr_element()
lazappi Apr 14, 2026
5bcfed9
Expand compression list in as_ZarrAnnData()
lazappi Apr 14, 2026
8be4459
Fix H5_ITER_INC_ORDERING docs
lazappi Apr 14, 2026
9d3e8aa
Fix as_ZarrAnnData() compression docs
lazappi Apr 14, 2026
2b50657
Fix Zarr varm roundtrip test
lazappi Apr 14, 2026
6d567f8
Review duplicate entry in README
lazappi Apr 14, 2026
bff6b83
Fix typo in AnnData-usage docs
lazappi Apr 14, 2026
2ee61eb
Fix comma in software design vignette
lazappi Apr 14, 2026
c7ebd33
Adjust class descriptions in software design vignette
lazappi Apr 14, 2026
5263b6f
Roxygenise
lazappi Apr 14, 2026
492cdb7
Minor text fixes
lazappi Apr 14, 2026
5b5aeed
Document .get_compressor
lazappi Apr 14, 2026
08b48f0
Minor fixes to function docs
lazappi Apr 14, 2026
3a2410c
Comment logic in create_zarr_group()
lazappi Apr 14, 2026
21c389e
Fix indentation in read_zarr_sparse_array()
lazappi Apr 14, 2026
8084f44
Add construct sparse matrix helper
lazappi Apr 14, 2026
535deb4
Add ZARR_METADATA_FILES vector
lazappi Apr 14, 2026
beec0d2
Eval Zarr chunks in vignettes
lazappi Apr 14, 2026
b6977ff
Merge test-Zarrv3-read.R into test-Zarr-read.R
lazappi Apr 14, 2026
7c29c7e
Combine roundtrip tests
lazappi Apr 15, 2026
f438aba
Add roundtrip test helpers
lazappi Apr 15, 2026
c67d6bf
Refactor test-h5ad-zarr.R to use helper
lazappi Apr 15, 2026
6deaf01
Refactor example files script
lazappi Apr 15, 2026
6197b79
Remove H5_ITER_INC_ORDERING()
lazappi Apr 15, 2026
28f8a6a
Remove Zarr compression comment
lazappi Apr 15, 2026
01768ec
Fix factor creation in read_zarr_categorical()
lazappi Apr 15, 2026
11b6816
Pin Rarr version
lazappi Apr 15, 2026
6f17dfa
Delete existing Zarr path before writing
lazappi Apr 15, 2026
bd2e1b4
Add helper functions for accessing Zarr keys
lazappi Apr 15, 2026
3a233e4
Update read_zarr_element() error message
lazappi Apr 15, 2026
8fc907b
Add dimname warnings to ZarrAnnData
lazappi Apr 15, 2026
dc9a5b5
Add Zarr writeability checks/tests
lazappi Apr 15, 2026
a236045
Roxygenise, lint, style
lazappi Apr 15, 2026
6b6993d
Use setup-bioc for all GHA
lazappi Apr 15, 2026
9ecfd0c
Update WORDLIST
lazappi Apr 15, 2026
5213860
Add .venv to .Rbuildignore
lazappi Apr 15, 2026
8fbbd77
Clean up test output
lazappi Apr 15, 2026
e3bd989
Add Zarr to benchmarks
lazappi Apr 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@
^data$
^codecov\.yml$
^benchmarks$
^\.venv$
5 changes: 3 additions & 2 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ jobs:
steps:
- uses: actions/checkout@v6

- uses: r-lib/actions/setup-r@v2
- name: Setup R and Bioconductor
uses: grimbough/bioc-actions/setup-bioc@v1
with:
use-public-rspm: true
bioc-version: devel

- name: Install air
run: curl -LsSf https://github.com/posit-dev/air/releases/latest/download/air-installer.sh | sh
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,10 @@ jobs:
with:
python-version: "3.x"

- uses: r-lib/actions/setup-r@v2
- name: Setup R and Bioconductor
uses: grimbough/bioc-actions/setup-bioc@v1
with:
use-public-rspm: true
bioc-version: devel

- uses: r-lib/actions/setup-r-dependencies@v2
with:
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/pr-commands.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@ jobs:
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}

- uses: r-lib/actions/setup-r@v2
- name: Setup R and Bioconductor
uses: grimbough/bioc-actions/setup-bioc@v1
with:
use-public-rspm: true
bioc-version: devel

- uses: r-lib/actions/setup-r-dependencies@v2
with:
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/test-coverage.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ jobs:
sudo apt-get update
sudo apt-get -y install hdf5-tools libsz2 libaec-dev
- name: Setup R
uses: r-lib/actions/setup-r@v2
- name: Setup R and Bioconductor
uses: grimbough/bioc-actions/setup-bioc@v1
with:
use-public-rspm: true
bioc-version: devel

- name: Install R dependencies
uses: r-lib/actions/setup-r-dependencies@v2
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,4 @@ benchmarks/results_*.txt
vignettes/data/*.h5ad
/doc/
/Meta/
/data/
/data/
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Suggests:
knitr,
processx,
rhdf5 (>= 2.52.1),
Rarr (>= 1.11.12),
rmarkdown,
S4Vectors,
Seurat,
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ export(as_AnnData)
export(generate_dataset)
export(get_generator_types)
export(read_h5ad)
export(read_zarr)
export(write_h5ad)
export(write_zarr)
importFrom(Matrix,as.matrix)
importFrom(Matrix,sparseMatrix)
importFrom(Matrix,t)
Expand All @@ -35,3 +37,4 @@ importFrom(reticulate,r_to_py)
importFrom(rlang,`%||%`)
importFrom(rlang,caller_env)
importFrom(stats,setNames)
importFrom(utils,tail)
62 changes: 62 additions & 0 deletions R/AbstractAnnData.R
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,37 @@ AbstractAnnData <- R6::R6Class(
)
},
#' @description
#' Convert to a [`ZarrAnnData`]
#'
#' See [as_ZarrAnnData()] for more details on the conversion
#'
#' @param file See [as_ZarrAnnData()]
#' @param compression See [as_ZarrAnnData()]
#' @param mode See [as_ZarrAnnData()]
#'
#' @return A [`ZarrAnnData`] object
as_ZarrAnnData = function(
file,
compression = c(
"none",
"gzip",
"blosc",
"zstd",
"lzma",
"bz2",
"zlib",
"lz4"
),
mode = c("w-", "r", "r+", "a", "w", "x")
) {
as_ZarrAnnData(
adata = self,
file = file,
compression = compression,
mode = mode
)
},
#' @description
#' Write the `AnnData` object to an H5AD file
#'
#' See [write_h5ad()] for details
Expand All @@ -313,6 +344,37 @@ AbstractAnnData <- R6::R6Class(
chunk_size = chunk_size,
mode = mode
)
},
#' @description
#' Write the `AnnData` object to a Zarr file
#'
#' See [write_zarr()] for details
#'
#' @param path See [write_zarr()]
#' @param compression See [write_zarr()]
#' @param mode See [write_zarr()]
#'
#' @return `path` invisibly
write_zarr = function(
path,
compression = c(
"none",
"gzip",
"blosc",
"zstd",
"lzma",
"bz2",
"zlib",
"lz4"
),
mode = c("w-", "r", "r+", "a", "w", "x")
) {
write_zarr(
object = self,
path,
compression = compression,
mode = mode
)
}
),
private = list(
Expand Down
12 changes: 12 additions & 0 deletions R/AnnData-usage.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#'
#' - [InMemoryAnnData] stores data in memory
#' - [HDF5AnnData] provides an interface to a H5AD file
#' - [ZarrAnnData] provides an interface to a Zarr store
#' - [ReticulateAnnData] wraps a Python `AnnData` object via \pkg{reticulate}
#'
#' See the class documentation for details.
Expand Down Expand Up @@ -89,6 +90,7 @@
#' \item{`as_Seurat()`}{Convert to [`SeuratObject::Seurat`], see [as_Seurat()]}
#' \item{`as_InMemoryAnnData()`}{Convert to [`InMemoryAnnData`], as [as_InMemoryAnnData()]}
#' \item{`as_HDF5AnnData()`}{Convert to [`HDF5AnnData`], see [as_HDF5AnnData()]}
#' \item{`as_ZarrAnnData()`}{Convert to [`ZarrAnnData`], see [as_ZarrAnnData()]}
#' \item{`as_ReticulateAnnData()`}{Convert to [`ReticulateAnnData`], see [as_ReticulateAnnData()]}
#' }
#'
Expand All @@ -102,6 +104,14 @@
#' }
#' }
#'
#' \describe{
#' \item{
#' `write_zarr()`
#' }{
#' Write the `AnnData` object to a Zarr store, see [write_zarr()]
#' }
#' }
#'
#' ## General methods:
#'
#' \describe{
Expand All @@ -113,6 +123,7 @@
#' \describe{
#' \item{[AnnData()]}{Create an [InMemoryAnnData] object}
#' \item{[read_h5ad()]}{Read an `AnnData` from a H5AD file}
#' \item{[read_zarr()]}{Read an `AnnData` from a Zarr store}
#' \item{[as_AnnData()]}{Convert other objects to an `AnnData` object}
#' }
#'
Expand All @@ -122,6 +133,7 @@
#' inherit from
#' @seealso [InMemoryAnnData] for the in-memory implementation of `AnnData`
#' @seealso [HDF5AnnData] for the HDF5-backed implementation of `AnnData`
#' @seealso [ZarrAnnData] for the Zarr-backed implementation of `AnnData`
#' @seealso [ReticulateAnnData] for the reticulate-based implementation that wraps Python AnnData objects
#'
#' @name AnnData-usage
Expand Down
105 changes: 105 additions & 0 deletions R/Rarr_utils.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Zarr metadata files used to identify valid Zarr nodes (arrays or groups)
ZARR_METADATA_FILES <- c(".zarray", ".zattrs", ".zgroup", "zarr.json")

#' create_zarr_group
#'
#' Create a Zarr group
#'
#' @param store The location of the Zarr store
#' @param name Name of the group
#' @param version Zarr version
#'
#' @return `NULL`
#'
#' @noRd
create_zarr_group <- function(store, name, version = "v2") {
# Split "a/b/c" into c("a", "b", "c")
split_name <- strsplit(name, split = "/", fixed = TRUE)[[1]]
if (length(split_name) > 1) {
# Build cumulative paths: c("a", "a/b", "a/b/c")
split_name <- vapply(
seq_along(split_name),
function(x) paste(split_name[seq_len(x)], collapse = "/"),
FUN.VALUE = character(1)
)
# Keep only the target and its immediate parent:
# split_name[1] = "a/b/c" (target), split_name[2] = "a/b" (parent)
split_name <- rev(tail(split_name, 2))
# Recursively ensure the parent group exists before creating the target
if (!dir.exists(file.path(store, split_name[2]))) {
create_zarr_group(store = store, name = split_name[2])
}
}
dir.create(file.path(store, split_name[1]), showWarnings = FALSE)
switch(
version,
v2 = {
write(
"{\"zarr_format\":2}",
file = file.path(store, split_name[1], ".zgroup")
)
},
v3 = {
cli_abort("Currently only zarr v2 is supported!")
},
cli_abort("Only zarr v2 is supported. Use version = 'v2'")
)
}

#' create_zarr
#'
#' Create Zarr store
#'
#' @param store The location of the Zarr store
#' @param version Zarr version
#'
#' @return `NULL`
#'
#' @noRd
create_zarr <- function(store, version = "v2") {
prefix <- basename(store)
dir <- gsub(paste0(prefix, "$"), "", store)
create_zarr_group(store = dir, name = prefix, version = version)
}

#' is_zarr_empty
#'
#' Check if a Zarr store is empty
#'
#' @param store The location of the Zarr store
#'
#' @return Returns `TRUE` if the Zarr store is empty
#'
#' @noRd
is_zarr_empty <- function(store) {
files <- list.files(store, recursive = FALSE, full.names = FALSE)
all(files %in% ZARR_METADATA_FILES)
}

#' Zarr path exists
#'
#' Check that a path in Zarr exists
#'
#' @return Whether the `target_path` exists in `store`
#' @noRd
#'
#' @param store Path to a Zarr store
#' @param target_path The path within the store to test for
zarr_path_exists <- function(store, target_path) {
zarr <- file.path(store, target_path)
if (!dir.exists(zarr)) {
FALSE
} else {
list_files <- list.files(
path = zarr,
full.names = FALSE,
recursive = FALSE,
all.files = TRUE
)
if (any(ZARR_METADATA_FILES %in% list_files)) {
TRUE
} else {
FALSE
}
}
}
Loading
Loading