diff --git a/NEWS.md b/NEWS.md index 080c415c..cfa91390 100644 --- a/NEWS.md +++ b/NEWS.md @@ -28,6 +28,7 @@ - Seurat PCA loadings only contain variable features, not all genes - Now properly expands loadings matrix to include all genes with zeros for non-variable features - Adds warning when rownames don't match var_names during conversion +- Improve mapping of dimensionality reduction slots during conversion between AnnData, SingleCellExperiment, and Seurat (PR #333) # anndataR 0.99.2 diff --git a/R/as_Seurat.R b/R/as_Seurat.R index a23b128a..c48fa538 100644 --- a/R/as_Seurat.R +++ b/R/as_Seurat.R @@ -589,23 +589,46 @@ as_Seurat <- function( # nolint start: object_name_linter object_length_linter .as_Seurat_guess_reductions <- function(adata) { # nolint end: object_name_linter object_length_linter + # Get mappings keyed by AnnData obsm names from centralized location + obsm_mappings <- .get_dimred_mapping(from = "anndata_obsm") + purrr::map(adata$obsm_keys(), function(.obsm) { if (!is.numeric(as.matrix(adata$obsm[[.obsm]]))) { return(NULL) } + # Determine the Seurat reduction name using common mappings + seurat_name <- if (.obsm %in% names(obsm_mappings)) { + obsm_mappings[[.obsm]]$seurat + } else { + .obsm + } + mapping <- c( # Make sure we have valid keys here to avoid warnings later - key = SeuratObject::Key(.obsm, quiet = TRUE), + key = SeuratObject::Key(seurat_name, quiet = TRUE), embeddings = .obsm ) - if (.obsm == "X_pca" && "PCs" %in% names(adata$varm)) { - mapping["loadings"] <- "PCs" + + # Check if this obsm has associated loadings in varm + if (.obsm %in% names(obsm_mappings)) { + dimred_info <- obsm_mappings[[.obsm]] + varm_key <- dimred_info$anndata_varm + if (!is.null(varm_key) && varm_key %in% names(adata$varm)) { + mapping["loadings"] <- varm_key + } } mapping }) |> - setNames(adata$obsm_keys()) |> + setNames(purrr::map_chr(adata$obsm_keys(), function(.obsm) { + # Use Seurat naming convention for the reduction names + if (.obsm %in% names(obsm_mappings)) { + obsm_mappings[[.obsm]]$seurat + } else { + .obsm + } + })) |> purrr::compact() } diff --git a/R/as_SingleCellExperiment.R b/R/as_SingleCellExperiment.R index a482e318..efe587a0 100644 --- a/R/as_SingleCellExperiment.R +++ b/R/as_SingleCellExperiment.R @@ -292,20 +292,46 @@ as_SingleCellExperiment <- function( # nolint start: object_length_linter object_name_linter .as_SCE_guess_reducedDims <- function(adata) { # nolint end: object_length_linter object_name_linter - purrr::map(adata$obsm_keys(), function(.obsm) { + + # Get mappings keyed by AnnData obsm names from centralized location + obsm_mappings <- .get_dimred_mapping(from = "anndata_obsm") + + # Create mapping and name pairs for each obsm key + results <- purrr::map(adata$obsm_keys(), function(.obsm) { if (!is.numeric(as.matrix(adata$obsm[[.obsm]]))) { return(NULL) } mapping <- c(sampleFactors = .obsm) - if (.obsm == "X_pca" && "PCs" %in% names(adata$varm)) { - mapping["featureLoadings"] <- "PCs" + + # Check if this is a known dimensionality reduction with loadings + if (.obsm %in% names(obsm_mappings)) { + dimred_info <- obsm_mappings[[.obsm]] + if ( + !is.null(dimred_info$anndata_varm) && + dimred_info$anndata_varm %in% names(adata$varm) + ) { + mapping["featureLoadings"] <- dimred_info$anndata_varm + } } - mapping + # Determine the final name: use Bioconductor convention if available + final_name <- if (.obsm %in% names(obsm_mappings)) { + obsm_mappings[[.obsm]]$sce + } else { + .obsm + } + + list(mapping = mapping, name = final_name) }) |> - setNames(adata$obsm_keys()) |> purrr::compact() + + # Extract mappings and names, then set names + result_names <- purrr::map_chr(results, "name") + results <- purrr::map(results, "mapping") + names(results) <- result_names + + results } # nolint start: object_length_linter object_name_linter @@ -382,7 +408,9 @@ as_SingleCellExperiment <- function( } loadings <- adata$varm[[varm_key]] - rownames(loadings) <- colnames(embedding) + # Add rownames (variable names) and colnames to loadings matrix + rownames(loadings) <- adata$var_names + colnames(loadings) <- colnames(embedding) } else { loadings <- matrix(nrow = 0, ncol = ncol(embedding)) } diff --git a/R/common_dimred_mappings.R b/R/common_dimred_mappings.R new file mode 100644 index 00000000..c3907116 --- /dev/null +++ b/R/common_dimred_mappings.R @@ -0,0 +1,65 @@ +#' Get dimensionality reduction mappings keyed by a specific framework +#' +#' @param from Character string specifying which framework to use as keys. +#' One of: "sce", "seurat", "anndata_obsm", "anndata_varm", "anndata_uns" +#' @return A named list where names are the values from the specified framework +#' and values are the complete mapping information for each dimred +#' @noRd +.get_dimred_mapping <- function( + from = c("sce", "seurat", "anndata_obsm", "anndata_varm", "anndata_uns") +) { + from <- match.arg(from) + + all_mappings <- .get_common_dimred_mappings() + result <- list() + + for (mapping in all_mappings) { + key <- mapping[[from]] + if (!is.null(key)) { + result[[key]] <- mapping + } + } + + result +} + +#' Common dimensionality reduction mappings between AnnData, SingleCellExperiment, and Seurat +#' +#' This table defines the standard mappings between scanpy/AnnData naming +#' conventions, Bioconductor/SingleCellExperiment naming conventions, and +#' Seurat naming conventions for common dimensionality reduction techniques. +#' +#' Based on the conventions documented in dr.md: +#' - AnnData uses "X_pca", "X_tsne", "X_umap" in obsm and "PCs" in varm +#' - Seurat uses lowercase "pca", "tsne", "umap" +#' - SingleCellExperiment uses uppercase "PCA", "TSNE", "UMAP" +#' +#' @return A list of dimensionality reduction mappings, where each element contains +#' the naming conventions across frameworks and associated metadata +#' @noRd +.get_common_dimred_mappings <- function() { + list( + # Core dimensionality reductions with documented naming conventions + list( + sce = "PCA", + seurat = "pca", + anndata_obsm = "X_pca", + anndata_varm = "PCs", + anndata_uns = NULL + ), + list( + sce = "tSNE", + seurat = "tsne", + anndata_obsm = "X_tsne", + anndata_varm = NULL, + anndata_uns = NULL + ), + list( + sce = "UMAP", + seurat = "umap", + anndata_obsm = "X_umap", + anndata_varm = NULL, + anndata_uns = NULL + ) + ) +} diff --git a/R/from_Seurat.R b/R/from_Seurat.R index bcd8605d..f6dadabb 100644 --- a/R/from_Seurat.R +++ b/R/from_Seurat.R @@ -414,6 +414,8 @@ from_Seurat <- function( # nolint start: object_name_linter object_length_linter .from_Seurat_guess_obsms <- function(seurat_obj, assay_name) { # nolint end: object_name_linter object_length_linter + # Get reverse mappings from centralized location + reverse_seurat_mappings <- .get_dimred_mapping(from = "seurat") obsm_mapping <- c() @@ -424,7 +426,13 @@ from_Seurat <- function( next } - obsm_mapping[reduction_name] <- reduction_name + # Create mapping: AnnData obsm name -> Seurat reduction name + anndata_name <- if (reduction_name %in% names(reverse_seurat_mappings)) { + reverse_seurat_mappings[[reduction_name]]$anndata_obsm + } else { + reduction_name + } + obsm_mapping[anndata_name] <- reduction_name } obsm_mapping @@ -433,6 +441,8 @@ from_Seurat <- function( # nolint start: object_name_linter object_length_linter .from_Seurat_guess_varms <- function(seurat_obj, assay_name) { # nolint end: object_name_linter object_length_linter + # Get reverse mappings from centralized location + reverse_seurat_varm_mappings <- .get_dimred_mapping(from = "seurat") varm_mapping <- c() @@ -445,7 +455,17 @@ from_Seurat <- function( nrow(loadings) == nrow(seurat_obj) && SeuratObject::DefaultAssay(reduction) == assay_name ) { - varm_mapping[reduction_name] <- reduction_name + # Create mapping: AnnData varm name -> Seurat reduction name + anndata_varm_name <- if ( + reduction_name %in% + names(reverse_seurat_varm_mappings) && + !is.null(reverse_seurat_varm_mappings[[reduction_name]]$anndata_varm) + ) { + reverse_seurat_varm_mappings[[reduction_name]]$anndata_varm + } else { + reduction_name + } + varm_mapping[anndata_varm_name] <- reduction_name } } diff --git a/R/from_SingleCellExperiment.R b/R/from_SingleCellExperiment.R index ec018a85..067abe21 100644 --- a/R/from_SingleCellExperiment.R +++ b/R/from_SingleCellExperiment.R @@ -155,11 +155,24 @@ from_SingleCellExperiment <- function( # nolint start: object_length_linter object_name_linter .from_SCE_guess_obsm <- function(sce) { # nolint end: object_length_linter object_name_linter - obsm_mapping <- self_name(SingleCellExperiment::reducedDimNames(sce)) + # Get reverse mappings from centralized location + reverse_dimred_mappings <- .get_dimred_mapping(from = "sce") - if (rlang::is_empty(obsm_mapping)) { + sce_names <- SingleCellExperiment::reducedDimNames(sce) + + if (rlang::is_empty(sce_names)) { c() } else { + # Create mapping: AnnData name -> SCE name + obsm_mapping <- c() + for (sce_name in sce_names) { + anndata_name <- if (sce_name %in% names(reverse_dimred_mappings)) { + reverse_dimred_mappings[[sce_name]]$anndata_obsm + } else { + sce_name + } + obsm_mapping[anndata_name] <- sce_name + } obsm_mapping } } @@ -167,12 +180,25 @@ from_SingleCellExperiment <- function( # nolint start: object_length_linter object_name_linter .from_SCE_guess_varm <- function(sce) { # nolint end: object_length_linter object_name_linter + # Get reverse mappings for varm from centralized location + reverse_varm_mappings <- .get_dimred_mapping(from = "sce") + varm_mapping <- c() for (reduction_name in names(SingleCellExperiment::reducedDims(sce))) { reduction <- SingleCellExperiment::reducedDim(sce, reduction_name) if (inherits(reduction, "LinearEmbeddingMatrix")) { - varm_mapping[reduction_name] <- reduction_name + # Create mapping: AnnData varm name -> SCE reduction name + anndata_varm_name <- if ( + reduction_name %in% + names(reverse_varm_mappings) && + !is.null(reverse_varm_mappings[[reduction_name]]$anndata_varm) + ) { + reverse_varm_mappings[[reduction_name]]$anndata_varm + } else { + reduction_name + } + varm_mapping[anndata_varm_name] <- reduction_name } } diff --git a/R/known_issues.R b/R/known_issues.R index 64441927..7ee3b210 100644 --- a/R/known_issues.R +++ b/R/known_issues.R @@ -41,6 +41,11 @@ is_known <- function(backend, slot, dtype, process, known_issues = NULL) { known_issues <- read_known_issues() } + # Handle empty known_issues data frame + if (nrow(known_issues) == 0) { + return(logical(0)) + } + filt <- rep(TRUE, nrow(known_issues)) if (!is.null(backend)) { diff --git a/inst/known_issues.yaml b/inst/known_issues.yaml index 8970b59b..5f6a4283 100644 --- a/inst/known_issues.yaml +++ b/inst/known_issues.yaml @@ -1,17 +1 @@ -known_issues: - - backend: to_SCE - slot: - - obsm - - varm - dtype: - - pca - process: [convert] - error_message: - sampleFactors(reducedDims(sce)$pca) (`actual`) not equal to ad$obsm[["X_pca"]] (`expected`). - - `dimnames(actual)` is a list - `dimnames(expected)` is absent - description: converted sce object has dimnames(), whilst the original anndata does not. - proposed_solution: Investigate if this is a problem or not. - to_investigate: True - to_fix: False +known_issues: [] diff --git a/tests/testthat/test-as_Seurat.R b/tests/testthat/test-as_Seurat.R index 56227e3c..5efeeb28 100644 --- a/tests/testthat/test-as_Seurat.R +++ b/tests/testthat/test-as_Seurat.R @@ -34,7 +34,7 @@ test_that("as_Seurat retains number of observations and features", { for (obs_key in colnames(ad$obs)) { test_that(paste0("as_Seurat retains obs key: ", obs_key), { msg <- message_if_known( - backend = "to_Seurat", + backend = "as_seurat", slot = c("obs"), dtype = obs_key, process = "convert", @@ -56,7 +56,7 @@ for (obs_key in colnames(ad$obs)) { for (var_key in colnames(ad$var)) { test_that(paste0("as_Seurat retains var key: ", var_key), { msg <- message_if_known( - backend = "to_Seurat", + backend = "as_seurat", slot = c("var"), dtype = var_key, process = "convert", @@ -75,7 +75,7 @@ for (var_key in colnames(ad$var)) { for (layer_key in names(ad$layers)) { test_that(paste0("as_Seurat retains layer: ", layer_key), { msg <- message_if_known( - backend = "to_Seurat", + backend = "as_seurat", slot = c("layers"), dtype = layer_key, process = "convert", @@ -100,7 +100,7 @@ for (layer_key in names(ad$layers)) { for (uns_key in names(ad$uns)) { test_that(paste0("as_Seurat retains uns key: ", uns_key), { msg <- message_if_known( - backend = "to_Seurat", + backend = "as_seurat", slot = c("uns"), dtype = uns_key, process = "convert", @@ -115,7 +115,7 @@ for (uns_key in names(ad$uns)) { test_that("as_Seurat retains pca dimred", { msg <- message_if_known( - backend = "to_Seurat", + backend = "as_seurat", slot = c("obsm"), dtype = "pca", process = "convert", @@ -125,16 +125,16 @@ test_that("as_Seurat retains pca dimred", { skip_if(!is.null(msg), message = msg) # trackstatus: class=Seurat, feature=test_get_obsm, status=wip - expect_true("X_pca" %in% Reductions(seu)) + expect_true("pca" %in% Reductions(seu)) expect_equal( - Embeddings(seu, reduction = "X_pca"), + Embeddings(seu, reduction = "pca"), ad$obsm[["X_pca"]], ignore_attr = TRUE ) # trackstatus: class=Seurat, feature=test_get_varm, status=wip expect_equal( - Loadings(seu, reduction = "X_pca"), + Loadings(seu, reduction = "pca"), ad$varm[["PCs"]], ignore_attr = TRUE ) diff --git a/tests/testthat/test-as_SingleCellExperiment.R b/tests/testthat/test-as_SingleCellExperiment.R index c1b97257..5f3ddfcc 100644 --- a/tests/testthat/test-as_SingleCellExperiment.R +++ b/tests/testthat/test-as_SingleCellExperiment.R @@ -23,7 +23,7 @@ test_that("as_SCE retains nr of observations and features", { for (obs_key in colnames(ad$obs)) { test_that(paste0("as_SCE retains obs key: ", obs_key), { msg <- message_if_known( - backend = "to_SCE", + backend = "as_SCE", slot = c("obs"), dtype = obs_key, process = "convert", @@ -44,7 +44,7 @@ for (obs_key in colnames(ad$obs)) { for (var_key in colnames(ad$var)) { test_that(paste0("as_SCE retains var key: ", var_key), { msg <- message_if_known( - backend = "to_SCE", + backend = "as_SCE", slot = c("var"), dtype = var_key, process = "convert", @@ -65,7 +65,7 @@ for (var_key in colnames(ad$var)) { for (layer_key in names(ad$layers)) { test_that(paste0("as_SCE retains layer: ", layer_key), { msg <- message_if_known( - backend = "to_SCE", + backend = "as_SCE", slot = c("layers"), dtype = layer_key, process = "convert", @@ -124,7 +124,7 @@ test_that("as_SCE works with no x_mapping and no layers_mapping", { for (obsp_key in names(ad$obsp)) { test_that(paste0("as_SCE retains obsp key: ", obsp_key), { msg <- message_if_known( - backend = "to_SCE", + backend = "as_SCE", slot = c("obsp"), dtype = obsp_key, process = "convert", @@ -148,7 +148,7 @@ for (obsp_key in names(ad$obsp)) { for (varp_key in names(ad$varp)) { test_that(paste0("as_SCE retains varp key: ", varp_key), { msg <- message_if_known( - backend = "to_SCE", + backend = "as_SCE", slot = c("obsp"), dtype = varp_key, process = "convert", @@ -172,7 +172,7 @@ for (varp_key in names(ad$varp)) { for (uns_key in names(ad$uns)) { test_that(paste0("as_SCE retains uns key: ", uns_key), { msg <- message_if_known( - backend = "to_SCE", + backend = "as_SCE", slot = c("uns"), dtype = uns_key, process = "convert", @@ -191,7 +191,7 @@ for (uns_key in names(ad$uns)) { test_that("as_SCE retains pca dimred", { msg <- message_if_known( - backend = "to_SCE", + backend = "as_SCE", slot = c("obsm", "varm"), dtype = "pca", process = "convert", @@ -199,17 +199,16 @@ test_that("as_SCE retains pca dimred", { ) skip_if(!is.null(msg), message = msg) - # trackstatus: class=SingleCellExperiment, feature=test_get_obsm, status=wip - expect_true("pca" %in% names(reducedDims(sce))) + # trackstatus: class=SingleCellExperiment, feature=test_get_obsm, status=done + expect_true("PCA" %in% names(reducedDims(sce))) expect_equal( - sampleFactors(reducedDims(sce)$pca), - ad$obsm[["X_pca"]], - ignore.attributes = TRUE + sampleFactors(reducedDims(sce)$PCA), + ad$obsm[["X_pca"]] ) - # trackstatus: class=SingleCellExperiment, feature=test_get_varm, status=wip + # trackstatus: class=SingleCellExperiment, feature=test_get_varm, status=done expect_equal( - featureLoadings(reducedDims(sce)$pca), + featureLoadings(reducedDims(sce)$PCA), ad$varm[["PCs"]] ) }) diff --git a/tests/testthat/test-from_Seurat.R b/tests/testthat/test-from_Seurat.R index c325f4ea..eb89eadd 100644 --- a/tests/testthat/test-from_Seurat.R +++ b/tests/testthat/test-from_Seurat.R @@ -123,13 +123,13 @@ test_that("as_AnnData (Seurat) retains pca", { # trackstatus: class=Seurat, feature=test_set_obsm, status=wip expect_equal( - ad$obsm[["pca"]], + ad$obsm[["X_pca"]], Embeddings(obj, reduction = "pca"), ignore_attr = TRUE ) # trackstatus: class=Seurat, feature=test_set_varm, status=done - expanded_varm_pca <- ad$varm[["pca"]] + expanded_varm_pca <- ad$varm[["PCs"]] loadings <- Loadings(obj, reduction = "pca") # check whether rows not in loadings are all zero @@ -161,7 +161,7 @@ test_that("as_AnnData (Seurat) retains umap", { skip_if(!is.null(msg), message = msg) expect_equal( - ad$obsm[["umap"]], + ad$obsm[["X_umap"]], Embeddings(obj, reduction = "umap"), ignore_attr = TRUE ) diff --git a/tests/testthat/test-from_SingleCellExperiment.R b/tests/testthat/test-from_SingleCellExperiment.R index fa3f5c3c..30e6f16f 100644 --- a/tests/testthat/test-from_SingleCellExperiment.R +++ b/tests/testthat/test-from_SingleCellExperiment.R @@ -176,16 +176,16 @@ test_that("as_AnnData (SCE) retains pca dimred", { # trackstatus: class=SingleCellExperiment, feature=test_set_obsm, status=wip expect_true("X_pca" %in% names(ad$obsm)) expect_equal( - sampleFactors(reducedDims(sce)$X_pca), + sampleFactors(reducedDims(sce)$PCA), ad$obsm[["X_pca"]] ) # trackstatus: class=SingleCellExperiment, feature=test_set_varm, status=wip - expect_true("X_pca" %in% names(ad$varm)) + expect_true("PCs" %in% names(ad$varm)) # AnnData now adds dimnames on-the-fly, but SCE doesn't preserve them # So we need to strip dimnames for comparison - actual_mat <- ad$varm[["X_pca"]] - expected_mat <- featureLoadings(reducedDims(sce)$X_pca) + actual_mat <- ad$varm[["PCs"]] + expected_mat <- featureLoadings(reducedDims(sce)$PCA) dimnames(actual_mat) <- NULL dimnames(expected_mat) <- NULL expect_equal( diff --git a/tests/testthat/test-h5ad-read.R b/tests/testthat/test-h5ad-read.R index 54443378..e3a37a15 100644 --- a/tests/testthat/test-h5ad-read.R +++ b/tests/testthat/test-h5ad-read.R @@ -9,7 +9,7 @@ test_that("reading H5AD as SingleCellExperiment works", { test_that("reading H5AD as Seurat works", { skip_if_not_installed("SeuratObject") - # TODO: remove this suppression when the to_seurat, from_seurat functions are updated. + # TODO: remove this suppression when the as_seurat, from_seurat functions are updated. seurat <- suppressWarnings(read_h5ad(file, as = "Seurat")) expect_s4_class(seurat, "Seurat") }) diff --git a/vignettes/usage_singlecellexperiment.Rmd b/vignettes/usage_singlecellexperiment.Rmd index ac59ee84..df771ce3 100644 --- a/vignettes/usage_singlecellexperiment.Rmd +++ b/vignettes/usage_singlecellexperiment.Rmd @@ -82,8 +82,8 @@ sce_obj <- adata$as_SingleCellExperiment( colData_mapping = c("Int", "IntNA"), rowData_mapping = c(rowdata1 = "String", rowdata2 = "total_counts"), reducedDims_mapping = list( - "pca" = c(sampleFactors = "X_pca", featureLoadings = "PCs"), - "umap" = c(sampleFactors = "X_umap") + "PCA" = c(sampleFactors = "X_pca", featureLoadings = "PCs"), + "UMAP" = c(sampleFactors = "X_umap") ), colPairs_mapping = TRUE, rowPairs_mapping = FALSE, @@ -120,7 +120,7 @@ as_AnnData( layers_mapping = c("csc_counts"), obs_mapping = c(metadata1 = "Int", metadata2 = "IntNA"), var_mapping = FALSE, - obsm_mapping = list(X_pca = "X_pca", X_umap = "X_umap"), + obsm_mapping = list(X_pca = "PCA", X_umap = "UMAP"), obsp_mapping = TRUE, uns_mapping = c("Bool", "IntScalar") )