Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
- Seurat PCA loadings only contain variable features, not all genes
- Now properly expands loadings matrix to include all genes with zeros for non-variable features
- Adds warning when rownames don't match var_names during conversion
- Improve mapping of dimensionality reduction slots during conversion between AnnData, SingleCellExperiment, and Seurat (PR #333)

# anndataR 0.99.2

Expand Down
31 changes: 27 additions & 4 deletions R/as_Seurat.R
Original file line number Diff line number Diff line change
Expand Up @@ -589,23 +589,46 @@ as_Seurat <- function(
# nolint start: object_name_linter object_length_linter
.as_Seurat_guess_reductions <- function(adata) {
# nolint end: object_name_linter object_length_linter
# Get mappings keyed by AnnData obsm names from centralized location
obsm_mappings <- .get_dimred_mapping(from = "anndata_obsm")

purrr::map(adata$obsm_keys(), function(.obsm) {
if (!is.numeric(as.matrix(adata$obsm[[.obsm]]))) {
return(NULL)
}

# Determine the Seurat reduction name using common mappings
seurat_name <- if (.obsm %in% names(obsm_mappings)) {
obsm_mappings[[.obsm]]$seurat
} else {
.obsm
}

mapping <- c(
# Make sure we have valid keys here to avoid warnings later
key = SeuratObject::Key(.obsm, quiet = TRUE),
key = SeuratObject::Key(seurat_name, quiet = TRUE),
embeddings = .obsm
)
if (.obsm == "X_pca" && "PCs" %in% names(adata$varm)) {
mapping["loadings"] <- "PCs"

# Check if this obsm has associated loadings in varm
if (.obsm %in% names(obsm_mappings)) {
dimred_info <- obsm_mappings[[.obsm]]
varm_key <- dimred_info$anndata_varm
if (!is.null(varm_key) && varm_key %in% names(adata$varm)) {
mapping["loadings"] <- varm_key
}
}

mapping
}) |>
setNames(adata$obsm_keys()) |>
setNames(purrr::map_chr(adata$obsm_keys(), function(.obsm) {
# Use Seurat naming convention for the reduction names
if (.obsm %in% names(obsm_mappings)) {
obsm_mappings[[.obsm]]$seurat
} else {
.obsm
}
})) |>
purrr::compact()
}

Expand Down
40 changes: 34 additions & 6 deletions R/as_SingleCellExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -292,20 +292,46 @@ as_SingleCellExperiment <- function(
# nolint start: object_length_linter object_name_linter
.as_SCE_guess_reducedDims <- function(adata) {
# nolint end: object_length_linter object_name_linter
purrr::map(adata$obsm_keys(), function(.obsm) {

# Get mappings keyed by AnnData obsm names from centralized location
obsm_mappings <- .get_dimred_mapping(from = "anndata_obsm")

# Create mapping and name pairs for each obsm key
results <- purrr::map(adata$obsm_keys(), function(.obsm) {
if (!is.numeric(as.matrix(adata$obsm[[.obsm]]))) {
return(NULL)
}

mapping <- c(sampleFactors = .obsm)
if (.obsm == "X_pca" && "PCs" %in% names(adata$varm)) {
mapping["featureLoadings"] <- "PCs"

# Check if this is a known dimensionality reduction with loadings
if (.obsm %in% names(obsm_mappings)) {
dimred_info <- obsm_mappings[[.obsm]]
if (
!is.null(dimred_info$anndata_varm) &&
dimred_info$anndata_varm %in% names(adata$varm)
) {
mapping["featureLoadings"] <- dimred_info$anndata_varm
}
}

mapping
# Determine the final name: use Bioconductor convention if available
final_name <- if (.obsm %in% names(obsm_mappings)) {
obsm_mappings[[.obsm]]$sce
} else {
.obsm
}

list(mapping = mapping, name = final_name)
}) |>
setNames(adata$obsm_keys()) |>
purrr::compact()

# Extract mappings and names, then set names
result_names <- purrr::map_chr(results, "name")
results <- purrr::map(results, "mapping")
names(results) <- result_names

results
}

# nolint start: object_length_linter object_name_linter
Expand Down Expand Up @@ -382,7 +408,9 @@ as_SingleCellExperiment <- function(
}

loadings <- adata$varm[[varm_key]]
rownames(loadings) <- colnames(embedding)
# Add rownames (variable names) and colnames to loadings matrix
rownames(loadings) <- adata$var_names
colnames(loadings) <- colnames(embedding)
} else {
loadings <- matrix(nrow = 0, ncol = ncol(embedding))
}
Expand Down
65 changes: 65 additions & 0 deletions R/common_dimred_mappings.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#' Get dimensionality reduction mappings keyed by a specific framework
#'
#' @param from Character string specifying which framework to use as keys.
#' One of: "sce", "seurat", "anndata_obsm", "anndata_varm", "anndata_uns"
#' @return A named list where names are the values from the specified framework
#' and values are the complete mapping information for each dimred
#' @noRd
.get_dimred_mapping <- function(
from = c("sce", "seurat", "anndata_obsm", "anndata_varm", "anndata_uns")
) {
from <- match.arg(from)

all_mappings <- .get_common_dimred_mappings()
result <- list()

for (mapping in all_mappings) {
key <- mapping[[from]]
if (!is.null(key)) {
result[[key]] <- mapping
}
}

result
}

#' Common dimensionality reduction mappings between AnnData, SingleCellExperiment, and Seurat
#'
#' This table defines the standard mappings between scanpy/AnnData naming
#' conventions, Bioconductor/SingleCellExperiment naming conventions, and
#' Seurat naming conventions for common dimensionality reduction techniques.
#'
#' Based on the conventions documented in dr.md:
#' - AnnData uses "X_pca", "X_tsne", "X_umap" in obsm and "PCs" in varm
#' - Seurat uses lowercase "pca", "tsne", "umap"
#' - SingleCellExperiment uses uppercase "PCA", "TSNE", "UMAP"
#'
#' @return A list of dimensionality reduction mappings, where each element contains
#' the naming conventions across frameworks and associated metadata
#' @noRd
.get_common_dimred_mappings <- function() {
list(
# Core dimensionality reductions with documented naming conventions
list(
sce = "PCA",
seurat = "pca",
anndata_obsm = "X_pca",
anndata_varm = "PCs",
anndata_uns = NULL
),
list(
sce = "tSNE",
seurat = "tsne",
anndata_obsm = "X_tsne",
anndata_varm = NULL,
anndata_uns = NULL
),
list(
sce = "UMAP",
seurat = "umap",
anndata_obsm = "X_umap",
anndata_varm = NULL,
anndata_uns = NULL
)
)
}
24 changes: 22 additions & 2 deletions R/from_Seurat.R
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,8 @@ from_Seurat <- function(
# nolint start: object_name_linter object_length_linter
.from_Seurat_guess_obsms <- function(seurat_obj, assay_name) {
# nolint end: object_name_linter object_length_linter
# Get reverse mappings from centralized location
reverse_seurat_mappings <- .get_dimred_mapping(from = "seurat")

obsm_mapping <- c()

Expand All @@ -424,7 +426,13 @@ from_Seurat <- function(
next
}

obsm_mapping[reduction_name] <- reduction_name
# Create mapping: AnnData obsm name -> Seurat reduction name
anndata_name <- if (reduction_name %in% names(reverse_seurat_mappings)) {
reverse_seurat_mappings[[reduction_name]]$anndata_obsm
} else {
reduction_name
}
obsm_mapping[anndata_name] <- reduction_name
}

obsm_mapping
Expand All @@ -433,6 +441,8 @@ from_Seurat <- function(
# nolint start: object_name_linter object_length_linter
.from_Seurat_guess_varms <- function(seurat_obj, assay_name) {
# nolint end: object_name_linter object_length_linter
# Get reverse mappings from centralized location
reverse_seurat_varm_mappings <- .get_dimred_mapping(from = "seurat")

varm_mapping <- c()

Expand All @@ -445,7 +455,17 @@ from_Seurat <- function(
nrow(loadings) == nrow(seurat_obj) &&
SeuratObject::DefaultAssay(reduction) == assay_name
) {
varm_mapping[reduction_name] <- reduction_name
# Create mapping: AnnData varm name -> Seurat reduction name
anndata_varm_name <- if (
reduction_name %in%
names(reverse_seurat_varm_mappings) &&
!is.null(reverse_seurat_varm_mappings[[reduction_name]]$anndata_varm)
) {
reverse_seurat_varm_mappings[[reduction_name]]$anndata_varm
} else {
reduction_name
}
varm_mapping[anndata_varm_name] <- reduction_name
}
}

Expand Down
32 changes: 29 additions & 3 deletions R/from_SingleCellExperiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,24 +155,50 @@ from_SingleCellExperiment <- function(
# nolint start: object_length_linter object_name_linter
.from_SCE_guess_obsm <- function(sce) {
# nolint end: object_length_linter object_name_linter
obsm_mapping <- self_name(SingleCellExperiment::reducedDimNames(sce))
# Get reverse mappings from centralized location
reverse_dimred_mappings <- .get_dimred_mapping(from = "sce")

if (rlang::is_empty(obsm_mapping)) {
sce_names <- SingleCellExperiment::reducedDimNames(sce)

if (rlang::is_empty(sce_names)) {
c()
} else {
# Create mapping: AnnData name -> SCE name
obsm_mapping <- c()
for (sce_name in sce_names) {
anndata_name <- if (sce_name %in% names(reverse_dimred_mappings)) {
reverse_dimred_mappings[[sce_name]]$anndata_obsm
} else {
sce_name
}
obsm_mapping[anndata_name] <- sce_name
}
obsm_mapping
}
}

# nolint start: object_length_linter object_name_linter
.from_SCE_guess_varm <- function(sce) {
# nolint end: object_length_linter object_name_linter
# Get reverse mappings for varm from centralized location
reverse_varm_mappings <- .get_dimred_mapping(from = "sce")

varm_mapping <- c()

for (reduction_name in names(SingleCellExperiment::reducedDims(sce))) {
reduction <- SingleCellExperiment::reducedDim(sce, reduction_name)
if (inherits(reduction, "LinearEmbeddingMatrix")) {
varm_mapping[reduction_name] <- reduction_name
# Create mapping: AnnData varm name -> SCE reduction name
anndata_varm_name <- if (
reduction_name %in%
names(reverse_varm_mappings) &&
!is.null(reverse_varm_mappings[[reduction_name]]$anndata_varm)
) {
reverse_varm_mappings[[reduction_name]]$anndata_varm
} else {
reduction_name
}
varm_mapping[anndata_varm_name] <- reduction_name
}
}

Expand Down
5 changes: 5 additions & 0 deletions R/known_issues.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ is_known <- function(backend, slot, dtype, process, known_issues = NULL) {
known_issues <- read_known_issues()
}

# Handle empty known_issues data frame
if (nrow(known_issues) == 0) {
return(logical(0))
}

filt <- rep(TRUE, nrow(known_issues))

if (!is.null(backend)) {
Expand Down
18 changes: 1 addition & 17 deletions inst/known_issues.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1 @@
known_issues:
- backend: to_SCE
slot:
- obsm
- varm
dtype:
- pca
process: [convert]
error_message:
sampleFactors(reducedDims(sce)$pca) (`actual`) not equal to ad$obsm[["X_pca"]] (`expected`).

`dimnames(actual)` is a list
`dimnames(expected)` is absent
description: converted sce object has dimnames(), whilst the original anndata does not.
proposed_solution: Investigate if this is a problem or not.
to_investigate: True
to_fix: False
known_issues: []
16 changes: 8 additions & 8 deletions tests/testthat/test-as_Seurat.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ test_that("as_Seurat retains number of observations and features", {
for (obs_key in colnames(ad$obs)) {
test_that(paste0("as_Seurat retains obs key: ", obs_key), {
msg <- message_if_known(
backend = "to_Seurat",
backend = "as_seurat",
slot = c("obs"),
dtype = obs_key,
process = "convert",
Expand All @@ -56,7 +56,7 @@ for (obs_key in colnames(ad$obs)) {
for (var_key in colnames(ad$var)) {
test_that(paste0("as_Seurat retains var key: ", var_key), {
msg <- message_if_known(
backend = "to_Seurat",
backend = "as_seurat",
slot = c("var"),
dtype = var_key,
process = "convert",
Expand All @@ -75,7 +75,7 @@ for (var_key in colnames(ad$var)) {
for (layer_key in names(ad$layers)) {
test_that(paste0("as_Seurat retains layer: ", layer_key), {
msg <- message_if_known(
backend = "to_Seurat",
backend = "as_seurat",
slot = c("layers"),
dtype = layer_key,
process = "convert",
Expand All @@ -100,7 +100,7 @@ for (layer_key in names(ad$layers)) {
for (uns_key in names(ad$uns)) {
test_that(paste0("as_Seurat retains uns key: ", uns_key), {
msg <- message_if_known(
backend = "to_Seurat",
backend = "as_seurat",
slot = c("uns"),
dtype = uns_key,
process = "convert",
Expand All @@ -115,7 +115,7 @@ for (uns_key in names(ad$uns)) {

test_that("as_Seurat retains pca dimred", {
msg <- message_if_known(
backend = "to_Seurat",
backend = "as_seurat",
slot = c("obsm"),
dtype = "pca",
process = "convert",
Expand All @@ -125,16 +125,16 @@ test_that("as_Seurat retains pca dimred", {
skip_if(!is.null(msg), message = msg)

# trackstatus: class=Seurat, feature=test_get_obsm, status=wip
expect_true("X_pca" %in% Reductions(seu))
expect_true("pca" %in% Reductions(seu))
expect_equal(
Embeddings(seu, reduction = "X_pca"),
Embeddings(seu, reduction = "pca"),
ad$obsm[["X_pca"]],
ignore_attr = TRUE
)

# trackstatus: class=Seurat, feature=test_get_varm, status=wip
expect_equal(
Loadings(seu, reduction = "X_pca"),
Loadings(seu, reduction = "pca"),
ad$varm[["PCs"]],
ignore_attr = TRUE
)
Expand Down
Loading
Loading