Skip to content
Draft
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
70f122b
Create SeuratUploader.py
dlesper May 21, 2026
db8bb98
Update process_uploaded_expression_dataset.cgi
dlesper May 21, 2026
eaa4612
Update BioCManager to 3.22
dlesper May 22, 2026
663753c
add packages necessary for SeuratUploader
dlesper May 22, 2026
24e8794
Update Dockerfile.r
dlesper May 22, 2026
93adac2
Enabled Seurat/RDS
dlesper May 22, 2026
cc67206
SeuratUploader: Change gene_names to gene_symbol
dlesper May 27, 2026
b398e82
Add packages to apt-get for Seurat
dlesper May 28, 2026
0d9456c
Add RDATA to list of dataset formats
dlesper May 28, 2026
04411ad
Change button text from 'Selected' to 'Choose'
dlesper May 28, 2026
ffed734
Package installation finalization
dlesper May 28, 2026
f18e569
Update requirements and setup documentation for additional packages
adkinsrs Jun 3, 2026
1b841b2
Fix formatting in Python setup instructions for package installation
adkinsrs Jun 3, 2026
2b61749
Merge branch 'devel' into feature_seurat_upload
adkinsrs Jun 3, 2026
2f9a249
Fixing squashed commits already in devel. Adjusting permissions of l…
adkinsrs Jun 3, 2026
9b6b51f
Rename SeuratUploader.py to seuratuploader.py
adkinsrs Jun 3, 2026
a1c923e
Enhance error handling and parameter retrieval in dataset processing …
adkinsrs Jun 3, 2026
5568761
Fix file extension check for RDS format in store_expression_dataset.cgi
adkinsrs Jun 3, 2026
ee9ef8c
Update Docker output descriptions to include "latest" tag for images
adkinsrs Jun 4, 2026
df93f5d
Clarify Docker image tag description in setup documentation
adkinsrs Jun 4, 2026
3dee550
Merge branch 'devel' into feature_seurat_upload
adkinsrs Jun 4, 2026
e450944
Fix font color assignment in update_stacked_violin_annotations to def…
adkinsrs Jun 4, 2026
4d59376
Refactor error handling in SeuratUploader and update progress trackin…
adkinsrs Jun 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions docker/Dockerfile
Comment thread
adkinsrs marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ RUN apt -qq update \
libpcre2-dev \
fonts-roboto \
fontconfig \
libgfortran5 \
libuv1 \
libhdf5-dev \
&& apt -qq clean autoclean \
&& apt -qq autoremove -y \
&& rm -rf /var/lib/apt/lists/*
Expand All @@ -55,12 +58,12 @@ RUN fc-cache -f -v
ENV LLVM_CONFIG=/usr/bin/llvm-config-14

# Copy compiled Python from builder stage
COPY --from=adkinsrs/gear-python-base:2026-04-27 /opt/Python-${PYTHON_FULL_VERSION} /opt/Python-${PYTHON_FULL_VERSION}
COPY --from=gear-python-base:latest /opt/Python-${PYTHON_FULL_VERSION} /opt/Python-${PYTHON_FULL_VERSION}

# Copy compiled R from r-builder stage
COPY --from=adkinsrs/gear-r-base:2026-04-27 /usr/local/lib/R /usr/local/lib/R
COPY --from=adkinsrs/gear-r-base:2026-04-27 /usr/local/bin/R /usr/local/bin/R
COPY --from=adkinsrs/gear-r-base:2026-04-27 /usr/local/bin/Rscript /usr/local/bin/Rscript
COPY --from=gear-r-base:latest /usr/local/lib/R /usr/local/lib/R
COPY --from=gear-r-base:latest /usr/local/bin/R /usr/local/bin/R
COPY --from=gear-r-base:latest /usr/local/bin/Rscript /usr/local/bin/Rscript

# Link Python and shared library
RUN mkdir -p /opt/bin \
Expand Down
2 changes: 2 additions & 0 deletions docker/Dockerfile.r
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ RUN apt -qq update \
tzdata \
git \
unzip \
libgfortran5 \
libhdf5-dev \
&& apt -qq clean autoclean \
&& apt -qq autoremove -y \
&& rm -rf /var/lib/apt/lists/*
Expand Down
2 changes: 1 addition & 1 deletion docker/install_bioc.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Install required packages
tryCatch( {
install.packages(c("BiocManager", "remotes"), dependencies=NA, repos="http://lib.stat.cmu.edu/R/CRAN/")
BiocManager::install(version = "3.21", ask=FALSE)
BiocManager::install(version = "3.22", ask=FALSE)
}, error = function(e) {
message("Error: ", e$message)
quit(status = 1, save = "no")
Expand Down
7 changes: 6 additions & 1 deletion docker/install_packages.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@ library(remotes) # for install_version
tryCatch( {
remotes::install_version("reticulate", version="1.46.0", repos="https://cloud.r-project.org/", ask=FALSE, dependencies=NA) # Sanity check with rpy2
remotes::install_github("ctlab/fgsea") # needed for projectR
remotes::install_github("genesofeve/projectR@d3dd79e2b14172a9561059d58462c97f0a78d4c8") # version 1.23.2
remotes::install_github("genesofeve/projectR@d3dd79e2b14172a9561059d58462c97f0a78d4c8")
remotes::install_github("satijalab/seurat", "seurat5", quiet = TRUE, ask=FALSE)
install.packages('httpuv', ask=FALSE, repos="https://cloud.r-project.org/")
install.packages("hdf5r",dependencies=TRUE, ask=FALSE, repos="https://cloud.r-project.org/")
BiocManager::install("rhdf5",ask=FALSE)
BiocManager::install("anndataR", ask=FALSE)# version 1.23.2
BiocManager::install("biomaRt", ask=FALSE) # version 2.60.0
remotes::install_github("CHuanSite/SJD")
}, error = function(e) {
Expand Down
8 changes: 8 additions & 0 deletions docker/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,11 @@ spatialdata_io==0.6.0
shadows==0.1a2
tables==3.11.1
watchfiles==1.1.1
Bio==1.8.3
biopython==1.87
biothings-client==0.5.0
gprofiler-official==1.0.0
h11==0.16.0
httpcore==1.0.9
httpx==0.28.1
mygene==3.2.2
13 changes: 12 additions & 1 deletion docs/developer/setup/python.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ fixed paths have worked fine for decades.
libicu-dev \
libdeflate-dev \
libssl3 \
libgfortran5 \
libuv1 \
libhdf5-dev \
pkg-config \
llvm \
apache2 \
Expand Down Expand Up @@ -148,7 +151,15 @@ I cannot add comments to the bash code without breaking the command. So consult
spatialdata_io==0.6.0 \
shadows==0.1a2 \
tables==3.11.1 \
watchfiles==1.1.1
watchfiles==1.1.1 \
Bio==1.8.3 \
biopython==1.87 \
biothings-client==0.5.0 \
gprofiler-official==1.0.0 \
h11==0.16.0 \
httpcore==1.0.9 \
httpx==0.28.1 \
mygene==3.2.2\
./pip3 install git+https://github.com/adkinsrs/diffxpy.git@ffd828c280882ca98adc6e42c934625fab0011f6
./pip3 uninstall dask-expr -y

Expand Down
169 changes: 169 additions & 0 deletions lib/gear/SeuratUploader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import argparse

import rpy2.robjects as ro
from rpy2.robjects.packages import importr
import rpy2.rinterface_lib.callbacks as r_cbs
import rpy2.robjects.packages as rpackages
import sys
import mygene
import pandas as pd
import scanpy
import os
import argparse


def silent_handler(s:str) -> None:
# way to bypass the R stderr output
pass

def argument_parser():
parser = argparse.ArgumentParser(usage="%(prog)s -r [RDS Object] -s [Share ID]",add_help=True)
parser.add_argument('-r', '--rds', required=True, type=str)
parser.add_argument('-s', '--share-id', required=True, type=str)
args = vars(parser.parse_args())
return args

def r_package_installer() -> None:
utils = rpackages.importr('utils')
# Install BiocManager if not installed
if not rpackages.isinstalled('BiocManager'):
utils.install_packages('BiocManager')
# Import BiocManager
BiocManager = importr('BiocManager')
# Install Seurat, anndataR and rhdf5
if not rpackages.isinstalled('reticulate'):
utils.install_packages('reticulate')
if not rpackages.isinstalled('Seurat'):
utils.install_packages('Seurat')
if not rpackages.isinstalled('anndataR'):
BiocManager.install('anndataR')
if not rpackages.isinstalled('rhdf5'):
BiocManager.install('rhdf5')


def r_package_importer(package_name:str):
"""
Import installed package, if not installed return message
Input:
package_name: R package name to import
Output:
The R package that was imported or if there's an error the message will be returned
"""
importErrorMessage = ""
try:
pkg = importr(package_name)
return pkg
except:
importErrorMessage += f"{package_name} not installed or can not be imported"
sys.exit(importErrorMessage)



def seurat_to_anndata(file_path: str, share_name: str, output_dir: str = "."):
"""
file_path: path to rds or rdata file
share_name: final h5ad string name to be expected (without h5ad)
output_dir: directory to write the temporary h5ad file into

return:
absolute path to tmp h5ad, or False on failure
"""
# Suppress R console output and ensure required packages are loaded,
# since this function may be called as a module in cgi script (not via main()).
r_cbs.consolewrite_print = silent_handler
r_cbs.consolewrite_warnerror = silent_handler
# Import required R packages
base = rpackages.importr('base')
r_package_importer('Seurat')
r_package_importer('rhdf5')
r_package_importer('anndataR')
# Use R's readRDS to load the object.
# The result is an R object within the Python environment.
r_seurat_obj = base.readRDS(file_path)
ro.globalenv['seurat_obj'] = r_seurat_obj
# Using anndataR write out a converted h5ad
ro.r('adata <- as_AnnData(seurat_obj)')
output_path = os.path.join(output_dir, f'tmp_{share_name}.h5ad')
try:
ro.r(f'write_h5ad(adata, "{output_path}")')
return output_path
# In cases where the write fails we will assume the h5ad already exists
except:
print(f"h5ad name already exists {output_path}")
return False


def openh5ad(h5ad_name):
"""Just open the supplied h5ad file"""
adata = scanpy.read_h5ad(h5ad_name)
return adata

def genes_to_ensembl(adata, taxid=None):
# We are calling an external API for genes to ensembl mapping
# Potentially problematic down the road if this shuts down
if taxid is None:
return None
genes = adata.var.index.tolist()
mg = mygene.MyGeneInfo()
mg_genes = mg.querymany(genes, scopes="symbol", fields="ensembl.gene", species=f"{taxid}")
ensembl_mapping_dict = {}
for mg_gene in mg_genes:
gene_name = mg_gene['query']
if 'ensembl' in mg_gene.keys():
if isinstance(mg_gene['ensembl'],list):
# Currently taking first value, not sure of a better way to handle one gene having multiple ensembl IDs
ensembl_mapping_dict[gene_name] = mg_gene['ensembl'][0]['gene']
else:
ensembl_mapping_dict[gene_name] = mg_gene['ensembl']['gene']
count = 0
# We still need an ensembl id for the genes that do not actually have them.
# So here we create a FAKE# for each one so that it can be searchable in gEAR
for gene in genes:
if gene not in ensembl_mapping_dict.keys():
ensembl_mapping_dict[gene] = f"Fake{count}"
count += 1
# Overwrite the current adata.var
adata.var = pd.DataFrame(
index=list(ensembl_mapping_dict.values()), data={"gene_symbol": list(ensembl_mapping_dict.keys())}
)
return adata


def reduction_to_metadata(adata):
# Discussion with Carlo and Brian resulted in us determining we would like to
# take the first 2 values of each reduction
# PCA in the future, and potentially other reductions may need more
for reduction in adata.obsm:
if adata.obsm[reduction].shape[1] > 1:
for i in range(2):
adata.obs[f'{reduction}_{i+1}'] = adata.obsm[reduction][:,i]
return adata


def layer_to_X(adata, layer_name):
# Possibility for Seurat -> Anndata conversion doesn not create the X matrix.
# Use adata.layers['data'] as X
adata.X = adata.layers[layer_name]
return adata

def main():
arguments = argument_parser()
# Args
rds_path = arguments['rds']
share_name = arguments['share_id']
r_package_installer()
# Take the RDS and output the most basic h5ad
h5ad_name = seurat_to_anndata(rds_path,share_name)
# Below are some changes and checks to the h5ad to correctly format for gEAR
if h5ad_name:
adata = openh5ad(f'tmp_{h5ad_name}')
adata = genes_to_ensembl(adata)
if adata is None:
sys.exit("TaxID not supplied")
adata = reduction_to_metadata(adata)
adata.write({h5ad_name.replace('tmp_','')})
os.remove(f'tmp_{h5ad_name}')


if __name__ == "__main__":
main()
Loading