Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
70f122b
Create SeuratUploader.py
dlesper May 21, 2026
db8bb98
Update process_uploaded_expression_dataset.cgi
dlesper May 21, 2026
eaa4612
Update BioCManager to 3.22
dlesper May 22, 2026
663753c
add packages necessary for SeuratUploader
dlesper May 22, 2026
24e8794
Update Dockerfile.r
dlesper May 22, 2026
93adac2
Enabled Seurat/RDS
dlesper May 22, 2026
cc67206
SeuratUploader: Change gene_names to gene_symbol
dlesper May 27, 2026
b398e82
Add packages to apt-get for Seurat
dlesper May 28, 2026
0d9456c
Add RDATA to list of dataset formats
dlesper May 28, 2026
04411ad
Change button text from 'Selected' to 'Choose'
dlesper May 28, 2026
ffed734
Package installation finalization
dlesper May 28, 2026
f18e569
Update requirements and setup documentation for additional packages
adkinsrs Jun 3, 2026
1b841b2
Fix formatting in Python setup instructions for package installation
adkinsrs Jun 3, 2026
2b61749
Merge branch 'devel' into feature_seurat_upload
adkinsrs Jun 3, 2026
2f9a249
Fixing squashed commits already in devel. Adjusting permissions of l…
adkinsrs Jun 3, 2026
9b6b51f
Rename SeuratUploader.py to seuratuploader.py
adkinsrs Jun 3, 2026
a1c923e
Enhance error handling and parameter retrieval in dataset processing …
adkinsrs Jun 3, 2026
5568761
Fix file extension check for RDS format in store_expression_dataset.cgi
adkinsrs Jun 3, 2026
ee9ef8c
Update Docker output descriptions to include "latest" tag for images
adkinsrs Jun 4, 2026
df93f5d
Clarify Docker image tag description in setup documentation
adkinsrs Jun 4, 2026
3dee550
Merge branch 'devel' into feature_seurat_upload
adkinsrs Jun 4, 2026
e450944
Fix font color assignment in update_stacked_violin_annotations to def…
adkinsrs Jun 4, 2026
4d59376
Refactor error handling in SeuratUploader and update progress trackin…
adkinsrs Jun 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions docker/Dockerfile
Comment thread
adkinsrs marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ RUN apt -qq update \
libpcre2-dev \
fonts-roboto \
fontconfig \
libgfortran5 \
libuv1 \
libhdf5-dev \
&& apt -qq clean autoclean \
&& apt -qq autoremove -y \
&& rm -rf /var/lib/apt/lists/*
Expand All @@ -55,12 +58,12 @@ RUN fc-cache -f -v
ENV LLVM_CONFIG=/usr/bin/llvm-config-14

# Copy compiled Python from builder stage
COPY --from=adkinsrs/gear-python-base:2026-04-27 /opt/Python-${PYTHON_FULL_VERSION} /opt/Python-${PYTHON_FULL_VERSION}
COPY --from=gear-python-base:latest /opt/Python-${PYTHON_FULL_VERSION} /opt/Python-${PYTHON_FULL_VERSION}

# Copy compiled R from r-builder stage
COPY --from=adkinsrs/gear-r-base:2026-04-27 /usr/local/lib/R /usr/local/lib/R
COPY --from=adkinsrs/gear-r-base:2026-04-27 /usr/local/bin/R /usr/local/bin/R
COPY --from=adkinsrs/gear-r-base:2026-04-27 /usr/local/bin/Rscript /usr/local/bin/Rscript
COPY --from=gear-r-base:latest /usr/local/lib/R /usr/local/lib/R
COPY --from=gear-r-base:latest /usr/local/bin/R /usr/local/bin/R
COPY --from=gear-r-base:latest /usr/local/bin/Rscript /usr/local/bin/Rscript

# Link Python and shared library
RUN mkdir -p /opt/bin \
Expand Down
2 changes: 2 additions & 0 deletions docker/Dockerfile.r
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ RUN apt -qq update \
tzdata \
git \
unzip \
libgfortran5 \
libhdf5-dev \
&& apt -qq clean autoclean \
&& apt -qq autoremove -y \
&& rm -rf /var/lib/apt/lists/*
Expand Down
2 changes: 1 addition & 1 deletion docker/install_bioc.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Install required packages
tryCatch( {
install.packages(c("BiocManager", "remotes"), dependencies=NA, repos="http://lib.stat.cmu.edu/R/CRAN/")
BiocManager::install(version = "3.21", ask=FALSE)
BiocManager::install(version = "3.22", ask=FALSE)
}, error = function(e) {
message("Error: ", e$message)
quit(status = 1, save = "no")
Expand Down
7 changes: 6 additions & 1 deletion docker/install_packages.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,12 @@ library(remotes) # for install_version
tryCatch( {
remotes::install_version("reticulate", version="1.46.0", repos="https://cloud.r-project.org/", ask=FALSE, dependencies=NA) # Sanity check with rpy2
remotes::install_github("ctlab/fgsea") # needed for projectR
remotes::install_github("genesofeve/projectR@d3dd79e2b14172a9561059d58462c97f0a78d4c8") # version 1.23.2
remotes::install_github("genesofeve/projectR@d3dd79e2b14172a9561059d58462c97f0a78d4c8")
remotes::install_github("satijalab/seurat", "seurat5", quiet = TRUE, ask=FALSE)
install.packages('httpuv', ask=FALSE, repos="https://cloud.r-project.org/")
install.packages("hdf5r",dependencies=TRUE, ask=FALSE, repos="https://cloud.r-project.org/")
BiocManager::install("rhdf5",ask=FALSE)
BiocManager::install("anndataR", ask=FALSE)# version 1.23.2
BiocManager::install("biomaRt", ask=FALSE) # version 2.60.0
remotes::install_github("CHuanSite/SJD")
}, error = function(e) {
Expand Down
8 changes: 7 additions & 1 deletion docker/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ aiohttp_retry==2.9.1
anndata==0.12.11
bio==1.8.3
biocode==0.10.0
biopython==1.87
biothings-client==0.5.0
cairosvg==2.7.1
colorcet==3.1.0
datashader==0.19.0
Expand All @@ -11,7 +13,10 @@ Flask==3.1.3
Flask-RESTful==0.3.9
google-analytics-data==0.21.0
gosling==0.3.0
h11==0.16.0
hic2cool==0.8.3
httpcore==1.0.9
httpx==0.28.1
jupyterlab==4.0.5
jupyter==1.0.0
kaleido==0.2.1
Expand All @@ -21,6 +26,7 @@ llvmlite==0.47.0
matplotlib==3.10.7
mod-wsgi==5.0.2
more_itertools==11.0.2
mygene==3.2.2
mysql-connector-python==8.0.28
numba==0.65.0
numpy==2.4.0
Expand All @@ -44,4 +50,4 @@ spatialdata==0.7.2
spatialdata_io==0.6.0
shadows==0.1a2
tables==3.11.1
watchfiles==1.1.1
watchfiles==1.1.1
8 changes: 5 additions & 3 deletions docs/developer/setup/docker.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,23 +66,25 @@ This file is dedicated entirely to compiling Python 3.x and installing requireme

**RPy2**: The "rpy2" package is actually built in the final Docker (umgear) image, due to some dependencies on R.

**The output**: This is currently built and pushed as adkinsrs/gear-python-base:YYYY-MM-DD
**The output**: This is currently built and pushed as adkinsrs/gear-python-base:YYYY-MM-DD and also tagged with the "latest" tag.

#### Dockerfile.r (The R Base)

This file is dedicated entirely to compiling R and running your Bioconductor scripts.

**When you build it**: Almost never. Only touch this if the team specifically requests a new version of Bioconductor or a brand-new R system library.

**The output**: This is currently built and pushed as adkinsrs/gear-r-base:YYYY-MM-DD
**The output**: This is currently built and pushed as adkinsrs/gear-r-base:YYYY-MM-DD and also tagged with the "latest" tag.

#### Dockerfile (The Final App)

This is your main daily-driver file. It starts with a clean Ubuntu image, uses COPY --from=... to pull in the pre-compiled folders from your registry, installs Apache, and copies over your Flask API and HTML/JS files.

Currently the inherited R and Python images are set to use the "latest" tag of a locally built image, as most of the time we want the most up-to-date version. If for some reason you need an earlier version, edit the Dockerfile to use one of the existing `adkinsrs/<image>:YYYY-MM-DD` tags stored in Docker Hub.

**When you build it**: Every time you update the website, tweak the Apache configuration, or change a CGI script. Anything gEAR-code related, basically.

**The output**: This builds in seconds and becomes your final production image. This is pushed as adkinsrs/umgear:YYYY-MM-DD
**The output**: This builds in seconds and becomes your final production image. This is pushed as adkinsrs/umgear:YYYY-MM-DD and also tagged with the "latest" tag.

## Starting the stack

Expand Down
9 changes: 9 additions & 0 deletions docs/developer/setup/python.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ fixed paths have worked fine for decades.
libicu-dev \
libdeflate-dev \
libssl3 \
libgfortran5 \
libuv1 \
libhdf5-dev \
pkg-config \
llvm \
apache2 \
Expand Down Expand Up @@ -109,14 +112,19 @@ I cannot add comments to the bash code without breaking the command. So consult
anndata==0.12.11 \
bio==1.8.3 \
biocode==0.10.0 \
biopython==1.87 \
biothings-client==0.5.0 \
cairosvg==2.7.1 \
colorcet==3.1.0 \
datashader==0.19.0 \
Flask==3.1.3 \
Flask-RESTful==0.3.9 \
google-analytics-data==0.21.0 \
gosling==0.3.0 \
h11==0.16.0 \
hic2cool==0.8.3 \
httpcore==1.0.9 \
httpx==0.28.1 \
jupyterlab==4.0.5 \
jupyter==1.0.0 \
kaleido==0.2.1 \
Expand All @@ -126,6 +134,7 @@ I cannot add comments to the bash code without breaking the command. So consult
matplotlib==3.10.7 \
mod-wsgi==5.0.2 \
more_itertools==11.0.2 \
mygene==3.2.2 \
mysql-connector-python==8.0.28 \
numba==0.65.0 \
numpy==2.4.0 \
Expand Down
Empty file modified lib/gear/__init__.py
100644 → 100755
Empty file.
Empty file modified lib/gear/analysis.py
100644 → 100755
Empty file.
Empty file modified lib/gear/dataarchive.py
100644 → 100755
Empty file.
Empty file modified lib/gear/db.py
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion lib/gear/mg_plotting.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -1185,7 +1185,7 @@ def update_stacked_violin_annotations(fig, primary_groups, color_map):
# Am attempting to do this based on the assumption that row facet titles will never have yanchor of bottom
# (or y-pos of 1) or have certain text shared with the axes titles
lambda a: a.update(
font=dict(color=color_map[a.text])
font=dict(color=color_map.get(a.text, "black"))
, textangle=0
, x=0
, xanchor="right"
Expand Down
Empty file modified lib/gear/orthology.py
100644 → 100755
Empty file.
Empty file modified lib/gear/plotting.py
100644 → 100755
Empty file.
Empty file modified lib/gear/primary_analysis.py
100644 → 100755
Empty file.
Empty file modified lib/gear/serverconfig.py
100644 → 100755
Empty file.
172 changes: 172 additions & 0 deletions lib/gear/seuratuploader.py
Comment thread
adkinsrs marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
import argparse
import os
import sys

import mygene
import pandas as pd
import rpy2.rinterface_lib.callbacks as r_cbs
import rpy2.robjects as ro
import rpy2.robjects.packages as rpackages
import scanpy
from rpy2.robjects.packages import importr


def silent_handler(s:str) -> None:
# way to bypass the R stderr output
pass

def argument_parser():
parser = argparse.ArgumentParser(usage="%(prog)s -r [RDS Object] -s [Share ID]",add_help=True)
parser.add_argument('-r', '--rds', required=True, type=str)
parser.add_argument('-s', '--share-id', required=True, type=str)
args = vars(parser.parse_args())
return args

def r_package_installer() -> None:
utils = rpackages.importr('utils')
# Install BiocManager if not installed
if not rpackages.isinstalled('BiocManager'):
utils.install_packages('BiocManager')
# Import BiocManager
BiocManager = importr('BiocManager')
# Install Seurat, anndataR and rhdf5
if not rpackages.isinstalled('reticulate'):
utils.install_packages('reticulate')
if not rpackages.isinstalled('Seurat'):
utils.install_packages('Seurat')
if not rpackages.isinstalled('anndataR'):
BiocManager.install('anndataR')
if not rpackages.isinstalled('rhdf5'):
BiocManager.install('rhdf5')


def r_package_importer(package_name:str):
"""
Import installed package, if not installed return message
Input:
package_name: R package name to import
Output:
The R package that was imported or if there's an error the message will be returned
"""
importErrorMessage = ""
try:
pkg = importr(package_name)
return pkg
except Exception:
importErrorMessage += f"{package_name} not installed or can not be imported"
sys.exit(importErrorMessage)



def seurat_to_anndata(file_path: str, share_name: str, output_dir: str = "."):
"""
file_path: path to rds or rdata file
share_name: final h5ad string name to be expected (without h5ad)
output_dir: directory to write the temporary h5ad file into

return:
absolute path to tmp h5ad, or False on failure
"""
# Suppress R console output and ensure required packages are loaded,
# since this function may be called as a module in cgi script (not via main()).
r_cbs.consolewrite_print = silent_handler
r_cbs.consolewrite_warnerror = silent_handler
# Import required R packages
base = rpackages.importr('base')
r_package_importer('Seurat')
r_package_importer('rhdf5')
r_package_importer('anndataR')
# Use R's readRDS to load the object.
# The result is an R object within the Python environment.
r_seurat_obj = base.readRDS(file_path)
ro.globalenv['seurat_obj'] = r_seurat_obj
# Using anndataR write out a converted h5ad
ro.r('adata <- as_AnnData(seurat_obj)')
output_path = os.path.join(output_dir, f'tmp_{share_name}.h5ad')
try:
ro.r(f'write_h5ad(adata, "{output_path}")')
return output_path
# In cases where the write fails we will assume the h5ad already exists
except Exception:
print(f"h5ad name already exists {output_path}")
raise

def openh5ad(h5ad_name):
"""Just open the supplied h5ad file"""
adata = scanpy.read_h5ad(h5ad_name)
return adata

def genes_to_ensembl(adata, taxid=None):
# We are calling an external API for genes to ensembl mapping
# Potentially problematic down the road if this shuts down
if taxid is None:
return None
genes = adata.var.index.tolist()
try:
# TODO: Perhaps add a retry mechanism in case the API returns 500
mg = mygene.MyGeneInfo()
mg_genes = mg.querymany(genes, scopes="symbol", fields="ensembl.gene", species=f"{taxid}")
except Exception as e:
print(f"Error occurred while querying MyGene: {e}", file=sys.stderr)
raise
ensembl_mapping_dict = {}
for mg_gene in mg_genes:
gene_name = mg_gene['query']
if 'ensembl' in mg_gene.keys():
if isinstance(mg_gene['ensembl'],list):
# Currently taking first value, not sure of a better way to handle one gene having multiple ensembl IDs
ensembl_mapping_dict[gene_name] = mg_gene['ensembl'][0]['gene']
else:
ensembl_mapping_dict[gene_name] = mg_gene['ensembl']['gene']
count = 0
# We still need an ensembl id for the genes that do not actually have them.
# So here we create a FAKE# for each one so that it can be searchable in gEAR
for gene in genes:
if gene not in ensembl_mapping_dict.keys():
ensembl_mapping_dict[gene] = f"Fake{count}"
count += 1
# Overwrite the current adata.var
adata.var = pd.DataFrame(
index=list(ensembl_mapping_dict.values()), data={"gene_symbol": list(ensembl_mapping_dict.keys())}
)
return adata


def reduction_to_metadata(adata):
# Discussion with Carlo and Brian resulted in us determining we would like to
# take the first 2 values of each reduction
# PCA in the future, and potentially other reductions may need more
for reduction in adata.obsm:
if adata.obsm[reduction].shape[1] > 1:
for i in range(2):
adata.obs[f'{reduction}_{i+1}'] = adata.obsm[reduction][:,i]
return adata


def layer_to_X(adata, layer_name):
# Possibility for Seurat -> Anndata conversion doesn not create the X matrix.
# Use adata.layers['data'] as X
adata.X = adata.layers[layer_name]
return adata

def main():
arguments = argument_parser()
# Args
rds_path = arguments['rds']
share_name = arguments['share_id']
r_package_installer()
# Take the RDS and output the most basic h5ad
h5ad_name = seurat_to_anndata(rds_path,share_name)
# Below are some changes and checks to the h5ad to correctly format for gEAR
if h5ad_name:
adata = openh5ad(f'tmp_{h5ad_name}')
adata = genes_to_ensembl(adata)
if adata is None:
sys.exit("TaxID not supplied")
adata = reduction_to_metadata(adata)
adata.write({h5ad_name.replace('tmp_','')})
os.remove(f'tmp_{h5ad_name}')


if __name__ == "__main__":
main()
Empty file modified lib/gear/spatialhandler.py
100644 → 100755
Empty file.
Empty file modified lib/gear/trackhub.py
100644 → 100755
Empty file.
Empty file modified lib/gear/userhistory.py
100644 → 100755
Empty file.
Empty file modified lib/gear/utils.py
100644 → 100755
Empty file.
12 changes: 12 additions & 0 deletions www/cgi/finalize_uploaded_expression_dataset.cgi
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,18 @@ def main() -> dict:
result['message'] = 'Error migrating Excel file: {}'.format(str(e))
return result

elif dataset_format == 'rds':
# migrate the RDS file
rds_file = dataset_upload_dir / f'{share_uid}.rds'
rds_dest = dataset_final_dir / f'{dataset_id}.rds'

try:
shutil.move(rds_file, rds_dest)
result['userdata_migrated'] = 1
except Exception as e:
result['message'] = 'Error migrating RDS file: {}'.format(str(e))
return result

elif dataset_format == "spatial":
# migrate the spatial tarball
spatial_src = dataset_upload_dir / f'{share_uid}.tar.gz'
Expand Down
Loading