From ef946fa3b7d0416a6effe61399fc09eead68fe8e Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 19 Mar 2025 21:24:39 -0400 Subject: [PATCH 1/6] feat: draft adding a couple covid workflows (#357) --- .../py/iwc_manifest_to_workflows_yaml.py | 1 + catalog/output/workflows.json | 16 ++ catalog/source/workflows.yml | 188 ++++++++++++++++++ 3 files changed, 205 insertions(+) diff --git a/catalog/build/py/iwc_manifest_to_workflows_yaml.py b/catalog/build/py/iwc_manifest_to_workflows_yaml.py index 533c1ddc3..88f50f4f7 100644 --- a/catalog/build/py/iwc_manifest_to_workflows_yaml.py +++ b/catalog/build/py/iwc_manifest_to_workflows_yaml.py @@ -16,6 +16,7 @@ "Transcriptomics": WorkflowCategoryId.TRANSCRIPTOMICS, "Epigenetics": WorkflowCategoryId.REGULATION, "Genome assembly": WorkflowCategoryId.ASSEMBLY, + "SARS-COV-2": WorkflowCategoryId.VARIANT_CALLING } MANIFEST_SOURCE_OF_TRUTH = ("trs_id", "workflow_name", "categories", "workflow_description") diff --git a/catalog/output/workflows.json b/catalog/output/workflows.json index b5e193bc8..ddf8a6f6f 100644 --- a/catalog/output/workflows.json +++ b/catalog/output/workflows.json @@ -20,6 +20,22 @@ "trsId": "#workflow/github.com/iwc-workflows/haploid-variant-calling-wgs-pe/main/versions/v0.1", "workflowDescription": "Workflow for variant analysis against a reference genome in GenBank format", "workflowName": "Paired end variant calling in haploid system" + }, + { + "parameters": [], + "ploidy": "HAPLOID", + "taxonomyId": "694009", + "trsId": "#workflow/github.com/iwc-workflows/sars-cov-2-pe-illumina-artic-variant-calling/COVID-19-PE-ARTIC-ILLUMINA/versions/v0.5.3", + "workflowDescription": "The workflow for Illumina-sequenced ARTIC data builds on the RNASeq workflow for paired-end data using the same steps for mapping and variant calling, but adds extra logic for trimming ARTIC primer sequences off reads with the ivar package. In addition, this workflow uses ivar also to identify amplicons affected by ARTIC primer-binding site mutations and tries to exclude reads derived from such tainted amplicons when calculating allele-frequencies of other variants.", + "workflowName": "COVID-19: variation analysis on ARTIC PE data" + }, + { + "parameters": [], + "ploidy": "HAPLOID", + "taxonomyId": "694009", + "trsId": "#workflow/github.com/iwc-workflows/sars-cov-2-pe-illumina-wgs-variant-calling/COVID-19-PE-WGS-ILLUMINA/versions/v0.2.4", + "workflowDescription": "This workflows performs paired end read mapping with bwa-mem followed by sensitive variant calling across a wide range of AFs with lofreq", + "workflowName": "COVID-19: variation analysis on WGS PE data" } ] }, diff --git a/catalog/source/workflows.yml b/catalog/source/workflows.yml index 212f4ffe7..2bd22649b 100644 --- a/catalog/source/workflows.yml +++ b/catalog/source/workflows.yml @@ -685,6 +685,194 @@ workflows: - key: GTF file of annotation variable: GENE_MODEL_URL active: true + - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-consensus-from-variation/COVID-19-CONSENSUS-CONSTRUCTION/versions/v0.4.2" + categories: + - VARIANT_CALLING + workflow_name: "COVID-19: consensus construction" + workflow_description: + "Build a consensus sequence from FILTER PASS variants with + intrasample allele-frequency above a configurable consensus threshold. + + Hard-mask regions with low coverage (but not consensus variants within them) and + ambiguous sites." + ploidy: ANY + parameters: + - key: Variant calls + type_guide: + class: Collection + - key: min-AF for consensus variant + type_guide: + class: float + - key: min-AF for failed variants + type_guide: + class: float + - key: aligned reads data for depth calculation + type_guide: + class: Collection + ext: bam + - key: Depth-threshold for masking + type_guide: + class: integer + - key: Reference genome + type_guide: + class: File + active: false + - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-ont-artic-variant-calling/COVID-19-ARTIC-ONT/versions/v0.3.2" + categories: + - VARIANT_CALLING + workflow_name: "COVID-19: variation analysis of ARTIC ONT data" + workflow_description: + This workflow for ONT-sequenced ARTIC data is modeled after + the alignment/variant-calling steps of the [ARTIC pipeline](https://artic.readthedocs.io/en/latest/). + It performs, essentially, the same steps as that pipeline’s minion command, i.e. + read mapping with minimap2 and variant calling with medaka. Like the Illumina + ARTIC workflow it uses ivar for primer trimming. Since ONT-sequenced reads have + a much higher error rate than Illumina-sequenced reads and are therefor plagued + more by false-positive variant calls, this workflow does make no attempt to handle + amplicons affected by potential primer-binding site mutations. + ploidy: ANY + parameters: + - key: ONT-sequenced reads + type_guide: + class: Collection + ext: + - fastqsanger + - fastqsanger.gz + - key: Minimum read length + type_guide: + class: integer + - key: Maximum read length + type_guide: + class: integer + - key: NC_045512.2 FASTA sequence of SARS-CoV-2 + type_guide: + class: File + ext: + - fasta + - fasta.gz + - key: Primer binding sites info in BED format + type_guide: + class: File + ext: bed + active: false + - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-pe-illumina-artic-variant-calling/COVID-19-PE-ARTIC-ILLUMINA/versions/v0.5.3" + categories: + - VARIANT_CALLING + workflow_name: "COVID-19: variation analysis on ARTIC PE data" + workflow_description: + The workflow for Illumina-sequenced ARTIC data builds on the + RNASeq workflow for paired-end data using the same steps for mapping and variant + calling, but adds extra logic for trimming ARTIC primer sequences off reads with + the ivar package. In addition, this workflow uses ivar also to identify amplicons + affected by ARTIC primer-binding site mutations and tries to exclude reads derived + from such tainted amplicons when calculating allele-frequencies of other variants. + ploidy: HAPLOID + taxonomy_id: 694009 + parameters: + - key: Paired Collection + type_guide: + class: Collection + ext: + - fastqsanger + - fastqsanger.gz + - key: NC_045512.2 FASTA sequence of SARS-CoV-2 + variable: ASSEMBLY_ID + - key: ARTIC primer BED + type_guide: + class: File + ext: bed + - key: ARTIC primers to amplicon assignments + type_guide: + class: File + ext: tabular + - key: Read removal minimum AF + type_guide: + class: float + - key: Read removal maximum AF + type_guide: + class: float + - key: Minimum DP required after amplicon bias correction + type_guide: + class: integer + - key: Minimum DP_ALT required after amplicon bias correction + type_guide: + class: integer + active: true + - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-pe-illumina-wgs-variant-calling/COVID-19-PE-WGS-ILLUMINA/versions/v0.2.4" + categories: + - VARIANT_CALLING + workflow_name: "COVID-19: variation analysis on WGS PE data" + workflow_description: + This workflows performs paired end read mapping with bwa-mem + followed by sensitive variant calling across a wide range of AFs with lofreq + ploidy: HAPLOID + taxonomy_id: 694009 + parameters: + - key: Paired Collection + type_guide: + class: Collection + ext: + - fastqsanger + - fastqsanger.gz + - key: NC_045512.2 FASTA sequence of SARS-CoV-2 + variable: ASSEMBLY_ID + active: true + - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-se-illumina-wgs-variant-calling/COVID-19-SE-WGS-ILLUMINA/versions/v0.1.5" + categories: + - VARIANT_CALLING + workflow_name: "COVID-19: variation analysis on WGS SE data" + workflow_description: + This workflows performs single end read mapping with bowtie2 + followed by sensitive variant calling across a wide range of AFs with lofreq + ploidy: ANY + parameters: + - key: Single End Collection + type_guide: + class: Collection + ext: + - fastqsanger + - fastqsanger.gz + - key: NC_045512.2 FASTA sequence of SARS-CoV-2 + type_guide: + class: File + ext: + - fasta + - fasta.gz + active: false + - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-variation-reporting/COVID-19-VARIATION-REPORTING/versions/v0.3.4" + categories: + - VARIANT_CALLING + workflow_name: "COVID-19: variation analysis reporting" + workflow_description: + This workflow takes a VCF dataset of variants produced by + any of the *-variant-calling workflows in https://github.com/galaxyproject/iwc/tree/main/workflows/sars-cov-2-variant-calling + and generates tabular lists of variants by Samples and by Variant, and an overview + plot of variants and their allele-frequencies. + ploidy: ANY + parameters: + - key: Variation data to report + type_guide: + class: Collection + ext: + - vcf + - vcf_bgzip + - key: AF Filter + type_guide: + class: float + - key: DP Filter + type_guide: + class: integer + - key: DP_ALT Filter + type_guide: + class: integer + - key: gene products translations + type_guide: + class: File + ext: tabular + - key: Number of Clusters + type_guide: + class: integer + active: false - trs_id: "#workflow/github.com/iwc-workflows/variation-reporting/main/versions/v0.1.1" categories: - VARIANT_CALLING From f83695cb8b9d90b8ca07645bf39310134dd52d50 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 25 Mar 2025 14:52:04 -0400 Subject: [PATCH 2/6] feat: add new workflow category for consensus sequences --- catalog/build/py/generated_schema/schema.py | 1 + catalog/schema/enums/workflow_category_id.yaml | 1 + catalog/schema/generated/schema.ts | 1 + catalog/schema/generated/workflow_categories.json | 1 + catalog/schema/generated/workflows.json | 1 + 5 files changed, 5 insertions(+) diff --git a/catalog/build/py/generated_schema/schema.py b/catalog/build/py/generated_schema/schema.py index cc4eb45e5..9418e9a71 100644 --- a/catalog/build/py/generated_schema/schema.py +++ b/catalog/build/py/generated_schema/schema.py @@ -94,6 +94,7 @@ class WorkflowCategoryId(str, Enum): ASSEMBLY = "ASSEMBLY" GENOME_COMPARISONS = "GENOME_COMPARISONS" PROTEIN_FOLDING = "PROTEIN_FOLDING" + CONSENSUS_SEQUENCES = "CONSENSUS_SEQUENCES" OTHER = "OTHER" diff --git a/catalog/schema/enums/workflow_category_id.yaml b/catalog/schema/enums/workflow_category_id.yaml index ea831f266..473193e26 100644 --- a/catalog/schema/enums/workflow_category_id.yaml +++ b/catalog/schema/enums/workflow_category_id.yaml @@ -12,4 +12,5 @@ enums: ASSEMBLY: GENOME_COMPARISONS: PROTEIN_FOLDING: + CONSENSUS_SEQUENCES: OTHER: diff --git a/catalog/schema/generated/schema.ts b/catalog/schema/generated/schema.ts index 12b3ed6bb..cd7a8d341 100644 --- a/catalog/schema/generated/schema.ts +++ b/catalog/schema/generated/schema.ts @@ -18,6 +18,7 @@ export enum WorkflowCategoryId { ASSEMBLY = "ASSEMBLY", GENOME_COMPARISONS = "GENOME_COMPARISONS", PROTEIN_FOLDING = "PROTEIN_FOLDING", + CONSENSUS_SEQUENCES = "CONSENSUS_SEQUENCES", OTHER = "OTHER", }; /** diff --git a/catalog/schema/generated/workflow_categories.json b/catalog/schema/generated/workflow_categories.json index cbec45ed0..5d877235f 100644 --- a/catalog/schema/generated/workflow_categories.json +++ b/catalog/schema/generated/workflow_categories.json @@ -52,6 +52,7 @@ "ASSEMBLY", "GENOME_COMPARISONS", "PROTEIN_FOLDING", + "CONSENSUS_SEQUENCES", "OTHER" ], "title": "WorkflowCategoryId", diff --git a/catalog/schema/generated/workflows.json b/catalog/schema/generated/workflows.json index 91b9ba232..1e6b0d72b 100644 --- a/catalog/schema/generated/workflows.json +++ b/catalog/schema/generated/workflows.json @@ -79,6 +79,7 @@ "ASSEMBLY", "GENOME_COMPARISONS", "PROTEIN_FOLDING", + "CONSENSUS_SEQUENCES", "OTHER" ], "title": "WorkflowCategoryId", From d1e77b46b850cc6da0cab633c20b3def64e0072a Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Tue, 25 Mar 2025 14:54:30 -0400 Subject: [PATCH 3/6] feat: adding mpox workflow and cleaning up covid ones --- .../py/iwc_manifest_to_workflows_yaml.py | 2 +- catalog/output/workflows.json | 49 ++++++++++- catalog/source/workflow_categories.yml | 4 + catalog/source/workflows.yml | 84 +++++++++---------- 4 files changed, 91 insertions(+), 48 deletions(-) diff --git a/catalog/build/py/iwc_manifest_to_workflows_yaml.py b/catalog/build/py/iwc_manifest_to_workflows_yaml.py index 88f50f4f7..b50251e05 100644 --- a/catalog/build/py/iwc_manifest_to_workflows_yaml.py +++ b/catalog/build/py/iwc_manifest_to_workflows_yaml.py @@ -16,7 +16,7 @@ "Transcriptomics": WorkflowCategoryId.TRANSCRIPTOMICS, "Epigenetics": WorkflowCategoryId.REGULATION, "Genome assembly": WorkflowCategoryId.ASSEMBLY, - "SARS-COV-2": WorkflowCategoryId.VARIANT_CALLING + "Virology": WorkflowCategoryId.CONSENSUS_SEQUENCES } MANIFEST_SOURCE_OF_TRUTH = ("trs_id", "workflow_name", "categories", "workflow_description") diff --git a/catalog/output/workflows.json b/catalog/output/workflows.json index ddf8a6f6f..119d1aa7a 100644 --- a/catalog/output/workflows.json +++ b/catalog/output/workflows.json @@ -22,20 +22,43 @@ "workflowName": "Paired end variant calling in haploid system" }, { - "parameters": [], + "parameters": [ + { + "key": "NC_045512.2 FASTA sequence of SARS-CoV-2", + "variable": "ASSEMBLY_FASTA_URL" + } + ], "ploidy": "HAPLOID", "taxonomyId": "694009", - "trsId": "#workflow/github.com/iwc-workflows/sars-cov-2-pe-illumina-artic-variant-calling/COVID-19-PE-ARTIC-ILLUMINA/versions/v0.5.3", + "trsId": "#workflow/github.com/iwc-workflows/sars-cov-2-pe-illumina-artic-variant-calling/COVID-19-PE-ARTIC-ILLUMINA/versions/v0.5.4", "workflowDescription": "The workflow for Illumina-sequenced ARTIC data builds on the RNASeq workflow for paired-end data using the same steps for mapping and variant calling, but adds extra logic for trimming ARTIC primer sequences off reads with the ivar package. In addition, this workflow uses ivar also to identify amplicons affected by ARTIC primer-binding site mutations and tries to exclude reads derived from such tainted amplicons when calculating allele-frequencies of other variants.", "workflowName": "COVID-19: variation analysis on ARTIC PE data" }, { - "parameters": [], + "parameters": [ + { + "key": "NC_045512.2 FASTA sequence of SARS-CoV-2", + "variable": "ASSEMBLY_FASTA_URL" + } + ], "ploidy": "HAPLOID", "taxonomyId": "694009", "trsId": "#workflow/github.com/iwc-workflows/sars-cov-2-pe-illumina-wgs-variant-calling/COVID-19-PE-WGS-ILLUMINA/versions/v0.2.4", "workflowDescription": "This workflows performs paired end read mapping with bwa-mem followed by sensitive variant calling across a wide range of AFs with lofreq", "workflowName": "COVID-19: variation analysis on WGS PE data" + }, + { + "parameters": [ + { + "key": "NC_045512.2 FASTA sequence of SARS-CoV-2", + "variable": "ASSEMBLY_FASTA_URL" + } + ], + "ploidy": "HAPLOID", + "taxonomyId": "694009", + "trsId": "#workflow/github.com/iwc-workflows/sars-cov-2-se-illumina-wgs-variant-calling/COVID-19-SE-WGS-ILLUMINA/versions/v0.1.6", + "workflowDescription": "This workflows performs single end read mapping with bowtie2 followed by sensitive variant calling across a wide range of AFs with lofreq", + "workflowName": "COVID-19: variation analysis on WGS SE data" } ] }, @@ -181,6 +204,26 @@ } ] }, + { + "category": "CONSENSUS_SEQUENCES", + "description": "Build consensus sequences for related isolates.", + "name": "Consensus sequences", + "workflows": [ + { + "parameters": [ + { + "key": "Reference FASTA", + "variable": "ASSEMBLY_FASTA_URL" + } + ], + "ploidy": "HAPLOID", + "taxonomyId": "10244", + "trsId": "#workflow/github.com/iwc-workflows/pox-virus-amplicon/main/versions/v0.3", + "workflowDescription": "A workflow for the analysis of pox virus genomes sequenced as half-genomes (for ITR resolution) in a tiled-amplicon approach", + "workflowName": "Pox Virus Illumina Amplicon Workflow from half-genomes" + } + ] + }, { "category": "ASSEMBLY", "description": "Assemble prokaryotic and eukaryotic genomes sequenced with a variety of technologies.", diff --git a/catalog/source/workflow_categories.yml b/catalog/source/workflow_categories.yml index 7cdb63c90..98e8076c7 100644 --- a/catalog/source/workflow_categories.yml +++ b/catalog/source/workflow_categories.yml @@ -11,6 +11,10 @@ workflow_categories: name: "Regulation" description: "Workflows for the analysis of ChIP-seq, ATAC-Seq, and beyond." + - category: "CONSENSUS_SEQUENCES" + name: "Consensus sequences" + description: "Build consensus sequences for related isolates." + - category: "ASSEMBLY" name: "Assembly" description: "Assemble prokaryotic and eukaryotic genomes sequenced with a variety of technologies." diff --git a/catalog/source/workflows.yml b/catalog/source/workflows.yml index 2bd22649b..afd92cc63 100644 --- a/catalog/source/workflows.yml +++ b/catalog/source/workflows.yml @@ -541,6 +541,37 @@ workflows: type_guide: class: text active: false + - trs_id: "#workflow/github.com/iwc-workflows/pox-virus-amplicon/main/versions/v0.3" + categories: + - CONSENSUS_SEQUENCES + workflow_name: Pox Virus Illumina Amplicon Workflow from half-genomes + workflow_description: + A workflow for the analysis of pox virus genomes sequenced + as half-genomes (for ITR resolution) in a tiled-amplicon approach + ploidy: HAPLOID + taxonomy_id: 10244 + parameters: + - key: Reference FASTA + variable: ASSEMBLY_FASTA_URL + - key: Primer Scheme + type_guide: + class: File + - key: PE Reads Pool1 + type_guide: + class: Collection + - key: PE Reads Pool2 + type_guide: + class: Collection + - key: Minimum quality score to call base + type_guide: + class: integer + - key: Allele frequency to call SNV + type_guide: + class: float + - key: Allele frequency to call indel + type_guide: + class: float + active: true - trs_id: "#workflow/github.com/iwc-workflows/pseudobulk-worflow-decoupler-edger/main/versions/v0.1.1" categories: - TRANSCRIPTOMICS @@ -614,7 +645,7 @@ workflows: type_guide: class: text active: false - - trs_id: "#workflow/github.com/iwc-workflows/rnaseq-de/main/versions/v0.3" + - trs_id: "#workflow/github.com/iwc-workflows/rnaseq-de/main/versions/v0.4" categories: - TRANSCRIPTOMICS workflow_name: RNA-Seq Differential Expression Analysis with Visualization @@ -685,38 +716,6 @@ workflows: - key: GTF file of annotation variable: GENE_MODEL_URL active: true - - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-consensus-from-variation/COVID-19-CONSENSUS-CONSTRUCTION/versions/v0.4.2" - categories: - - VARIANT_CALLING - workflow_name: "COVID-19: consensus construction" - workflow_description: - "Build a consensus sequence from FILTER PASS variants with - intrasample allele-frequency above a configurable consensus threshold. - - Hard-mask regions with low coverage (but not consensus variants within them) and - ambiguous sites." - ploidy: ANY - parameters: - - key: Variant calls - type_guide: - class: Collection - - key: min-AF for consensus variant - type_guide: - class: float - - key: min-AF for failed variants - type_guide: - class: float - - key: aligned reads data for depth calculation - type_guide: - class: Collection - ext: bam - - key: Depth-threshold for masking - type_guide: - class: integer - - key: Reference genome - type_guide: - class: File - active: false - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-ont-artic-variant-calling/COVID-19-ARTIC-ONT/versions/v0.3.2" categories: - VARIANT_CALLING @@ -755,7 +754,7 @@ workflows: class: File ext: bed active: false - - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-pe-illumina-artic-variant-calling/COVID-19-PE-ARTIC-ILLUMINA/versions/v0.5.3" + - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-pe-illumina-artic-variant-calling/COVID-19-PE-ARTIC-ILLUMINA/versions/v0.5.4" categories: - VARIANT_CALLING workflow_name: "COVID-19: variation analysis on ARTIC PE data" @@ -776,7 +775,7 @@ workflows: - fastqsanger - fastqsanger.gz - key: NC_045512.2 FASTA sequence of SARS-CoV-2 - variable: ASSEMBLY_ID + variable: ASSEMBLY_FASTA_URL - key: ARTIC primer BED type_guide: class: File @@ -815,16 +814,17 @@ workflows: - fastqsanger - fastqsanger.gz - key: NC_045512.2 FASTA sequence of SARS-CoV-2 - variable: ASSEMBLY_ID + variable: ASSEMBLY_FASTA_URL active: true - - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-se-illumina-wgs-variant-calling/COVID-19-SE-WGS-ILLUMINA/versions/v0.1.5" + - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-se-illumina-wgs-variant-calling/COVID-19-SE-WGS-ILLUMINA/versions/v0.1.6" categories: - VARIANT_CALLING workflow_name: "COVID-19: variation analysis on WGS SE data" workflow_description: This workflows performs single end read mapping with bowtie2 followed by sensitive variant calling across a wide range of AFs with lofreq - ploidy: ANY + ploidy: HAPLOID + taxonomy_id: 694009 parameters: - key: Single End Collection type_guide: @@ -833,12 +833,8 @@ workflows: - fastqsanger - fastqsanger.gz - key: NC_045512.2 FASTA sequence of SARS-CoV-2 - type_guide: - class: File - ext: - - fasta - - fasta.gz - active: false + variable: ASSEMBLY_FASTA_URL + active: true - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-variation-reporting/COVID-19-VARIATION-REPORTING/versions/v0.3.4" categories: - VARIANT_CALLING From b2122bdfab92cbf39b513eff247c0c529b344381 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 26 Mar 2025 10:28:55 -0400 Subject: [PATCH 4/6] feat: support explicit url params passed from workflow yml --- .../brc-analytics-catalog/common/entities.ts | 21 ++++++++++++++++++- app/utils/galaxy-api.ts | 20 ++++++++++++------ catalog/output/workflows.json | 16 ++++++++++++++ catalog/source/workflows.yml | 10 +++++---- 4 files changed, 56 insertions(+), 11 deletions(-) diff --git a/app/apis/catalog/brc-analytics-catalog/common/entities.ts b/app/apis/catalog/brc-analytics-catalog/common/entities.ts index 5cd1e8184..726024fd7 100644 --- a/app/apis/catalog/brc-analytics-catalog/common/entities.ts +++ b/app/apis/catalog/brc-analytics-catalog/common/entities.ts @@ -4,6 +4,25 @@ import { WORKFLOW_PLOIDY, } from "./schema-entities"; +export interface WorkflowUrlParameter { + ext: string; + src: string; + url: string; +} + +export function isWorkflowUrlParameter( + // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Necessary for type guard pattern + value: any +): value is WorkflowUrlParameter { + return ( + typeof value === "object" && + value !== null && + "ext" in value && + "src" in value && + "url" in value + ); +} + export type BRCCatalog = BRCDataCatalogGenome; export interface BRCDataCatalogGenome { @@ -87,5 +106,5 @@ export interface Workflow { export interface WorkflowParameter { key: string; - variable: WORKFLOW_PARAMETER_VARIABLE; + variable: WORKFLOW_PARAMETER_VARIABLE | WorkflowUrlParameter; } diff --git a/app/utils/galaxy-api.ts b/app/utils/galaxy-api.ts index 343b38be0..7df0ab29c 100644 --- a/app/utils/galaxy-api.ts +++ b/app/utils/galaxy-api.ts @@ -1,5 +1,9 @@ import { WORKFLOW_PARAMETER_VARIABLE } from "../apis/catalog/brc-analytics-catalog/common/schema-entities"; -import { WorkflowParameter } from "../apis/catalog/brc-analytics-catalog/common/entities"; +import { + WorkflowParameter, + WorkflowUrlParameter, + isWorkflowUrlParameter, +} from "../apis/catalog/brc-analytics-catalog/common/entities"; import ky from "ky"; import { GALAXY_ENVIRONMENT } from "site-config/common/galaxy"; @@ -11,7 +15,7 @@ interface WorkflowLandingsBody { } type WorkflowLandingsBodyRequestState = { - [key: string]: { [key: string]: string } | string; + [key: string]: { [key: string]: string } | string | WorkflowUrlParameter; }; interface WorkflowLanding { @@ -69,10 +73,14 @@ function buildFastaUrl(identifier: string): string { } function paramVariableToRequestValue( - variable: WORKFLOW_PARAMETER_VARIABLE, + variable: WORKFLOW_PARAMETER_VARIABLE | WorkflowUrlParameter, geneModelUrl: string | null, referenceGenome: string ): WorkflowLandingsBodyRequestState[string] | undefined { + if (isWorkflowUrlParameter(variable)) { + return variable; + } + switch (variable) { case WORKFLOW_PARAMETER_VARIABLE.ASSEMBLY_ID: return referenceGenome; @@ -81,14 +89,14 @@ function paramVariableToRequestValue( ext: "fasta.gz", src: "url", url: buildFastaUrl(referenceGenome), - }; + } as WorkflowUrlParameter; case WORKFLOW_PARAMETER_VARIABLE.GENE_MODEL_URL: return geneModelUrl - ? { + ? ({ ext: "gtf.gz", src: "url", url: geneModelUrl, - } + } as WorkflowUrlParameter) : undefined; } } diff --git a/catalog/output/workflows.json b/catalog/output/workflows.json index 119d1aa7a..8cd8867f0 100644 --- a/catalog/output/workflows.json +++ b/catalog/output/workflows.json @@ -26,6 +26,22 @@ { "key": "NC_045512.2 FASTA sequence of SARS-CoV-2", "variable": "ASSEMBLY_FASTA_URL" + }, + { + "key": "ARTIC primer BED", + "variable": { + "ext": "bed", + "src": "url", + "url": "https://zenodo.org/record/4555735/files/ARTIC_nCoV-2019_v3.bed" + } + }, + { + "key": "ARTIC primers to amplicon assignments", + "variable": { + "ext": "tabular", + "src": "url", + "url": "https://zenodo.org/record/4555735/files/ARTIC_amplicon_info_v3.tsv" + } } ], "ploidy": "HAPLOID", diff --git a/catalog/source/workflows.yml b/catalog/source/workflows.yml index afd92cc63..7851b9ac6 100644 --- a/catalog/source/workflows.yml +++ b/catalog/source/workflows.yml @@ -777,13 +777,15 @@ workflows: - key: NC_045512.2 FASTA sequence of SARS-CoV-2 variable: ASSEMBLY_FASTA_URL - key: ARTIC primer BED - type_guide: - class: File + variable: ext: bed + src: url + url: https://zenodo.org/record/4555735/files/ARTIC_nCoV-2019_v3.bed - key: ARTIC primers to amplicon assignments - type_guide: - class: File + variable: ext: tabular + src: url + url: https://zenodo.org/record/4555735/files/ARTIC_amplicon_info_v3.tsv - key: Read removal minimum AF type_guide: class: float From f0dc310bf3b0a119f9fa6b894d9d33925dff2951 Mon Sep 17 00:00:00 2001 From: Danielle Callan Date: Wed, 26 Mar 2025 11:01:43 -0400 Subject: [PATCH 5/6] fix: clean up explicit url specs for workflow params --- .../brc-analytics-catalog/common/entities.ts | 16 ++------- app/utils/galaxy-api.ts | 29 ++++++++------- catalog/build/py/generated_schema/schema.py | 13 +++++++ catalog/build/ts/build-catalog.ts | 4 ++- catalog/output/workflows.json | 4 +-- catalog/schema/generated/schema.ts | 15 ++++++++ catalog/schema/generated/workflows.json | 36 +++++++++++++++++++ catalog/schema/workflows.yaml | 20 +++++++++++ catalog/source/workflows.yml | 4 +-- 9 files changed, 107 insertions(+), 34 deletions(-) diff --git a/app/apis/catalog/brc-analytics-catalog/common/entities.ts b/app/apis/catalog/brc-analytics-catalog/common/entities.ts index 726024fd7..f8fbe7bff 100644 --- a/app/apis/catalog/brc-analytics-catalog/common/entities.ts +++ b/app/apis/catalog/brc-analytics-catalog/common/entities.ts @@ -10,19 +10,6 @@ export interface WorkflowUrlParameter { url: string; } -export function isWorkflowUrlParameter( - // eslint-disable-next-line @typescript-eslint/no-explicit-any -- Necessary for type guard pattern - value: any -): value is WorkflowUrlParameter { - return ( - typeof value === "object" && - value !== null && - "ext" in value && - "src" in value && - "url" in value - ); -} - export type BRCCatalog = BRCDataCatalogGenome; export interface BRCDataCatalogGenome { @@ -106,5 +93,6 @@ export interface Workflow { export interface WorkflowParameter { key: string; - variable: WORKFLOW_PARAMETER_VARIABLE | WorkflowUrlParameter; + url_spec?: WorkflowUrlParameter; + variable?: WORKFLOW_PARAMETER_VARIABLE; } diff --git a/app/utils/galaxy-api.ts b/app/utils/galaxy-api.ts index 7df0ab29c..fa65d50df 100644 --- a/app/utils/galaxy-api.ts +++ b/app/utils/galaxy-api.ts @@ -2,7 +2,6 @@ import { WORKFLOW_PARAMETER_VARIABLE } from "../apis/catalog/brc-analytics-catal import { WorkflowParameter, WorkflowUrlParameter, - isWorkflowUrlParameter, } from "../apis/catalog/brc-analytics-catalog/common/entities"; import ky from "ky"; import { GALAXY_ENVIRONMENT } from "site-config/common/galaxy"; @@ -73,14 +72,10 @@ function buildFastaUrl(identifier: string): string { } function paramVariableToRequestValue( - variable: WORKFLOW_PARAMETER_VARIABLE | WorkflowUrlParameter, + variable: WORKFLOW_PARAMETER_VARIABLE, geneModelUrl: string | null, referenceGenome: string ): WorkflowLandingsBodyRequestState[string] | undefined { - if (isWorkflowUrlParameter(variable)) { - return variable; - } - switch (variable) { case WORKFLOW_PARAMETER_VARIABLE.ASSEMBLY_ID: return referenceGenome; @@ -114,15 +109,19 @@ function getWorkflowLandingsRequestState( parameters: WorkflowParameter[] ): WorkflowLandingsBodyRequestState { const result: WorkflowLandingsBodyRequestState = {}; - parameters.forEach(({ key, variable }) => { - const maybeParam = paramVariableToRequestValue( - variable, - geneModelUrl, - referenceGenome - ); - if (maybeParam !== undefined) { - result[key] = maybeParam; + for (const { key, url_spec, variable } of parameters) { + if (url_spec) { + // If url_spec is provided, use it directly + result[key] = url_spec as WorkflowUrlParameter; + } else if (variable) { + // Otherwise, use the variable to determine the value + const value = paramVariableToRequestValue( + variable, + geneModelUrl, + referenceGenome + ); + if (value !== undefined) result[key] = value; } - }); + } return result; } diff --git a/catalog/build/py/generated_schema/schema.py b/catalog/build/py/generated_schema/schema.py index 9418e9a71..3b1bfb8bf 100644 --- a/catalog/build/py/generated_schema/schema.py +++ b/catalog/build/py/generated_schema/schema.py @@ -212,9 +212,21 @@ class WorkflowParameter(ConfiguredBaseModel): key: str = Field(default=..., description="""The key in which the parameter will be set.""", json_schema_extra = { "linkml_meta": {'alias': 'key', 'domain_of': ['WorkflowParameter']} }) variable: Optional[WorkflowParameterVariable] = Field(default=None, description="""A variable to substitute in as the value of the parameter.""", json_schema_extra = { "linkml_meta": {'alias': 'variable', 'domain_of': ['WorkflowParameter']} }) + url_spec: Optional[WorkflowUrlSpec] = Field(default=None, description="""A direct URL specification for the parameter.""", json_schema_extra = { "linkml_meta": {'alias': 'url_spec', 'domain_of': ['WorkflowParameter']} }) type_guide: Optional[Any] = Field(default=None, description="""Arbitrary data describing the type of the parameter, intended only as convenient reference for maintainers.""", json_schema_extra = { "linkml_meta": {'alias': 'type_guide', 'domain_of': ['WorkflowParameter']} }) +class WorkflowUrlSpec(ConfiguredBaseModel): + """ + A URL specification for a workflow parameter. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflows.yaml#'}) + + ext: str = Field(default=..., description="""The file extension of the URL.""", json_schema_extra = { "linkml_meta": {'alias': 'ext', 'domain_of': ['WorkflowUrlSpec']} }) + src: str = Field(default=..., description="""The source type, typically 'url'.""", json_schema_extra = { "linkml_meta": {'alias': 'src', 'domain_of': ['WorkflowUrlSpec']} }) + url: str = Field(default=..., description="""The URL to the resource.""", json_schema_extra = { "linkml_meta": {'alias': 'url', 'domain_of': ['WorkflowUrlSpec']} }) + + # Model rebuild # see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model Assemblies.model_rebuild() @@ -226,4 +238,5 @@ class WorkflowParameter(ConfiguredBaseModel): Workflows.model_rebuild() Workflow.model_rebuild() WorkflowParameter.model_rebuild() +WorkflowUrlSpec.model_rebuild() diff --git a/catalog/build/ts/build-catalog.ts b/catalog/build/ts/build-catalog.ts index e5d204d4d..c47cdbe10 100644 --- a/catalog/build/ts/build-catalog.ts +++ b/catalog/build/ts/build-catalog.ts @@ -198,8 +198,10 @@ function buildWorkflow( }: SourceWorkflow ): void { const parameters = []; - for (const { key, variable } of sourceParameters) { + for (const { key, url_spec, variable } of sourceParameters) { + // Add parameter if either variable or url_spec is defined if (variable) parameters.push({ key, variable }); + else if (url_spec) parameters.push({ key, url_spec }); } const workflow: Workflow = { parameters, diff --git a/catalog/output/workflows.json b/catalog/output/workflows.json index 8cd8867f0..387d32c32 100644 --- a/catalog/output/workflows.json +++ b/catalog/output/workflows.json @@ -29,7 +29,7 @@ }, { "key": "ARTIC primer BED", - "variable": { + "url_spec": { "ext": "bed", "src": "url", "url": "https://zenodo.org/record/4555735/files/ARTIC_nCoV-2019_v3.bed" @@ -37,7 +37,7 @@ }, { "key": "ARTIC primers to amplicon assignments", - "variable": { + "url_spec": { "ext": "tabular", "src": "url", "url": "https://zenodo.org/record/4555735/files/ARTIC_amplicon_info_v3.tsv" diff --git a/catalog/schema/generated/schema.ts b/catalog/schema/generated/schema.ts index cd7a8d341..a5bf10fb6 100644 --- a/catalog/schema/generated/schema.ts +++ b/catalog/schema/generated/schema.ts @@ -149,9 +149,24 @@ export interface WorkflowParameter { key: string, /** A variable to substitute in as the value of the parameter. */ variable?: WorkflowParameterVariable | null, + /** A direct URL specification for the parameter. */ + url_spec?: WorkflowUrlSpec | null, /** Arbitrary data describing the type of the parameter, intended only as convenient reference for maintainers. */ type_guide?: Any | null, } +/** + * A URL specification for a workflow parameter. + */ +export interface WorkflowUrlSpec { + /** The file extension of the URL. */ + ext: string, + /** The source type, typically 'url'. */ + src: string, + /** The URL to the resource. */ + url: string, +} + + diff --git a/catalog/schema/generated/workflows.json b/catalog/schema/generated/workflows.json index 1e6b0d72b..168092410 100644 --- a/catalog/schema/generated/workflows.json +++ b/catalog/schema/generated/workflows.json @@ -104,6 +104,17 @@ ], "description": "Arbitrary data describing the type of the parameter, intended only as convenient reference for maintainers." }, + "url_spec": { + "anyOf": [ + { + "$ref": "#/$defs/WorkflowUrlSpec" + }, + { + "type": "null" + } + ], + "description": "A direct URL specification for the parameter." + }, "variable": { "$ref": "#/$defs/WorkflowParameterVariable", "description": "A variable to substitute in as the value of the parameter." @@ -136,6 +147,31 @@ "title": "WorkflowPloidy", "type": "string" }, + "WorkflowUrlSpec": { + "additionalProperties": false, + "description": "A URL specification for a workflow parameter.", + "properties": { + "ext": { + "description": "The file extension of the URL.", + "type": "string" + }, + "src": { + "description": "The source type, typically 'url'.", + "type": "string" + }, + "url": { + "description": "The URL to the resource.", + "type": "string" + } + }, + "required": [ + "ext", + "src", + "url" + ], + "title": "WorkflowUrlSpec", + "type": "object" + }, "Workflows": { "additionalProperties": false, "description": "Object containing list of workflows.", diff --git a/catalog/schema/workflows.yaml b/catalog/schema/workflows.yaml index 243457573..37c9cd91e 100644 --- a/catalog/schema/workflows.yaml +++ b/catalog/schema/workflows.yaml @@ -75,7 +75,27 @@ classes: description: A variable to substitute in as the value of the parameter. required: false range: WorkflowParameterVariable + url_spec: + description: A direct URL specification for the parameter. + required: false + range: WorkflowUrlSpec type_guide: description: Arbitrary data describing the type of the parameter, intended only as convenient reference for maintainers. required: false range: Any + + WorkflowUrlSpec: + description: A URL specification for a workflow parameter. + attributes: + ext: + description: The file extension of the URL. + required: true + range: string + src: + description: The source type, typically 'url'. + required: true + range: string + url: + description: The URL to the resource. + required: true + range: string diff --git a/catalog/source/workflows.yml b/catalog/source/workflows.yml index 7851b9ac6..52528cf59 100644 --- a/catalog/source/workflows.yml +++ b/catalog/source/workflows.yml @@ -777,12 +777,12 @@ workflows: - key: NC_045512.2 FASTA sequence of SARS-CoV-2 variable: ASSEMBLY_FASTA_URL - key: ARTIC primer BED - variable: + url_spec: ext: bed src: url url: https://zenodo.org/record/4555735/files/ARTIC_nCoV-2019_v3.bed - key: ARTIC primers to amplicon assignments - variable: + url_spec: ext: tabular src: url url: https://zenodo.org/record/4555735/files/ARTIC_amplicon_info_v3.tsv From ee5e03fd681f32a76331e384822202cd8b7799a0 Mon Sep 17 00:00:00 2001 From: hunterckx <118154470+hunterckx@users.noreply.github.com> Date: Sat, 29 Mar 2025 20:36:27 -0700 Subject: [PATCH 6/6] refactor: simplify typing in `galaxy-api` (#357) --- app/utils/galaxy-api.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/app/utils/galaxy-api.ts b/app/utils/galaxy-api.ts index fa65d50df..fb7859637 100644 --- a/app/utils/galaxy-api.ts +++ b/app/utils/galaxy-api.ts @@ -14,7 +14,7 @@ interface WorkflowLandingsBody { } type WorkflowLandingsBodyRequestState = { - [key: string]: { [key: string]: string } | string | WorkflowUrlParameter; + [key: string]: string | WorkflowUrlParameter; }; interface WorkflowLanding { @@ -84,14 +84,14 @@ function paramVariableToRequestValue( ext: "fasta.gz", src: "url", url: buildFastaUrl(referenceGenome), - } as WorkflowUrlParameter; + }; case WORKFLOW_PARAMETER_VARIABLE.GENE_MODEL_URL: return geneModelUrl - ? ({ + ? { ext: "gtf.gz", src: "url", url: geneModelUrl, - } as WorkflowUrlParameter) + } : undefined; } } @@ -112,7 +112,7 @@ function getWorkflowLandingsRequestState( for (const { key, url_spec, variable } of parameters) { if (url_spec) { // If url_spec is provided, use it directly - result[key] = url_spec as WorkflowUrlParameter; + result[key] = url_spec; } else if (variable) { // Otherwise, use the variable to determine the value const value = paramVariableToRequestValue(