diff --git a/app/apis/catalog/brc-analytics-catalog/common/entities.ts b/app/apis/catalog/brc-analytics-catalog/common/entities.ts index 5cd1e8184..f8fbe7bff 100644 --- a/app/apis/catalog/brc-analytics-catalog/common/entities.ts +++ b/app/apis/catalog/brc-analytics-catalog/common/entities.ts @@ -4,6 +4,12 @@ import { WORKFLOW_PLOIDY, } from "./schema-entities"; +export interface WorkflowUrlParameter { + ext: string; + src: string; + url: string; +} + export type BRCCatalog = BRCDataCatalogGenome; export interface BRCDataCatalogGenome { @@ -87,5 +93,6 @@ export interface Workflow { export interface WorkflowParameter { key: string; - variable: WORKFLOW_PARAMETER_VARIABLE; + url_spec?: WorkflowUrlParameter; + variable?: WORKFLOW_PARAMETER_VARIABLE; } diff --git a/app/utils/galaxy-api.ts b/app/utils/galaxy-api.ts index 343b38be0..fb7859637 100644 --- a/app/utils/galaxy-api.ts +++ b/app/utils/galaxy-api.ts @@ -1,5 +1,8 @@ import { WORKFLOW_PARAMETER_VARIABLE } from "../apis/catalog/brc-analytics-catalog/common/schema-entities"; -import { WorkflowParameter } from "../apis/catalog/brc-analytics-catalog/common/entities"; +import { + WorkflowParameter, + WorkflowUrlParameter, +} from "../apis/catalog/brc-analytics-catalog/common/entities"; import ky from "ky"; import { GALAXY_ENVIRONMENT } from "site-config/common/galaxy"; @@ -11,7 +14,7 @@ interface WorkflowLandingsBody { } type WorkflowLandingsBodyRequestState = { - [key: string]: { [key: string]: string } | string; + [key: string]: string | WorkflowUrlParameter; }; interface WorkflowLanding { @@ -106,15 +109,19 @@ function getWorkflowLandingsRequestState( parameters: WorkflowParameter[] ): WorkflowLandingsBodyRequestState { const result: WorkflowLandingsBodyRequestState = {}; - parameters.forEach(({ key, variable }) => { - const maybeParam = paramVariableToRequestValue( - variable, - geneModelUrl, - referenceGenome - ); - if (maybeParam !== undefined) { - result[key] = maybeParam; + for (const { key, url_spec, variable } of parameters) { + if (url_spec) { + // If url_spec is provided, use it directly + result[key] = url_spec; + } else if (variable) { + // Otherwise, use the variable to determine the value + const value = paramVariableToRequestValue( + variable, + geneModelUrl, + referenceGenome + ); + if (value !== undefined) result[key] = value; } - }); + } return result; } diff --git a/catalog/build/py/generated_schema/schema.py b/catalog/build/py/generated_schema/schema.py index cc4eb45e5..3b1bfb8bf 100644 --- a/catalog/build/py/generated_schema/schema.py +++ b/catalog/build/py/generated_schema/schema.py @@ -94,6 +94,7 @@ class WorkflowCategoryId(str, Enum): ASSEMBLY = "ASSEMBLY" GENOME_COMPARISONS = "GENOME_COMPARISONS" PROTEIN_FOLDING = "PROTEIN_FOLDING" + CONSENSUS_SEQUENCES = "CONSENSUS_SEQUENCES" OTHER = "OTHER" @@ -211,9 +212,21 @@ class WorkflowParameter(ConfiguredBaseModel): key: str = Field(default=..., description="""The key in which the parameter will be set.""", json_schema_extra = { "linkml_meta": {'alias': 'key', 'domain_of': ['WorkflowParameter']} }) variable: Optional[WorkflowParameterVariable] = Field(default=None, description="""A variable to substitute in as the value of the parameter.""", json_schema_extra = { "linkml_meta": {'alias': 'variable', 'domain_of': ['WorkflowParameter']} }) + url_spec: Optional[WorkflowUrlSpec] = Field(default=None, description="""A direct URL specification for the parameter.""", json_schema_extra = { "linkml_meta": {'alias': 'url_spec', 'domain_of': ['WorkflowParameter']} }) type_guide: Optional[Any] = Field(default=None, description="""Arbitrary data describing the type of the parameter, intended only as convenient reference for maintainers.""", json_schema_extra = { "linkml_meta": {'alias': 'type_guide', 'domain_of': ['WorkflowParameter']} }) +class WorkflowUrlSpec(ConfiguredBaseModel): + """ + A URL specification for a workflow parameter. + """ + linkml_meta: ClassVar[LinkMLMeta] = LinkMLMeta({'from_schema': 'https://github.com/galaxyproject/brc-analytics/blob/main/catalog/schema/workflows.yaml#'}) + + ext: str = Field(default=..., description="""The file extension of the URL.""", json_schema_extra = { "linkml_meta": {'alias': 'ext', 'domain_of': ['WorkflowUrlSpec']} }) + src: str = Field(default=..., description="""The source type, typically 'url'.""", json_schema_extra = { "linkml_meta": {'alias': 'src', 'domain_of': ['WorkflowUrlSpec']} }) + url: str = Field(default=..., description="""The URL to the resource.""", json_schema_extra = { "linkml_meta": {'alias': 'url', 'domain_of': ['WorkflowUrlSpec']} }) + + # Model rebuild # see https://pydantic-docs.helpmanual.io/usage/models/#rebuilding-a-model Assemblies.model_rebuild() @@ -225,4 +238,5 @@ class WorkflowParameter(ConfiguredBaseModel): Workflows.model_rebuild() Workflow.model_rebuild() WorkflowParameter.model_rebuild() +WorkflowUrlSpec.model_rebuild() diff --git a/catalog/build/py/iwc_manifest_to_workflows_yaml.py b/catalog/build/py/iwc_manifest_to_workflows_yaml.py index 533c1ddc3..b50251e05 100644 --- a/catalog/build/py/iwc_manifest_to_workflows_yaml.py +++ b/catalog/build/py/iwc_manifest_to_workflows_yaml.py @@ -16,6 +16,7 @@ "Transcriptomics": WorkflowCategoryId.TRANSCRIPTOMICS, "Epigenetics": WorkflowCategoryId.REGULATION, "Genome assembly": WorkflowCategoryId.ASSEMBLY, + "Virology": WorkflowCategoryId.CONSENSUS_SEQUENCES } MANIFEST_SOURCE_OF_TRUTH = ("trs_id", "workflow_name", "categories", "workflow_description") diff --git a/catalog/build/ts/build-catalog.ts b/catalog/build/ts/build-catalog.ts index e5d204d4d..c47cdbe10 100644 --- a/catalog/build/ts/build-catalog.ts +++ b/catalog/build/ts/build-catalog.ts @@ -198,8 +198,10 @@ function buildWorkflow( }: SourceWorkflow ): void { const parameters = []; - for (const { key, variable } of sourceParameters) { + for (const { key, url_spec, variable } of sourceParameters) { + // Add parameter if either variable or url_spec is defined if (variable) parameters.push({ key, variable }); + else if (url_spec) parameters.push({ key, url_spec }); } const workflow: Workflow = { parameters, diff --git a/catalog/output/workflows.json b/catalog/output/workflows.json index b5e193bc8..387d32c32 100644 --- a/catalog/output/workflows.json +++ b/catalog/output/workflows.json @@ -20,6 +20,61 @@ "trsId": "#workflow/github.com/iwc-workflows/haploid-variant-calling-wgs-pe/main/versions/v0.1", "workflowDescription": "Workflow for variant analysis against a reference genome in GenBank format", "workflowName": "Paired end variant calling in haploid system" + }, + { + "parameters": [ + { + "key": "NC_045512.2 FASTA sequence of SARS-CoV-2", + "variable": "ASSEMBLY_FASTA_URL" + }, + { + "key": "ARTIC primer BED", + "url_spec": { + "ext": "bed", + "src": "url", + "url": "https://zenodo.org/record/4555735/files/ARTIC_nCoV-2019_v3.bed" + } + }, + { + "key": "ARTIC primers to amplicon assignments", + "url_spec": { + "ext": "tabular", + "src": "url", + "url": "https://zenodo.org/record/4555735/files/ARTIC_amplicon_info_v3.tsv" + } + } + ], + "ploidy": "HAPLOID", + "taxonomyId": "694009", + "trsId": "#workflow/github.com/iwc-workflows/sars-cov-2-pe-illumina-artic-variant-calling/COVID-19-PE-ARTIC-ILLUMINA/versions/v0.5.4", + "workflowDescription": "The workflow for Illumina-sequenced ARTIC data builds on the RNASeq workflow for paired-end data using the same steps for mapping and variant calling, but adds extra logic for trimming ARTIC primer sequences off reads with the ivar package. In addition, this workflow uses ivar also to identify amplicons affected by ARTIC primer-binding site mutations and tries to exclude reads derived from such tainted amplicons when calculating allele-frequencies of other variants.", + "workflowName": "COVID-19: variation analysis on ARTIC PE data" + }, + { + "parameters": [ + { + "key": "NC_045512.2 FASTA sequence of SARS-CoV-2", + "variable": "ASSEMBLY_FASTA_URL" + } + ], + "ploidy": "HAPLOID", + "taxonomyId": "694009", + "trsId": "#workflow/github.com/iwc-workflows/sars-cov-2-pe-illumina-wgs-variant-calling/COVID-19-PE-WGS-ILLUMINA/versions/v0.2.4", + "workflowDescription": "This workflows performs paired end read mapping with bwa-mem followed by sensitive variant calling across a wide range of AFs with lofreq", + "workflowName": "COVID-19: variation analysis on WGS PE data" + }, + { + "parameters": [ + { + "key": "NC_045512.2 FASTA sequence of SARS-CoV-2", + "variable": "ASSEMBLY_FASTA_URL" + } + ], + "ploidy": "HAPLOID", + "taxonomyId": "694009", + "trsId": "#workflow/github.com/iwc-workflows/sars-cov-2-se-illumina-wgs-variant-calling/COVID-19-SE-WGS-ILLUMINA/versions/v0.1.6", + "workflowDescription": "This workflows performs single end read mapping with bowtie2 followed by sensitive variant calling across a wide range of AFs with lofreq", + "workflowName": "COVID-19: variation analysis on WGS SE data" } ] }, @@ -165,6 +220,26 @@ } ] }, + { + "category": "CONSENSUS_SEQUENCES", + "description": "Build consensus sequences for related isolates.", + "name": "Consensus sequences", + "workflows": [ + { + "parameters": [ + { + "key": "Reference FASTA", + "variable": "ASSEMBLY_FASTA_URL" + } + ], + "ploidy": "HAPLOID", + "taxonomyId": "10244", + "trsId": "#workflow/github.com/iwc-workflows/pox-virus-amplicon/main/versions/v0.3", + "workflowDescription": "A workflow for the analysis of pox virus genomes sequenced as half-genomes (for ITR resolution) in a tiled-amplicon approach", + "workflowName": "Pox Virus Illumina Amplicon Workflow from half-genomes" + } + ] + }, { "category": "ASSEMBLY", "description": "Assemble prokaryotic and eukaryotic genomes sequenced with a variety of technologies.", diff --git a/catalog/schema/enums/workflow_category_id.yaml b/catalog/schema/enums/workflow_category_id.yaml index ea831f266..473193e26 100644 --- a/catalog/schema/enums/workflow_category_id.yaml +++ b/catalog/schema/enums/workflow_category_id.yaml @@ -12,4 +12,5 @@ enums: ASSEMBLY: GENOME_COMPARISONS: PROTEIN_FOLDING: + CONSENSUS_SEQUENCES: OTHER: diff --git a/catalog/schema/generated/schema.ts b/catalog/schema/generated/schema.ts index 12b3ed6bb..a5bf10fb6 100644 --- a/catalog/schema/generated/schema.ts +++ b/catalog/schema/generated/schema.ts @@ -18,6 +18,7 @@ export enum WorkflowCategoryId { ASSEMBLY = "ASSEMBLY", GENOME_COMPARISONS = "GENOME_COMPARISONS", PROTEIN_FOLDING = "PROTEIN_FOLDING", + CONSENSUS_SEQUENCES = "CONSENSUS_SEQUENCES", OTHER = "OTHER", }; /** @@ -148,9 +149,24 @@ export interface WorkflowParameter { key: string, /** A variable to substitute in as the value of the parameter. */ variable?: WorkflowParameterVariable | null, + /** A direct URL specification for the parameter. */ + url_spec?: WorkflowUrlSpec | null, /** Arbitrary data describing the type of the parameter, intended only as convenient reference for maintainers. */ type_guide?: Any | null, } +/** + * A URL specification for a workflow parameter. + */ +export interface WorkflowUrlSpec { + /** The file extension of the URL. */ + ext: string, + /** The source type, typically 'url'. */ + src: string, + /** The URL to the resource. */ + url: string, +} + + diff --git a/catalog/schema/generated/workflow_categories.json b/catalog/schema/generated/workflow_categories.json index cbec45ed0..5d877235f 100644 --- a/catalog/schema/generated/workflow_categories.json +++ b/catalog/schema/generated/workflow_categories.json @@ -52,6 +52,7 @@ "ASSEMBLY", "GENOME_COMPARISONS", "PROTEIN_FOLDING", + "CONSENSUS_SEQUENCES", "OTHER" ], "title": "WorkflowCategoryId", diff --git a/catalog/schema/generated/workflows.json b/catalog/schema/generated/workflows.json index 91b9ba232..168092410 100644 --- a/catalog/schema/generated/workflows.json +++ b/catalog/schema/generated/workflows.json @@ -79,6 +79,7 @@ "ASSEMBLY", "GENOME_COMPARISONS", "PROTEIN_FOLDING", + "CONSENSUS_SEQUENCES", "OTHER" ], "title": "WorkflowCategoryId", @@ -103,6 +104,17 @@ ], "description": "Arbitrary data describing the type of the parameter, intended only as convenient reference for maintainers." }, + "url_spec": { + "anyOf": [ + { + "$ref": "#/$defs/WorkflowUrlSpec" + }, + { + "type": "null" + } + ], + "description": "A direct URL specification for the parameter." + }, "variable": { "$ref": "#/$defs/WorkflowParameterVariable", "description": "A variable to substitute in as the value of the parameter." @@ -135,6 +147,31 @@ "title": "WorkflowPloidy", "type": "string" }, + "WorkflowUrlSpec": { + "additionalProperties": false, + "description": "A URL specification for a workflow parameter.", + "properties": { + "ext": { + "description": "The file extension of the URL.", + "type": "string" + }, + "src": { + "description": "The source type, typically 'url'.", + "type": "string" + }, + "url": { + "description": "The URL to the resource.", + "type": "string" + } + }, + "required": [ + "ext", + "src", + "url" + ], + "title": "WorkflowUrlSpec", + "type": "object" + }, "Workflows": { "additionalProperties": false, "description": "Object containing list of workflows.", diff --git a/catalog/schema/workflows.yaml b/catalog/schema/workflows.yaml index 243457573..37c9cd91e 100644 --- a/catalog/schema/workflows.yaml +++ b/catalog/schema/workflows.yaml @@ -75,7 +75,27 @@ classes: description: A variable to substitute in as the value of the parameter. required: false range: WorkflowParameterVariable + url_spec: + description: A direct URL specification for the parameter. + required: false + range: WorkflowUrlSpec type_guide: description: Arbitrary data describing the type of the parameter, intended only as convenient reference for maintainers. required: false range: Any + + WorkflowUrlSpec: + description: A URL specification for a workflow parameter. + attributes: + ext: + description: The file extension of the URL. + required: true + range: string + src: + description: The source type, typically 'url'. + required: true + range: string + url: + description: The URL to the resource. + required: true + range: string diff --git a/catalog/source/workflow_categories.yml b/catalog/source/workflow_categories.yml index 7cdb63c90..98e8076c7 100644 --- a/catalog/source/workflow_categories.yml +++ b/catalog/source/workflow_categories.yml @@ -11,6 +11,10 @@ workflow_categories: name: "Regulation" description: "Workflows for the analysis of ChIP-seq, ATAC-Seq, and beyond." + - category: "CONSENSUS_SEQUENCES" + name: "Consensus sequences" + description: "Build consensus sequences for related isolates." + - category: "ASSEMBLY" name: "Assembly" description: "Assemble prokaryotic and eukaryotic genomes sequenced with a variety of technologies." diff --git a/catalog/source/workflows.yml b/catalog/source/workflows.yml index 212f4ffe7..52528cf59 100644 --- a/catalog/source/workflows.yml +++ b/catalog/source/workflows.yml @@ -541,6 +541,37 @@ workflows: type_guide: class: text active: false + - trs_id: "#workflow/github.com/iwc-workflows/pox-virus-amplicon/main/versions/v0.3" + categories: + - CONSENSUS_SEQUENCES + workflow_name: Pox Virus Illumina Amplicon Workflow from half-genomes + workflow_description: + A workflow for the analysis of pox virus genomes sequenced + as half-genomes (for ITR resolution) in a tiled-amplicon approach + ploidy: HAPLOID + taxonomy_id: 10244 + parameters: + - key: Reference FASTA + variable: ASSEMBLY_FASTA_URL + - key: Primer Scheme + type_guide: + class: File + - key: PE Reads Pool1 + type_guide: + class: Collection + - key: PE Reads Pool2 + type_guide: + class: Collection + - key: Minimum quality score to call base + type_guide: + class: integer + - key: Allele frequency to call SNV + type_guide: + class: float + - key: Allele frequency to call indel + type_guide: + class: float + active: true - trs_id: "#workflow/github.com/iwc-workflows/pseudobulk-worflow-decoupler-edger/main/versions/v0.1.1" categories: - TRANSCRIPTOMICS @@ -614,7 +645,7 @@ workflows: type_guide: class: text active: false - - trs_id: "#workflow/github.com/iwc-workflows/rnaseq-de/main/versions/v0.3" + - trs_id: "#workflow/github.com/iwc-workflows/rnaseq-de/main/versions/v0.4" categories: - TRANSCRIPTOMICS workflow_name: RNA-Seq Differential Expression Analysis with Visualization @@ -685,6 +716,161 @@ workflows: - key: GTF file of annotation variable: GENE_MODEL_URL active: true + - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-ont-artic-variant-calling/COVID-19-ARTIC-ONT/versions/v0.3.2" + categories: + - VARIANT_CALLING + workflow_name: "COVID-19: variation analysis of ARTIC ONT data" + workflow_description: + This workflow for ONT-sequenced ARTIC data is modeled after + the alignment/variant-calling steps of the [ARTIC pipeline](https://artic.readthedocs.io/en/latest/). + It performs, essentially, the same steps as that pipeline’s minion command, i.e. + read mapping with minimap2 and variant calling with medaka. Like the Illumina + ARTIC workflow it uses ivar for primer trimming. Since ONT-sequenced reads have + a much higher error rate than Illumina-sequenced reads and are therefor plagued + more by false-positive variant calls, this workflow does make no attempt to handle + amplicons affected by potential primer-binding site mutations. + ploidy: ANY + parameters: + - key: ONT-sequenced reads + type_guide: + class: Collection + ext: + - fastqsanger + - fastqsanger.gz + - key: Minimum read length + type_guide: + class: integer + - key: Maximum read length + type_guide: + class: integer + - key: NC_045512.2 FASTA sequence of SARS-CoV-2 + type_guide: + class: File + ext: + - fasta + - fasta.gz + - key: Primer binding sites info in BED format + type_guide: + class: File + ext: bed + active: false + - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-pe-illumina-artic-variant-calling/COVID-19-PE-ARTIC-ILLUMINA/versions/v0.5.4" + categories: + - VARIANT_CALLING + workflow_name: "COVID-19: variation analysis on ARTIC PE data" + workflow_description: + The workflow for Illumina-sequenced ARTIC data builds on the + RNASeq workflow for paired-end data using the same steps for mapping and variant + calling, but adds extra logic for trimming ARTIC primer sequences off reads with + the ivar package. In addition, this workflow uses ivar also to identify amplicons + affected by ARTIC primer-binding site mutations and tries to exclude reads derived + from such tainted amplicons when calculating allele-frequencies of other variants. + ploidy: HAPLOID + taxonomy_id: 694009 + parameters: + - key: Paired Collection + type_guide: + class: Collection + ext: + - fastqsanger + - fastqsanger.gz + - key: NC_045512.2 FASTA sequence of SARS-CoV-2 + variable: ASSEMBLY_FASTA_URL + - key: ARTIC primer BED + url_spec: + ext: bed + src: url + url: https://zenodo.org/record/4555735/files/ARTIC_nCoV-2019_v3.bed + - key: ARTIC primers to amplicon assignments + url_spec: + ext: tabular + src: url + url: https://zenodo.org/record/4555735/files/ARTIC_amplicon_info_v3.tsv + - key: Read removal minimum AF + type_guide: + class: float + - key: Read removal maximum AF + type_guide: + class: float + - key: Minimum DP required after amplicon bias correction + type_guide: + class: integer + - key: Minimum DP_ALT required after amplicon bias correction + type_guide: + class: integer + active: true + - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-pe-illumina-wgs-variant-calling/COVID-19-PE-WGS-ILLUMINA/versions/v0.2.4" + categories: + - VARIANT_CALLING + workflow_name: "COVID-19: variation analysis on WGS PE data" + workflow_description: + This workflows performs paired end read mapping with bwa-mem + followed by sensitive variant calling across a wide range of AFs with lofreq + ploidy: HAPLOID + taxonomy_id: 694009 + parameters: + - key: Paired Collection + type_guide: + class: Collection + ext: + - fastqsanger + - fastqsanger.gz + - key: NC_045512.2 FASTA sequence of SARS-CoV-2 + variable: ASSEMBLY_FASTA_URL + active: true + - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-se-illumina-wgs-variant-calling/COVID-19-SE-WGS-ILLUMINA/versions/v0.1.6" + categories: + - VARIANT_CALLING + workflow_name: "COVID-19: variation analysis on WGS SE data" + workflow_description: + This workflows performs single end read mapping with bowtie2 + followed by sensitive variant calling across a wide range of AFs with lofreq + ploidy: HAPLOID + taxonomy_id: 694009 + parameters: + - key: Single End Collection + type_guide: + class: Collection + ext: + - fastqsanger + - fastqsanger.gz + - key: NC_045512.2 FASTA sequence of SARS-CoV-2 + variable: ASSEMBLY_FASTA_URL + active: true + - trs_id: "#workflow/github.com/iwc-workflows/sars-cov-2-variation-reporting/COVID-19-VARIATION-REPORTING/versions/v0.3.4" + categories: + - VARIANT_CALLING + workflow_name: "COVID-19: variation analysis reporting" + workflow_description: + This workflow takes a VCF dataset of variants produced by + any of the *-variant-calling workflows in https://github.com/galaxyproject/iwc/tree/main/workflows/sars-cov-2-variant-calling + and generates tabular lists of variants by Samples and by Variant, and an overview + plot of variants and their allele-frequencies. + ploidy: ANY + parameters: + - key: Variation data to report + type_guide: + class: Collection + ext: + - vcf + - vcf_bgzip + - key: AF Filter + type_guide: + class: float + - key: DP Filter + type_guide: + class: integer + - key: DP_ALT Filter + type_guide: + class: integer + - key: gene products translations + type_guide: + class: File + ext: tabular + - key: Number of Clusters + type_guide: + class: integer + active: false - trs_id: "#workflow/github.com/iwc-workflows/variation-reporting/main/versions/v0.1.1" categories: - VARIANT_CALLING