diff --git a/README.md b/README.md index d0375a43..211d6f53 100644 --- a/README.md +++ b/README.md @@ -138,7 +138,7 @@ nextflow run nf-core/multiplesequencealign \
FAQ: Can I specify the arguments of the tools (tree and aligner)? - Yes, use the --args_tree and --args_aligner flags. More info: usage and parameters. + Yes, use the --args_guidetree and --args_aligner flags. More info: usage and parameters.
### CASE 2: Multiple datasets, multiple tools. @@ -191,12 +191,12 @@ Please check: filename.equals('versions.yml') ? null : filename } - ] - } + ext.prefix = { "${meta.id}_${meta.treealign}-args-${meta.args_treealign_clean}_${meta.guidetree}-args-${meta.args_guidetree_clean}" } + ext.args = { "${meta.args_treealign}" == "null" ? '' : "${meta.args_treealign}" } + publishDir = [ + path: { "${params.outdir}/alignment/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } - withName: "LEARNMSA_ALIGN" { - tag = { - [ - "${meta.id}", - meta.tree ? "tree: ${meta.tree}" : "", - meta.args_tree ? "argstree: ${meta.args_tree}" : "", - meta.args_aligner ? "args: ${meta.args_aligner_clean}" : "" - ].join(' ').trim() - } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } - ext.args = { "${meta.args_aligner}" == "null" ? '' : "${meta.args_aligner}" } + // + // alignments + // - if(params.skip_compression){ - publishDir = [ - path: { "${params.outdir}/alignments/${meta.id}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: "CREATE_TEMPLATE" { + ext.prefix = { "${meta.id}" } } - withName: "PIGZ_COMPRESS_LEARNMSA"{ + withName: "NFCORE_MULTIPLESEQUENCEALIGN:MULTIPLESEQUENCEALIGN:MSA_ALIGNMENT:CLUSTALO_ALIGN|FAMSA_ALIGN|LEARNMSA_ALIGN|KALIGN_ALIGN|MUSCLE5_SUPER5|MAGUS_ALIGN|TCOFFEE_ALIGN" { tag = { [ "${meta.id}", - meta.tree ? "tree: ${meta.tree}" : "", - meta.args_tree ? "argstree: ${meta.args_tree}" : "", - meta.args_aligner ? "args: ${meta.args_aligner}" : "" + meta.args_aligner ? "args: ${meta.args_alignment_clean}" : "" ].join(' ').trim() } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } - ext.args = { "${meta.args_aligner}" == "null" ? '' : "${meta.args_aligner}" } - + ext.prefix = { "${meta.id}_${meta.alignment}-args-${meta.args_alignment_clean}" } + ext.args = { "${meta.args_alignment}" == "null" ? '' : "${meta.args_alignment}" } publishDir = [ path: { "${params.outdir}/alignments/${meta.id}" }, mode: params.publish_dir_mode, @@ -156,11 +133,11 @@ [ "${meta.id}", meta.tree ? "tree: ${meta.tree}" : "", - meta.args_tree ? "argstree: ${meta.args_tree}" : "", + meta.args_guidetree ? "argstree: ${meta.args_guidetree}" : "", meta.args_aligner ? "args: ${meta.args_aligner}" : "" ].join(' ').trim() } - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}_${meta.alignment ?: meta.treealign}-args-${meta.args_alignment_clean ?: meta.args_treealign_clean}" } ext.args = { "${meta.args_aligner}" == "null" ? '' : "${meta.args_aligner}" } if(params.skip_compression){ publishDir = [ @@ -215,21 +192,21 @@ // withName: 'PARSE_IRMSD' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_irmsd" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}_${meta.alignment ?: meta.treealign}-args-${meta.args_alignment_clean ?: meta.args_treealign_clean}_irmsd" } } withName: 'TCOFFEE_ALNCOMPARE_SP' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_sp" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}_${meta.alignment ?: meta.treealign}-args-${meta.args_alignment_clean ?: meta.args_treealign_clean}_sp" } ext.args = "-compare_mode sp" } withName: 'TCOFFEE_ALNCOMPARE_TC' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_tc" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}_${meta.alignment ?: meta.treealign}-args-${meta.args_alignment_clean ?: meta.args_treealign_clean}_tc" } ext.args = "-compare_mode tc" } withName: 'TCOFFEE_IRMSD' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_irmsd" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}_${meta.alignment ?: meta.treealign}-args-${meta.args_alignment_clean ?: meta.args_treealign_clean}_irmsd" } publishDir = [ path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: params.publish_dir_mode, @@ -239,7 +216,7 @@ } withName: "CALC_GAPS" { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_gaps" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}_${meta.alignment ?: meta.treealign}-args-${meta.args_alignment_clean ?: meta.args_treealign_clean}_gaps" } } withName: "CONCAT_IRMSD" { @@ -263,7 +240,7 @@ } withName: 'TCOFFEE_TCS' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}_tcs" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}_${meta.alignment ?: meta.treealign}-args-${meta.args_alignment_clean ?: meta.args_treealign_clean}_tcs" } publishDir = [ path: { "${params.outdir}/evaluation/${task.process.tokenize(':')[-1].toLowerCase()}" }, mode: params.publish_dir_mode, @@ -310,7 +287,7 @@ // Visualization // withName: 'FOLDMASON_MSA2LDDTREPORT' { - ext.prefix = { "${meta.id}_${meta.tree}-args-${meta.args_tree_clean}_${meta.aligner}-args-${meta.args_aligner_clean}" } + ext.prefix = { "${meta.id}_${meta.guidetree}-args-${meta.args_guidetree_clean}_${meta.alignment ?: meta.treealign}-args-${meta.args_alignment_clean ?: meta.args_treealign_clean}" } publishDir = [ path: { "${params.outdir}/reports/visualization" }, mode: params.publish_dir_mode, diff --git a/modules.json b/modules.json index 78514f49..a81fabd0 100644 --- a/modules.json +++ b/modules.json @@ -2,18 +2,44 @@ "name": "nf-core/multiplesequencealign", "homePage": "https://github.com/nf-core/multiplesequencealign", "repos": { + "https://github.com/mirpedrol/class-modules.git": { + "subworkflows": { + "mirpedrol": { + "msa_alignment": { + "branch": "main", + "git_sha": "caf40830bd920d881a6b1a553b3140c422f3952f", + "installed_by": ["subworkflows"] + }, + "msa_guidetree": { + "branch": "main", + "git_sha": "d59da459719d7a2df943a98b129a587e7c4bc7ed", + "installed_by": ["subworkflows"] + }, + "msa_structural_alignment": { + "branch": "main", + "git_sha": "4dc4da3795b6910ab508171843c33779f59f63fc", + "installed_by": ["subworkflows"] + }, + "msa_treealign": { + "branch": "main", + "git_sha": "4dc4da3795b6910ab508171843c33779f59f63fc", + "installed_by": ["subworkflows"] + } + } + } + }, "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { "clustalo/align": { "branch": "master", "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", - "installed_by": ["modules"] + "installed_by": ["modules", "msa_alignment", "msa_treealign"] }, "clustalo/guidetree": { "branch": "master", "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", - "installed_by": ["modules"] + "installed_by": ["modules", "msa_guidetree"] }, "csvtk/concat": { "branch": "master", @@ -29,12 +55,12 @@ "famsa/align": { "branch": "master", "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", - "installed_by": ["modules"] + "installed_by": ["modules", "msa_alignment", "msa_treealign"] }, "famsa/guidetree": { "branch": "master", "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", - "installed_by": ["modules"] + "installed_by": ["modules", "msa_guidetree"] }, "fastavalidator": { "branch": "master", @@ -46,6 +72,11 @@ "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", "installed_by": ["modules"] }, + "fastqc": { + "branch": "master", + "git_sha": "dc94b6ee04a05ddb9f7ae050712ff30a13149164", + "installed_by": ["modules"] + }, "foldmason/createdb": { "branch": "master", "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", @@ -54,7 +85,7 @@ "foldmason/easymsa": { "branch": "master", "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", - "installed_by": ["modules"], + "installed_by": ["modules", "msa_structural_alignment"], "patch": "modules/nf-core/foldmason/easymsa/foldmason-easymsa.diff" }, "foldmason/msa2lddtreport": { @@ -65,13 +96,13 @@ "kalign/align": { "branch": "master", "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", - "installed_by": ["modules"], + "installed_by": ["modules", "msa_alignment"], "patch": "modules/nf-core/kalign/align/kalign-align.diff" }, "learnmsa/align": { "branch": "master", "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", - "installed_by": ["modules"] + "installed_by": ["modules", "msa_alignment"] }, "mafft/align": { "branch": "master", @@ -86,12 +117,17 @@ "magus/align": { "branch": "master", "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", - "installed_by": ["modules"] + "installed_by": ["modules", "msa_alignment", "msa_treealign"] + }, + "magus/guidetree": { + "branch": "master", + "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", + "installed_by": ["msa_guidetree"] }, "mtmalign/align": { "branch": "master", "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", - "installed_by": ["modules"] + "installed_by": ["modules", "msa_structural_alignment"] }, "multiqc": { "branch": "master", @@ -101,7 +137,7 @@ "muscle5/super5": { "branch": "master", "git_sha": "81880787133db07d9b4c1febd152c090eb8325dc", - "installed_by": ["modules"] + "installed_by": ["modules", "msa_alignment"] }, "pigz/compress": { "branch": "master", @@ -116,7 +152,7 @@ "tcoffee/align": { "branch": "master", "git_sha": "a380f1dade8d24fea92176d0ffaa5ab6235b8e15", - "installed_by": ["modules"] + "installed_by": ["modules", "msa_alignment", "msa_treealign"] }, "tcoffee/alncompare": { "branch": "master", diff --git a/modules/nf-core/magus/guidetree/environment.yml b/modules/nf-core/magus/guidetree/environment.yml new file mode 100644 index 00000000..9fb4aa7a --- /dev/null +++ b/modules/nf-core/magus/guidetree/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::magus-msa=0.2.0 diff --git a/modules/nf-core/magus/guidetree/main.nf b/modules/nf-core/magus/guidetree/main.nf new file mode 100644 index 00000000..eb37fb9d --- /dev/null +++ b/modules/nf-core/magus/guidetree/main.nf @@ -0,0 +1,48 @@ +process MAGUS_GUIDETREE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/magus-msa:0.2.0--pyhdfd78af_0': + 'biocontainers/magus-msa:0.2.0--pyhdfd78af_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.tree"), emit: tree + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + magus \\ + -np $task.cpus \\ + -i $fasta \\ + -o ${prefix}.tree \\ + --onlyguidetree TRUE \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MAGUS: \$(magus --version) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tree + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + MAGUS: \$(magus --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/magus/guidetree/meta.yml b/modules/nf-core/magus/guidetree/meta.yml new file mode 100644 index 00000000..d1dde6bb --- /dev/null +++ b/modules/nf-core/magus/guidetree/meta.yml @@ -0,0 +1,53 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "magus_guidetree" +description: Multiple Sequence Alignment using Graph Clustering +keywords: + - MSA + - guidetree + - genomics + - graph +tools: + - "magus": + description: "Multiple Sequence Alignment using Graph Clustering" + homepage: "https://github.com/vlasmirnov/MAGUS" + documentation: "https://github.com/vlasmirnov/MAGUS" + tool_dev_url: "https://github.com/vlasmirnov/MAGUS" + doi: "10.1093/bioinformatics/btaa992" + licence: ["MIT"] + identifier: biotools:magus + +input: + - - meta: + type: map + description: | + Groovy Map containing fasta meta information + e.g. `[ id:'test', single_end:false ]` + - fasta: + type: file + description: Input sequences in FASTA format. + pattern: "*.{fa,fna,fasta}" + ontologies: + - edam: http://edamontology.org/format_1919 # SEQUENCE-LIKE + - edam: http://edamontology.org/format_1929 # FASTA +output: + tree: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - "*.tree": + type: file + description: File containing the output guidetree, in newick format. + pattern: "*.tree" + ontologies: + - edam: http://edamontology.org/format_2006 # PHYLOGENETIC TREE + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@lrauschning" diff --git a/modules/nf-core/magus/guidetree/tests/main.nf.test b/modules/nf-core/magus/guidetree/tests/main.nf.test new file mode 100644 index 00000000..96d33b8d --- /dev/null +++ b/modules/nf-core/magus/guidetree/tests/main.nf.test @@ -0,0 +1,39 @@ +nextflow_process { + + name "Test Process MAGUS_GUIDETREE" + script "../main.nf" + process "MAGUS_GUIDETREE" + + tag "modules" + tag "modules_nfcore" + tag "magus" + tag "magus/guidetree" + + test("setoxin - fasta") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + //{ assert snapshot(process.out.tree).match("tree")}, + // tests seem to be reproducible on a single machine, but not across different machines + // test the correct samples are in there + { assert path(process.out.tree[0][1]).getText().contains("1apf") }, + { assert path(process.out.tree[0][1]).getText().contains("1ahl") }, + { assert path(process.out.tree[0][1]).getText().contains("1atx") }, + { assert path(process.out.tree[0][1]).getText().contains("1sh1") }, + { assert path(process.out.tree[0][1]).getText().contains("1bds") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/magus/guidetree/tests/main.nf.test.snap b/modules/nf-core/magus/guidetree/tests/main.nf.test.snap new file mode 100644 index 00000000..d564be3d --- /dev/null +++ b/modules/nf-core/magus/guidetree/tests/main.nf.test.snap @@ -0,0 +1,23 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,5a2ad92c9ea945c4bf4890f02ca2562f" + ] + ], + "timestamp": "2024-03-28T18:25:41.292337485" + }, + "tree": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.tree:md5,c742636229d166322a2824d409595738" + ] + ] + ], + "timestamp": "2024-03-28T18:25:41.226027114" + } +} \ No newline at end of file diff --git a/subworkflows/local/ALIGN/main.nf b/subworkflows/local/ALIGN/main.nf deleted file mode 100644 index da2a357c..00000000 --- a/subworkflows/local/ALIGN/main.nf +++ /dev/null @@ -1,402 +0,0 @@ -/* - * Compute trees if needed and run alignment - */ - -// -// Include the subworkflows -// -include { COMPUTE_TREES } from '../../../subworkflows/local/COMPUTE_TREES' - -// Include the nf-core modules -include { CLUSTALO_ALIGN } from '../../../modules/nf-core/clustalo/align/main' -include { FAMSA_ALIGN } from '../../../modules/nf-core/famsa/align/main' -include { FOLDMASON_EASYMSA as FOLDMASON_ALIGN } from '../../../modules/nf-core/foldmason/easymsa/main' -include { KALIGN_ALIGN } from '../../../modules/nf-core/kalign/align/main' -include { LEARNMSA_ALIGN } from '../../../modules/nf-core/learnmsa/align/main' -include { MAFFT_ALIGN } from '../../../modules/nf-core/mafft/align/main' -include { MAGUS_ALIGN } from '../../../modules/nf-core/magus/align/main' -include { MTMALIGN_ALIGN } from '../../../modules/nf-core/mtmalign/align/main' -include { MUSCLE5_SUPER5 as MUSCLE5_ALIGN } from '../../../modules/nf-core/muscle5/super5/main' -include { TCOFFEE_ALIGN } from '../../../modules/nf-core/tcoffee/align/main' -include { TCOFFEE_ALIGN as TCOFFEE3D_ALIGN } from '../../../modules/nf-core/tcoffee/align/main' -include { TCOFFEE_REGRESSIVE as REGRESSIVE_ALIGN } from '../../../modules/nf-core/tcoffee/regressive/main' -include { TCOFFEE_CONSENSUS as CONSENSUS } from '../../../modules/nf-core/tcoffee/consensus/main' -include { UPP_ALIGN } from '../../../modules/nf-core/upp/align/main' -include { PIGZ_COMPRESS as PIGZ_COMPRESS_LEARNMSA } from '../../../modules/nf-core/pigz/compress/main' - - -workflow ALIGN { - take: - ch_fastas // channel: [ val(meta), [ path(fastas) ] ] - ch_tools // channel: [ val(meta_tree), val(meta_aligner) ] - // [[tree:, args_tree:, args_tree_clean: ], [aligner:, args_aligner:, args_aligner_clean:]] - // e.g.[[tree:FAMSA, args_tree:-gt upgma -parttree, args_tree_clean:-gt_upgma_-parttree], [aligner:FAMSA, args_aligner:null, args_aligner_clean:null]] - // e.g.[[tree:null, args_tree:null, args_tree_clean:null], [aligner:TCOFFEE, args_aligner:-output fasta_aln, args_aligner_clean:-output_fasta_aln]] - ch_optional_data // channel: meta, [e.g. /path/to/file.pdb,/path/to/file.pdb,/path/to/file.pdb] - compress // boolean: true or false - - main: - - ch_msa = Channel.empty() - ch_versions = Channel.empty() - - // Branch the toolsheet information into two channels - // This way, it can direct the computation of guidetrees - // and aligners separately - ch_tools - .multiMap { - it -> - tree: it[0] - align: it[1] - } - .set { ch_tools_split } - - // ------------------------------------------------ - // Compute the required trees - // ------------------------------------------------ - COMPUTE_TREES ( - ch_fastas, - ch_optional_data, - ch_tools_split.tree.unique() - ) - trees = COMPUTE_TREES.out.trees - ch_versions = ch_versions.mix(COMPUTE_TREES.out.versions) - - ch_fastas.combine(ch_tools) - .map { - metafasta, fasta, metatree, metaalign -> - [ metafasta+metatree , metaalign, fasta ] - } - .set { ch_fasta_tools } - - // ------------------------------------------------ - // Add back trees to the fasta channel - // And prepare the input channels for the aligners - // ------------------------------------------------ - - // Tools that accept sequence and tree - ch_fasta_tools - .join(trees, by: [0], remainder:true ) - .filter{ - it[1] != null - } - .map { - metafasta_tree, metaalign, fasta, tree -> - [ metafasta_tree + metaalign, fasta, tree ] - } - .map { - meta, fasta, tree -> - tree ? [ meta,fasta, tree ] : [meta, fasta, [ ] ] - } - .branch { - clustalo: it[0]["aligner"] == "CLUSTALO" - famsa: it[0]["aligner"] == "FAMSA" - kalign: it[0]["aligner"] == "KALIGN" - learnmsa: it[0]["aligner"] == "LEARNMSA" - mafft: it[0]["aligner"] == "MAFFT" - magus: it[0]["aligner"] == "MAGUS" - muscle5: it[0]["aligner"] == "MUSCLE5" - mtmalign: it[0]["aligner"] == "MTMALIGN" - regressive: it[0]["aligner"] == "REGRESSIVE" - tcoffee: it[0]["aligner"] == "TCOFFEE" - tcoffee3d: it[0]["aligner"] == "3DCOFFEE" - upp: it[0]["aligner"] == "UPP" - } - .set { ch_fasta_trees } - - // tools that accept only optional data - ch_optional_data.combine(ch_tools) - .map { - metadependency, template, dependency, metatree, metaalign -> - [ metadependency+metatree+metaalign, template, dependency ] - } - .branch { - mtmalign: it[0]["aligner"] == "MTMALIGN" - } - .set { ch_optional_data_tools } - - - // tools that accept optional data and tree - ch_optional_data.combine(ch_tools) - .map { - metadependency, template, dependency, metatree, metaalign -> - [ metadependency + metatree , metaalign, template, dependency ] - } - .join(trees, by: 0, remainder: true) - .filter{ - it.size() == 5 - } - .map { - metratreeanddep, metaalign, template, dependency, tree -> - tree ? [ metratreeanddep + metaalign, tree, template, dependency ]:[ metratreeanddep + metaalign, [ ], template, dependency ] - } - .branch { - foldmason: it[0]["aligner"] == "FOLDMASON" - } - .set { ch_optional_data_tools_tree } - - - // ------------------------------------------------ - // Compute the alignments - // ------------------------------------------------ - - // 1. SEQUENCE BASED - // ----------------- CLUSTALO ------------------ - ch_fasta_trees.clustalo - .multiMap { - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_clustalo } - - CLUSTALO_ALIGN ( - ch_fasta_trees_clustalo.fasta, - ch_fasta_trees_clustalo.tree, - [], - [], - [], - [], - compress - ) - ch_msa = ch_msa.mix(CLUSTALO_ALIGN.out.alignment) - ch_versions = ch_versions.mix(CLUSTALO_ALIGN.out.versions.first()) - - // ----------------- FAMSA --------------------- - ch_fasta_trees.famsa - .multiMap { - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_famsa} - - FAMSA_ALIGN (ch_fasta_trees_famsa.fasta, - ch_fasta_trees_famsa.tree, - compress - ) - ch_msa = ch_msa.mix(FAMSA_ALIGN.out.alignment) - ch_versions = ch_versions.mix(FAMSA_ALIGN.out.versions.first()) - - // ---------------- KALIGN ----------------------- - ch_fasta_trees.kalign - .multiMap { - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - } - .set { ch_fasta_kalign } - - KALIGN_ALIGN ( - ch_fasta_kalign.fasta, - compress - ) - ch_msa = ch_msa.mix(KALIGN_ALIGN.out.alignment) - ch_versions = ch_versions.mix(KALIGN_ALIGN.out.versions.first()) - - // ---------------- LEARNMSA ---------------------- - ch_fasta_trees.learnmsa - .multiMap { - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - } - .set { ch_fasta_learnmsa } - - LEARNMSA_ALIGN ( - ch_fasta_learnmsa.fasta - ) - - if(compress){ - PIGZ_COMPRESS_LEARNMSA(LEARNMSA_ALIGN.out.alignment) - ch_versions = ch_versions.mix(PIGZ_COMPRESS_LEARNMSA.out.versions.first()) - ch_msa = ch_msa.mix(PIGZ_COMPRESS_LEARNMSA.out.archive) - }else{ - ch_msa = ch_msa.mix(LEARNMSA_ALIGN.out.alignment) - } - - ch_versions = ch_versions.mix(LEARNMSA_ALIGN.out.versions.first()) - - // ---------------- MAFFT ----------------------- - ch_fasta_trees.mafft - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - } - .set { ch_fasta_mafft } - - MAFFT_ALIGN ( - ch_fasta_mafft.fasta, - [ [:], [] ], - [ [:], [] ], - [ [:], [] ], - [ [:], [] ], - [ [:], [] ], - compress - ) - ch_msa = ch_msa.mix(MAFFT_ALIGN.out.fas) // the MAFFT module calls its output fas instead of alignment - ch_versions = ch_versions.mix(MAFFT_ALIGN.out.versions.first()) - - // ----------------- MAGUS ------------------ - ch_fasta_trees.magus - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_magus } - - MAGUS_ALIGN ( - ch_fasta_trees_magus.fasta, - ch_fasta_trees_magus.tree, - compress - ) - ch_msa = ch_msa.mix(MAGUS_ALIGN.out.alignment) - ch_versions = ch_versions.mix(MAGUS_ALIGN.out.versions.first()) - - // ----------------- MUSCLE5 ------------------ - ch_fasta_trees.muscle5 - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - } - .set { ch_fasta_muscle5 } - - MUSCLE5_ALIGN ( - ch_fasta_muscle5.fasta, - compress - ) - ch_msa = ch_msa.mix(MUSCLE5_ALIGN.out.alignment.first()) - ch_versions = ch_versions.mix(MUSCLE5_ALIGN.out.versions.first()) - - // ----------------- TCOFFEE ------------------ - ch_fasta_trees.tcoffee - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_tcoffee } - - TCOFFEE_ALIGN ( - ch_fasta_trees_tcoffee.fasta, - ch_fasta_trees_tcoffee.tree, - [ [:], [], [] ], - compress - ) - ch_msa = ch_msa.mix(TCOFFEE_ALIGN.out.alignment) - ch_versions = ch_versions.mix(TCOFFEE_ALIGN.out.versions.first()) - - // ----------------- REGRESSIVE ------------------ - ch_fasta_trees.regressive - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_regressive } - - REGRESSIVE_ALIGN ( - ch_fasta_trees_regressive.fasta, - ch_fasta_trees_regressive.tree, - [ [:], [], [] ], - compress - ) - ch_msa = ch_msa.mix(REGRESSIVE_ALIGN.out.alignment) - ch_versions = ch_versions.mix(REGRESSIVE_ALIGN.out.versions.first()) - - // ----------------- UPP ------------------- - ch_fasta_trees.upp - .multiMap{ - meta, fastafile, treefile -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - } - .set { ch_fasta_trees_upp } - - UPP_ALIGN ( - ch_fasta_trees_upp.fasta, - ch_fasta_trees_upp.tree, - compress - ) - ch_msa = ch_msa.mix(UPP_ALIGN.out.alignment) - ch_versions = ch_versions.mix(UPP_ALIGN.out.versions.first()) - - // 2. SEQUENCE + STRUCTURE BASED - - if(params.templates_suffix == ".pdb"){ - // ----------------- 3DCOFFEE ------------------ - ch_fasta_trees.tcoffee3d - .map{ meta, fasta, tree -> [ meta["id"], meta, fasta, tree ] } - .combine(ch_optional_data.map{ meta, template, optional_data -> [ meta["id"], template, optional_data ] }, by: 0) - .multiMap{ - merging_id, meta, fastafile, treefile, templatefile, datafiles -> - fasta: [ meta, fastafile ] - tree: [ meta, treefile ] - optional_data: [ meta, templatefile, datafiles ] - } - .set { ch_fasta_trees_3dcoffee } - - TCOFFEE3D_ALIGN ( - ch_fasta_trees_3dcoffee.fasta, - ch_fasta_trees_3dcoffee.tree, - ch_fasta_trees_3dcoffee.optional_data, - compress - ) - ch_msa = ch_msa.mix(TCOFFEE3D_ALIGN.out.alignment) - ch_versions = ch_versions.mix(TCOFFEE3D_ALIGN.out.versions.first()) - - // 3. STRUCTURE BASED - - // ----------------- MTMALIGN ------------------ - ch_optional_data_tools.mtmalign - .multiMap { - meta, template, dependency -> - pdbs: [ meta, dependency ] - } - .set { ch_pdb_mtmalign } - - MTMALIGN_ALIGN ( - ch_pdb_mtmalign.pdbs, - compress - ) - ch_msa = ch_msa.mix(MTMALIGN_ALIGN.out.alignment) - ch_versions = ch_versions.mix(MTMALIGN_ALIGN.out.versions.first()) - - - // ----------------- FOLDMASON ------------------ - - ch_optional_data_tools_tree.foldmason - .multiMap { - meta, tree, template, dependency -> - pdbs: [ meta, dependency ] - trees: [ meta, tree ] - } - .set { ch_pdb_foldmason } - - FOLDMASON_ALIGN ( - ch_pdb_foldmason.pdbs, - ch_pdb_foldmason.trees, - compress - ) - ch_msa = ch_msa.mix(FOLDMASON_ALIGN.out.msa_aa) - ch_versions = ch_versions.mix(FOLDMASON_ALIGN.out.versions.first()) - } - - // ----------------- CONSENSUS ------------------ - if(params.build_consensus){ - ch_msa.map{ meta, msa -> [ meta["id"], msa]} - .groupTuple() - .filter { it[1].size() > 1 } - .map { id_meta, msas -> [ ["id": id_meta, "tree":"DEFAULT", "args_tree":"", "args_tree_clean":"default", "aligner":"CONSENSUS", "args_aligner":"", "args_aligner_clean":"default" ], msas ]} - .set { ch_msa_consensus } - - CONSENSUS(ch_msa_consensus, [[:],[]], compress) - ch_msa = ch_msa.mix(CONSENSUS.out.alignment) - ch_versions = ch_versions.mix(CONSENSUS.out.versions.first()) - } - - - emit: - msa = ch_msa // channel: [ val(meta), path(msa) ] - trees = trees // channel: [ val(meta), path(tree) ] - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/COMPUTE_TREES/main.nf b/subworkflows/local/COMPUTE_TREES/main.nf deleted file mode 100644 index bd33593a..00000000 --- a/subworkflows/local/COMPUTE_TREES/main.nf +++ /dev/null @@ -1,55 +0,0 @@ -// -// Compute guide trees either with FAMSA or Clusta Omega -// - -include { FAMSA_GUIDETREE } from '../../../modules/nf-core/famsa/guidetree/main' -include { CLUSTALO_GUIDETREE } from '../../../modules/nf-core/clustalo/guidetree/main' -include { MAFFT_GUIDETREE } from '../../../modules/nf-core/mafft/guidetree/main' - -include { CUSTOM_PDBSTOFASTA } from '../../../modules/local/custom/pdbtofasta' -include { FASTAVALIDATOR } from '../../../modules/nf-core/fastavalidator/main' - -workflow COMPUTE_TREES { - - take: - ch_fastas //channel: [ meta, /path/to/file.fasta ] - ch_optional_data //channel: [ meta, template, [ /path/to/file1, /path/to/file2, ... ] ] - tree_tools //channel: [ meta ] ( tools to be run: meta.tree, meta.args_tree ) - - main: - ch_versions = Channel.empty() - ch_trees = Channel.empty() - - // - // Render the required guide trees - // - ch_fastas - .combine(tree_tools) - .map { - metafasta, fasta, metatree -> - [ metafasta + metatree, fasta ] - } - .branch { - famsa: it[0]["tree"] == "FAMSA" - clustalo: it[0]["tree"] == "CLUSTALO" - mafft: it[0]["tree"] == "MAFFT" - } - .set { ch_fastas_fortrees } - - - FAMSA_GUIDETREE (ch_fastas_fortrees.famsa) - ch_trees = FAMSA_GUIDETREE.out.tree - ch_versions = ch_versions.mix(FAMSA_GUIDETREE.out.versions.first()) - - CLUSTALO_GUIDETREE (ch_fastas_fortrees.clustalo) - ch_trees = ch_trees.mix(CLUSTALO_GUIDETREE.out.tree) - ch_versions = ch_versions.mix(CLUSTALO_GUIDETREE.out.versions.first()) - - MAFFT_GUIDETREE (ch_fastas_fortrees.mafft) - ch_trees = ch_trees.mix(MAFFT_GUIDETREE.out.tree) - ch_versions = ch_versions.mix(MAFFT_GUIDETREE.out.versions.first()) - - emit: - trees = ch_trees // channel: [ val(meta), path(tree) ] - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/local/VISUALIZATION/main.nf b/subworkflows/local/VISUALIZATION/main.nf index 6dcb0c0b..9ea0e3ad 100644 --- a/subworkflows/local/VISUALIZATION/main.nf +++ b/subworkflows/local/VISUALIZATION/main.nf @@ -18,7 +18,7 @@ workflow VISUALIZATION { // split the msa meta to be able to merge with the tree meta ch_msa .map { - meta, file -> [ meta.subMap([ "id", "tree", "args_tree", "args_tree_clean" ]), meta, file ] + meta, file -> [ meta.subMap([ "id", "tree", "args_guidetree", "args_guidetree_clean" ]), meta, file ] } .join(ch_trees, by: [0], remainder:true ) .filter { diff --git a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf index 862409aa..28abdcd8 100644 --- a/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_multiplesequencealign_pipeline/main.nf @@ -92,7 +92,7 @@ workflow PIPELINE_INITIALISATION { if (params.aligner){ ch_tools = Channel.fromList([ - [["aligner": params.aligner, "tree": params.tree, "args_tree": params.args_tree, "args_aligner": params.args_aligner]] + [["aligner": params.aligner, "tree": params.tree, "args_guidetree": params.args_guidetree, "args_aligner": params.args_aligner]] ]) }else{ @@ -105,13 +105,17 @@ workflow PIPELINE_INITIALISATION { def tree_map = [:] def align_map = [:] - tree_map["tree"] = Utils.clean_tree(meta_clone["tree"].toString()) - tree_map["args_tree"] = meta_clone["args_tree"] - tree_map["args_tree_clean"] = Utils.cleanArgs(meta_clone.args_tree) + tree_map["guidetree"] = Utils.clean_tree(meta_clone["guidetree"]) + tree_map["args_guidetree"] = meta_clone["args_guidetree"] + tree_map["args_guidetree_clean"] = Utils.cleanArgs(meta_clone.args_guidetree) - align_map["aligner"] = meta_clone["aligner"].toString() - align_map["args_aligner"] = Utils.check_required_args(meta_clone["aligner"], meta_clone["args_aligner"]) - align_map["args_aligner_clean"] = Utils.cleanArgs(meta_clone.args_aligner) + tree_map["treealign"] = Utils.clean_tree(meta_clone["treealign"]) + tree_map["args_treealign"] = meta_clone["args_treealign"] + tree_map["args_treealign_clean"] = Utils.cleanArgs(meta_clone.args_treealign) + + align_map["alignment"] = meta_clone["alignment"] + align_map["args_alignment"] = Utils.check_required_args(meta_clone["alignment"], meta_clone["args_alignment"]) + align_map["args_alignment_clean"] = Utils.cleanArgs(meta_clone.args_alignment) [ tree_map, align_map ] }.unique() @@ -188,11 +192,11 @@ workflow PIPELINE_COMPLETION { // Output file naming def summary_file_with_traces = "${outdir}/summary/complete_summary_stats_eval_times.csv" - if (!skip_shiny) { - merge_summary_and_traces(summary_file, trace_dir_path, versions_path, summary_file_with_traces, "${shiny_dir_path}/complete_summary_stats_eval_times.csv") - }else{ - merge_summary_and_traces(summary_file, trace_dir_path, versions_path, summary_file_with_traces, "") - } + //if (!skip_shiny) { + // merge_summary_and_traces(summary_file, trace_dir_path, versions_path, summary_file_with_traces, "${shiny_dir_path}/complete_summary_stats_eval_times.csv") + //}else{ + // merge_summary_and_traces(summary_file, trace_dir_path, versions_path, summary_file_with_traces, "") + //} } workflow.onError { @@ -575,7 +579,7 @@ def processTraceFile(String traceDirPath) { keys_to_add = keys - ["id", "tree", "args", "aligner"] keys_to_add.each { key -> empty_trace[key+"_tree"] = null } empty_trace["tree"] = "DEFAULT" - empty_trace["args_tree_clean"] = "default" + empty_trace["args_guidetree_clean"] = "default" traceTrees.add(empty_trace) // Return the extracted traces as a map @@ -614,14 +618,14 @@ def prepTrace(trace, suffix_to_replace, subworkflow, keys) { newRow.tree = treeMatch ? treeMatch[0][1] : "DEFAULT" def treeArgsMatch = (row.tag =~ /argstree: (.*)/) - newRow.args_tree_clean = treeArgsMatch ? Utils.cleanArgs(treeArgsMatch[0][1]) : "default" + newRow.args_guidetree_clean = treeArgsMatch ? Utils.cleanArgs(treeArgsMatch[0][1]) : "default" - // remove tree and args_tree from keys - keys_iterator = keys - ["tree", "args_tree_clean"] + // remove tree and args_guidetree from keys + keys_iterator = keys - ["guidetree", "args_guidetree_clean"] } else if(subworkflow == "COMPUTE_TREES") { - suffix = "_tree" - specific_key = "tree" + suffix = "_guidetree" + specific_key = "guidetree" } @@ -747,12 +751,12 @@ def merge_summary_and_traces(summary_file, trace_dir_path, versions_path, outFil def treeMatch = [:] if(row.tree == "DEFAULT"){ - treeMatch = trace_file.traceTrees.find {it.tree == row.tree && it.args_tree_clean == row.args_tree_clean} + treeMatch = trace_file.traceTrees.find {it.tree == row.tree && it.args_guidetree_clean == row.args_guidetree_clean} } else { - treeMatch = trace_file.traceTrees.find { it.id == row.id && it.tree == row.tree && it.args_tree_clean == row.args_tree_clean} + treeMatch = trace_file.traceTrees.find { it.id == row.id && it.tree == row.tree && it.args_guidetree_clean == row.args_guidetree_clean} } - def alignMatch = trace_file.traceAlign.find { it.id == row.id && it.tree == row.tree && row.args_tree_clean == it.args_tree_clean && it.aligner == row.aligner && it.args_aligner_clean == row.args_aligner_clean} + def alignMatch = trace_file.traceAlign.find { it.id == row.id && it.tree == row.tree && row.args_guidetree_clean == it.args_guidetree_clean && it.aligner == row.aligner && it.args_aligner_clean == row.args_aligner_clean} def mergedRow = row + (treeMatch ?: [:]) + (alignMatch ?: [:]) mergedData << mergedRow } diff --git a/subworkflows/mirpedrol/msa_alignment/main.nf b/subworkflows/mirpedrol/msa_alignment/main.nf new file mode 100644 index 00000000..0f215606 --- /dev/null +++ b/subworkflows/mirpedrol/msa_alignment/main.nf @@ -0,0 +1,74 @@ +include { CLUSTALO_ALIGN } from '../../../modules/nf-core/clustalo/align/main' +include { FAMSA_ALIGN } from '../../../modules/nf-core/famsa/align/main' +include { KALIGN_ALIGN } from '../../../modules/nf-core/kalign/align/main' +include { LEARNMSA_ALIGN } from '../../../modules/nf-core/learnmsa/align/main' +include { MAGUS_ALIGN } from '../../../modules/nf-core/magus/align/main' +include { MUSCLE5_SUPER5 } from '../../../modules/nf-core/muscle5/super5/main' +include { TCOFFEE_ALIGN } from '../../../modules/nf-core/tcoffee/align/main' + + +workflow MSA_ALIGNMENT { + + take: + ch_fasta + + main: + def ch_out_alignment = Channel.empty() + def ch_out_versions = Channel.empty() + + ch_fasta + .branch { + meta, fasta, tool -> + clustalo_align: tool == "clustalo_align" + return [ meta, fasta ] + famsa_align: tool == "famsa_align" + return [ meta, fasta ] + kalign_align: tool == "kalign_align" + return [ meta, fasta ] + learnmsa_align: tool == "learnmsa_align" + return [ meta, fasta ] + magus_align: tool == "magus_align" + return [ meta, fasta ] + muscle5_super5: tool == "muscle5_super5" + return [ meta, fasta ] + tcoffee_align: tool == "tcoffee_align" + return [ meta, fasta ] + } + .set { ch_fasta_branch } + + CLUSTALO_ALIGN( ch_fasta_branch.clustalo_align, [[], []], [], [], [], [], [] ) + ch_out_alignment = ch_out_alignment.mix(CLUSTALO_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(CLUSTALO_ALIGN.out.versions) + + FAMSA_ALIGN( ch_fasta_branch.famsa_align, [[], []], [] ) + ch_out_alignment = ch_out_alignment.mix(FAMSA_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(FAMSA_ALIGN.out.versions) + + KALIGN_ALIGN( ch_fasta_branch.kalign_align, [] ) + ch_out_alignment = ch_out_alignment.mix(KALIGN_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(KALIGN_ALIGN.out.versions) + + LEARNMSA_ALIGN( ch_fasta_branch.learnmsa_align ) + ch_out_alignment = ch_out_alignment.mix(LEARNMSA_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(LEARNMSA_ALIGN.out.versions) + + MAGUS_ALIGN( ch_fasta_branch.magus_align, [[], []], [] ) + ch_out_alignment = ch_out_alignment.mix(MAGUS_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(MAGUS_ALIGN.out.versions) + + MUSCLE5_SUPER5( ch_fasta_branch.muscle5_super5, [] ) + ch_out_alignment = ch_out_alignment.mix(MUSCLE5_SUPER5.out.alignment) + ch_out_versions = ch_out_versions.mix(MUSCLE5_SUPER5.out.versions) + + TCOFFEE_ALIGN( ch_fasta_branch.tcoffee_align, [[], []], [[], [], []], [] ) + ch_out_alignment = ch_out_alignment.mix(TCOFFEE_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(TCOFFEE_ALIGN.out.versions) + + + + emit: + alignment = ch_out_alignment + versions = ch_out_versions + +} + diff --git a/subworkflows/mirpedrol/msa_alignment/meta.yml b/subworkflows/mirpedrol/msa_alignment/meta.yml new file mode 100644 index 00000000..4f5d7f34 --- /dev/null +++ b/subworkflows/mirpedrol/msa_alignment/meta.yml @@ -0,0 +1,72 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/subworkflows/yaml-schema.json +name: "msa_alignment" +description: Perform multiple sequence alignment +keywords: ["alignment", "msa", "align"] +components: + - clustalo/align: + git_remote: "https://github.com/nf-core/modules.git" + - famsa/align: + git_remote: "https://github.com/nf-core/modules.git" + - kalign/align: + git_remote: "https://github.com/nf-core/modules.git" + - learnmsa/align: + git_remote: "https://github.com/nf-core/modules.git" + - magus/align: + git_remote: "https://github.com/nf-core/modules.git" + - muscle5/super5: + git_remote: "https://github.com/nf-core/modules.git" + - tcoffee/align: + git_remote: "https://github.com/nf-core/modules.git" + +input: + - ch_fasta: + description: "Channel containing: meta, fasta" + structure: + - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'sample1', single_end:false ]` + + " + type: map + - fasta: + description: Input sequences in FASTA format + ontologies: + - edam: http://edamontology.org/format_1929 + pattern: "*.{fa,fasta}" + type: file + - tool: + description: The name of the tool to run + type: string + +output: + - alignment: + description: Output channel alignment + structure: + - - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'test']` + + " + type: map + - "*.aln.gz": + description: Alignment file, in FASTA format. + ontologies: + - edam: http://edamontology.org/format_1984 + pattern: "*.aln.gz" + type: file + - versions: + description: Output channel versions + structure: + - versions.yml: + description: File containing software versions + ontologies: + - edam: http://edamontology.org/format_3750 + pattern: versions.yml + type: file + +authors: + - "@mirpedrol" +maintainers: + - "@mirpedrol" diff --git a/subworkflows/mirpedrol/msa_alignment/msa_alignment.diff b/subworkflows/mirpedrol/msa_alignment/msa_alignment.diff new file mode 100644 index 00000000..b17f15c0 --- /dev/null +++ b/subworkflows/mirpedrol/msa_alignment/msa_alignment.diff @@ -0,0 +1,51 @@ +Changes in component 'mirpedrol/msa_alignment' +'subworkflows/mirpedrol/msa_alignment/meta.yml' is unchanged +'subworkflows/mirpedrol/msa_alignment/main.nf' is unchanged +'subworkflows/mirpedrol/msa_alignment/tests/main.nf.test.snap' was created +'subworkflows/mirpedrol/msa_alignment/tests/nextflow.config' was created +Changes in 'msa_alignment/tests/main.nf.test': +--- subworkflows/mirpedrol/msa_alignment/tests/main.nf.test ++++ subworkflows/mirpedrol/msa_alignment/tests/main.nf.test +@@ -15,6 +15,7 @@ + tag "subworkflows/../../modules/nf-core/muscle5/super5" + tag "subworkflows/../../modules/nf-core/tcoffee/align" + ++ config './nextflow.config' + + test("clustalo/align") { + +@@ -70,7 +71,8 @@ + } + } + +- test("learnmsa/align") { ++ test("learnmsa/align - stub") { ++ options "-stub" + + when { + workflow { +@@ -83,7 +85,7 @@ + then { + assertAll( + { assert workflow.success }, +- { assert snapshot(workflow.out).match("learnmsa/align") }, ++ { assert snapshot(workflow.out).match("learnmsa/align - stub") }, + ) + } + } +@@ -101,7 +103,12 @@ + then { + assertAll( + { assert workflow.success }, +- { assert snapshot(workflow.out).match("magus/align") }, ++ { assert path(workflow.out.alignment[0][1]).getText().contains("1atx") }, ++ { assert path(workflow.out.alignment[0][1]).getText().contains("1bds") }, ++ { assert path(workflow.out.alignment[0][1]).getText().contains("1sh1") }, ++ { assert path(workflow.out.alignment[0][1]).getText().contains("1apf") }, ++ { assert path(workflow.out.alignment[0][1]).getText().contains("1ahl") }, ++ { assert snapshot(workflow.out.versions).match("magus/align") }, + ) + } + } + +************************************************************ diff --git a/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test b/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test new file mode 100644 index 00000000..720acb79 --- /dev/null +++ b/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test @@ -0,0 +1,153 @@ +nextflow_workflow { + + name "Test Subworkflow MSA_ALIGNMENT" + script "../main.nf" + workflow "MSA_ALIGNMENT" + + tag "subworkflows" + tag "subworkflows_mirpedrol" + tag "subworkflows/msa_alignment" + tag "subworkflows/../../modules/nf-core/clustalo/align" + tag "subworkflows/../../modules/nf-core/famsa/align" + tag "subworkflows/../../modules/nf-core/kalign/align" + tag "subworkflows/../../modules/nf-core/learnmsa/align" + tag "subworkflows/../../modules/nf-core/magus/align" + tag "subworkflows/../../modules/nf-core/muscle5/super5" + tag "subworkflows/../../modules/nf-core/tcoffee/align" + + config './nextflow.config' + + test("clustalo/align") { + + when { + workflow { + """ + input[0] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.aln', checkIfExists: true), 'clustalo_align'] ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match("clustalo/align") }, + ) + } + } + + test("famsa/align") { + + when { + workflow { + """ + input[0] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.aln', checkIfExists: true), 'famsa_align'] ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match("famsa/align") }, + ) + } + } + + test("kalign/align") { + + when { + workflow { + """ + input[0] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.aln', checkIfExists: true), 'kalign_align'] ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match("kalign/align") }, + ) + } + } + + test("learnmsa/align - stub") { + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.aln', checkIfExists: true), 'learnmsa_align'] ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match("learnmsa/align - stub") }, + ) + } + } + + test("magus/align") { + + when { + workflow { + """ + input[0] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.aln', checkIfExists: true), 'magus_align'] ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert path(workflow.out.alignment[0][1]).getText().contains("1atx") }, + { assert path(workflow.out.alignment[0][1]).getText().contains("1bds") }, + { assert path(workflow.out.alignment[0][1]).getText().contains("1sh1") }, + { assert path(workflow.out.alignment[0][1]).getText().contains("1apf") }, + { assert path(workflow.out.alignment[0][1]).getText().contains("1ahl") }, + { assert snapshot(workflow.out.versions).match("magus/align") }, + ) + } + } + + test("muscle5/super5") { + + when { + workflow { + """ + input[0] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.aln', checkIfExists: true), 'muscle5_super5'] ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match("muscle5/super5") }, + ) + } + } + + test("tcoffee/align") { + + when { + workflow { + """ + input[0] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.aln', checkIfExists: true), 'tcoffee_align'] ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match("tcoffee/align") }, + ) + } + } + + +} \ No newline at end of file diff --git a/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test.snap b/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test.snap new file mode 100644 index 00000000..7ec9c2a8 --- /dev/null +++ b/subworkflows/mirpedrol/msa_alignment/tests/main.nf.test.snap @@ -0,0 +1,212 @@ +{ + "clustalo/align": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,09ec06af06bf2cf6e0cb9a1cabdcd5ab" + ] + ], + "1": [ + "versions.yml:md5,75356bf56559adcb33a9c93aba830309" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,09ec06af06bf2cf6e0cb9a1cabdcd5ab" + ] + ], + "versions": [ + "versions.yml:md5,75356bf56559adcb33a9c93aba830309" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-07-31T13:37:39.239390311" + }, + "famsa/align": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,efec3f0fffa0bca1f9c87073de81af06" + ] + ], + "1": [ + "versions.yml:md5,c74ca8b91c442fc4ea29219ee1b724fd" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,efec3f0fffa0bca1f9c87073de81af06" + ] + ], + "versions": [ + "versions.yml:md5,c74ca8b91c442fc4ea29219ee1b724fd" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-07-31T13:38:19.076634506" + }, + "learnmsa/align - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,527ae9e8a292ad5c7342f26233e9e131" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,527ae9e8a292ad5c7342f26233e9e131" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-07-31T13:47:27.224686397" + }, + "muscle5/super5": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,50d236a9fbcf62d0aa229593dc8bbf6b" + ] + ], + "1": [ + "versions.yml:md5,48ec1b7cf99109e8495f3bc00d67a1eb" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,50d236a9fbcf62d0aa229593dc8bbf6b" + ] + ], + "versions": [ + "versions.yml:md5,48ec1b7cf99109e8495f3bc00d67a1eb" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-01T07:01:54.454247483" + }, + "magus/align": { + "content": [ + [ + "versions.yml:md5,9258e7c6deb7c3d816ba75cf111e09a8" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-01T06:56:07.991213481" + }, + "kalign/align": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,e9831df8f8c092e67935bc03eff8dde6" + ] + ], + "1": [ + "versions.yml:md5,e7d33c95bb5d69e8573c8ad4eb2aca2c" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,e9831df8f8c092e67935bc03eff8dde6" + ] + ], + "versions": [ + "versions.yml:md5,e7d33c95bb5d69e8573c8ad4eb2aca2c" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-07-31T13:38:38.673198876" + }, + "tcoffee/align": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,39441d8270972f23df19106708ee2dcb" + ] + ], + "1": [ + "versions.yml:md5,c5208e86b43e8c973c39c2bd8ca2932a" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,39441d8270972f23df19106708ee2dcb" + ] + ], + "versions": [ + "versions.yml:md5,c5208e86b43e8c973c39c2bd8ca2932a" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-01T07:27:07.276478754" + } +} diff --git a/subworkflows/mirpedrol/msa_alignment/tests/nextflow.config b/subworkflows/mirpedrol/msa_alignment/tests/nextflow.config new file mode 100644 index 00000000..466c2bcf --- /dev/null +++ b/subworkflows/mirpedrol/msa_alignment/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: 'MSA_ALIGNMENT:LEARNMSA_ALIGN'{ + container = "nf-core/ubuntu:22.04" + } + withName: 'MSA_ALIGNMENT:TCOFFEE_ALIGN'{ + ext.args = { "-output fasta_aln" } + } +} diff --git a/subworkflows/mirpedrol/msa_guidetree/main.nf b/subworkflows/mirpedrol/msa_guidetree/main.nf new file mode 100644 index 00000000..594308ff --- /dev/null +++ b/subworkflows/mirpedrol/msa_guidetree/main.nf @@ -0,0 +1,46 @@ +include { CLUSTALO_GUIDETREE } from '../../../modules/nf-core/clustalo/guidetree/main' +include { FAMSA_GUIDETREE } from '../../../modules/nf-core/famsa/guidetree/main' +include { MAGUS_GUIDETREE } from '../../../modules/nf-core/magus/guidetree/main' + + +workflow MSA_GUIDETREE { + + take: + ch_fasta + + main: + def ch_out_tree = Channel.empty() + def ch_out_versions = Channel.empty() + + ch_fasta + .branch { + meta, fasta, tool -> + clustalo_guidetree: tool == "clustalo_guidetree" + return [ meta, fasta ] + famsa_guidetree: tool == "famsa_guidetree" + return [ meta, fasta ] + magus_guidetree: tool == "magus_guidetree" + return [ meta, fasta ] + } + .set { ch_fasta_branch } + + CLUSTALO_GUIDETREE( ch_fasta_branch.clustalo_guidetree ) + ch_out_tree = ch_out_tree.mix(CLUSTALO_GUIDETREE.out.tree) + ch_out_versions = ch_out_versions.mix(CLUSTALO_GUIDETREE.out.versions) + + FAMSA_GUIDETREE( ch_fasta_branch.famsa_guidetree ) + ch_out_tree = ch_out_tree.mix(FAMSA_GUIDETREE.out.tree) + ch_out_versions = ch_out_versions.mix(FAMSA_GUIDETREE.out.versions) + + MAGUS_GUIDETREE( ch_fasta_branch.magus_guidetree ) + ch_out_tree = ch_out_tree.mix(MAGUS_GUIDETREE.out.tree) + ch_out_versions = ch_out_versions.mix(MAGUS_GUIDETREE.out.versions) + + + + emit: + tree = ch_out_tree + versions = ch_out_versions + +} + diff --git a/subworkflows/mirpedrol/msa_guidetree/meta.yml b/subworkflows/mirpedrol/msa_guidetree/meta.yml new file mode 100644 index 00000000..1fa9b2af --- /dev/null +++ b/subworkflows/mirpedrol/msa_guidetree/meta.yml @@ -0,0 +1,64 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/subworkflows/yaml-schema.json +name: "msa_guidetree" +description: Compute a guide tree for multiple sequence alignment +keywords: ["align", "guide tree", "guidetree", "msa"] +components: + - clustalo/guidetree: + git_remote: "https://github.com/nf-core/modules.git" + - famsa/guidetree: + git_remote: "https://github.com/nf-core/modules.git" + - magus/guidetree: + git_remote: "https://github.com/nf-core/modules.git" + +input: + - ch_fasta: + description: "Channel containing: meta, fasta" + structure: + - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'test']` + + " + type: map + - fasta: + description: Input sequences in FASTA format + ontologies: + - edam: http://edamontology.org/format_1929 + pattern: "*.{fa,fasta}" + type: file + - tool: + description: The name of the tool to run + type: string + +output: + - tree: + description: Output channel tree + structure: + - - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'test']` + + " + type: map + - "*.dnd": + description: Guide tree file in Newick format + ontologies: + - edam: http://edamontology.org/format_2006 + pattern: "*.{dnd}" + type: file + - versions: + description: Output channel versions + structure: + - versions.yml: + description: File containing software versions + ontologies: + - edam: http://edamontology.org/format_3750 + pattern: versions.yml + type: file + +authors: + - "@mirpedrol" +maintainers: + - "@mirpedrol" diff --git a/subworkflows/mirpedrol/msa_guidetree/msa_guidetree.diff b/subworkflows/mirpedrol/msa_guidetree/msa_guidetree.diff new file mode 100644 index 00000000..f793a89f --- /dev/null +++ b/subworkflows/mirpedrol/msa_guidetree/msa_guidetree.diff @@ -0,0 +1,22 @@ +Changes in component 'mirpedrol/msa_guidetree' +'subworkflows/mirpedrol/msa_guidetree/meta.yml' is unchanged +'subworkflows/mirpedrol/msa_guidetree/main.nf' is unchanged +'subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test.snap' was created +Changes in 'msa_guidetree/tests/main.nf.test': +--- subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test ++++ subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test +@@ -61,7 +61,11 @@ + then { + assertAll( + { assert workflow.success }, +- { assert snapshot(workflow.out).match("magus/guidetree") }, ++ { assert path(workflow.out.tree[0][1]).getText().contains("1atx") }, ++ { assert path(workflow.out.tree[0][1]).getText().contains("1bds") }, ++ { assert path(workflow.out.tree[0][1]).getText().contains("1sh1") }, ++ { assert path(workflow.out.tree[0][1]).getText().contains("1apf") }, ++ { assert path(workflow.out.tree[0][1]).getText().contains("1ahl") }, + ) + } + } + +************************************************************ diff --git a/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test b/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test new file mode 100644 index 00000000..20ca74a5 --- /dev/null +++ b/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test @@ -0,0 +1,75 @@ +nextflow_workflow { + + name "Test Subworkflow MSA_GUIDETREE" + script "../main.nf" + workflow "MSA_GUIDETREE" + + tag "subworkflows" + tag "subworkflows_mirpedrol" + tag "subworkflows/msa_guidetree" + tag "subworkflows/../../modules/nf-core/clustalo/guidetree" + tag "subworkflows/../../modules/nf-core/famsa/guidetree" + tag "subworkflows/../../modules/nf-core/magus/guidetree" + + + test("clustalo/guidetree") { + + when { + workflow { + """ + input[0] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.aln', checkIfExists: true), 'clustalo_guidetree'] ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match("clustalo/guidetree") }, + ) + } + } + + test("famsa/guidetree") { + + when { + workflow { + """ + input[0] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.aln', checkIfExists: true), 'famsa_guidetree'] ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match("famsa/guidetree") }, + ) + } + } + + test("magus/guidetree") { + + when { + workflow { + """ + input[0] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.aln', checkIfExists: true), 'magus_guidetree'] ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert path(workflow.out.tree[0][1]).getText().contains("1atx") }, + { assert path(workflow.out.tree[0][1]).getText().contains("1bds") }, + { assert path(workflow.out.tree[0][1]).getText().contains("1sh1") }, + { assert path(workflow.out.tree[0][1]).getText().contains("1apf") }, + { assert path(workflow.out.tree[0][1]).getText().contains("1ahl") }, + { assert snapshot(workflow.out.versions).match("magus/guidetree") }, + ) + } + } + + +} \ No newline at end of file diff --git a/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test.snap b/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test.snap new file mode 100644 index 00000000..be8ab655 --- /dev/null +++ b/subworkflows/mirpedrol/msa_guidetree/tests/main.nf.test.snap @@ -0,0 +1,80 @@ +{ + "famsa/guidetree": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.dnd:md5,3fddb6ad96904101d105d090ddff061d" + ] + ], + "1": [ + "versions.yml:md5,9362f729d456eebde15c076a3927b340" + ], + "tree": [ + [ + { + "id": "test" + }, + "test.dnd:md5,3fddb6ad96904101d105d090ddff061d" + ] + ], + "versions": [ + "versions.yml:md5,9362f729d456eebde15c076a3927b340" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-01T12:21:33.436132015" + }, + "clustalo/guidetree": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.dnd:md5,90cf248dac7bf20a706e2b1552fd41f1" + ] + ], + "1": [ + "versions.yml:md5,08d14ffe74bfaff6d04f86a87f1adb4f" + ], + "tree": [ + [ + { + "id": "test" + }, + "test.dnd:md5,90cf248dac7bf20a706e2b1552fd41f1" + ] + ], + "versions": [ + "versions.yml:md5,08d14ffe74bfaff6d04f86a87f1adb4f" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-01T12:21:24.785688977" + }, + "magus/guidetree": { + "content": [ + [ + "versions.yml:md5,7234c3d5bc0089242622028903d2f4d1" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-19T13:46:43.306273138" + } +} diff --git a/subworkflows/mirpedrol/msa_structural_alignment/main.nf b/subworkflows/mirpedrol/msa_structural_alignment/main.nf new file mode 100644 index 00000000..c113145f --- /dev/null +++ b/subworkflows/mirpedrol/msa_structural_alignment/main.nf @@ -0,0 +1,39 @@ +include { FOLDMASON_EASYMSA } from '../../../modules/nf-core/foldmason/easymsa/main' +include { MTMALIGN_ALIGN } from '../../../modules/nf-core/mtmalign/align/main' + + +workflow MSA_STRUCTURAL_ALIGNMENT { + + take: + ch_pdbs + + main: + def ch_out_alignment = Channel.empty() + def ch_out_versions = Channel.empty() + + ch_pdbs + .branch { + meta, pdbs, tool -> + foldmason_easymsa: tool == "foldmason_easymsa" + return [ meta, pdbs ] + mtmalign_align: tool == "mtmalign_align" + return [ meta, pdbs ] + } + .set { ch_pdbs_branch } + + FOLDMASON_EASYMSA( ch_pdbs_branch.foldmason_easymsa, [[], []], [] ) + ch_out_alignment = ch_out_alignment.mix(FOLDMASON_EASYMSA.out.msa_aa) + ch_out_versions = ch_out_versions.mix(FOLDMASON_EASYMSA.out.versions) + + MTMALIGN_ALIGN( ch_pdbs_branch.mtmalign_align, [] ) + ch_out_alignment = ch_out_alignment.mix(MTMALIGN_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(MTMALIGN_ALIGN.out.versions) + + + + emit: + alignment = ch_out_alignment + versions = ch_out_versions + +} + diff --git a/subworkflows/mirpedrol/msa_structural_alignment/meta.yml b/subworkflows/mirpedrol/msa_structural_alignment/meta.yml new file mode 100644 index 00000000..e62eeb75 --- /dev/null +++ b/subworkflows/mirpedrol/msa_structural_alignment/meta.yml @@ -0,0 +1,63 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/subworkflows/yaml-schema.json +name: "msa_structural_alignment" +description: Perform multiple sequence alignment of protein structures +keywords: ["alignment", "msa", "structure"] +components: + - foldmason/easymsa: + git_remote: "https://github.com/nf-core/modules.git" + - mtmalign/align: + git_remote: "https://github.com/nf-core/modules.git" + +input: + - ch_pdbs: + description: "Channel containing: meta, pdbs" + structure: + - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'sample1' ]` + + " + type: map + - pdbs: + description: Input protein structures in PDB format. + ontologies: + - edam: http://edamontology.org/format_1476 + - edam: http://edamontology.org/format_1477 + pattern: "*.{pdb,mmcif}" + type: file + - tool: + description: The name of the tool to run + type: string + +output: + - alignment: + description: Output channel alignment + structure: + - - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'test']` + + " + type: map + - ${prefix}.aln.gz: + description: Alignment in FASTA format. May be gzipped or uncompressed. + ontologies: + - edam: http://edamontology.org/format_1984 + pattern: "*.{aln.gz,aln,fa,fa.gz,fasta,fasta.gz}" + type: file + - versions: + description: Output channel versions + structure: + - versions.yml: + description: File containing software versions + ontologies: + - edam: http://edamontology.org/format_3750 + pattern: versions.yml + type: file + +authors: + - "@mirpedrol" +maintainers: + - "@mirpedrol" diff --git a/subworkflows/mirpedrol/msa_structural_alignment/tests/main.nf.test b/subworkflows/mirpedrol/msa_structural_alignment/tests/main.nf.test new file mode 100644 index 00000000..81aa7c44 --- /dev/null +++ b/subworkflows/mirpedrol/msa_structural_alignment/tests/main.nf.test @@ -0,0 +1,51 @@ +nextflow_workflow { + + name "Test Subworkflow MSA_STRUCTURAL_ALIGNMENT" + script "../main.nf" + workflow "MSA_STRUCTURAL_ALIGNMENT" + + tag "subworkflows" + tag "subworkflows_mirpedrol" + tag "subworkflows/msa_structural_alignment" + tag "subworkflows/../../modules/nf-core/foldmason/easymsa" + tag "subworkflows/../../modules/nf-core/mtmalign/align" + + + test("foldmason/easymsa") { + + when { + workflow { + """ + input[0] = Channel.of( [[ id:'test_tree' ], [file(params.modules_testdata_base_path + 'proteomics/pdb/1tim.pdb', checkIfExists: true), file(params.modules_testdata_base_path + 'proteomics/pdb/8tim.pdb', checkIfExists: true)], 'foldmason_easymsa'] ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match("foldmason/easymsa") }, + ) + } + } + + test("mtmalign/align") { + + when { + workflow { + """ + input[0] = Channel.of( [[ id:'test_tree' ], [file(params.modules_testdata_base_path + 'proteomics/pdb/1tim.pdb', checkIfExists: true), file(params.modules_testdata_base_path + 'proteomics/pdb/8tim.pdb', checkIfExists: true)], 'mtmalign_align'] ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match("mtmalign/align") }, + ) + } + } + + +} \ No newline at end of file diff --git a/subworkflows/mirpedrol/msa_structural_alignment/tests/main.nf.test.snap b/subworkflows/mirpedrol/msa_structural_alignment/tests/main.nf.test.snap new file mode 100644 index 00000000..338bb6b1 --- /dev/null +++ b/subworkflows/mirpedrol/msa_structural_alignment/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "foldmason/easymsa": { + "content": [ + { + "0": [ + [ + { + "id": "test_tree" + }, + "test_tree_aa.fa:md5,2c20e002e5efd0d43cde8c5e896e998e" + ] + ], + "1": [ + "versions.yml:md5,079537a12ecb1a4e69170051f18a9c73" + ], + "alignment": [ + [ + { + "id": "test_tree" + }, + "test_tree_aa.fa:md5,2c20e002e5efd0d43cde8c5e896e998e" + ] + ], + "versions": [ + "versions.yml:md5,079537a12ecb1a4e69170051f18a9c73" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-01T12:22:19.477678328" + }, + "mtmalign/align": { + "content": [ + { + "0": [ + [ + { + "id": "test_tree" + }, + "test_tree.aln:md5,393e2f3480c2daddceef798c03201a9d" + ] + ], + "1": [ + "versions.yml:md5,3c92191a04b0f0a03555dbb7fed62a5c" + ], + "alignment": [ + [ + { + "id": "test_tree" + }, + "test_tree.aln:md5,393e2f3480c2daddceef798c03201a9d" + ] + ], + "versions": [ + "versions.yml:md5,3c92191a04b0f0a03555dbb7fed62a5c" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-01T12:22:27.340342692" + } +} diff --git a/subworkflows/mirpedrol/msa_treealign/main.nf b/subworkflows/mirpedrol/msa_treealign/main.nf new file mode 100644 index 00000000..a150661e --- /dev/null +++ b/subworkflows/mirpedrol/msa_treealign/main.nf @@ -0,0 +1,67 @@ +include { CLUSTALO_ALIGN } from '../../../modules/nf-core/clustalo/align/main' +include { FAMSA_ALIGN } from '../../../modules/nf-core/famsa/align/main' +include { MAGUS_ALIGN } from '../../../modules/nf-core/magus/align/main' +include { TCOFFEE_ALIGN } from '../../../modules/nf-core/tcoffee/align/main' + + +workflow MSA_TREEALIGN { + + take: + ch_fasta + ch_tree + + main: + def ch_out_alignment = Channel.empty() + def ch_out_versions = Channel.empty() + + ch_fasta + .branch { + meta, fasta, tool -> + clustalo_align: tool == "clustalo_align" + return [ meta, fasta ] + famsa_align: tool == "famsa_align" + return [ meta, fasta ] + magus_align: tool == "magus_align" + return [ meta, fasta ] + tcoffee_align: tool == "tcoffee_align" + return [ meta, fasta ] + } + .set { ch_fasta_branch } + ch_tree + .branch { + meta, tree, tool -> + clustalo_align: tool == "clustalo_align" + return [ meta, tree ] + famsa_align: tool == "famsa_align" + return [ meta, tree ] + magus_align: tool == "magus_align" + return [ meta, tree ] + tcoffee_align: tool == "tcoffee_align" + return [ meta, tree ] + } + .set { ch_tree_branch } + + CLUSTALO_ALIGN( ch_fasta_branch.clustalo_align, ch_tree_branch.clustalo_align, [], [], [], [], [] ) + ch_out_alignment = ch_out_alignment.mix(CLUSTALO_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(CLUSTALO_ALIGN.out.versions) + + FAMSA_ALIGN( ch_fasta_branch.famsa_align, ch_tree_branch.famsa_align, [] ) + ch_out_alignment = ch_out_alignment.mix(FAMSA_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(FAMSA_ALIGN.out.versions) + + MAGUS_ALIGN( ch_fasta_branch.magus_align, ch_tree_branch.magus_align, [] ) + ch_out_alignment = ch_out_alignment.mix(MAGUS_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(MAGUS_ALIGN.out.versions) + + TCOFFEE_ALIGN( ch_fasta_branch.tcoffee_align, ch_tree_branch.tcoffee_align, [[], [], []], [] ) + ch_out_alignment = ch_out_alignment.mix(TCOFFEE_ALIGN.out.alignment) + ch_out_versions = ch_out_versions.mix(TCOFFEE_ALIGN.out.versions) + + + + emit: + alignment = ch_out_alignment + versions = ch_out_versions + +} + diff --git a/subworkflows/mirpedrol/msa_treealign/meta.yml b/subworkflows/mirpedrol/msa_treealign/meta.yml new file mode 100644 index 00000000..7056d1ef --- /dev/null +++ b/subworkflows/mirpedrol/msa_treealign/meta.yml @@ -0,0 +1,83 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/mirpedrol/class-modules/main/subworkflows/yaml-schema.json +name: "msa_treealign" +description: Perform multiple sequence alignment from a provided guide tree +keywords: ["alignment", "treealignment", "msa"] +components: + - clustalo/align: + git_remote: "https://github.com/nf-core/modules.git" + - famsa/align: + git_remote: "https://github.com/nf-core/modules.git" + - magus/align: + git_remote: "https://github.com/nf-core/modules.git" + - tcoffee/align: + git_remote: "https://github.com/nf-core/modules.git" + +input: + - ch_fasta: + description: "Channel containing: meta, fasta" + structure: + - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'test']` + + " + type: map + - fasta: + description: Input sequences in FASTA format + ontologies: + - edam: http://edamontology.org/format_1929 + pattern: "*.{fa,fasta}" + type: file + - tool: + description: The name of the tool to run + type: string + - ch_tree: + description: "Channel containing: meta, fasta" + structure: + - meta2: + description: "Groovy Map containing tree information + + e.g. `[ id:'test_tree']` + + " + type: map + - tree: + description: Input guide tree in Newick format + pattern: "*.{dnd}" + type: file + - tool: + description: The name of the tool to run + type: string + +output: + - alignment: + description: Output channel alignment + structure: + - - meta: + description: "Groovy Map containing sample information + + e.g. `[ id:'test']` + + " + type: map + - "*.aln.gz": + description: Alignment file, in gzipped fasta format + ontologies: + - edam: http://edamontology.org/format_1984 + pattern: "*.aln.gz" + type: file + - versions: + description: Output channel versions + structure: + - versions.yml: + description: File containing software versions + ontologies: + - edam: http://edamontology.org/format_3750 + pattern: versions.yml + type: file + +authors: + - "@mirpedrol" +maintainers: + - "@mirpedrol" diff --git a/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test b/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test new file mode 100644 index 00000000..80a9d239 --- /dev/null +++ b/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test @@ -0,0 +1,93 @@ +nextflow_workflow { + + name "Test Subworkflow MSA_TREEALIGN" + script "../main.nf" + workflow "MSA_TREEALIGN" + + tag "subworkflows" + tag "subworkflows_mirpedrol" + tag "subworkflows/msa_treealign" + tag "subworkflows/../../modules/nf-core/clustalo/align" + tag "subworkflows/../../modules/nf-core/famsa/align" + tag "subworkflows/../../modules/nf-core/magus/align" + tag "subworkflows/../../modules/nf-core/tcoffee/align" + + + test("clustalo/align") { + + when { + workflow { + """ + input[0] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.aln', checkIfExists: true), 'clustalo_align'] ) + input[1] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.dnd', checkIfExists: true), 'clustalo_align'] ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match("clustalo/align") }, + ) + } + } + + test("famsa/align") { + + when { + workflow { + """ + input[0] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.aln', checkIfExists: true), 'famsa_align'] ) + input[1] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.dnd', checkIfExists: true), 'famsa_align'] ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match("famsa/align") }, + ) + } + } + + test("magus/align") { + + when { + workflow { + """ + input[0] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.aln', checkIfExists: true), 'magus_align'] ) + input[1] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.dnd', checkIfExists: true), 'magus_align'] ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match("magus/align") }, + ) + } + } + + test("tcoffee/align") { + + when { + workflow { + """ + input[0] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.aln', checkIfExists: true), 'tcoffee_align'] ) + input[1] = Channel.of( [[ id:'test' ], file(params.modules_testdata_base_path + 'genomics/eukaryotes/anemonia_sulcata/seatoxin-ref.dnd', checkIfExists: true), 'tcoffee_align'] ) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match("tcoffee/align") }, + ) + } + } + + +} \ No newline at end of file diff --git a/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test.snap b/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test.snap new file mode 100644 index 00000000..76afd485 --- /dev/null +++ b/subworkflows/mirpedrol/msa_treealign/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "clustalo/align": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,09ec06af06bf2cf6e0cb9a1cabdcd5ab" + ] + ], + "1": [ + "versions.yml:md5,895c03496923ed549de8c050b0b78978" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,09ec06af06bf2cf6e0cb9a1cabdcd5ab" + ] + ], + "versions": [ + "versions.yml:md5,895c03496923ed549de8c050b0b78978" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-01T14:07:58.070189402" + }, + "famsa/align": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,efec3f0fffa0bca1f9c87073de81af06" + ] + ], + "1": [ + "versions.yml:md5,dc23c65f66a62f337556459f11efb7b3" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,efec3f0fffa0bca1f9c87073de81af06" + ] + ], + "versions": [ + "versions.yml:md5,dc23c65f66a62f337556459f11efb7b3" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-01T14:08:10.972913645" + }, + "magus/align": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,20844d321995ed865277254366ad9523" + ] + ], + "1": [ + "versions.yml:md5,4ee49a1541b5aa2a45bcf93fdb3958a1" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,20844d321995ed865277254366ad9523" + ] + ], + "versions": [ + "versions.yml:md5,4ee49a1541b5aa2a45bcf93fdb3958a1" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-01T14:08:36.990307238" + }, + "tcoffee/align": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.aln:md5,a4c80aa63979cfe7fc770ff01dbe744e" + ] + ], + "1": [ + "versions.yml:md5,bb9ac2316dd7f099b964f31fbf3a1b48" + ], + "alignment": [ + [ + { + "id": "test" + }, + "test.aln:md5,a4c80aa63979cfe7fc770ff01dbe744e" + ] + ], + "versions": [ + "versions.yml:md5,bb9ac2316dd7f099b964f31fbf3a1b48" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-01T14:09:48.216260207" + } +} \ No newline at end of file diff --git a/workflows/multiplesequencealign.nf b/workflows/multiplesequencealign.nf index d9202ef4..fbb76b01 100644 --- a/workflows/multiplesequencealign.nf +++ b/workflows/multiplesequencealign.nf @@ -19,7 +19,6 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mult // SUBWORKFLOW: Local subworkflows // include { STATS } from '../subworkflows/local/STATS' -include { ALIGN } from '../subworkflows/local/ALIGN' include { EVALUATE } from '../subworkflows/local/EVALUATE' include { TEMPLATES } from '../subworkflows/local/TEMPLATES' include { PREPROCESS } from '../subworkflows/local/PREPROCESS' @@ -47,6 +46,16 @@ include { CSVTK_JOIN as MERGE_STATS_EVAL } from '../modules/nf-core/csvtk/join/m include { PIGZ_COMPRESS } from '../modules/nf-core/pigz/compress/main' include { FASTAVALIDATOR } from '../modules/nf-core/fastavalidator/main' +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT CLASS-MODULES MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { MSA_ALIGNMENT } from '../subworkflows/mirpedrol/msa_alignment/main' +include { MSA_GUIDETREE } from '../subworkflows/mirpedrol/msa_guidetree/main' +include { MSA_TREEALIGN } from '../subworkflows/mirpedrol/msa_treealign/main' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -57,7 +66,7 @@ workflow MULTIPLESEQUENCEALIGN{ take: ch_input // channel: [ meta, path(sequence.fasta), path(reference.fasta), path(dependency_files.tar.gz), path(templates.txt) ] - ch_tools // channel: [ val(guide_tree_tool), val(args_guide_tree_tool), val(alignment_tool), val(args_alignment_tool) ] + ch_tools // channel: [ meta_guidetree_treealign, meta_alignment ] main: ch_multiqc_files = Channel.empty() @@ -281,20 +290,90 @@ workflow MULTIPLESEQUENCEALIGN{ stats_summary = stats_summary.mix(STATS.out.stats_summary) } + ch_seqs + .combine(ch_tools) + // Add tools and arguments to the meta + .multiMap { + meta, fasta, meta_guidetree_treealign, meta_alignment -> + guidetree: [ meta + ["guidetree":meta_guidetree_treealign.guidetree, "args_guidetree":meta_guidetree_treealign.args_guidetree, "args_guidetree_clean":meta_guidetree_treealign.args_guidetree_clean], fasta, meta_guidetree_treealign.guidetree] + alignment: [ meta + ["alignment":meta_alignment.alignment, "args_alignment":meta_alignment.args_alignment, "args_alignment_clean":meta_alignment.args_alignment_clean], fasta, meta_alignment.alignment] + } + .set { ch_fasta_tools } + + ch_fasta_tools.guidetree + .filter{ it -> it[0].guidetree } + .unique() + .set { ch_fasta_guidetree } + ch_fasta_tools.alignment + .filter{ it -> it[0].alignment } + .unique() + .set { ch_fasta_alignment } + + ch_fasta_guidetree.dump( tag: 'ch_fasta_guidetree' ) + ch_fasta_alignment.dump( tag: 'ch_fasta_alignment' ) + + // + // Compute tree + // + MSA_GUIDETREE (ch_fasta_guidetree) + ch_versions = ch_versions.mix(MSA_GUIDETREE.out.versions) + + ch_seqs + .map { meta, fasta -> + [ meta.id, meta, fasta ] + } + .combine( + MSA_GUIDETREE.out.tree + .map { meta, tree -> + [ meta.id, meta, tree ] + } + , by:0 + ) // combine by meta ID + .map { meta_id, meta_fasta, fasta, meta_tree, tree -> + [ meta_tree.guidetree, meta_tree, fasta, tree ] + } + .combine( + ch_tools + .map { meta_guidetree_treealign, meta_alignment -> + [ meta_guidetree_treealign.guidetree, meta_guidetree_treealign ] + } + , by: 0 + ) // combine by guidetree + .map { + guidetree, meta, fasta, tree, meta_guidetree_treealign -> + [meta + ["treealign":meta_guidetree_treealign.treealign, "args_treealign":meta_guidetree_treealign.args_treealign, "args_treealign_clean":meta_guidetree_treealign.args_treealign_lean], fasta, tree, meta_guidetree_treealign.treealign] + } + .multiMap { + meta, fasta, tree, treealign -> + fastas: [ meta, fasta, treealign ] + trees: [ meta, tree, treealign ] + } + .set { ch_tree_treealign } + + ch_alignment_output = Channel.empty() + + // + // Align with a given tree + // + MSA_TREEALIGN ( + ch_tree_treealign.fastas, + ch_tree_treealign.trees + ) + ch_versions = ch_versions.mix(MSA_TREEALIGN.out.versions) + ch_alignment_output = ch_alignment_output.mix(MSA_TREEALIGN.out.alignment) + + // // Align // compress_during_align = !(params.skip_compression || (!params.skip_eval || params.build_consensus)) - ALIGN ( - ch_seqs, - ch_tools, - ch_optional_data_template, - compress_during_align - ) - ch_versions = ch_versions.mix(ALIGN.out.versions) + + MSA_ALIGNMENT ( ch_fasta_alignment ) + ch_versions = ch_versions.mix(MSA_ALIGNMENT.out.versions) + ch_alignment_output = ch_alignment_output.mix(MSA_ALIGNMENT.out.alignment) if (!params.skip_compression && !compress_during_align) { - PIGZ_COMPRESS (ALIGN.out.msa) + PIGZ_COMPRESS (ch_alignment_output) ch_versions = ch_versions.mix(PIGZ_COMPRESS.out.versions) } @@ -302,7 +381,7 @@ workflow MULTIPLESEQUENCEALIGN{ // Evaluate the quality of the alignment // if (!params.skip_eval) { - EVALUATE (ALIGN.out.msa, ch_refs, ch_optional_data_template) + EVALUATE (ch_alignment_output, ch_refs, ch_optional_data_template) ch_versions = ch_versions.mix(EVALUATE.out.versions) evaluation_summary = evaluation_summary.mix(EVALUATE.out.eval_summary) } else { @@ -351,8 +430,8 @@ workflow MULTIPLESEQUENCEALIGN{ if (!params.skip_visualisation) { VISUALIZATION ( - ALIGN.out.msa, - ALIGN.out.trees, + ch_alignment_output, + MSA_GUIDETREE.out.tree, ch_optional_data ) }