diff --git a/conf/containers_conda_lock_files_amd64.config b/conf/containers_conda_lock_files_amd64.config index d3ee1b4ee..f6b0997ed 100644 --- a/conf/containers_conda_lock_files_amd64.config +++ b/conf/containers_conda_lock_files_amd64.config @@ -1,2 +1 @@ -process { withName: 'FASTQC' { container = 'modules/nf-core/fastqc/.conda-lock/linux_amd64-bd-5cb1a2fa2f18c7c2_1.txt' } } -process { withName: 'MULTIQC' { container = 'modules/nf-core/multiqc/.conda-lock/linux_amd64-bd-db7c73dae76bc9e6_1.txt' } } +process { withName: 'MULTIQC' { container = 'https://wave.seqera.io/v1alpha1/builds/bd-ee7739d47738383b_1/condalock' } } diff --git a/conf/containers_conda_lock_files_arm64.config b/conf/containers_conda_lock_files_arm64.config index 2b90ac4fc..0cd12a281 100644 --- a/conf/containers_conda_lock_files_arm64.config +++ b/conf/containers_conda_lock_files_arm64.config @@ -1,2 +1 @@ -process { withName: 'FASTQC' { container = 'modules/nf-core/fastqc/.conda-lock/linux_arm64-bd-e455e32f745abe68_1.txt' } } -process { withName: 'MULTIQC' { container = 'modules/nf-core/multiqc/.conda-lock/linux_arm64-bd-d167b8012595a136_1.txt' } } +process { withName: 'MULTIQC' { container = 'https://wave.seqera.io/v1alpha1/builds/bd-58d7dee710ab3aa8_1/condalock' } } diff --git a/conf/containers_docker_amd64.config b/conf/containers_docker_amd64.config index 65f1814a0..dd93726bc 100644 --- a/conf/containers_docker_amd64.config +++ b/conf/containers_docker_amd64.config @@ -1,2 +1 @@ -process { withName: 'FASTQC' { container = 'community.wave.seqera.io/library/fastqc:0.12.1--5cb1a2fa2f18c7c2' } } -process { withName: 'MULTIQC' { container = 'community.wave.seqera.io/library/multiqc:1.34--db7c73dae76bc9e6' } } +process { withName: 'MULTIQC' { container = 'community.wave.seqera.io/library/multiqc:1.33--ee7739d47738383b' } } diff --git a/conf/containers_docker_arm64.config b/conf/containers_docker_arm64.config index 6c845ba4b..23418fef7 100644 --- a/conf/containers_docker_arm64.config +++ b/conf/containers_docker_arm64.config @@ -1,2 +1 @@ -process { withName: 'FASTQC' { container = 'community.wave.seqera.io/library/fastqc:0.12.1--e455e32f745abe68' } } -process { withName: 'MULTIQC' { container = 'community.wave.seqera.io/library/multiqc:1.34--d167b8012595a136' } } +process { withName: 'MULTIQC' { container = 'community.wave.seqera.io/library/multiqc:1.33--58d7dee710ab3aa8' } } diff --git a/conf/containers_singularity_https_amd64.config b/conf/containers_singularity_https_amd64.config index 838f2484f..d04c5be34 100644 --- a/conf/containers_singularity_https_amd64.config +++ b/conf/containers_singularity_https_amd64.config @@ -1,2 +1 @@ -process { withName: 'FASTQC' { container = 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f2/f20b021476d1d87658820f971ebecc1e8cdbde0f338eb0d9cea2b0a8fc54a54b/data' } } -process { withName: 'MULTIQC' { container = 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/1b/1bef8af6be88c5733461959c46ac8ef73d18f65277f62a1695d0e1633054f9c2/data' } } +process { withName: 'MULTIQC' { container = 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/34/34e733a9ae16a27e80fe00f863ea1479c96416017f24a907996126283e7ecd4d/data' } } diff --git a/conf/containers_singularity_https_arm64.config b/conf/containers_singularity_https_arm64.config index 090173be1..4a0312379 100644 --- a/conf/containers_singularity_https_arm64.config +++ b/conf/containers_singularity_https_arm64.config @@ -1,2 +1 @@ -process { withName: 'FASTQC' { container = 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/46/46daf2dad0169afd2ae047c3e50ed3776259f664bf07e5e06b045dc23449e994/data' } } -process { withName: 'MULTIQC' { container = 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9a/9a1fec9662a152683e6fcae440d0ce20920b3b89dc62d1e3a52e73f92eba0969/data' } } +process { withName: 'MULTIQC' { container = 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/78/78b89e91d89e9cc99ad5ade5be311f347838cb2acbfb4f13bc343b170be09ce4/data' } } diff --git a/conf/containers_singularity_oras_amd64.config b/conf/containers_singularity_oras_amd64.config index 773f3698b..2d8d51fcf 100644 --- a/conf/containers_singularity_oras_amd64.config +++ b/conf/containers_singularity_oras_amd64.config @@ -1,2 +1 @@ -process { withName: 'FASTQC' { container = 'oras://community.wave.seqera.io/library/fastqc:0.12.1--5c4bd442468d75dd' } } -process { withName: 'MULTIQC' { container = 'oras://community.wave.seqera.io/library/multiqc:1.34--4fc8657c816047c0' } } +process { withName: 'MULTIQC' { container = 'oras://community.wave.seqera.io/library/multiqc:1.33--e3576ddf588fa00d' } } diff --git a/conf/containers_singularity_oras_arm64.config b/conf/containers_singularity_oras_arm64.config index 798cc638d..c3210dd0f 100644 --- a/conf/containers_singularity_oras_arm64.config +++ b/conf/containers_singularity_oras_arm64.config @@ -1,2 +1 @@ -process { withName: 'FASTQC' { container = 'oras://community.wave.seqera.io/library/fastqc:0.12.1--127a87fc06499035' } } -process { withName: 'MULTIQC' { container = 'oras://community.wave.seqera.io/library/multiqc:1.34--7fbd82d945c06726' } } +process { withName: 'MULTIQC' { container = 'oras://community.wave.seqera.io/library/multiqc:1.33--2537ca5f8445e3c2' } } diff --git a/modules.json b/modules.json index f72f65a4f..33111bb3d 100644 --- a/modules.json +++ b/modules.json @@ -296,6 +296,11 @@ "git_sha": "66391ca86ea6a081d288afe6a93d242fefcd8c2c", "installed_by": ["modules"] }, + "htslib/bgziptabix": { + "branch": "master", + "git_sha": "54e41f4ed3aead45054380a9befeb927612ffc91", + "installed_by": ["vcf_extract_relate_somalier"] + }, "last/lastdb": { "branch": "master", "git_sha": "a54953d05812af249d40ffaccb5b8ee371a75866", @@ -491,6 +496,16 @@ "git_sha": "d0afa49c7c23c9e99a47af9bafb4cee449853560", "installed_by": ["modules"] }, + "somalier/extract": { + "branch": "master", + "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", + "installed_by": ["modules", "vcf_extract_relate_somalier"] + }, + "somalier/relate": { + "branch": "master", + "git_sha": "6d46786420b4d7bc88eba026eb389c0c5535d120", + "installed_by": ["modules", "vcf_extract_relate_somalier"] + }, "spring/decompress": { "branch": "master", "git_sha": "d0afa49c7c23c9e99a47af9bafb4cee449853560", @@ -585,6 +600,11 @@ "git_sha": "fdc08b8b1ae74f56686ce21f7ea11ad11990ce57", "installed_by": ["subworkflows"] }, + "vcf_extract_relate_somalier": { + "branch": "master", + "git_sha": "54e41f4ed3aead45054380a9befeb927612ffc91", + "installed_by": ["subworkflows"] + }, "vcf_filter_bcftools_ensemblvep": { "branch": "master", "git_sha": "95518d261ec0561b3dffb332944bebc5ef85efcf", diff --git a/modules/nf-core/htslib/bgziptabix/environment.yml b/modules/nf-core/htslib/bgziptabix/environment.yml new file mode 100644 index 000000000..573601221 --- /dev/null +++ b/modules/nf-core/htslib/bgziptabix/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::htslib=1.23.1 + - conda-forge::xz=5.8.3 diff --git a/modules/nf-core/htslib/bgziptabix/main.nf b/modules/nf-core/htslib/bgziptabix/main.nf new file mode 100644 index 000000000..9efe9689b --- /dev/null +++ b/modules/nf-core/htslib/bgziptabix/main.nf @@ -0,0 +1,88 @@ +process HTSLIB_BGZIPTABIX { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/33/33a1f2c7f36ec58339e41cbea096d121f606918778a91cfbef944b40ba7ce48b/data' + : 'community.wave.seqera.io/library/htslib_xz:49c8c84af5c4b3b9'}" + + input: + tuple val(meta), path(infile), path(infile_tbi), path(regions) + val action + val make_index + val out_ext + + output: + tuple val(meta), path("${outfile}"), emit: output + tuple val(meta), path("${outfile}.{tbi,csi}"), emit: index, optional: true + // all htslib tools have the same version, we use bgzip + tuple val("${task.process}"), val('htslib'), eval("bgzip --version | sed '1! d; s/bgzip (htslib) //'"), topic: versions, emit: versions_htslib + tuple val("${task.process}"), val('xz'), eval("xz --version | sed '1! d; s/xz (XZ Utils) //'"), topic: versions, emit: versions_xz + + when: + task.ext.when == null || task.ext.when + + script: + def allowed_actions = ["compress", "decompress"] + if (action !in allowed_actions) { + error("htslib/bgziptabix: Invalid action: ${action}. Allowed actions are: ${allowed_actions.join(', ')}") + } + + if (action == "decompress" && make_index) { + log.warn("htslib/bgziptabix: Cannot create index when decompressing. Ignoring make_index option.") + } + + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + outfile = action == "compress" ? (out_ext ? "${prefix}.${out_ext}.gz" : "${prefix}.gz") : (out_ext ? "${prefix}.${out_ext}" : "${prefix}") + + def compress_cmd = action == "compress" ? "bgzip -c ${args} -@ ${task.cpus}" : "cat" + def bgzip_cmd = action == "compress" ? "[ '\$(basename ${infile})' != '\$(basename ${outfile})' ] && ln -s ${infile} ${outfile}" : "bgzip -c -d ${args} -@ ${task.cpus} ${infile} > ${outfile}" + + def regions_arg = regions ? "-R ${regions}" : "" + def tabix_cmd = (make_index && !infile_tbi) ? "tabix -@ ${task.cpus} ${regions_arg} ${args2} -f ${outfile}" : "" + def link_tabix_cmd = make_index && infile_tbi ? "ln -s ${infile_tbi} ${outfile}.${infile_tbi.extension}" : "" + def uncompressed_cmd = action == "compress" ? "${compress_cmd} ${infile} > ${outfile}" : (infile.getName() == outfile ? "" : "ln -s ${infile} ${outfile}") + """ + ${link_tabix_cmd} + + FILE_TYPE=\$(htsfile ${infile}) + + case "\$FILE_TYPE" in + *BGZF-compressed*) + ${bgzip_cmd} ;; + *gzip-compressed*) + [ "\$(basename ${infile})" == "\$(basename ${outfile})" ] && echo "Input and output names cannot be the same" && exit 1 + zcat ${infile} | ${compress_cmd} > ${outfile} ;; + *bzip2-compressed*) + bzcat ${infile} | ${compress_cmd} > ${outfile} ;; + *XZ-compressed*) + xzcat ${infile} | ${compress_cmd} > ${outfile} ;; + *) + ${uncompressed_cmd} ;; + esac + + ${tabix_cmd} + """ + + stub: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + outfile = action == "compress" ? (out_ext ? "${prefix}.${out_ext}.gz" : "${prefix}.gz") : (out_ext ? "${prefix}.${out_ext}" : "${prefix}") + + def touch_cmd = action == "compress" ? "echo | bgzip -c" : "echo" + def index_fmt = args2.contains('-C') ? 'csi' : 'tbi' + def tabix_cmd = make_index ? "touch ${outfile}.${index_fmt}" : "" + def link_tabix_cmd = make_index && infile_tbi ? "ln -s ${infile_tbi} ${outfile}.${infile_tbi.extension}" : "" + """ + echo ${args} + + ${touch_cmd} > ${outfile} + + ${tabix_cmd} + ${link_tabix_cmd} + """ +} diff --git a/modules/nf-core/htslib/bgziptabix/meta.yml b/modules/nf-core/htslib/bgziptabix/meta.yml new file mode 100644 index 000000000..4cdefd0e1 --- /dev/null +++ b/modules/nf-core/htslib/bgziptabix/meta.yml @@ -0,0 +1,125 @@ +name: "htslib_bgziptabix" +description: "Multi-purpose module to compress, decompress and index files using bgzip + and tabix." +keywords: + - compress + - decompress + - index + - bgzip + - tabix + - gzip + - bzip + - xz +tools: + - "htslib": + description: "C library for high-throughput sequencing data formats." + homepage: "http://www.htslib.org/" + documentation: "http://www.htslib.org/doc/" + tool_dev_url: "https://github.com/samtools/htslib" + doi: "10.1093/gigascience/giab007" + licence: + - "MIT" + identifier: biotools:htslib +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - infile: + type: file + description: Input file to compress or decompress + pattern: "*" + ontologies: [] + - infile_tbi: + type: file + description: Optional tabix index for the input file. + pattern: "*.{tbi,csi}" + ontologies: + - edam: http://edamontology.org/format_3616 # tabix + - regions: + type: file + description: Optional file of regions to extract (BED or chr:start-end format). + Only used when creating an index for the output file. + pattern: "*.{bed,txt,tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - edam: http://edamontology.org/format_3003 # BED + - action: + type: string + description: Action to perform, either `compress` or `decompress` + - make_index: + type: boolean + description: Whether to create a tabix index for the output file; only used + if `action` is `compress` + - out_ext: + type: string + description: Output file extension without `.gz` suffix (for example `vcf`) +output: + output: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${outfile}: + type: file + description: Compressed or decompressed output file + pattern: "*" + ontologies: [] + index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - ${outfile}.{tbi,csi}: + type: file + description: Tabix index file for the compressed output file + pattern: "*.{tbi,csi}" + ontologies: + - edam: http://edamontology.org/format_3616 # tabix + versions_htslib: + - - ${task.process}: + type: string + description: The name of the process + - htslib: + type: string + description: The name of the tool + - bgzip --version | sed '1! d; s/bgzip (htslib) //': + type: eval + description: The expression to obtain the version of the tool + versions_xz: + - - ${task.process}: + type: string + description: The name of the process + - xz: + type: string + description: The name of the tool + - xz --version | sed '1! d; s/xz (XZ Utils) //': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - htslib: + type: string + description: The name of the tool + - bgzip --version | sed '1! d; s/bgzip (htslib) //': + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - xz: + type: string + description: The name of the tool + - xz --version | sed '1! d; s/xz (XZ Utils) //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@itrujnara" +maintainers: + - "@itrujnara" diff --git a/modules/nf-core/htslib/bgziptabix/tests/main.nf.test b/modules/nf-core/htslib/bgziptabix/tests/main.nf.test new file mode 100644 index 000000000..a73465066 --- /dev/null +++ b/modules/nf-core/htslib/bgziptabix/tests/main.nf.test @@ -0,0 +1,435 @@ +nextflow_process { + + name "Test Process HTSLIB_BGZIPTABIX" + script "../main.nf" + process "HTSLIB_BGZIPTABIX" + + tag "modules" + tag "modules_nfcore" + tag "htslib" + tag "htslib/bgziptabix" + + test("sarscov2 - vcf - decompress") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.vcf') }, + { assert process.out.index.size() == 0 } + ) + } + + } + + test("sarscov2 - vcf - compress - index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = true // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.vcf.gz') }, + { assert process.out.index.get(0).get(1).endsWith('.vcf.gz.tbi') } + ) + } + + } + + test("sarscov2 - vcf + regions - compress - index") { + when { + process { + """ + input[0] = [ + [ id:'example' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file('https://raw.githubusercontent.com/luisas/test-datasets/refs/heads/add-bedgraph-subset-illumina/data/genomics/sarscov2/illumina/bed/test.bed', checkIfExists: true) + ] + input[1] = 'compress' // action + input[2] = true // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + sanitizeOutput(process.out), + path(process.out.output[0][1]).vcf.getVariantsMD5(), + ).match() } + ) + } + } + + test("sarscov2 - bgzip - decompress") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.vcf') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("sarscov2 - bgzip - compress - no index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = false // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.vcf.gz') }, + { assert process.out.index.size() == 0 } + ) + } + + } + + test("sarscov2 - gzip - decompress") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = 'fastq' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.fastq') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("sarscov2 - gzip - (re)compress - no index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = false // make_index + input[3] = 'fastq' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.fastq.gz') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("sarscov2 - gzip - name clash") { + + when { + process { + """ + input[0] = [ + [ id:'test_1' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = false // make_index + input[3] = 'fastq' // out_ext + """ + } + } + + then { + assert process.failed + assertAll( + { assert process.errorReport.contains("Input and output names cannot be the same") } + ) + } + } + + test("metagenome - bz2 - decompress") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/rgi/card-data.tar.bz2', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = 'tar' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.tar') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("metagenome - bz2 - (re)compress - no index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/rgi/card-data.tar.bz2', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = false // make_index + input[3] = 'tar' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.tar.gz') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("metagenome - xz - decompress") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/misc/taxa_sqlite.xz', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = '' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() }, + { assert process.out.output.get(0).get(1).endsWith('test') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("metagenome - xz - (re)compress - no index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/prokaryotes/metagenome/taxonomy/misc/taxa_sqlite.xz', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = false // make_index + input[3] = '' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() }, + { assert process.out.output.get(0).get(1).endsWith('.gz') }, + { assert process.out.index.size() == 0 } + ) + } + } + + test("sarscov2 - vcf - compress - index - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [], + [] + ] + input[1] = 'compress' // action + input[2] = true // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } + + test("sarscov2 - vcf - decompress - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = 'decompress' // action + input[2] = false // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(sanitizeOutput(process.out)).match() } + ) + } + + } + + test("illegal action") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [], + [] + ] + input[1] = 'invalid_action' // action + input[2] = true // make_index + input[3] = 'vcf' // out_ext + """ + } + } + + then { + assert process.failed + assert process.errorReport.contains("Invalid action: invalid_action. Allowed actions are: compress, decompress") + } + + } + +} diff --git a/modules/nf-core/htslib/bgziptabix/tests/main.nf.test.snap b/modules/nf-core/htslib/bgziptabix/tests/main.nf.test.snap new file mode 100644 index 000000000..52cfc9069 --- /dev/null +++ b/modules/nf-core/htslib/bgziptabix/tests/main.nf.test.snap @@ -0,0 +1,574 @@ +{ + "sarscov2 - gzip - (re)compress - no index": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:33.710007592", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "metagenome - xz - (re)compress - no index": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.gz:md5,b8d852a2b1ee52ed64d83046dcdb9de2" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:22:42.445692755", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "metagenome - bz2 - decompress": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.tar:md5,39e9e71fd16cfd09ceca12cd46e6abce" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:48.804507455", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf - decompress - stub": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:23:14.567213835", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - gzip - decompress": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:27.607706101", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf - compress - index - stub": { + "content": [ + { + "index": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:22:57.854824265", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf - decompress": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:20:56.277612816", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "metagenome - bz2 - (re)compress - no index": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.tar.gz:md5,39e9e71fd16cfd09ceca12cd46e6abce" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:57.137689117", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf - compress - index": { + "content": [ + { + "index": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,7f005943c935f2b55ba3f9d4802aa09f" + ] + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:02.256241871", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "metagenome - xz - decompress": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test:md5,b8d852a2b1ee52ed64d83046dcdb9de2" + ] + ], + "1": [ + + ], + "2": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "3": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ], + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test:md5,b8d852a2b1ee52ed64d83046dcdb9de2" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + }, + { + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-12T11:31:30.587093278", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf - compress - no index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ] + } + ], + "timestamp": "2026-05-06T15:27:48.766118732", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - bgzip - compress - no index": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:21.172365408", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - vcf + regions - compress - index": { + "content": [ + { + "index": [ + [ + { + "id": "example" + }, + "example.vcf.gz.tbi:md5,d22e5b84e4fcd18792179f72e6da702e" + ] + ], + "output": [ + [ + { + "id": "example" + }, + "example.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + }, + "bc7bf3ee9e8430e064c539eb81e59bf9" + ], + "timestamp": "2026-05-19T10:34:19.00293386", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + }, + "sarscov2 - bgzip - decompress": { + "content": [ + { + "index": [ + + ], + "output": [ + [ + { + "id": "test" + }, + "test.vcf:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_htslib": [ + [ + "HTSLIB_BGZIPTABIX", + "htslib", + "1.23.1" + ] + ], + "versions_xz": [ + [ + "HTSLIB_BGZIPTABIX", + "xz", + "5.8.3" + ] + ] + } + ], + "timestamp": "2026-05-19T10:21:14.663326257", + "meta": { + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/somalier/extract/environment.yml b/modules/nf-core/somalier/extract/environment.yml new file mode 100644 index 000000000..80eb6a006 --- /dev/null +++ b/modules/nf-core/somalier/extract/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::somalier=0.2.19 diff --git a/modules/nf-core/somalier/extract/main.nf b/modules/nf-core/somalier/extract/main.nf new file mode 100644 index 000000000..3069a717e --- /dev/null +++ b/modules/nf-core/somalier/extract/main.nf @@ -0,0 +1,40 @@ + +process SOMALIER_EXTRACT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/somalier:0.2.19--h0c29559_0': + 'quay.io/biocontainers/somalier:0.2.19--h0c29559_0' }" + + input: + tuple val(meta), path(input), path(input_index) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(sites) + + output: + tuple val(meta), path("*.somalier") , emit: extract + tuple val("${task.process}"), val('somalier'), eval('somalier 2>&1 | sed -n \'s/.*version: \\([0-9.]*\\).*/\\1/p\''), emit: versions_somalier, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + somalier extract \\ + --sites ${sites} \\ + -f ${fasta} \\ + ${input} \\ + ${args} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.somalier + """ +} diff --git a/modules/nf-core/somalier/extract/meta.yml b/modules/nf-core/somalier/extract/meta.yml new file mode 100644 index 000000000..40a7c9c33 --- /dev/null +++ b/modules/nf-core/somalier/extract/meta.yml @@ -0,0 +1,111 @@ +name: "somalier_extract" +description: Somalier can extract informative sites, evaluate relatedness, and + perform quality-control on BAM/CRAM/BCF/VCF/GVCF or from jointly-called VCFs +keywords: + - relatedness + - QC + - bam + - cram + - vcf + - gvcf + - ancestry + - identity + - kinship + - informative sites + - family +tools: + - "somalier": + description: "Somalier can extract informative sites, evaluate relatedness, and + perform quality-control on BAM/CRAM/BCF/VCF/GVCF or from jointly-called VCFs" + homepage: "https://github.com/brentp/somalier" + documentation: "https://github.com/brentp/somalier/blob/master/README.md" + tool_dev_url: "https://github.com/brentp/somalier" + doi: "10.1186/s13073-020-00761-2" + licence: + - "MIT" + identifier: biotools:somalier +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM/BCF/VCF/GVCF or jointly-called VCF file + ontologies: [] + - input_index: + type: file + description: index file of the input data, e.g., bam.bai, cram.crai + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'hg38' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.{fasta,fna,fas,fa}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'hg38' ] + - fai: + type: file + description: FASTA index file + pattern: "*.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing sites information + e.g. [ id:'hg38' ] + - sites: + type: file + description: sites file in VCF format which can be taken from + https://github.com/brentp/somalier + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 +output: + extract: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.somalier": + type: file + description: binary output file based on extracted sites + pattern: "*.{somalier}" + ontologies: [] + versions_somalier: + - - ${task.process}: + type: string + description: The name of the process + - somalier: + type: string + description: The name of the tool + - "somalier 2>&1 | sed -n 's/.*version: \\([0-9.]*\\).*/\\1/p'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - somalier: + type: string + description: The name of the tool + - "somalier 2>&1 | sed -n 's/.*version: \\([0-9.]*\\).*/\\1/p'": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@ashotmarg" + - "@nvnieuwk" +maintainers: + - "@ashotmarg" + - "@nvnieuwk" diff --git a/modules/nf-core/somalier/extract/tests/main.nf.test b/modules/nf-core/somalier/extract/tests/main.nf.test new file mode 100644 index 000000000..351ee9e21 --- /dev/null +++ b/modules/nf-core/somalier/extract/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process SOMALIER_EXTRACT" + script "../main.nf" + process "SOMALIER_EXTRACT" + + tag "modules" + tag "modules_nfcore" + tag "somalier" + tag "somalier/extract" + + test("homo_sapiens - [ bam, bai ], fasta, fai, sites") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'delete_me/somalier/sites_chr21.hg38.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [ bam, bai ], fasta, fai, sites -stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.markduplicates.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'delete_me/somalier/sites_chr21.hg38.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/somalier/extract/tests/main.nf.test.snap b/modules/nf-core/somalier/extract/tests/main.nf.test.snap new file mode 100644 index 000000000..d86e70740 --- /dev/null +++ b/modules/nf-core/somalier/extract/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "homo_sapiens - [ bam, bai ], fasta, fai, sites": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "normal.somalier:md5,21f0b980edd42ddaa8ab964959c1de02" + ] + ], + "1": [ + [ + "SOMALIER_EXTRACT", + "somalier", + "0.2.19" + ] + ], + "extract": [ + [ + { + "id": "test", + "single_end": false + }, + "normal.somalier:md5,21f0b980edd42ddaa8ab964959c1de02" + ] + ], + "versions_somalier": [ + [ + "SOMALIER_EXTRACT", + "somalier", + "0.2.19" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-18T17:10:03.312821223" + }, + "homo_sapiens - [ bam, bai ], fasta, fai, sites -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.somalier:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "SOMALIER_EXTRACT", + "somalier", + "0.2.19" + ] + ], + "extract": [ + [ + { + "id": "test", + "single_end": false + }, + "test.somalier:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_somalier": [ + [ + "SOMALIER_EXTRACT", + "somalier", + "0.2.19" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-18T17:10:15.032712665" + } +} \ No newline at end of file diff --git a/modules/nf-core/somalier/relate/environment.yml b/modules/nf-core/somalier/relate/environment.yml new file mode 100644 index 000000000..80eb6a006 --- /dev/null +++ b/modules/nf-core/somalier/relate/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::somalier=0.2.19 diff --git a/modules/nf-core/somalier/relate/main.nf b/modules/nf-core/somalier/relate/main.nf new file mode 100644 index 000000000..d0ced157f --- /dev/null +++ b/modules/nf-core/somalier/relate/main.nf @@ -0,0 +1,48 @@ + +process SOMALIER_RELATE { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/somalier:0.2.19--h0c29559_0': + 'quay.io/biocontainers/somalier:0.2.19--h0c29559_0' }" + + input: + tuple val(meta), path(extract), path(ped) + path(sample_groups) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.pairs.tsv"), emit: pairs_tsv + tuple val(meta), path("*.samples.tsv"), emit: samples_tsv + tuple val("${task.process}"), val('somalier'), eval('somalier 2>&1 | sed -n \'s/.*version: \\([0-9.]*\\).*/\\1/p\''), emit: versions_somalier, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def input_list = extract.join(' ') + def prefix = task.ext.prefix ?: "$meta.id" + def sample_groups_command = sample_groups ? "-g $sample_groups" : "" + def ped_command = ped ? "-p $ped" : "" + + """ + somalier relate \\ + -o ${prefix} \\ + ${input_list} \\ + ${args} \\ + ${sample_groups_command} \\ + ${ped_command} + """ + + stub: + def prefix = task.ext.prefix ?: "$meta.id" + + """ + touch ${prefix}.html + touch ${prefix}.pairs.tsv + touch ${prefix}.samples.tsv + """ +} diff --git a/modules/nf-core/somalier/relate/meta.yml b/modules/nf-core/somalier/relate/meta.yml new file mode 100644 index 000000000..2c6ab1441 --- /dev/null +++ b/modules/nf-core/somalier/relate/meta.yml @@ -0,0 +1,113 @@ +name: "somalier_relate" +description: Somalier can extract informative sites, evaluate relatedness, and + perform quality-control on BAM/CRAM/BCF/VCF/GVCF or from jointly-called VCFs +keywords: + - relatedness + - QC + - bam + - cram + - vcf + - gvcf + - ancestry + - identity + - kinship + - informative sites + - family +tools: + - "somalier": + description: "Somalier can extract informative sites, evaluate relatedness, and + perform quality-control on BAM/CRAM/BCF/VCF/GVCF or from jointly-called VCFs" + homepage: "https://github.com/brentp/somalier" + documentation: "https://github.com/brentp/somalier/blob/master/README.md" + tool_dev_url: "https://github.com/brentp/somalier" + doi: "10.1186/s13073-020-00761-2" + licence: + - "MIT" + identifier: biotools:somalier +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - extract: + type: file + description: extract file(s) from Somalier extract + pattern: "*.somalier" + ontologies: [] + - ped: + type: file + description: optional path to a ped or fam file indicating the expected + relationships among samples + pattern: "*.{ped,fam}" + ontologies: [] + - sample_groups: + type: file + description: optional path to expected groups of samples such as tumor + normal pairs specified as comma-separated groups per line + pattern: "*.{txt,csv}" + ontologies: + - edam: http://edamontology.org/format_3752 +output: + html: + - - meta: + type: file + description: html file + pattern: "*.html" + ontologies: [] + - "*.html": + type: file + description: html file + pattern: "*.html" + ontologies: [] + pairs_tsv: + - - meta: + type: file + description: html file + pattern: "*.html" + ontologies: [] + - "*.pairs.tsv": + type: file + description: tsv file with output stats for pairs of samples + pattern: "*.pairs.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 + samples_tsv: + - - meta: + type: file + description: html file + pattern: "*.html" + ontologies: [] + - "*.samples.tsv": + type: file + description: tsv file with sample-level information + pattern: "*.samples.tsv" + ontologies: + - edam: http://edamontology.org/format_3475 + versions_somalier: + - - ${task.process}: + type: string + description: The name of the process + - somalier: + type: string + description: The name of the tool + - "somalier 2>&1 | sed -n 's/.*version: \\([0-9.]*\\).*/\\1/p'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - somalier: + type: string + description: The name of the tool + - "somalier 2>&1 | sed -n 's/.*version: \\([0-9.]*\\).*/\\1/p'": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@ashotmarg" + - "@nvnieuwk" +maintainers: + - "@ashotmarg" + - "@nvnieuwk" diff --git a/modules/nf-core/somalier/relate/tests/main.nf.test b/modules/nf-core/somalier/relate/tests/main.nf.test new file mode 100644 index 000000000..71ededf77 --- /dev/null +++ b/modules/nf-core/somalier/relate/tests/main.nf.test @@ -0,0 +1,124 @@ +nextflow_process { + + name "Test Process SOMALIER_RELATE" + script "../main.nf" + process "SOMALIER_RELATE" + + tag "modules" + tag "modules_nfcore" + tag "somalier" + tag "somalier/relate" + + test("[ delete_me, [] ], []") { + + when { + process { + """ + input[0] = [ + [ id:'cohort', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'delete_me/somalier/normal.somalier', checkIfExists: true), + file(params.modules_testdata_base_path + 'delete_me/somalier/tumour.somalier', checkIfExists: true) + ], + [] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("[ delete_me, ped ], groups") { + + when { + process { + """ + input[0] = [ + [ id:'cohort', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'delete_me/somalier/normal.somalier', checkIfExists: true), + file(params.modules_testdata_base_path + 'delete_me/somalier/tumour.somalier', checkIfExists: true) + ], + file(params.modules_testdata_base_path + 'delete_me/somalier/family.ped', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'delete_me/somalier/groups.txt', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("[ delete_me, [] ], [] -stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'cohort', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'delete_me/somalier/normal.somalier', checkIfExists: true), + file(params.modules_testdata_base_path + 'delete_me/somalier/tumour.somalier', checkIfExists: true) + ], + [] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("[ delete_me, ped ], groups -stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'cohort', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'delete_me/somalier/normal.somalier', checkIfExists: true), + file(params.modules_testdata_base_path + 'delete_me/somalier/tumour.somalier', checkIfExists: true) + ], + file(params.modules_testdata_base_path + 'delete_me/somalier/family.ped', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'delete_me/somalier/groups.txt', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/somalier/relate/tests/main.nf.test.snap b/modules/nf-core/somalier/relate/tests/main.nf.test.snap new file mode 100644 index 000000000..c510ebf0c --- /dev/null +++ b/modules/nf-core/somalier/relate/tests/main.nf.test.snap @@ -0,0 +1,318 @@ +{ + "[ delete_me, [] ], []": { + "content": [ + { + "0": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.html:md5,03cac9b2c67a8a06f63e07f83ee11e18" + ] + ], + "1": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.pairs.tsv:md5,54d1e9fca1bf9d747d4254c6fa98edcf" + ] + ], + "2": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.samples.tsv:md5,97257d88886db1325c4d7d10cefa7169" + ] + ], + "3": [ + [ + "SOMALIER_RELATE", + "somalier", + "0.2.19" + ] + ], + "html": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.html:md5,03cac9b2c67a8a06f63e07f83ee11e18" + ] + ], + "pairs_tsv": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.pairs.tsv:md5,54d1e9fca1bf9d747d4254c6fa98edcf" + ] + ], + "samples_tsv": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.samples.tsv:md5,97257d88886db1325c4d7d10cefa7169" + ] + ], + "versions_somalier": [ + [ + "SOMALIER_RELATE", + "somalier", + "0.2.19" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-18T17:12:11.50120294" + }, + "[ delete_me, [] ], [] -stub": { + "content": [ + { + "0": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.pairs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "SOMALIER_RELATE", + "somalier", + "0.2.19" + ] + ], + "html": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pairs_tsv": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.pairs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "samples_tsv": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_somalier": [ + [ + "SOMALIER_RELATE", + "somalier", + "0.2.19" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-18T17:12:26.334811909" + }, + "[ delete_me, ped ], groups -stub": { + "content": [ + { + "0": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.pairs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "SOMALIER_RELATE", + "somalier", + "0.2.19" + ] + ], + "html": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pairs_tsv": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.pairs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "samples_tsv": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_somalier": [ + [ + "SOMALIER_RELATE", + "somalier", + "0.2.19" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-18T17:12:33.559842159" + }, + "[ delete_me, ped ], groups": { + "content": [ + { + "0": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.html:md5,0d573016c9279ccdfdcfd4eb01d73b89" + ] + ], + "1": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.pairs.tsv:md5,8655714f1e5359329188e9f501168131" + ] + ], + "2": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.samples.tsv:md5,38ef93340e55fbeef47640abda9e48b0" + ] + ], + "3": [ + [ + "SOMALIER_RELATE", + "somalier", + "0.2.19" + ] + ], + "html": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.html:md5,0d573016c9279ccdfdcfd4eb01d73b89" + ] + ], + "pairs_tsv": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.pairs.tsv:md5,8655714f1e5359329188e9f501168131" + ] + ], + "samples_tsv": [ + [ + { + "id": "cohort", + "single_end": false + }, + "cohort.samples.tsv:md5,38ef93340e55fbeef47640abda9e48b0" + ] + ], + "versions_somalier": [ + [ + "SOMALIER_RELATE", + "somalier", + "0.2.19" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-18T17:12:19.028367312" + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 0b8d22571..694e66438 100644 --- a/nextflow.config +++ b/nextflow.config @@ -120,6 +120,9 @@ params { // Variant calling cnvnator_binsize = 1000 variant_caller = 'deepvariant' + // Somalier options + run_somalier = false + somalier_sites_vcf = null homoplasmy_af_threshold = 1 // variant annotation diff --git a/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf b/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf new file mode 100644 index 000000000..d15781aad --- /dev/null +++ b/subworkflows/nf-core/vcf_extract_relate_somalier/main.nf @@ -0,0 +1,76 @@ +include { SOMALIER_EXTRACT } from '../../../modules/nf-core/somalier/extract/main' +include { SOMALIER_RELATE } from '../../../modules/nf-core/somalier/relate/main' +include { HTSLIB_BGZIPTABIX } from '../../../modules/nf-core/htslib/bgziptabix/main' + +workflow VCF_EXTRACT_RELATE_SOMALIER { + take: + ch_vcfs // channel: [mandatory] [ val(meta), path(vcf), path(tbi), val(count) ] + ch_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_fasta_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_somalier_sites // channel: [mandatory] [ path(somalier_sites_vcf) ] + ch_peds // channel: [mandatory] [ val(meta), path(ped) ] + ch_sample_groups // channel: [optional] [ path(txt) ] + val_common_id // string: [optional] A common identifier for the samples that need to be related. Has to be given when using single sample VCFs + main: + + ch_input = ch_vcfs + .branch { meta, vcf, tbi, _count -> + tbi: tbi != [] + return [ meta, vcf, tbi ] + no_tbi: tbi == [] + return [ meta, vcf ] + } + + ch_for_index = ch_input.no_tbi + .multiMap { meta, vcf -> + files: [ meta, vcf ] + format: vcf.name.endsWith(".bcf.gz") ? "bcf" : "vcf" + } + + HTSLIB_BGZIPTABIX( + ch_for_index.files.map { meta, vcf -> [meta, vcf, [], []] }, + "compress", + true, + ch_for_index.format + ) + + ch_somalierextract_input = HTSLIB_BGZIPTABIX.out.output.join( + HTSLIB_BGZIPTABIX.out.index + ).mix( + ch_input.tbi + ) + + SOMALIER_EXTRACT( + ch_somalierextract_input, + ch_fasta, + ch_fasta_fai, + ch_somalier_sites + ) + + ch_somalierrelate_input = SOMALIER_EXTRACT.out.extract + .join(ch_vcfs, failOnDuplicate: true, failOnMismatch: true) + .map { meta, extract, _vcf, _tbi, count -> + def new_meta = val_common_id ? meta + [id:meta[val_common_id]] : meta + [ count ? groupKey(new_meta, count): new_meta, extract ] + } + .groupTuple() + .join(ch_peds, failOnDuplicate: true, failOnMismatch: true) + .map { meta, extract, ped -> + def extract2 = extract[0] instanceof ArrayList ? extract[0] : extract + def sorted_extract = extract2.sort { a, b -> file(a).name <=> file(b).name } + // Check if meta is a GroupKey by checking for 'target' property + def new_meta = meta.hasProperty('target') ? meta.target : meta + [ new_meta, sorted_extract, ped ] + } // Sort and flatten the extract list, remove the GroupKey wrapper if present + + SOMALIER_RELATE( + ch_somalierrelate_input, + ch_sample_groups + ) + + emit: + extract = SOMALIER_EXTRACT.out.extract // channel: [ val(meta), path(extract) ] + html = SOMALIER_RELATE.out.html // channel: [ val(meta), path(html) ] + pairs_tsv = SOMALIER_RELATE.out.pairs_tsv // channel: [ val(meta), path(tsv) ] + samples_tsv = SOMALIER_RELATE.out.samples_tsv // channel: [ val(meta), path(tsv) ] +} diff --git a/subworkflows/nf-core/vcf_extract_relate_somalier/meta.yml b/subworkflows/nf-core/vcf_extract_relate_somalier/meta.yml new file mode 100644 index 000000000..f31ac706f --- /dev/null +++ b/subworkflows/nf-core/vcf_extract_relate_somalier/meta.yml @@ -0,0 +1,68 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "vcf_extract_relate_somalier" +description: Perform somalier extraction and relate stats on input VCFs +keywords: + - somalier + - stats + - vcf + - ped + - relatedness +components: + - htslib/bgziptabix + - somalier/extract + - somalier/relate +input: + - ch_vcfs: + description: | + The input VCFs to perform the stats on, optionally with their indices. + This channel can also contain the number of samples in the same family/group + to check relatedness. This is advised to add as it can improve the efficiency of your pipeline + Structure: [ val(meta), path(vcf), path(tbi), val(count) ] + - ch_fasta: + description: | + The reference FASTA used to create the VCF files + Structure: [ path(fasta) ] + - ch_fasta_fai: + description: | + The index of the reference FASTA + Structure: [ path(fasta_fai) ] + - ch_somalier_sites: + description: | + A VCF containing the common sites for Somalier + Structure: [ path(somalier_sites_vcf) ] + - ch_peds: + description: | + A channel containing an optional PED file for the corresponding families. This channel has to be given, but can be like `[meta, []]`. + When you don't want to use a PED file, you must supply a channel + containing the meta and an empty value (`[]`) instead of a PED + Structure: [ val(meta), path(ped) ] + - ch_sample_groups: + description: | + Optional - A text file describing how the samples should be grouped + Structure: [ path(txt) ] + - val_common_id: + description: | + Optional - A common identifier in the meta map. + This will be used to determine which VCFs should be used in somalier_relate. + This value should be given when using single sample VCFs +output: + - extract: + description: | + The extract file created with Somalier extract + Structure: [ val(meta), path(extract) ] + - html: + description: | + An HTML file containing an interactive graph on the relatedness of the samples + Structure: [ val(meta), path(html) ] + - pairs_tsv: + description: | + A TSV file detailing the relatedness between pairs of samples + Structure: [ val(meta), path(tsv) ] + - samples_tsv: + description: | + A TSV file detailing the relatedness between all samples with the same meta + Structure: [ val(meta), path(tsv) ] +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/subworkflows/nf-core/vcf_extract_relate_somalier/tests/main.nf.test b/subworkflows/nf-core/vcf_extract_relate_somalier/tests/main.nf.test new file mode 100644 index 000000000..f94df0bc7 --- /dev/null +++ b/subworkflows/nf-core/vcf_extract_relate_somalier/tests/main.nf.test @@ -0,0 +1,332 @@ +nextflow_workflow { + + name "Test Workflow VCF_EXTRACT_RELATE_SOMALIER" + + script "../main.nf" + workflow "VCF_EXTRACT_RELATE_SOMALIER" + + config "./nextflow.config" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/vcf_extract_relate_somalier" + tag "vcf_extract_relate_somalier" + + tag "somalier/extract" + tag "somalier/relate" + tag "somalier" + tag "htslib/bgziptabix" + tag "htslib" + + tag "bcftools/merge" + tag "bcftools" + + test("homo_sapiens minimal - vcf 1 - fasta - fai - sites - [] - [] - []") { + when { + workflow { + """ + input[0] = Channel.of([ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true), + [], + 1 + ]) + input[1] = Channel.of([ [id: "Hg38"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true)]) + input[2] = Channel.of([ [id: "Hg38"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true)]) + input[3] = Channel.of([ [id: "sites"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz', checkIfExists: true)]) + input[4] = Channel.of([ [id:"test"], [] ]) + input[5] = [] + input[6] = [] + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + test("homo_sapiens vcf_and_index - vcf index 1 - fasta - fai - sites - [] - [] - []") { + when { + workflow { + """ + input[0] = Channel.of([ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz.csi', checkIfExists: true), + 1 + ]) + input[1] = Channel.of([ [id: "Hg38"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true)]) + input[2] = Channel.of([ [id: "Hg38"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true)]) + input[3] = Channel.of([ [id: "sites"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz', checkIfExists: true)]) + input[4] = Channel.of([ [id:"test"], [] ]) + input[5] = [] + input[6] = [] + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + test("homo_sapiens with_ped - vcf 1 - fasta - fai - sites - ped - [] - []") { + when { + workflow { + """ + ped_file = Channel.of( + "#family_id\tsample_id\tfather\tmother\tsex\tphenotype", + "family1\tnormal\tfatherID\tmotherID\t1\t1", + "family1\ttumour\tfatherID\tmotherID\t1\t2" + ) + .collectFile(name: 'family.ped', newLine: true) + ch_ped = Channel.of([[id: "test"]]) + .combine(ped_file) + + input[0] = Channel.of([ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true), + [], + 1 + ]) + input[1] = Channel.of([ [id: "Hg38"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true)]) + input[2] = Channel.of([ [id: "Hg38"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true)]) + input[3] = Channel.of([ [id: "sites"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz', checkIfExists: true)]) + input[4] = ch_ped + input[5] = [] + input[6] = [] + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + test("homo_sapiens mixed_combine - [vcf, vcf] [index, []] 2 - fasta - fai - sites - ped - samples - common") { + when { + + workflow { + """ + ped_file = Channel.of( + "#family_id\tsample_id\tfather\tmother\tsex\tphenotype", + "family1\tnormal\tfatherID\tmotherID\t1\t1", + "family1\ttumour\tfatherID\tmotherID\t1\t2" + ) + .collectFile(name: 'family.ped', newLine: true) + ch_ped = Channel.of([[id: "test", common:"test"]]) + .combine(ped_file) + + input[0] = Channel.of( + [ + [id:"test", common:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true), + [], + 2 + ], + [ + [id:"test2", common:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz.tbi', checkIfExists: true), + 2 + ], + ) + input[1] = Channel.of([ [id: "Hg38"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true)]).collect() + input[2] = Channel.of([ [id: "Hg38"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true)]).collect() + input[3] = Channel.of([ [id: "sites"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz', checkIfExists: true)]).collect() + input[4] = ch_ped + input[5] = Channel.of("disease_103,testN").collectFile(name:"sample_groups.txt") + input[6] = "common" + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + test("homo_sapiens mixed_no_combine - [vcf, vcf] [index, []] 1 - fasta - fai - sites - [ped, ped] - [] - []") { + when { + workflow { + """ + ped_file = Channel.of( + "#family_id\tsample_id\tfather\tmother\tsex\tphenotype", + "family1\tnormal\tfatherID\tmotherID\t1\t1", + "family1\ttumour\tfatherID\tmotherID\t1\t2" + ) + .collectFile(name: 'family.ped', newLine: true) + ch_ped = Channel.of([id: "test"], [id: "test2"]) + .combine(ped_file) + + input[0] = Channel.of( + [ + [id:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true), + [], + 1 + ], + [ + [id:"test2"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz.tbi', checkIfExists: true), + 1 + ], + ) + input[1] = Channel.of([ [id: "Hg38"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true)]).collect() + input[2] = Channel.of([ [id: "Hg38"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true)]).collect() + input[3] = Channel.of([ [id: "sites"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz', checkIfExists: true)]).collect() + input[4] = ch_ped + input[5] = [] + input[6] = [] + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + test("homo_sapiens joint_vcf - vcf index [] - fasta - fai - sites - [] - [] - []") { + setup { + run ("BCFTOOLS_MERGE") { + script "../../../../modules/nf-core/bcftools/merge" + process { + """ + input[0] = Channel.of([ + [id:"test"], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz.csi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz.tbi', checkIfExists: true) + ], + [] + ]) + input[1] = [[], [], []] + """ + } + } + } + when { + workflow { + """ + input[0] = BCFTOOLS_MERGE.out.vcf + .join(BCFTOOLS_MERGE.out.index) + .combine(Channel.of([[]])) + input[1] = Channel.of([ [id: "Hg38"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true)]).collect() + input[2] = Channel.of([ [id: "Hg38"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true)]).collect() + input[3] = Channel.of([ [id: "sites"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz', checkIfExists: true)]).collect() + input[4] = Channel.of([[id:"test"], []]) + input[5] = [] + input[6] = [] + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + test("homo_sapiens mixed_combine_no_count- [vcf, vcf] [index, []] [] - fasta - fai - sites - ped - samples - common") { + when { + workflow { + """ + ped_file = Channel.of( + "#family_id\tsample_id\tfather\tmother\tsex\tphenotype", + "family1\tnormal\tfatherID\tmotherID\t1\t1", + "family1\ttumour\tfatherID\tmotherID\t1\t2" + ) + .collectFile(name: 'family.ped', newLine: true) + + input[0] = Channel.of( + [ + [id:"test", common:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true), + [], + [] + ], + [ + [id:"test2", common:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz.tbi', checkIfExists: true), + [] + ], + ) + input[1] = Channel.of([ [id: "Hg38"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true)]).collect() + input[2] = Channel.of([ [id: "Hg38"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true)]).collect() + input[3] = Channel.of([ [id: "sites"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz', checkIfExists: true)]).collect() + input[4] = Channel.of([[id: "test", common:"test"]]) + .combine(ped_file) + input[5] = Channel.of("disease_103,testN").collectFile(name:"sample_groups.txt") + input[6] = "common" + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("homo_sapiens mixed_combine_no_count- [vcf, vcf] [index, []] [] - fasta - fai - sites - ped - samples - common -- stub") { + options "-stub" + when { + workflow { + """ + ped_file = Channel.of( + "#family_id\tsample_id\tfather\tmother\tsex\tphenotype", + "family1\tnormal\tfatherID\tmotherID\t1\t1", + "family1\ttumour\tfatherID\tmotherID\t1\t2" + ) + .collectFile(name: 'family.ped', newLine: true) + + input[0] = Channel.of( + [ + [id:"test", common:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr21_22.vcf.gz', checkIfExists: true), + [], + [] + ], + [ + [id:"test2", common:"test"], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz.tbi', checkIfExists: true), + [] + ], + ) + input[1] = Channel.of([ [id: "Hg38"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists:true)]).collect() + input[2] = Channel.of([ [id: "Hg38"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists:true)]).collect() + input[3] = Channel.of([ [id: "sites"], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/popgen/1000GP.chr22.sites.vcf.gz', checkIfExists: true)]).collect() + input[4] = Channel.of([[id: "test", common:"test"]]) + .combine(ped_file) + input[5] = Channel.of("disease_103,testN").collectFile(name:"sample_groups.txt") + input[6] = "common" + """ + } + } + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out, + workflow.out.versions.collect{ path(it).yaml } + ).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/vcf_extract_relate_somalier/tests/main.nf.test.snap b/subworkflows/nf-core/vcf_extract_relate_somalier/tests/main.nf.test.snap new file mode 100644 index 000000000..d06ac88ce --- /dev/null +++ b/subworkflows/nf-core/vcf_extract_relate_somalier/tests/main.nf.test.snap @@ -0,0 +1,725 @@ +{ + "homo_sapiens vcf_and_index - vcf index 1 - fasta - fai - sites - [] - [] - []": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "NA12878.somalier:md5,39256adac134a8f304383250f8e062a4" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.html:md5,637558080f6e4a6b5f992523b11a7367" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.pairs.tsv:md5,4a0988fc3c0fe5cfd5dd205fe6755595" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.samples.tsv:md5,50fa1bf9b6844b29b57e3b7cc9dcecc5" + ] + ], + "extract": [ + [ + { + "id": "test" + }, + "NA12878.somalier:md5,39256adac134a8f304383250f8e062a4" + ] + ], + "html": [ + [ + { + "id": "test" + }, + "test.html:md5,637558080f6e4a6b5f992523b11a7367" + ] + ], + "pairs_tsv": [ + [ + { + "id": "test" + }, + "test.pairs.tsv:md5,4a0988fc3c0fe5cfd5dd205fe6755595" + ] + ], + "samples_tsv": [ + [ + { + "id": "test" + }, + "test.samples.tsv:md5,50fa1bf9b6844b29b57e3b7cc9dcecc5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-18T17:31:53.46202097" + }, + "homo_sapiens joint_vcf - vcf index [] - fasta - fai - sites - [] - [] - []": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "NA12878.somalier:md5,39256adac134a8f304383250f8e062a4", + "NA24385.somalier:md5,45ffea7301f126d886b9ba7ba729e8cc" + ] + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.html:md5,9c9d41a4531b30631643cdaee46b2fca" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.pairs.tsv:md5,f34e11f0c67615be91afca8379d03ca5" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.samples.tsv:md5,63938fbed12480ca48a05ee42315298b" + ] + ], + "extract": [ + [ + { + "id": "test" + }, + [ + "NA12878.somalier:md5,39256adac134a8f304383250f8e062a4", + "NA24385.somalier:md5,45ffea7301f126d886b9ba7ba729e8cc" + ] + ] + ], + "html": [ + [ + { + "id": "test" + }, + "test.html:md5,9c9d41a4531b30631643cdaee46b2fca" + ] + ], + "pairs_tsv": [ + [ + { + "id": "test" + }, + "test.pairs.tsv:md5,f34e11f0c67615be91afca8379d03ca5" + ] + ], + "samples_tsv": [ + [ + { + "id": "test" + }, + "test.samples.tsv:md5,63938fbed12480ca48a05ee42315298b" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-18T17:32:28.912258155" + }, + "homo_sapiens minimal - vcf 1 - fasta - fai - sites - [] - [] - []": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "NA12878.somalier:md5,39256adac134a8f304383250f8e062a4" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.html:md5,637558080f6e4a6b5f992523b11a7367" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.pairs.tsv:md5,4a0988fc3c0fe5cfd5dd205fe6755595" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.samples.tsv:md5,50fa1bf9b6844b29b57e3b7cc9dcecc5" + ] + ], + "extract": [ + [ + { + "id": "test" + }, + "NA12878.somalier:md5,39256adac134a8f304383250f8e062a4" + ] + ], + "html": [ + [ + { + "id": "test" + }, + "test.html:md5,637558080f6e4a6b5f992523b11a7367" + ] + ], + "pairs_tsv": [ + [ + { + "id": "test" + }, + "test.pairs.tsv:md5,4a0988fc3c0fe5cfd5dd205fe6755595" + ] + ], + "samples_tsv": [ + [ + { + "id": "test" + }, + "test.samples.tsv:md5,50fa1bf9b6844b29b57e3b7cc9dcecc5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-18T17:31:45.554748747" + }, + "homo_sapiens mixed_combine_no_count- [vcf, vcf] [index, []] [] - fasta - fai - sites - ped - samples - common": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "common": "test" + }, + "NA12878.somalier:md5,39256adac134a8f304383250f8e062a4" + ], + [ + { + "id": "test2", + "common": "test" + }, + "NA24385.somalier:md5,45ffea7301f126d886b9ba7ba729e8cc" + ] + ], + "1": [ + [ + { + "id": "test", + "common": "test" + }, + "test.html:md5,9c9d41a4531b30631643cdaee46b2fca" + ] + ], + "2": [ + [ + { + "id": "test", + "common": "test" + }, + "test.pairs.tsv:md5,f34e11f0c67615be91afca8379d03ca5" + ] + ], + "3": [ + [ + { + "id": "test", + "common": "test" + }, + "test.samples.tsv:md5,63938fbed12480ca48a05ee42315298b" + ] + ], + "extract": [ + [ + { + "id": "test", + "common": "test" + }, + "NA12878.somalier:md5,39256adac134a8f304383250f8e062a4" + ], + [ + { + "id": "test2", + "common": "test" + }, + "NA24385.somalier:md5,45ffea7301f126d886b9ba7ba729e8cc" + ] + ], + "html": [ + [ + { + "id": "test", + "common": "test" + }, + "test.html:md5,9c9d41a4531b30631643cdaee46b2fca" + ] + ], + "pairs_tsv": [ + [ + { + "id": "test", + "common": "test" + }, + "test.pairs.tsv:md5,f34e11f0c67615be91afca8379d03ca5" + ] + ], + "samples_tsv": [ + [ + { + "id": "test", + "common": "test" + }, + "test.samples.tsv:md5,63938fbed12480ca48a05ee42315298b" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-18T17:32:38.393763261" + }, + "homo_sapiens mixed_combine - [vcf, vcf] [index, []] 2 - fasta - fai - sites - ped - samples - common": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "common": "test" + }, + "NA12878.somalier:md5,39256adac134a8f304383250f8e062a4" + ], + [ + { + "id": "test2", + "common": "test" + }, + "NA24385.somalier:md5,45ffea7301f126d886b9ba7ba729e8cc" + ] + ], + "1": [ + [ + { + "id": "test", + "common": "test" + }, + "test.html:md5,9c9d41a4531b30631643cdaee46b2fca" + ] + ], + "2": [ + [ + { + "id": "test", + "common": "test" + }, + "test.pairs.tsv:md5,f34e11f0c67615be91afca8379d03ca5" + ] + ], + "3": [ + [ + { + "id": "test", + "common": "test" + }, + "test.samples.tsv:md5,63938fbed12480ca48a05ee42315298b" + ] + ], + "extract": [ + [ + { + "id": "test", + "common": "test" + }, + "NA12878.somalier:md5,39256adac134a8f304383250f8e062a4" + ], + [ + { + "id": "test2", + "common": "test" + }, + "NA24385.somalier:md5,45ffea7301f126d886b9ba7ba729e8cc" + ] + ], + "html": [ + [ + { + "id": "test", + "common": "test" + }, + "test.html:md5,9c9d41a4531b30631643cdaee46b2fca" + ] + ], + "pairs_tsv": [ + [ + { + "id": "test", + "common": "test" + }, + "test.pairs.tsv:md5,f34e11f0c67615be91afca8379d03ca5" + ] + ], + "samples_tsv": [ + [ + { + "id": "test", + "common": "test" + }, + "test.samples.tsv:md5,63938fbed12480ca48a05ee42315298b" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-18T17:32:12.112261901" + }, + "homo_sapiens mixed_no_combine - [vcf, vcf] [index, []] 1 - fasta - fai - sites - [ped, ped] - [] - []": { + "content": [ + { + "0": [ + [ + { + "id": "test2" + }, + "NA24385.somalier:md5,45ffea7301f126d886b9ba7ba729e8cc" + ], + [ + { + "id": "test" + }, + "NA12878.somalier:md5,39256adac134a8f304383250f8e062a4" + ] + ], + "1": [ + [ + { + "id": "test2" + }, + "test2.html:md5,66352f34ee7f06ac1427b8d0386ff909" + ], + [ + { + "id": "test" + }, + "test.html:md5,637558080f6e4a6b5f992523b11a7367" + ] + ], + "2": [ + [ + { + "id": "test2" + }, + "test2.pairs.tsv:md5,4a0988fc3c0fe5cfd5dd205fe6755595" + ], + [ + { + "id": "test" + }, + "test.pairs.tsv:md5,4a0988fc3c0fe5cfd5dd205fe6755595" + ] + ], + "3": [ + [ + { + "id": "test2" + }, + "test2.samples.tsv:md5,ff3aa68dffa5582de08779f513935960" + ], + [ + { + "id": "test" + }, + "test.samples.tsv:md5,50fa1bf9b6844b29b57e3b7cc9dcecc5" + ] + ], + "extract": [ + [ + { + "id": "test2" + }, + "NA24385.somalier:md5,45ffea7301f126d886b9ba7ba729e8cc" + ], + [ + { + "id": "test" + }, + "NA12878.somalier:md5,39256adac134a8f304383250f8e062a4" + ] + ], + "html": [ + [ + { + "id": "test2" + }, + "test2.html:md5,66352f34ee7f06ac1427b8d0386ff909" + ], + [ + { + "id": "test" + }, + "test.html:md5,637558080f6e4a6b5f992523b11a7367" + ] + ], + "pairs_tsv": [ + [ + { + "id": "test2" + }, + "test2.pairs.tsv:md5,4a0988fc3c0fe5cfd5dd205fe6755595" + ], + [ + { + "id": "test" + }, + "test.pairs.tsv:md5,4a0988fc3c0fe5cfd5dd205fe6755595" + ] + ], + "samples_tsv": [ + [ + { + "id": "test2" + }, + "test2.samples.tsv:md5,ff3aa68dffa5582de08779f513935960" + ], + [ + { + "id": "test" + }, + "test.samples.tsv:md5,50fa1bf9b6844b29b57e3b7cc9dcecc5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-18T17:32:20.706811494" + }, + "homo_sapiens with_ped - vcf 1 - fasta - fai - sites - ped - [] - []": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "NA12878.somalier:md5,39256adac134a8f304383250f8e062a4" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.html:md5,637558080f6e4a6b5f992523b11a7367" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test.pairs.tsv:md5,4a0988fc3c0fe5cfd5dd205fe6755595" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test.samples.tsv:md5,50fa1bf9b6844b29b57e3b7cc9dcecc5" + ] + ], + "extract": [ + [ + { + "id": "test" + }, + "NA12878.somalier:md5,39256adac134a8f304383250f8e062a4" + ] + ], + "html": [ + [ + { + "id": "test" + }, + "test.html:md5,637558080f6e4a6b5f992523b11a7367" + ] + ], + "pairs_tsv": [ + [ + { + "id": "test" + }, + "test.pairs.tsv:md5,4a0988fc3c0fe5cfd5dd205fe6755595" + ] + ], + "samples_tsv": [ + [ + { + "id": "test" + }, + "test.samples.tsv:md5,50fa1bf9b6844b29b57e3b7cc9dcecc5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-18T17:32:02.835692749" + }, + "homo_sapiens mixed_combine_no_count- [vcf, vcf] [index, []] [] - fasta - fai - sites - ped - samples - common -- stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "common": "test" + }, + "test.somalier:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "common": "test" + }, + "test2.somalier:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "common": "test" + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "common": "test" + }, + "test.pairs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "common": "test" + }, + "test.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "extract": [ + [ + { + "id": "test", + "common": "test" + }, + "test.somalier:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + { + "id": "test2", + "common": "test" + }, + "test2.somalier:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "html": [ + [ + { + "id": "test", + "common": "test" + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "pairs_tsv": [ + [ + { + "id": "test", + "common": "test" + }, + "test.pairs.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "samples_tsv": [ + [ + { + "id": "test", + "common": "test" + }, + "test.samples.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + }, + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-02-18T17:32:46.952074177" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/vcf_extract_relate_somalier/tests/nextflow.config b/subworkflows/nf-core/vcf_extract_relate_somalier/tests/nextflow.config new file mode 100644 index 000000000..4acfdfeaf --- /dev/null +++ b/subworkflows/nf-core/vcf_extract_relate_somalier/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: "BCFTOOLS_MERGE" { + ext.args = "--output-type z --write-index=tbi" + } +} diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 689858a3b..a67750889 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -870,6 +870,30 @@ workflow RAREDISEASE { .map { meta, value -> ['peddy/', [meta, value]] } } + // Somalier extract + relate (requires params.run_somalier and params.somalier_sites_vcf) + if (params.run_somalier ?: false) { + // prepare VCF channel: [ meta, vcf, tbi, count ] as expected by the subworkflow + ch_vcfs_for_somalier = CALL_SNV.out.genome_vcf + .join(CALL_SNV.out.genome_tabix, failOnMismatch:true, failOnDuplicate:true) + .map { meta, vcf, tbi -> [ meta, vcf, tbi, [] ] } + + // somalier sites VCF supplied via params.somalier_sites_vcf + ch_somalier_sites = channel.value( file(params.somalier_sites_vcf) ) + + VCF_EXTRACT_RELATE_SOMALIER( + ch_vcfs_for_somalier, + ch_genome_fasta, + ch_genome_fai, + ch_somalier_sites, + ch_pedfile.map{ ped -> return[[id:'pedigree'], ped] } + ) + + ch_somalier_publish = VCF_EXTRACT_RELATE_SOMALIER.out.html + .mix(VCF_EXTRACT_RELATE_SOMALIER.out.pairs_tsv) + .mix(VCF_EXTRACT_RELATE_SOMALIER.out.samples_tsv) + .map { meta, value -> ['somalier/', [meta, value]] } + } + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Generate CGH files from sequencing data