diff --git a/CHANGELOG.md b/CHANGELOG.md
index f37c1d751..a3f8891a8 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,18 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## 3.1.0 - XXXXX [2026-XX-XX]
+
+### `Added`
+
+- FastDup module from nf-core and integrated it into the pipeline, as an alternative for Picard Markduplicates. #XXX
+
+### Parameters
+
+| Old parameter | New parameter |
+| ------------------- | --------------------------- |
+| | duplicates_marker |
+
## 3.0.0 - Mario [2026-05-12]
### `Added`
diff --git a/conf/modules/align_bwa_bwamem2_bwameme.config b/conf/modules/align_bwa_bwamem2_bwameme.config
index da2cd83ff..52e022661 100644
--- a/conf/modules/align_bwa_bwamem2_bwameme.config
+++ b/conf/modules/align_bwa_bwamem2_bwameme.config
@@ -58,4 +58,9 @@ process {
ext.args = "--TMP_DIR ."
ext.prefix = { "${meta.id}_sorted_md" }
}
-}
+
+ withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:FASTDUP' {
+ ext.args = "--create-index"
+ ext.prefix = { "${meta.id}_sorted_md" }
+ }
+}
\ No newline at end of file
diff --git a/docs/usage.md b/docs/usage.md
index e5963d4d2..ce908c2ef 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -10,25 +10,25 @@ Table of contents:
- [Run nf-core/raredisease with test data](#run-nf-coreraredisease-with-test-data)
- [Updating the pipeline](#updating-the-pipeline)
- [Run nf-core/raredisease with your data](#run-nf-coreraredisease-with-your-data)
- - [Samplesheet](#samplesheet)
- - [Samplesheet for BAM file input](#samplesheet-for-bam-file-input)
- - [Reference files and parameters](#reference-files-and-parameters)
- - [1. Alignment](#1-alignment)
- - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files)
- - [3. Repeat expansions](#3-repeat-expansions)
- - [4. Variant calling - SNV](#4-variant-calling---snv)
- - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants)
- - [6. Copy number variant calling](#6-copy-number-variant-calling)
- - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking)
- - [8. SV annotation \& Ranking](#8-sv-annotation--ranking)
- - [9. Mitochondrial annotation](#9-mitochondrial-annotation)
- - [10. Mobile element calling](#10-mobile-element-calling)
- - [11. Mobile element annotation](#11-mobile-element-annotation)
- - [12. Variant evaluation](#12-variant-evaluation)
- - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens)
- - [Run the pipeline](#run-the-pipeline)
- - [Direct input in CLI](#direct-input-in-cli)
- - [Import from a config file (recommended)](#import-from-a-config-file-recommended)
+ - [Samplesheet](#samplesheet)
+ - [Samplesheet for BAM file input](#samplesheet-for-bam-file-input)
+ - [Reference files and parameters](#reference-files-and-parameters)
+ - [1. Alignment](#1-alignment)
+ - [2. QC stats from the alignment files](#2-qc-stats-from-the-alignment-files)
+ - [3. Repeat expansions](#3-repeat-expansions)
+ - [4. Variant calling - SNV](#4-variant-calling---snv)
+ - [5. Variant calling - Structural variants](#5-variant-calling---structural-variants)
+ - [6. Copy number variant calling](#6-copy-number-variant-calling)
+ - [7. SNV annotation \& Ranking](#7-snv-annotation--ranking)
+ - [8. SV annotation \& Ranking](#8-sv-annotation--ranking)
+ - [9. Mitochondrial annotation](#9-mitochondrial-annotation)
+ - [10. Mobile element calling](#10-mobile-element-calling)
+ - [11. Mobile element annotation](#11-mobile-element-annotation)
+ - [12. Variant evaluation](#12-variant-evaluation)
+ - [13. Prepare data for CNV visualisation in Gens](#13-prepare-data-for-cnv-visualisation-in-gens)
+ - [Run the pipeline](#run-the-pipeline)
+ - [Direct input in CLI](#direct-input-in-cli)
+ - [Import from a config file (recommended)](#import-from-a-config-file-recommended)
- [Best practices](#best-practices)
- [Core Nextflow arguments](#core-nextflow-arguments)
- [`-profile`](#-profile)
@@ -211,6 +211,7 @@ The mandatory and optional parameters for each category are tabulated below.
| | extract_alignments |
| | restrict_to_contigs7 |
| | exclude_alt8 |
+| | duplicates_marker9 |
1Default value is bwamem2. Other alternatives are bwa, bwameme and sentieon (requires valid Sentieon license ).
2Analysis set reference genome in fasta format, first 25 contigs need to be chromosome 1-22, X, Y and the mitochondria.
@@ -220,6 +221,7 @@ The mandatory and optional parameters for each category are tabulated below.
6Default value is 40. Used only by fastp.
7Used to limit your analysis to specific contigs. Can be used to remove alignments to unplaced contigs to minimize potential errors. This parameter should be used in conjunction with the `extract_alignments` parameter.
8When set to true, alignments to alt/unplaced contigs are removed after alignment using samtools view, retaining only primary chromosomes (GRCh37: 1-22,X,Y,MT / GRCh38: chr1-chr22,chrX,chrY,chrM). Note that this will affect all downstream variant calling, as variants will only be called on these primary chromosomes.
+9Default value is "markduplicates". Other alternative is "fastdup".
##### 2. QC stats from the alignment files
diff --git a/main.nf b/main.nf
index f847e20e6..683b10da6 100644
--- a/main.nf
+++ b/main.nf
@@ -52,6 +52,7 @@ workflow NFCORE_RAREDISEASE {
val_call_interval
val_concatenate_snv_calls
val_skip_split_multiallelics
+ val_duplicates_marker
val_exclude_alt
val_extract_alignments
val_fai
@@ -481,6 +482,7 @@ workflow NFCORE_RAREDISEASE {
val_cadd_resources,
val_concatenate_snv_calls,
val_skip_split_multiallelics,
+ val_duplicates_marker,
val_exclude_alt,
val_extract_alignments,
val_genome,
@@ -570,6 +572,7 @@ workflow {
params.call_interval,
params.concatenate_snv_calls,
params.skip_split_multiallelics,
+ params.duplicates_marker,
params.exclude_alt,
params.extract_alignments,
params.fai,
diff --git a/modules.json b/modules.json
index 4bb0c67b5..63e72f7f2 100644
--- a/modules.json
+++ b/modules.json
@@ -146,6 +146,11 @@
"git_sha": "2ad28db4a5a82972c1210dfa7c85f035bb80c4de",
"installed_by": ["modules"]
},
+ "fastdup": {
+ "branch": "master",
+ "git_sha": "4aab34a29f9ca1730e2f7d194261f5145f53d56d",
+ "installed_by": ["modules"]
+ },
"fastp": {
"branch": "master",
"git_sha": "a331ecfd1aa48b2b2298aab23bb4516c800e410b",
diff --git a/modules/nf-core/fastdup/environment.yml b/modules/nf-core/fastdup/environment.yml
new file mode 100644
index 000000000..ab55fff04
--- /dev/null
+++ b/modules/nf-core/fastdup/environment.yml
@@ -0,0 +1,7 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+channels:
+ - conda-forge
+ - bioconda
+dependencies:
+ - bioconda::fastdup=1.0.0
diff --git a/modules/nf-core/fastdup/main.nf b/modules/nf-core/fastdup/main.nf
new file mode 100644
index 000000000..f0cf9b977
--- /dev/null
+++ b/modules/nf-core/fastdup/main.nf
@@ -0,0 +1,52 @@
+process FASTDUP {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c5/c55070589353b3e1837ca3414c4f182d3674cbf55a64edee07e8bf75370762a9/data':
+ 'community.wave.seqera.io/library/fastdup:1.0.0--a9b28abff06bb2bb' }"
+
+ input:
+ tuple val(meta), path(reads)
+
+ output:
+ tuple val(meta), path("*.bam"), emit: bam
+ tuple val(meta), path("*.metrics.txt"), emit: metrics
+ tuple val(meta), path("*.bai"), emit: bai, optional: true
+ tuple val(meta), path("*.csi"), emit: csi, optional: true
+ tuple val("${task.process}"), val('fastdup'), eval("fastdup --version"), topic: versions, emit: versions_fastdup
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ if ("${reads}" == "${prefix}.bam") {
+ error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!")
+ }
+ """
+ fastdup \\
+ $args \\
+ --input $reads \\
+ --metrics ${prefix}.metrics.txt \\
+ --output ${prefix}.bam \\
+ --num-threads $task.cpus
+
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def index_command = args.contains("--index-format CSI") ? "touch ${prefix}.csi"
+ : args.contains("--create-index") ? "touch ${prefix}.bai" : ""
+
+ """
+
+ touch ${prefix}.bam
+ ${index_command}
+ touch ${prefix}.metrics.txt
+
+ """
+}
diff --git a/modules/nf-core/fastdup/meta.yml b/modules/nf-core/fastdup/meta.yml
new file mode 100644
index 000000000..cdc4496ca
--- /dev/null
+++ b/modules/nf-core/fastdup/meta.yml
@@ -0,0 +1,101 @@
+name: "fastdup"
+description: "FastDup is a tool designed to locate and tag duplicate reads in a coordinate-sorted
+ SAM or BAM file, using the same core algorithm as Picard MarkDuplicates."
+keywords:
+ - duplicate
+ - BAM
+ - reads
+tools:
+ - "fastdup":
+ description: "FastDup is a tool designed to locate and tag duplicate reads in
+ a coordinate-sorted SAM or BAM file, using the same core algorithm as Picard
+ MarkDuplicates."
+ homepage: "https://github.com/zzhofict/FastDup"
+ documentation: "https://github.com/zzhofict/FastDup"
+ tool_dev_url: "https://github.com/zzhofict/FastDup"
+ doi: "10.1093/bioinformatics/btaf633"
+ licence:
+ - "MIT"
+ identifier: "biotools:fastdup"
+input:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - reads:
+ type: file
+ description: Sequence reads file, can be SAM/BAM format
+ pattern: "*.{bam,sam}"
+ ontologies: []
+output:
+ bam:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - "*.bam":
+ type: file
+ description: BAM file with duplicate reads marked/removed.
+ pattern: "*.{bam}"
+ ontologies:
+ - edam: "http://edamontology.org/format_2572"
+ versions_fastdup:
+ - - ${task.process}:
+ type: string
+ description: The name of the process
+ - fastdup:
+ type: string
+ description: The name of the tool
+ - fastdup --version:
+ type: eval
+ description: The expression to obtain the version of the tool
+ metrics:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - "*.metrics.txt":
+ type: file
+ description: Duplicate metrics file generated by fastdup.
+ pattern: "*.{metrics.txt}"
+ ontologies: []
+ bai:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - "*.bai":
+ type: file
+ description: BAI index file generated by fastdup.
+ pattern: "*.{bai}"
+ ontologies: []
+ csi:
+ - - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - "*.csi":
+ type: file
+ description: CSI index file generated by fastdup.
+ pattern: "*.{csi}"
+ ontologies: []
+topics:
+ versions:
+ - - ${task.process}:
+ type: string
+ description: The name of the process
+ - fastdup:
+ type: string
+ description: The name of the tool
+ - fastdup --version:
+ type: eval
+ description: The expression to obtain the version of the tool
+authors:
+ - "@emmadizdarevic"
+maintainers:
+ - "@emmadizdarevic"
diff --git a/modules/nf-core/fastdup/tests/main.nf.test b/modules/nf-core/fastdup/tests/main.nf.test
new file mode 100644
index 000000000..5933c660b
--- /dev/null
+++ b/modules/nf-core/fastdup/tests/main.nf.test
@@ -0,0 +1,138 @@
+nextflow_process {
+
+ name "Test Process FASTDUP"
+ script "../main.nf"
+ process "FASTDUP"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fastdup"
+
+ test("sarscov2 - bam") {
+
+ when {
+ process {
+ """
+
+ input[0] = [
+ [ id:'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ ]
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(
+ bam(process.out.bam[0][1]).getReadsMD5(),
+ file(process.out.metrics[0][1]).readLines()[0..2],
+ process.out.bai,
+ process.out.csi,
+ process.out.findAll { key, val -> key.startsWith("versions") }
+
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+
+ input[0] = [
+ [ id:'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ ]
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(
+ process.out
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam - create bai index") {
+
+ config "./nextflow.config"
+
+ when {
+ params{
+ module_args = "--create-index"
+ }
+ process {
+ """
+
+ input[0] = [
+ [ id:'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ ]
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(
+ bam(process.out.bam[0][1]).getReadsMD5(),
+ file(process.out.metrics[0][1]).name,
+ process.out.bai,
+ process.out.csi,
+ process.out.findAll { key, val -> key.startsWith("versions") }
+
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - bam - create csi index") {
+
+ config "./nextflow.config"
+
+ when {
+ params{
+ module_args = "--create-index --index-format CSI"
+ }
+ process {
+ """
+
+ input[0] = [
+ [ id:'test' ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true),
+ ]
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assertAll(
+ { assert snapshot(
+ bam(process.out.bam[0][1]).getReadsMD5(),
+ file(process.out.metrics[0][1]).name,
+ process.out.bai,
+ process.out.csi,
+ process.out.findAll { key, val -> key.startsWith("versions") }
+
+ ).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/fastdup/tests/main.nf.test.snap b/modules/nf-core/fastdup/tests/main.nf.test.snap
new file mode 100644
index 000000000..2342099cf
--- /dev/null
+++ b/modules/nf-core/fastdup/tests/main.nf.test.snap
@@ -0,0 +1,163 @@
+{
+ "sarscov2 - bam - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+
+ ],
+ "4": [
+ [
+ "FASTDUP",
+ "fastdup",
+ "1.0.0"
+ ]
+ ],
+ "bai": [
+
+ ],
+ "bam": [
+ [
+ {
+ "id": "test"
+ },
+ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "csi": [
+
+ ],
+ "metrics": [
+ [
+ {
+ "id": "test"
+ },
+ "test.metrics.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions_fastdup": [
+ [
+ "FASTDUP",
+ "fastdup",
+ "1.0.0"
+ ]
+ ]
+ }
+ ],
+ "timestamp": "2026-06-02T13:15:04.742292",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "sarscov2 - bam - create csi index": {
+ "content": [
+ "894549ee3ced6b5ca2eed2563a985217",
+ "test.metrics.txt",
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.bam.csi:md5,afdc79c180dd73c1c8dadbf65af0788e"
+ ]
+ ],
+ {
+ "versions_fastdup": [
+ [
+ "FASTDUP",
+ "fastdup",
+ "1.0.0"
+ ]
+ ]
+ }
+ ],
+ "timestamp": "2026-06-03T10:08:41.493463",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "sarscov2 - bam": {
+ "content": [
+ "894549ee3ced6b5ca2eed2563a985217",
+ [
+ "## StringHeader",
+ "# fastdup --input test.paired_end.sorted.bam --metrics test.metrics.txt --output test.bam --num-threads 2",
+ "## StringHeader"
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ {
+ "versions_fastdup": [
+ [
+ "FASTDUP",
+ "fastdup",
+ "1.0.0"
+ ]
+ ]
+ }
+ ],
+ "timestamp": "2026-06-03T10:11:58.499113",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
+ "sarscov2 - bam - create bai index": {
+ "content": [
+ "894549ee3ced6b5ca2eed2563a985217",
+ "test.metrics.txt",
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.bam.bai:md5,412de50af8da0544bf151011ae739a2d"
+ ]
+ ],
+ [
+
+ ],
+ {
+ "versions_fastdup": [
+ [
+ "FASTDUP",
+ "fastdup",
+ "1.0.0"
+ ]
+ ]
+ }
+ ],
+ "timestamp": "2026-06-03T10:08:38.477602",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fastdup/tests/nextflow.config b/modules/nf-core/fastdup/tests/nextflow.config
new file mode 100644
index 000000000..7dafe3ea2
--- /dev/null
+++ b/modules/nf-core/fastdup/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: 'FASTDUP' {
+ ext.args = {params.module_args}
+ }
+}
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 4baa3cf23..14e760061 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -110,6 +110,7 @@ params {
// Alignment
aligner = 'bwamem2'
mt_aligner = 'bwamem2'
+ duplicates_marker = 'markduplicates'
mbuffer_mem = 3072
samtools_sort_threads = 4
min_trimmed_length = 40
diff --git a/nextflow_schema.json b/nextflow_schema.json
index a5b135431..7daa5813f 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -644,6 +644,13 @@
"fa_icon": "fas fa-align-center",
"enum": ["bwa", "bwamem2", "sentieon"]
},
+ "duplicates_marker": {
+ "type": "string",
+ "default": "markduplicates",
+ "description": "Specifies the tool to use for marking duplicates. The default is 'markduplicates', another option is the faster alternative 'fastdup'.",
+ "fa_icon": "fas fa-align-center",
+ "enum": ["markduplicates", "fastdup"]
+ },
"samtools_sort_threads": {
"type": "integer",
"default": 4,
diff --git a/subworkflows/local/align/main.nf b/subworkflows/local/align/main.nf
index 932ace4f8..6547b0741 100644
--- a/subworkflows/local/align/main.nf
+++ b/subworkflows/local/align/main.nf
@@ -35,6 +35,7 @@ workflow ALIGN {
skip_fastp // boolean
val_aligner // string: 'bwa', 'bwamem2', 'bwameme', or 'sentieon'
val_analysis_type // string: 'wgs', 'wes', or 'mito'
+ val_duplicates_marker // string: 'markduplicates' or 'fastdup', default: 'markduplicates'
val_exclude_alt // boolean
val_extract_alignments // boolean
val_mbuffer_mem // integer: [mandatory] memory in megabytes
@@ -100,6 +101,7 @@ workflow ALIGN {
ch_genome_fasta,
ch_input_reads,
val_aligner,
+ val_duplicates_marker,
val_extract_alignments,
val_mbuffer_mem,
val_platform,
diff --git a/subworkflows/local/align/tests/main.nf.test b/subworkflows/local/align/tests/main.nf.test
index 7a8e427ac..478002a04 100644
--- a/subworkflows/local/align/tests/main.nf.test
+++ b/subworkflows/local/align/tests/main.nf.test
@@ -113,15 +113,16 @@ nextflow_workflow {
input[18] = false
input[19] = "bwamem2"
input[20] = "wgs"
- input[21] = true
- input[22] = false
- input[23] = 3072
- input[24] = "bwamem2"
- input[25] = "illumina"
- input[26] = false
- input[27] = 4
- input[28] = true
+ input[21] = "markduplicates"
+ input[22] = true
+ input[23] = false
+ input[24] = 3072
+ input[25] = "bwamem2"
+ input[26] = "illumina"
+ input[27] = false
+ input[28] = 4
input[29] = true
+ input[30] = true
"""
}
}
@@ -220,15 +221,16 @@ nextflow_workflow {
input[18] = false
input[19] = "bwamem2"
input[20] = "wes"
- input[21] = false
+ input[21] = "markduplicates"
input[22] = false
- input[23] = 3072
- input[24] = "bwamem2"
- input[25] = "illumina"
- input[26] = false
- input[27] = 4
- input[28] = true
- input[29] = false
+ input[23] = false
+ input[24] = 3072
+ input[25] = "bwamem2"
+ input[26] = "illumina"
+ input[27] = false
+ input[28] = 4
+ input[29] = true
+ input[30] = false
"""
}
}
@@ -318,15 +320,16 @@ nextflow_workflow {
input[18] = true
input[19] = "bwameme"
input[20] = "wgs"
- input[21] = false
+ input[21] = "markduplicates"
input[22] = false
- input[23] = 3072
- input[24] = "bwamem2"
- input[25] = "illumina"
- input[26] = false
- input[27] = 4
- input[28] = true
- input[29] = false
+ input[23] = false
+ input[24] = 3072
+ input[25] = "bwamem2"
+ input[26] = "illumina"
+ input[27] = false
+ input[28] = 4
+ input[29] = true
+ input[30] = false
"""
}
}
diff --git a/subworkflows/local/align_bwa_bwamem2_bwameme/main.nf b/subworkflows/local/align_bwa_bwamem2_bwameme/main.nf
index 6ea0434de..69eaffb45 100644
--- a/subworkflows/local/align_bwa_bwamem2_bwameme/main.nf
+++ b/subworkflows/local/align_bwa_bwamem2_bwameme/main.nf
@@ -5,6 +5,7 @@
include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main'
include { BWAMEME_MEM } from '../../../modules/nf-core/bwameme/mem/main'
include { BWA_MEM as BWA } from '../../../modules/nf-core/bwa/mem/main'
+include { FASTDUP } from '../../../modules/nf-core/fastdup/main'
include { PICARD_MARKDUPLICATES as MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main'
include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_ALIGN } from '../../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_EXTRACT } from '../../../modules/nf-core/samtools/index/main'
@@ -23,6 +24,7 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME {
ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_input_reads // channel: [mandatory] [ val(meta), path(reads_input) ]
val_aligner // string: 'bwa', 'bwamem2', 'bwameme', or 'sentieon'
+ val_duplicates_marker // string: 'markduplicates' or 'fastdup', default: 'markduplicates'
val_extract_alignments // boolean
val_mbuffer_mem // integer: [mandatory] default: 3072
val_platform // string: [mandatory] default: illumina
@@ -74,19 +76,35 @@ workflow ALIGN_BWA_BWAMEM2_BWAMEME {
}
// Marking duplicates
- MARKDUPLICATES ( prepared_bam , ch_genome_fasta, ch_genome_fai )
- SAMTOOLS_INDEX_MARKDUP ( MARKDUPLICATES.out.bam )
+ if (val_duplicates_marker == "markduplicates") {
+ MARKDUPLICATES ( prepared_bam, ch_genome_fasta, ch_genome_fai )
+ SAMTOOLS_INDEX_MARKDUP (MARKDUPLICATES.out.bam)
+
+ ch_marked_bam = MARKDUPLICATES.out.bam
+ ch_marked_bai = SAMTOOLS_INDEX_MARKDUP.out.bai
+ ch_marked_csi = SAMTOOLS_INDEX_MARKDUP.out.csi
+ ch_metrics = MARKDUPLICATES.out.metrics
+ } else {
+ FASTDUP ( prepared_bam )
+ ch_marked_bam = FASTDUP.out.bam
+ ch_marked_bai = FASTDUP.out.bai
+ ch_marked_csi = FASTDUP.out.csi
+ ch_metrics = FASTDUP.out.metrics
+ }
+
+ ch_publish = ch_marked_bam
+ .mix(ch_metrics)
+ .mix(ch_marked_bai)
+ .mix(ch_marked_csi)
+ .map {meta, value -> ['alignment/', [meta, value]] }
- ch_publish = MARKDUPLICATES.out.bam
- .mix(MARKDUPLICATES.out.metrics)
- .mix(SAMTOOLS_INDEX_MARKDUP.out.bai)
- .mix(SAMTOOLS_INDEX_MARKDUP.out.csi)
- .map { meta, value -> ['alignment/', [meta, value]] }
emit:
- marked_bai = SAMTOOLS_INDEX_MARKDUP.out.bai // channel: [ val(meta), path(bai) ]
- marked_bam = MARKDUPLICATES.out.bam // channel: [ val(meta), path(bam) ]
- metrics = MARKDUPLICATES.out.metrics // channel: [ val(meta), path(metrics) ]
- stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ]
- publish = ch_publish // channel: [ val(destination), val(value) ]
+
+ marked_bam = ch_marked_bam // = MARKDUPLICATES.out.bam or FASTDUP.out.bam // channel: [ val(meta), path(bam) ]
+ marked_bai = ch_marked_bai // = SAMTOOLS_INDEX_MARKDUP.out.bai or FASTDUP.out.bai // channel: [ val(meta), path(bai) ]
+ marked_csi = ch_marked_csi // = SAMTOOLS_INDEX_MARKDUP.out.csi or FASTDUP.out.csi // channel: [ val(meta), path(csi) ]
+ metrics = ch_metrics // = MARKDUPLICATES.out.metrics or FASTDUP.out.metrics // channel: [ val(meta), path(metrics) ]
+ stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ]
+ publish = ch_publish // channel: [ val(destination), val(value) ]
}
diff --git a/subworkflows/local/align_bwa_bwamem2_bwameme/tests/main.nf.test b/subworkflows/local/align_bwa_bwamem2_bwameme/tests/main.nf.test
index 7e64d0355..fcc437987 100644
--- a/subworkflows/local/align_bwa_bwamem2_bwameme/tests/main.nf.test
+++ b/subworkflows/local/align_bwa_bwamem2_bwameme/tests/main.nf.test
@@ -10,6 +10,7 @@ nextflow_workflow {
tag "bwa/mem"
tag "bwamem2/mem"
tag "bwameme/mem"
+ tag "fastdup"
tag "samtools/index"
tag "samtools/stats"
tag "samtools/merge"
@@ -54,10 +55,12 @@ nextflow_workflow {
]
])
input[6] = "bwamem2"
- input[7] = true
- input[8] = 3072
- input[9] = "illumina"
- input[10] = 4
+ input[7] = "markduplicates"
+ input[8] = true
+ input[9] = 3072
+ input[10] = "illumina"
+ input[11] = 4
+
"""
}
}
@@ -114,10 +117,137 @@ nextflow_workflow {
]
])
input[6] = "bwameme"
- input[7] = true
- input[8] = 3072
- input[9] = "illumina"
- input[10] = 4
+ input[7] = "markduplicates"
+ input[8] = true
+ input[9] = 3072
+ input[10] = "illumina"
+ input[11] = 4
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out.publish.flatten()
+ .findAll { it instanceof String && (it.startsWith('/') || it.contains('.')) }
+ .collect { new File(it).name }
+ .sort(),
+ workflow.out.marked_bam.collect { meta, bamfile -> [ meta, bam(bamfile).getHeaderMD5() ] },
+ workflow.out.marked_bam.collect { meta, bamfile -> [ meta, bam(bamfile).getReadsMD5() ] }
+ ).match()
+ }
+ )
+ }
+ }
+
+
+ test("align bwameme, FASTDUP with bai index") {
+
+ setup {
+ run("BWAMEME_INDEX") {
+ script "modules/nf-core/bwameme/index/main.nf"
+ process {
+ """
+ input[0] = channel.of([
+ [id:'sarscov2'],
+ file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ """
+ }
+ }
+ }
+
+ when {
+ params {
+ sarscov_testdata_base_path= 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/'
+ restrict_to_contigs = "MT192765.1"
+ fastdup_args = "--create-index"
+ }
+ workflow {
+ """
+ input[0] = [[:],[]]
+ input[1] = [[:],[]]
+ input[2] = BWAMEME_INDEX.out.index
+ input[3] = channel.of([[id:'sarscov2'], [file(params.sarscov_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]])
+ input[4] = channel.of([[id:'sarscov2'], [file(params.sarscov_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]])
+ input[5] = channel.of([
+ [ id:'test', sample:'test', single_end:false, num_lanes:1, read_group:"\'@RG\\\\tID:test\\\\tPL:illumina\\\\tSM:test\'" ], // meta map
+ [
+ file(params.sarscov_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.sarscov_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ])
+ input[6] = "bwameme"
+ input[7] = "fastdup"
+ input[8] = true
+ input[9] = 3072
+ input[10] = "illumina"
+ input[11] = 4
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out.publish.flatten()
+ .findAll { it instanceof String && (it.startsWith('/') || it.contains('.')) }
+ .collect { new File(it).name }
+ .sort(),
+ workflow.out.marked_bam.collect { meta, bamfile -> [ meta, bam(bamfile).getHeaderMD5() ] },
+ workflow.out.marked_bam.collect { meta, bamfile -> [ meta, bam(bamfile).getReadsMD5() ] }
+ ).match()
+ }
+ )
+ }
+ }
+
+
+ test("align bwameme, FASTDUP with csi index") {
+
+ setup {
+ run("BWAMEME_INDEX") {
+ script "modules/nf-core/bwameme/index/main.nf"
+ process {
+ """
+ input[0] = channel.of([
+ [id:'sarscov2'],
+ file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ])
+ """
+ }
+ }
+ }
+
+ when {
+ params {
+ sarscov_testdata_base_path= 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/'
+ restrict_to_contigs = "MT192765.1"
+ fastdup_args = "--create-index --index-format CSI"
+ }
+ workflow {
+ """
+ input[0] = [[:],[]]
+ input[1] = [[:],[]]
+ input[2] = BWAMEME_INDEX.out.index
+ input[3] = channel.of([[id:'sarscov2'], [file(params.sarscov_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]])
+ input[4] = channel.of([[id:'sarscov2'], [file(params.sarscov_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]])
+ input[5] = channel.of([
+ [ id:'test', sample:'test', single_end:false, num_lanes:1, read_group:"\'@RG\\\\tID:test\\\\tPL:illumina\\\\tSM:test\'" ], // meta map
+ [
+ file(params.sarscov_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.sarscov_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ])
+ input[6] = "bwameme"
+ input[7] = "fastdup"
+ input[8] = true
+ input[9] = 3072
+ input[10] = "illumina"
+ input[11] = 4
"""
}
}
diff --git a/subworkflows/local/align_bwa_bwamem2_bwameme/tests/main.nf.test.snap b/subworkflows/local/align_bwa_bwamem2_bwameme/tests/main.nf.test.snap
index 7632f79b8..920d49646 100644
--- a/subworkflows/local/align_bwa_bwamem2_bwameme/tests/main.nf.test.snap
+++ b/subworkflows/local/align_bwa_bwamem2_bwameme/tests/main.nf.test.snap
@@ -43,6 +43,50 @@
"nextflow": "25.10.4"
}
},
+ "align bwameme, FASTDUP with bai index": {
+ "content": [
+ [
+ "test_sorted_md.bam",
+ "test_sorted_md.bam.bai",
+ "test_sorted_md.metrics.txt"
+ ],
+ [
+ [
+ {
+ "groupSize": 1,
+ "groupTarget": {
+ "id": "test",
+ "sample": "test",
+ "single_end": false,
+ "num_lanes": 1,
+ "read_group": "'@RG\\tID:test\\tPL:illumina\\tSM:test'"
+ }
+ },
+ "785f016d043339de0df488fa1bb3a16"
+ ]
+ ],
+ [
+ [
+ {
+ "groupSize": 1,
+ "groupTarget": {
+ "id": "test",
+ "sample": "test",
+ "single_end": false,
+ "num_lanes": 1,
+ "read_group": "'@RG\\tID:test\\tPL:illumina\\tSM:test'"
+ }
+ },
+ "af8628d9df18b2d3d4f6fd47ef2bb872"
+ ]
+ ]
+ ],
+ "timestamp": "2026-06-09T13:46:05.133199",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
+ },
"align bwameme": {
"content": [
[
@@ -86,5 +130,49 @@
"nf-test": "0.9.4",
"nextflow": "25.10.4"
}
+ },
+ "align bwameme, FASTDUP with csi index": {
+ "content": [
+ [
+ "test_sorted_md.bam",
+ "test_sorted_md.bam.csi",
+ "test_sorted_md.metrics.txt"
+ ],
+ [
+ [
+ {
+ "groupSize": 1,
+ "groupTarget": {
+ "id": "test",
+ "sample": "test",
+ "single_end": false,
+ "num_lanes": 1,
+ "read_group": "'@RG\\tID:test\\tPL:illumina\\tSM:test'"
+ }
+ },
+ "6fbed72e21c5b2bcb981e1dded632829"
+ ]
+ ],
+ [
+ [
+ {
+ "groupSize": 1,
+ "groupTarget": {
+ "id": "test",
+ "sample": "test",
+ "single_end": false,
+ "num_lanes": 1,
+ "read_group": "'@RG\\tID:test\\tPL:illumina\\tSM:test'"
+ }
+ },
+ "af8628d9df18b2d3d4f6fd47ef2bb872"
+ ]
+ ]
+ ],
+ "timestamp": "2026-06-09T13:46:33.608928",
+ "meta": {
+ "nf-test": "0.9.4",
+ "nextflow": "25.10.4"
+ }
}
}
\ No newline at end of file
diff --git a/subworkflows/local/align_bwa_bwamem2_bwameme/tests/nextflow.config b/subworkflows/local/align_bwa_bwamem2_bwameme/tests/nextflow.config
index 4563b2d78..c2ecd4a69 100644
--- a/subworkflows/local/align_bwa_bwamem2_bwameme/tests/nextflow.config
+++ b/subworkflows/local/align_bwa_bwamem2_bwameme/tests/nextflow.config
@@ -34,4 +34,9 @@ process {
ext.prefix = { "${meta.id}_sorted_md" }
}
+ withName: 'FASTDUP' {
+ ext.args = {params.fastdup_args}
+ ext.prefix = { "${meta.id}_sorted_md" }
+
+ }
}
diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf
index b3b1c1470..5ebd39031 100644
--- a/workflows/raredisease.nf
+++ b/workflows/raredisease.nf
@@ -183,6 +183,7 @@ workflow RAREDISEASE {
val_cadd_resources
val_concatenate_snv_calls
val_skip_split_multiallelics
+ val_duplicates_marker
val_exclude_alt
val_extract_alignments
val_genome
@@ -310,6 +311,7 @@ workflow RAREDISEASE {
skip_fastp,
val_aligner,
val_analysis_type,
+ val_duplicates_marker,
val_exclude_alt,
val_extract_alignments,
val_mbuffer_mem,