diff --git a/CHANGELOG.md b/CHANGELOG.md index f37c1d751..87dad6ba3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,41 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## 3.0.1 - Mario-patch [2026-05-25] + +### `Added` + +- Parameter `cadd_prescored` to pass a directory of pre-scored CADD indel annotations to the CADD process in genome and mitochondrial SNV annotation subworkflows [#866](https://github.com/nf-core/raredisease/pull/866) +- Parameter `manta_call_regions` to restrict Manta SV calling to specified regions (e.g. primary chromosomes) via a bgzipped, tabix-indexed BED file, reducing runtime without affecting other callers [#867](https://github.com/nf-core/raredisease/pull/867) +- Local `FILTERVEP` module using a Python reimplementation of Ensembl's `filter_vep`, replacing the `ENSEMBLVEP_FILTERVEP` module with a lighter cyvcf2-based alternative [#870](https://github.com/nf-core/raredisease/pull/870) +- `bwafastalign/index` nf-core module and `bwafastalign` parameter to support index preparation for the bwa-fastalign genome aligner [#877](https://github.com/nf-core/raredisease/pull/877) +- `bwafastalign/mem` nf-core module to support genome alignment with bwa-fastalign when `--aligner bwafastalign` is set [#880](https://github.com/nf-core/raredisease/pull/880) + +### `Changed` + +- Replace `ENSEMBLVEP_FILTERVEP` with local `FILTERVEP` in the clinical set subworkflow, renamed from `VCF_FILTER_BCFTOOLS_ENSEMBLVEP` to `VCF_FILTER_BCFTOOLS_FILTERVEP` [#870](https://github.com/nf-core/raredisease/pull/870) +- Increase default mbuffer memory value from 3GB to 8GB [#880](https://github.com/nf-core/raredisease/pull/880) +- Update `bwameme/mem` to new nf-core module signature: `val mbuffer` and `val samtools_threads` replaced by `ext.args2` and `ext.args3` [#881](https://github.com/nf-core/raredisease/pull/881) + +### `Fixed` + +- Add a bcftools norm split-multiallelics step after merging standard and shifted MT calls to handle new multiallelic sites introduced by bcftools merge [#855](https://github.com/nf-core/raredisease/pull/855) + +### Parameters + +| Old parameter | New parameter | +| ------------- | ---------------------- | +| | bwafastalign | +| | cadd_prescored | +| | manta_call_regions | +| | manta_call_regions_tbi | + +### Tool updates + +| Tool | Old version | New version | +| ------------- | ----------- | ----------- | +| bwa-fastalign | | 1.0.0 | + ## 3.0.0 - Mario [2026-05-12] ### `Added` @@ -121,6 +156,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 | picard | 3.3.0 | 3.4.0 | | pigz | 2.3.4 | 2.8 | | qualimap | 2.3 | | +| saltshaker | 1.0.0 | 1.1.1 | | sambamba | | 1.0.1 | | samtools | 1.21 | 1.22.1 | | sentieon | 202503 | 202503.02 | diff --git a/bin/filter_vep b/bin/filter_vep new file mode 100755 index 000000000..32b072399 --- /dev/null +++ b/bin/filter_vep @@ -0,0 +1,885 @@ +#!/usr/bin/env python3 +""" +filter_vep - Fast Python reimplementation of Ensembl's filter_vep. + +Fast Python reimplementation of the Perl filter_vep script distributed with +Ensembl VEP (EMBL-EBI). Validated against VEP 110 and 115.2, which are +functionally identical for VCF filtering. +Uses cyvcf2 for VCF parsing when available (significant speedup for large +VCF / bgzipped files via htslib). + +All filter operators are supported except is_child, which requires a live +Ensembl Ontology DB. Use the original filter_vep --ontology for +ontology-aware filtering, or replace is_child with an explicit 'in' list +of consequence terms. +""" + +import sys +import re +import gzip +import argparse +import os + +try: + import cyvcf2 + HAS_CYVCF2 = True +except ImportError: + HAS_CYVCF2 = False + + +# --------------------------------------------------------------------------- +# Filter expression parser / evaluator +# --------------------------------------------------------------------------- + +class FilterSet: + """Parse and evaluate VEP filter expressions. + + Multiple filter strings are ANDed (all must pass). + Within a string: 'and' / 'or' / 'not' and parentheses are supported. + + Operators (with documented synonyms): + is / = / eq case-insensitive equality + is not / != / ne inequality + > / gt greater than + < / lt less than + >= / gte greater than or equal + <= / lte less than or equal + match / matches / re / regex regex search (case-insensitive) + in list or file membership + exists / ex / defined field has a non-null value + is_child SO ontology child (not supported; see --ontology) + + Special value syntax: + #FieldName compare against another field's value + bare field name equivalent to 'exists' + + Numeric extraction: + Mixed-content values like "deleterious(0.05)" have the number + extracted automatically when a numeric operator is used. + + Source-prefix stripping: + "HGNC:28706" matches "28706" for is/in comparisons. + """ + + # Canonical operator names after synonym normalisation + _OP_SYNONYMS = { + 'eq': 'is', + 'ne': '!=', + 'lt': '<', + 'gt': '>', + 'lte': '<=', + 'gte': '>=', + 'matches': 'match', + 're': 'match', + 'regex': 'match', + 'ex': 'exists', + 'defined': 'exists', + } + # Words that should be tokenised as OP rather than field names + _OP_WORDS = frozenset( + ['is', 'match', 'in', 'exists', 'is_child'] + + list(_OP_SYNONYMS.keys()) + ) + + _file_cache: dict = {} # shared across all instances + + def __init__(self, *filter_strings): + self._parsed = [self._parse(filter_string) for filter_string in filter_strings] + + # -- tokeniser ----------------------------------------------------------- + + def _tokenize(self, expr): + tokens = [] + char_idx = 0 + expr_length = len(expr) + while char_idx < expr_length: + char = expr[char_idx] + if char.isspace(): + char_idx += 1 + continue + if char == '(': + tokens.append(('LP', None)); char_idx += 1; continue + if char == ')': + tokens.append(('RP', None)); char_idx += 1; continue + if char in ('>', '<', '!', '='): + two_char = expr[char_idx:char_idx+2] + if two_char in ('>=', '<=', '!='): + tokens.append(('OP', two_char)); char_idx += 2 + else: + tokens.append(('OP', char)); char_idx += 1 + continue + word_match = re.match(r'[^\s()]+', expr[char_idx:]) + if word_match: + word = word_match.group() + word_lower = word.lower() + if word_lower == 'and': tokens.append(('AND', None)) + elif word_lower == 'or': tokens.append(('OR', None)) + elif word_lower == 'not': tokens.append(('NOT', None)) + elif word_lower in self._OP_WORDS: + tokens.append(('OP', self._OP_SYNONYMS.get(word_lower, word_lower))) + else: + tokens.append(('W', word)) + char_idx += len(word) + continue + char_idx += 1 + tokens.append(('EOF', None)) + return tokens + + # -- recursive-descent parser -> AST ------------------------------------- + + def _parse(self, expr): + tokens = self._tokenize(expr) + token_pos = [0] + + def peek(): + return tokens[token_pos[0]] + + def eat(): + token = tokens[token_pos[0]] + token_pos[0] += 1 + return token + + def parse_or(): + left_node = parse_and() + while peek()[0] == 'OR': + eat() + right_node = parse_and() + left_node = ('or', left_node, right_node) + return left_node + + def parse_and(): + left_node = parse_not() + while peek()[0] == 'AND': + eat() + right_node = parse_not() + left_node = ('and', left_node, right_node) + return left_node + + def parse_not(): + if peek()[0] == 'NOT': + eat() + return ('not', parse_atom()) + return parse_atom() + + def parse_atom(): + if peek()[0] == 'LP': + eat() + node = parse_or() + if peek()[0] == 'RP': + eat() + return node + if peek()[0] != 'W': + raise SyntaxError(f"Expected field name, got {peek()!r} in: {expr!r}") + field = eat()[1] + next_token = peek() + if next_token[0] in ('EOF', 'RP', 'AND', 'OR'): + return ('exists', field, None) + if next_token[0] != 'OP': + return ('exists', field, None) + operator = eat()[1].lower() + if operator == 'is' and peek()[0] == 'NOT': + eat() + operator = 'is not' + if operator == 'exists': + return ('exists', field, None) + if peek()[0] in ('EOF', 'AND', 'OR', 'RP'): + value = None + else: + value = eat()[1] + return (operator, field, value) + + return parse_or() + + # -- evaluator ----------------------------------------------------------- + + def evaluate(self, data): + return all(self._eval(ast_node, data) for ast_node in self._parsed) + + def _fetch_field_value(self, data, field): + """Fetch field value with case-insensitive fallback.""" + value = data.get(field) + if value is not None: + return value + field_lower = field.lower() + for existing_key, existing_value in data.items(): + if existing_key is not None and existing_key.lower() == field_lower: + return existing_value + return None + + @staticmethod + def _normalize_missing(field, value): + """Treat '' and '-' (except for Allele) as missing.""" + if value is None or value == '': + return None + if field != 'Allele' and value == '-': + return None + return value + + # Regex for VEP mixed-type field values: e.g. "HGNC:28706", "deleterious(0.05)" + _MIXED_RE = re.compile(r'^([\w.\-]+)?:?\(?([-\d.e]*)\)?', re.IGNORECASE) + _NUM_RE = re.compile(r'^-?\d+\.?\d*(e-?\d+)?$', re.IGNORECASE) + _PURE_NUM = re.compile(r'^[-\d.e]+$', re.IGNORECASE) + + def _get_input(self, field_value, representative_value): + """Replicate VEP 115 FilterSet.get_input() value-extraction logic. + + Given a raw field value and a representative comparison value, + returns the part of field_value that should be compared. + + - If field_value has the form TEXT:NUM or TEXT(NUM): + - and representative_value is purely numeric → return the NUM part + - otherwise → return field_value unchanged + - If field_value is plain (no numeric part) → return field_value unchanged + """ + if field_value is None: + return None + field_str = str(field_value) + mixed_match = self._MIXED_RE.match(field_str) + if not mixed_match: + return field_str + text_part = mixed_match.group(1) or '' + numeric_part = mixed_match.group(2) or '' + if not numeric_part: + return field_str + representative_str = str(representative_value) if representative_value is not None else '' + if self._PURE_NUM.match(representative_str): + if not self._NUM_RE.match(text_part): + return numeric_part + return text_part + return field_str + + def _resolve_value(self, value, data): + """If value starts with '#', resolve it to the named field's value.""" + if value and value.startswith('#'): + field_name = value[1:] + return self._normalize_missing(field_name, self._fetch_field_value(data, field_name)) + return value + + def _eval(self, node, data): + operator = node[0] + if operator == 'and': return self._eval(node[1], data) and self._eval(node[2], data) + if operator == 'or': return self._eval(node[1], data) or self._eval(node[2], data) + if operator == 'not': return not self._eval(node[1], data) + + field = node[1] + value = self._resolve_value(node[2] if len(node) > 2 else None, data) + field_value = self._normalize_missing(field, self._fetch_field_value(data, field)) + + if operator == 'exists': + return field_value is not None + + if operator in ('is', '='): + if value is None: return field_value is None + if field_value is None: return False + processed_input = self._get_input(field_value, value) + return processed_input.lower() == str(value).lower() + + if operator in ('is not', '!='): + if value is None: return field_value is not None + if field_value is None: return True + processed_input = self._get_input(field_value, value) + return processed_input.lower() != str(value).lower() + + if field_value is None: + return False + + if operator == 'match': + return bool(re.search(str(value or ''), str(field_value), re.IGNORECASE)) + + if operator == 'in': + if value is None: return False + in_set = self._in_set(value) + representative = next(iter(in_set), None) + processed_input = self._get_input(field_value, representative).lower() + return processed_input in in_set + + if operator == 'is_child': + raise NotImplementedError( + "is_child requires a live Ensembl Ontology DB (--ontology). " + "Use the original filter_vep with --ontology, or replace " + "is_child with an explicit 'in' list of the consequence terms you need." + ) + + # Ordered comparisons — use _get_input for numeric extraction + processed_input = self._get_input(field_value, value) + str_field_val = processed_input + str_target_val = str(value or '') + try: + field_num = float(str_field_val) + target_num = float(str_target_val) + if operator == '>': return field_num > target_num + if operator == '<': return field_num < target_num + if operator == '>=': return field_num >= target_num + if operator == '<=': return field_num <= target_num + except (ValueError, TypeError): + pass + if operator == '>': return str_field_val > str_target_val + if operator == '<': return str_field_val < str_target_val + if operator == '>=': return str_field_val >= str_target_val + if operator == '<=': return str_field_val <= str_target_val + return False + + def limit_synonym_search(self, _value=True): + """No-op shim (kept for API parity with the Perl module).""" + + def _in_set(self, value): + """Return a set of lowercase values for the 'in' operator. + If value looks like a readable file path, read one value per line. + Otherwise split on commas. + """ + if value in self._file_cache: + return self._file_cache[value] + if os.path.isfile(value): + with open(value) as file_handle: + value_set = {line.strip().lower() for line in file_handle if line.strip()} + self._file_cache[value] = value_set + return value_set + value_set = {item.strip().lower() for item in value.split(',')} + return value_set + + +# --------------------------------------------------------------------------- +# Parsing helpers +# --------------------------------------------------------------------------- + +def parse_headers(raw_headers, vcf_info_field='CSQ'): + """Return (vep_headers, col_headers, allowed_fields_dict).""" + vep_headers = None + col_headers = None + allowed_fields = {} + for raw_header in raw_headers: + hash_count = len(raw_header) - len(raw_header.lstrip('#')) + stripped_header = raw_header.lstrip('#') + if hash_count >= 2: + if re.match(r'INFO== 5 + and re.match(r'(chr)?\w+', fields[0]) + and re.match(r'^\d+$', fields[1]) + and fields[3] and re.match(r'^[ACGTN\-.]+$', fields[3], re.IGNORECASE) + and fields[4]): + return 'vcf' + return 'tab' + + +def open_input(path, force_gz=False): + if path is None: + return sys.stdin + if path.endswith('.gz') or force_gz: + return gzip.open(path, 'rt') + with open(path, 'rb') as file_handle: + magic_bytes = file_handle.read(2) + if magic_bytes == b'\x1f\x8b': + return gzip.open(path, 'rt') + return open(path, 'r') + + +# --------------------------------------------------------------------------- +# cyvcf2 fast path (VCF only) +# --------------------------------------------------------------------------- + +def process_cyvcf2(args, output_fh): + vcf_info_field = args.vcf_info_field + filter_set = args.filter_set + + vcf = cyvcf2.VCF(args.input_file or '-') + + raw_header_lines = vcf.raw_header.rstrip('\n').split('\n') + + vep_headers = None + for header_line in raw_header_lines: + regex_match = re.search(r'ID=' + re.escape(vcf_info_field) + r'.*?Format: (.+?)"', header_line) + if regex_match: + vep_headers = regex_match.group(1).split('|') + break + + col_headers = None + for header_line in reversed(raw_header_lines): + if header_line.startswith('#') and not header_line.startswith('##'): + col_headers = header_line.lstrip('#').split('\t') + break + + all_fields = set() + if vep_headers: all_fields.update(vep_headers) + if col_headers: all_fields.update(col_headers) + for header_line in raw_header_lines: + regex_match = re.match(r'##INFO=') + output_header_lines.insert(idx, '##FILTER=') + break + + if not args.count: + output_fh.write('\n'.join(output_header_lines) + '\n') + + column_names = col_headers or ['CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO'] + count = 0 + line_number = 0 + missing_csq = 0 + + for variant in vcf: + line_number += 1 + if args.test and line_number > args.test: + break + + vcf_line = str(variant).rstrip('\n') + vcf_parts = vcf_line.split('\t') + + main_data = {column_names[idx]: (vcf_parts[idx] if idx < len(vcf_parts) else None) + for idx in range(len(column_names))} + if len(vcf_parts) > 7: + for info_field in vcf_parts[7].split(';'): + if '=' in info_field: + info_key, info_value = info_field.split('=', 1) + if info_key != vcf_info_field: + main_data[info_key] = info_value + elif info_field: + main_data[info_field] = None # Flag: Perl sets to undef, so exists-check fails + + csq_value = variant.INFO.get(vcf_info_field) + chunks = [] + data_list = [] + + if csq_value: + chunks = csq_value.split(',') + data_list = [parse_csq_chunk(chunk, vep_headers or [], main_data) for chunk in chunks] + else: + missing_csq += 1 + data_list = [main_data] + + line_pass = 0 + new_chunks = [] + for blob_index, parsed_blob in enumerate(data_list): + if filter_set.evaluate(parsed_blob): + line_pass += 1 + if blob_index < len(chunks): + new_chunks.append(chunks[blob_index]) + + count += bool(line_pass) + + if not args.soft_filter and count < args.start: + continue + + if args.soft_filter: + filter_tag = 'filter_vep_pass' if line_pass else 'filter_vep_fail' + existing_filter = vcf_parts[6] if len(vcf_parts) > 6 else '.' + vcf_parts[6] = f"{existing_filter};{filter_tag}" if existing_filter and existing_filter != '.' else filter_tag + output_fh.write('\t'.join(vcf_parts) + '\n') + elif line_pass and not args.count: + out_line = vcf_line + if args.only_matched and new_chunks and len(new_chunks) != len(chunks): + new_csq = ','.join(new_chunks) + vcf_parts_rw = out_line.split('\t') + vcf_parts_rw[7] = re.sub(re.escape(vcf_info_field) + r'=[^;]*', + vcf_info_field + '=' + new_csq, vcf_parts_rw[7], count=1) + out_line = '\t'.join(vcf_parts_rw) + output_fh.write(out_line + '\n') + + if not args.soft_filter and count >= args.limit + args.start - 1: + break + + vcf.close() + + if line_number == 0 and not args.count: + output_fh.write('\n'.join(output_header_lines) + '\n') + + if args.count: + output_fh.write(f"{count}\n") + + if missing_csq: + sys.stderr.write( + f"WARNING: filter_vep couldn't find VEP annotations field " + f"{vcf_info_field} in {missing_csq} line(s) of the input file\n" + ) + + +# --------------------------------------------------------------------------- +# Generic path (tab or VCF without cyvcf2) +# --------------------------------------------------------------------------- + +def process_generic(args, output_fh): + vcf_info_field = args.vcf_info_field + filter_set = args.filter_set + + input_fh = open_input(args.input_file, args.gz) + + raw_headers = [] + vep_headers = None + col_headers = None + file_format = args.format + count = 0 + line_number = 0 + missing_csq = 0 + headers_initialised = False + + csq_re = re.compile(re.escape(vcf_info_field) + r'=(.+?)(?:;|\s|$)') + + for raw_line in input_fh: + line = raw_line.rstrip('\n') + + if line.startswith('#'): + raw_headers.append(line) + continue + + line_number += 1 + if args.test and line_number > args.test: + break + + # -- initialise on first data line -- + if not headers_initialised: + if not raw_headers: + sys.exit("ERROR: No headers found in input file") + + if args.soft_filter: + chrom_header = raw_headers.pop() + raw_headers.append('##FILTER=') + raw_headers.append('##FILTER=') + raw_headers.append(chrom_header) + + if not args.count and not args.list: + output_fh.write('\n'.join(raw_headers) + '\n') + + vep_headers, col_headers, extra_allowed = parse_headers(raw_headers, vcf_info_field) + + all_fields = set() + if vep_headers: all_fields.update(vep_headers) + if col_headers: all_fields.update(col_headers) + all_fields.update(extra_allowed) + + if args.list: + print("Available fields:\n") + for field_name in sorted(all_fields): + print(field_name) + if input_fh is not sys.stdin: + input_fh.close() + return + + headers_initialised = True + + # -- format detection -- + if not file_format: + file_format = detect_format(line) + if file_format not in ('vcf', 'tab'): + sys.exit(f"ERROR: Unable to parse data in format {file_format}") + if file_format != 'vcf' and args.only_matched: + sys.exit("ERROR: --only_matched is compatible only with VCF files") + if file_format != 'vcf' and args.soft_filter: + sys.exit("ERROR: --soft_filter is compatible only with VCF files") + + if args.soft_filter: + args.start = 0 + + chunks = [] + data_list = [] + + if file_format == 'tab': + tab_headers = col_headers or vep_headers or [] + data_list.append(parse_tab_line(line, tab_headers)) + chunks.append(line) + if not any(header == 'Extra' for header in tab_headers): + filter_set.limit_synonym_search(True) + + else: # vcf + vcf_parts = line.split('\t') + column_headers = col_headers or ['CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO'] + main_data = {column_headers[idx]: (vcf_parts[idx] if idx < len(vcf_parts) else None) + for idx in range(len(column_headers))} + + if len(vcf_parts) > 7: + for info_field in vcf_parts[7].split(';'): + if '=' in info_field: + info_key, info_value = info_field.split('=', 1) + if info_key != vcf_info_field: + main_data[info_key] = info_value + elif info_field: + main_data[info_field] = None # Flag: Perl sets to undef, so exists-check fails + + regex_match = csq_re.search(line) + if regex_match: + raw_csq_value = regex_match.group(1) + chunks = raw_csq_value.split(',') + data_list = [parse_csq_chunk(chunk, vep_headers or [], main_data) for chunk in chunks] + else: + missing_csq += 1 + data_list = [main_data] + + filter_set.limit_synonym_search(True) + + # -- evaluate -- + line_pass = 0 + new_chunks = [] + for blob_index, parsed_blob in enumerate(data_list): + if filter_set.evaluate(parsed_blob): + line_pass += 1 + if blob_index < len(chunks): + new_chunks.append(chunks[blob_index]) + + count += bool(line_pass) + + if not args.soft_filter and count < args.start: + continue + + # -- output -- + if args.soft_filter: + split_parts = line.split('\t') + filter_tag = 'filter_vep_pass' if line_pass else 'filter_vep_fail' + existing_filter = split_parts[6] if len(split_parts) > 6 else '.' + split_parts[6] = f"{existing_filter};{filter_tag}" if existing_filter and existing_filter != '.' else filter_tag + output_fh.write('\t'.join(split_parts) + '\n') + + elif line_pass and not args.count: + out_line = line + if args.only_matched and new_chunks and len(new_chunks) != len(chunks): + new_csq = ','.join(new_chunks) + vcf_parts_rw = out_line.split('\t') + vcf_parts_rw[7] = re.sub(re.escape(vcf_info_field) + r'=[^;]*', + vcf_info_field + '=' + new_csq, vcf_parts_rw[7], count=1) + out_line = '\t'.join(vcf_parts_rw) + output_fh.write(out_line + '\n') + + if not args.soft_filter and count >= args.limit + args.start - 1: + break + + # -- empty file -- + if not line_number: + if not args.count and not args.list: + output_fh.write('\n'.join(raw_headers) + '\n') + if args.list: + vep_headers_parsed, col_headers_parsed, allowed_parsed = parse_headers(raw_headers, vcf_info_field) + all_fields = set() + if vep_headers_parsed: all_fields.update(vep_headers_parsed) + if col_headers_parsed: all_fields.update(col_headers_parsed) + all_fields.update(allowed_parsed) + print("Available fields:\n") + for field_name in sorted(all_fields): + print(field_name) + if input_fh is not sys.stdin: + input_fh.close() + return + + if args.count: + output_fh.write(f"{count}\n") + + if missing_csq: + sys.stderr.write( + f"WARNING: filter_vep couldn't find VEP annotations field " + f"{vcf_info_field} in {missing_csq} line(s) of the input file\n" + ) + + if input_fh is not sys.stdin: + input_fh.close() + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +USAGE = """\ +#------------# +# filter_vep # +#------------# + +http://www.ensembl.org/info/docs/tools/vep/script/vep_filter.html + +Usage: +./filter_vep [arguments] + +--help -h Print usage message and exit + +--input_file [file] -i Input file (VEP results). Reads from STDIN if + not specified. Supports gzip (auto-detected or + force with --gz). +--format [vcf|tab] Input format (tab = any tab-delimited format, + including default VEP output format) + +--output_file [file] -o Output file. Defaults to STDOUT. +--force_overwrite Overwrite existing output file. + +--filter [filters] -f Filter expression. Multiple --filter flags are + treated as logical ANDs, e.g.: + -f "Consequence is missense_variant" + -f "AF < 0.01 or not AF" + -f "HGNC_ID in gene_panels.txt" + -f "(AFR_AF gt #EUR_AF) and SIFT lt 0.05" + + Operators: + is / = / eq case-insensitive equality + != / ne inequality + > / gt greater than + < / lt less than + >= / gte greater than or equal + <= / lte less than or equal + match/matches/re/regex regex (case-insensitive) + in list or file (one value/line) + exists/ex/defined field is present and non-null + is_child SO ontology child (not supported here) + Logical: and or not ( ) + Value can be #Field to compare two fields. + Mixed content (e.g. "deleterious(0.05)") has its + number extracted for numeric comparisons. + Source prefixes (e.g. "HGNC:34") are stripped + automatically in is/in comparisons. + +--list -l List available fields from the input file. +--count -c Print only a count of matched lines. + +--only_matched In VCF files, rewrite the CSQ field to contain + only annotation blobs that passed the filters. + +--vcf_info_field [key] VCF INFO key for VEP annotations (default: CSQ). + +--soft_filter Add filter_vep_pass / filter_vep_fail to the VCF + FILTER column instead of excluding variants. + +--ontology -y Accepted for compatibility but not implemented. + is_child requires a live Ensembl Ontology DB; use + the original filter_vep --ontology for that feature, + or replace is_child with an explicit 'in' list. +--host / --user / --pass / --port / --version / --registry + Database connection options (accepted, not used). + +--start [N] -s Skip first N passing results (1-based, default 1) +--limit [N] Return at most N passing results. +--test [N] Process only the first N non-header lines. +""" + + +def main(): + arg_parser = argparse.ArgumentParser(add_help=False) + arg_parser.add_argument('--help', '-h', action='store_true') + arg_parser.add_argument('--test', type=int) + arg_parser.add_argument('--count', '-c', action='store_true') + arg_parser.add_argument('--list', '-l', action='store_true') + arg_parser.add_argument('--input_file', '-i') + arg_parser.add_argument('--output_file', '-o', default='stdout') + arg_parser.add_argument('--force_overwrite', action='store_true') + arg_parser.add_argument('--format', choices=['vcf', 'tab']) + arg_parser.add_argument('--gz', action='store_true') + arg_parser.add_argument('--only_matched', action='store_true') + arg_parser.add_argument('--vcf_info_field', default='CSQ') + arg_parser.add_argument('--soft_filter', action='store_true') + arg_parser.add_argument('--ontology', '-y', action='store_true') + arg_parser.add_argument('--host', default='ensembldb.ensembl.org') + arg_parser.add_argument('--user', default='anonymous') + arg_parser.add_argument('--pass', dest='password', default=None) + arg_parser.add_argument('--port', type=int, default=3306) + arg_parser.add_argument('--version', type=int) + arg_parser.add_argument('--registry') + arg_parser.add_argument('--start', '-s', type=int, default=1) + arg_parser.add_argument('--limit', type=int, default=int(1e12)) + arg_parser.add_argument('--filter', '-f', action='append') + + args = arg_parser.parse_args() + + if args.help or (not args.filter and not args.list): + print(USAGE) + if not args.help: + sys.exit("ERROR: No valid filters given") + sys.exit(0) + + if args.ontology: + sys.stderr.write( + "WARNING: --ontology is accepted for compatibility but is_child is not " + "implemented. Use the original filter_vep --ontology for ontology-aware " + "filtering, or replace is_child with an explicit 'in' list.\n" + ) + + args.filter_set = FilterSet(*(args.filter or [])) + + if args.output_file.lower() != 'stdout': + if os.path.exists(args.output_file) and not args.force_overwrite: + sys.exit( + f"ERROR: Output file {args.output_file} already exists. " + "Use --force_overwrite to overwrite." + ) + output_fh = open(args.output_file, 'w') + else: + output_fh = sys.stdout + + try: + use_cyvcf2 = ( + HAS_CYVCF2 + and args.format != 'tab' + and (args.format == 'vcf' + or (args.input_file and + re.search(r'\.vcf(\.gz)?$|\.bcf$', args.input_file, re.IGNORECASE))) + ) + if use_cyvcf2: + process_cyvcf2(args, output_fh) + else: + process_generic(args, output_fh) + finally: + if output_fh is not sys.stdout: + output_fh.close() + + +if __name__ == '__main__': + main() diff --git a/conf/modules/align_bwa_bwamem2_bwameme.config b/conf/modules/align_bwa_bwamem2_bwameme.config index da2cd83ff..c2711965e 100644 --- a/conf/modules/align_bwa_bwamem2_bwameme.config +++ b/conf/modules/align_bwa_bwamem2_bwameme.config @@ -17,6 +17,13 @@ process { + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:BWAFASTALIGN_MEM' { + ext.args = { "-M -K 100000000 -R ${meta.read_group}" } + ext.args2 = { "-m ${params.mbuffer_mem}M" } + ext.args3 = { "-T ./samtools_sort_tmp -@ ${params.samtools_sort_threads} -m ${(params.mbuffer_mem / params.samtools_sort_threads).intValue()}M" } + ext.prefix = { "${meta.id}_sorted" } + } + withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:BWAMEM2_MEM' { ext.args = { "-M -K 100000000 -R ${meta.read_group}" } ext.args2 = { "-T ./samtools_sort_tmp" } @@ -30,8 +37,9 @@ process { } withName: '.*ALIGN:ALIGN_BWA_BWAMEM2_BWAMEME:BWAMEME_MEM' { - ext.args = { "-7 -M -K 100000000 -R ${meta.read_group}" } - ext.args2 = { "-T ./samtools_sort_tmp" } + ext.args = { "-7 -M -K 100000000 -R ${meta.read_group}" } + ext.args2 = { "-m ${params.mbuffer_mem}M" } + ext.args3 = { "-T ./samtools_sort_tmp -@ ${params.samtools_sort_threads} -m ${(params.mbuffer_mem / params.samtools_sort_threads).intValue()}M" } ext.prefix = { "${meta.id}_sorted" } } diff --git a/conf/modules/postprocess_MT_calls.config b/conf/modules/postprocess_MT_calls.config index f54c383e6..4bd28abf3 100644 --- a/conf/modules/postprocess_MT_calls.config +++ b/conf/modules/postprocess_MT_calls.config @@ -35,8 +35,13 @@ process { ext.prefix = { "${meta.id}_split_rmdup" } } + withName: '.*POSTPROCESS_MT_CALLS:SPLIT_MULTIALLELICS_POSTMERGE_MT' { + ext.args = '--output-type z --multiallelics -both' + ext.prefix = { "${meta.id}_split_rmdup_merged_split" } + } + withName: '.*POSTPROCESS_MT_CALLS:BCFTOOLS_MERGE_MT' { - ext.args = '--output-type z' + ext.args = '--output-type z --write-index=tbi' ext.prefix = { "${meta.id}_split_rmdup_merged" } } diff --git a/conf/modules/vcf_filter_bcftools_ensemblvep.config b/conf/modules/vcf_filter_bcftools_filtervep.config similarity index 90% rename from conf/modules/vcf_filter_bcftools_ensemblvep.config rename to conf/modules/vcf_filter_bcftools_filtervep.config index 83b282b21..6a4a927af 100644 --- a/conf/modules/vcf_filter_bcftools_ensemblvep.config +++ b/conf/modules/vcf_filter_bcftools_filtervep.config @@ -18,7 +18,7 @@ // SNVs process { - withName: '.*:GENERATE_CLINICAL_SET_SNV:ENSEMBLVEP_FILTERVEP' { + withName: '.*:GENERATE_CLINICAL_SET_SNV:FILTERVEP' { ext.prefix = { "${meta.id}_snv_${meta.set}" } ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" } } @@ -31,7 +31,7 @@ process { // SVs process { - withName: '.*:GENERATE_CLINICAL_SET_SV:ENSEMBLVEP_FILTERVEP' { + withName: '.*:GENERATE_CLINICAL_SET_SV:FILTERVEP' { ext.prefix = { "${meta.id}_sv_${meta.set}" } ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" } } @@ -52,7 +52,7 @@ process { // ME variants process { - withName: '.*:GENERATE_CLINICAL_SET_ME:ENSEMBLVEP_FILTERVEP' { + withName: '.*:GENERATE_CLINICAL_SET_ME:FILTERVEP' { ext.prefix = { "${meta.id}_me_${meta.set}" } ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" } } diff --git a/docs/usage.md b/docs/usage.md index e5963d4d2..3007cbdf6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -263,10 +263,14 @@ The mandatory and optional parameters for each category are tabulated below. ##### 5. Variant calling - Structural variants -| Mandatory | Optional | -| --------- | ---------- | -| | target_bed | -| | bwa | +| Mandatory | Optional | +| --------- | ---------------------------------- | +| | target_bed | +| | bwa | +| | manta_call_regions1 | +| | manta_call_regions_tbi1 | + +1 A bgzipped BED file (`.bed.gz`) and its tabix index (`.bed.gz.tbi`) restricting Manta's SV calling to specific regions. Both parameters must be supplied together. Only applied for WGS; for WES, Manta always uses `target_bed` and these parameters have no effect. Useful for reducing runtime on references with many short contigs such as GRCh38 by limiting analysis to primary chromosomes. ##### 6. Copy number variant calling diff --git a/main.nf b/main.nf index f847e20e6..dba61fb61 100644 --- a/main.nf +++ b/main.nf @@ -46,8 +46,10 @@ workflow NFCORE_RAREDISEASE { val_aligner val_analysis_type val_bwa + val_bwafastalign val_bwamem2 val_bwameme + val_cadd_prescored val_cadd_resources val_call_interval val_concatenate_snv_calls @@ -73,7 +75,8 @@ workflow NFCORE_RAREDISEASE { val_known_dbsnp_tbi val_light_strand_origin_end val_light_strand_origin_start - val_mbuffer_mem + val_manta_call_regions + val_manta_call_regions_tbi val_mito_length val_mito_name val_mitosalt_breakspan @@ -111,7 +114,6 @@ workflow NFCORE_RAREDISEASE { val_run_vcfanno_db_sanity_check val_sambamba_regions val_sample_id_map - val_samtools_sort_threads val_save_all_mapped_as_cram val_save_noalt_mapped_as_cram val_save_reference @@ -157,6 +159,7 @@ workflow NFCORE_RAREDISEASE { val_aligner, val_analysis_type, val_bwa, + val_bwafastalign, val_bwamem2, val_bwameme, val_fai, @@ -181,6 +184,7 @@ workflow NFCORE_RAREDISEASE { ch_bait_intervals = ch_references.bait_intervals ch_dbsnp = ch_references.dbsnp ch_dbsnp_tbi = ch_references.dbsnp_tbi + ch_genome_bwafastalignindex = ch_references.genome_bwafastalign_index ch_genome_bwaindex = ch_references.genome_bwa_index ch_genome_bwamem2index = ch_references.genome_bwamem2_index ch_genome_bwamemeindex = ch_references.genome_bwameme_index @@ -233,6 +237,7 @@ workflow NFCORE_RAREDISEASE { ch_svd_ud = channelFromPath(val_verifybamid_svd_ud) // Using channelFromPathWithMeta helper (with simpleName). If filepath is null, returns, [[:],[]] + ch_cadd_prescored = channelFromPathWithMeta(val_cadd_prescored, true) ch_cadd_resources = channelFromPathWithMeta(val_cadd_resources, true) ch_call_interval = channelFromPathWithMeta(val_call_interval, true) ch_ml_model = channelFromPathWithMeta(val_ml_model, true) @@ -260,6 +265,11 @@ workflow NFCORE_RAREDISEASE { ch_cadd_header = channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect() ch_foundin_header = channel.fromPath("$projectDir/assets/foundin.hdr", checkIfExists: true).collect() + ch_manta_regions = val_analysis_type.equals("wgs") + ? (val_manta_call_regions + ? channel.value([file(val_manta_call_regions), file(val_manta_call_regions_tbi)]) + : channel.value([[], []])) + : ch_target_bed.map { _meta, bed, tbi -> [bed, tbi] } ch_ngsbits_method = channel.value(val_ngsbits_samplegender_method) ch_sentieon_pcr_indel_model = channel.value(val_sentieon_dnascope_pcr_indel_model) ch_subdepth = channel.value(val_subdepth) @@ -378,6 +388,7 @@ workflow NFCORE_RAREDISEASE { ch_alignments, ch_bait_intervals, ch_cadd_header, + ch_cadd_prescored, ch_cadd_resources, ch_call_interval, ch_case_info, @@ -385,6 +396,7 @@ workflow NFCORE_RAREDISEASE { ch_dbsnp_tbi, ch_foundin_header, ch_gcnvcaller_model, + ch_genome_bwafastalignindex, ch_genome_bwaindex, ch_genome_bwamem2index, ch_genome_bwamemeindex, @@ -401,6 +413,7 @@ workflow NFCORE_RAREDISEASE { ch_hgnc_ids, ch_intervals_wgs, ch_intervals_y, + ch_manta_regions, ch_me_references, ch_me_svdb_resources, ch_ml_model, @@ -489,7 +502,6 @@ workflow NFCORE_RAREDISEASE { val_homoplasmy_af_threshold, val_light_strand_origin_end, val_light_strand_origin_start, - val_mbuffer_mem, val_mito_length, val_mito_name, val_mitosalt_breakspan, @@ -515,7 +527,6 @@ workflow NFCORE_RAREDISEASE { val_run_rtgvcfeval, val_run_vcfanno_db_sanity_check, val_sample_id_map, - val_samtools_sort_threads, val_save_all_mapped_as_cram, val_save_noalt_mapped_as_cram, val_svdb_query_bedpedbs, @@ -564,8 +575,10 @@ workflow { params.aligner, params.analysis_type, params.bwa, + params.bwafastalign, params.bwamem2, params.bwameme, + params.cadd_prescored, params.cadd_resources, params.call_interval, params.concatenate_snv_calls, @@ -591,7 +604,8 @@ workflow { params.known_dbsnp_tbi, params.light_strand_origin_end, params.light_strand_origin_start, - params.mbuffer_mem, + params.manta_call_regions, + params.manta_call_regions_tbi, params.mito_length, params.mito_name, params.mitosalt_breakspan, @@ -629,7 +643,6 @@ workflow { params.run_vcfanno_db_sanity_check, params.sambamba_regions, params.sample_id_map, - params.samtools_sort_threads, params.save_all_mapped_as_cram, params.save_noalt_mapped_as_cram, params.save_reference, diff --git a/modules.json b/modules.json index 4bb0c67b5..bcb716ff7 100644 --- a/modules.json +++ b/modules.json @@ -75,6 +75,16 @@ "git_sha": "707241c72951f24fd89982c4c80c5983a4c437ef", "installed_by": ["modules"] }, + "bwafastalign/index": { + "branch": "master", + "git_sha": "4ddd2d28518c7475bc76f0dd0e0451dd0e19df2a", + "installed_by": ["modules"] + }, + "bwafastalign/mem": { + "branch": "master", + "git_sha": "d6379deecb794ad5cc6234f3208085aaee27b327", + "installed_by": ["modules"] + }, "bwamem2/index": { "branch": "master", "git_sha": "5dd46a36fca68d6ad1a6b22ec47adc8c6863717d", @@ -92,7 +102,7 @@ }, "bwameme/mem": { "branch": "master", - "git_sha": "7a41710e25fdcdf8e4d5b324f2eb74022ffc77ff", + "git_sha": "ead2c49d7abda0172e1a0abf585b305b690fb8a8", "installed_by": ["modules"] }, "cadd": { @@ -130,11 +140,6 @@ "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", "installed_by": ["modules"] }, - "ensemblvep/filtervep": { - "branch": "master", - "git_sha": "2e751c119b5e6e9c7ecda7a682e157aabb57a812", - "installed_by": ["modules", "vcf_filter_bcftools_ensemblvep"] - }, "ensemblvep/vep": { "branch": "master", "git_sha": "34505e1fc5e9f4fd641210ca440acff6bd33b842", @@ -383,17 +388,17 @@ }, "saltshaker/call": { "branch": "master", - "git_sha": "9d051963759dda1a424374e23f4f22aaa2b0bd60", + "git_sha": "a61c82ad6d255d555aa6c7f1c3c0e90c3735c830", "installed_by": ["modules"] }, "saltshaker/classify": { "branch": "master", - "git_sha": "ff5f2ad4481a4a1e1769a1fad922681e7f7fd176", + "git_sha": "a61c82ad6d255d555aa6c7f1c3c0e90c3735c830", "installed_by": ["modules"] }, "saltshaker/plot": { "branch": "master", - "git_sha": "ff5f2ad4481a4a1e1769a1fad922681e7f7fd176", + "git_sha": "a61c82ad6d255d555aa6c7f1c3c0e90c3735c830", "installed_by": ["modules"] }, "sambamba/depth": { @@ -579,11 +584,6 @@ "branch": "master", "git_sha": "4b406a74dc0449c0401ed87d5bfff4252fd277fd", "installed_by": ["subworkflows"] - }, - "vcf_filter_bcftools_ensemblvep": { - "branch": "master", - "git_sha": "95518d261ec0561b3dffb332944bebc5ef85efcf", - "installed_by": ["subworkflows"] } } } diff --git a/modules/local/filtervep/environment.yml b/modules/local/filtervep/environment.yml new file mode 100644 index 000000000..0ffb35d7c --- /dev/null +++ b/modules/local/filtervep/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cyvcf2=0.32.1 diff --git a/modules/local/filtervep/main.nf b/modules/local/filtervep/main.nf new file mode 100644 index 000000000..6e05dace3 --- /dev/null +++ b/modules/local/filtervep/main.nf @@ -0,0 +1,40 @@ +process FILTERVEP { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/48/481e0a88c723eb6abe5a6c36e50bc2119b460f3c6f71aff2111a22e8b704c5d9/data' : + 'community.wave.seqera.io/library/cyvcf2:0.32.1--569b36b775b7f1e5' }" + + input: + tuple val(meta), path(input) + path feature_file + + output: + tuple val(meta), path("*.${extension}"), emit: output + tuple val("${task.process}"), val('filtervep'), val('1.0.0'), topic: versions, emit: versions_filtervep + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--format tab") ? "txt" : "vcf" + """ + filter_vep \\ + ${args} \\ + --input_file ${input} \\ + --output_file ${prefix}.${extension} \\ + --only_matched + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--format tab") ? "txt" : "vcf" + """ + touch ${prefix}.${extension} + """ +} diff --git a/modules/nf-core/ensemblvep/filtervep/environment.yml b/modules/nf-core/bwafastalign/index/environment.yml similarity index 65% rename from modules/nf-core/ensemblvep/filtervep/environment.yml rename to modules/nf-core/bwafastalign/index/environment.yml index 7e60f7f95..a89afa0a9 100644 --- a/modules/nf-core/ensemblvep/filtervep/environment.yml +++ b/modules/nf-core/bwafastalign/index/environment.yml @@ -4,5 +4,6 @@ channels: - conda-forge - bioconda dependencies: - - bioconda::ensembl-vep=115.2 - - bioconda::perl-math-cdf=0.1 + - bioconda::bwa-fastalign=1.0.0 + - bioconda::mbuffer=20160228 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/bwafastalign/index/main.nf b/modules/nf-core/bwafastalign/index/main.nf new file mode 100644 index 000000000..8bf8d7eb3 --- /dev/null +++ b/modules/nf-core/bwafastalign/index/main.nf @@ -0,0 +1,48 @@ +process BWAFASTALIGN_INDEX { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f8/f8c975324a12014c8a817c2c1ad0cd68b077cf09c4370717589177b262dcd1dc/data': + 'community.wave.seqera.io/library/bwa-fastalign_mbuffer_samtools:35f24ce8addcd26b'}" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("bwafastalign"), emit: index + tuple val("${task.process}"), val('bwafastalign'), val('1.0.0'), topic: versions, emit: versions_bwafastalign + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + tuple val("${task.process}"), val('samtools'), eval("samtools --version 2>&1 | sed '1!d;s/.* //'") , topic: versions, emit: versions_samtools + tuple val("${task.process}"), val('mbuffer'), eval("mbuffer --version 2>&1 | sed -n 's/mbuffer //p'") , topic: versions, emit: versions_mbuffer + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${fasta}" + """ + mkdir bwafastalign + + bwa-fastalign index \\ + $args \\ + -p bwafastalign/$prefix \\ + $fasta + """ + + stub: + def prefix = task.ext.prefix ?: "${fasta}" + """ + mkdir bwafastalign + touch bwafastalign/${prefix}.amb + touch bwafastalign/${prefix}.ann + touch bwafastalign/${prefix}.bwt + touch bwafastalign/${prefix}.bytesa + touch bwafastalign/${prefix}.fmt + touch bwafastalign/${prefix}.kmer + touch bwafastalign/${prefix}.pac + touch bwafastalign/${prefix}.sa + """ +} diff --git a/modules/nf-core/bwafastalign/index/meta.yml b/modules/nf-core/bwafastalign/index/meta.yml new file mode 100644 index 000000000..5c2fdbca4 --- /dev/null +++ b/modules/nf-core/bwafastalign/index/meta.yml @@ -0,0 +1,106 @@ +name: "bwafastalign_index" +description: Create BWA-FASTALIGN index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - "bwafastalign": + description: "Faster and Cheaper Sequence Alignment on Commercial CPUs (A faster + version of bwa-mem with identical outputs)." + homepage: "https://github.com/zzhofict/BWA-FastAlign" + documentation: "https://github.com/zzhofict/BWA-FastAlign" + doi: "10.1145/3774934.3786421" + licence: + - "MIT" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - fasta: + type: file + description: Input genome fasta file + ontologies: [] +output: + index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - bwafastalign: + type: string + description: BWA-FASTALIGN genome index files + pattern: "*.{0123,amb,ann,pac,pos_packed,suffixarray_uint64,suffixarray_uint64_L0_PARAMETERS,suffixarray_uint64_L1_PARAMETERS,suffixarray_uint64_L2_PARAMETERS}" + ontologies: [] + versions_bwafastalign: + - - ${task.process}: + type: string + description: The name of the process + - bwafastalign: + type: string + description: BWA-FASTALIGN genome index files + pattern: "*.{0123,amb,ann,pac,pos_packed,suffixarray_uint64,suffixarray_uint64_L0_PARAMETERS,suffixarray_uint64_L1_PARAMETERS,suffixarray_uint64_L2_PARAMETERS}" + ontologies: [] + - 1.0.0: + type: string + description: The expression to obtain the version of the tool + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools --version 2>&1 | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool + versions_mbuffer: + - - ${task.process}: + type: string + description: The name of the process + - mbuffer: + type: string + description: The name of the tool + - mbuffer --version 2>&1 | sed -n 's/mbuffer //p': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - bwafastalign: + type: string + description: BWA-FASTALIGN genome index files + pattern: "*.{0123,amb,ann,pac,pos_packed,suffixarray_uint64,suffixarray_uint64_L0_PARAMETERS,suffixarray_uint64_L1_PARAMETERS,suffixarray_uint64_L2_PARAMETERS}" + ontologies: [] + - 1.0.0: + type: string + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: The name of the tool + - samtools --version 2>&1 | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - mbuffer: + type: string + description: The name of the tool + - mbuffer --version 2>&1 | sed -n 's/mbuffer //p': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@peterpru" +maintainers: + - "@peterpru" diff --git a/modules/nf-core/bwafastalign/index/tests/main.nf.test b/modules/nf-core/bwafastalign/index/tests/main.nf.test new file mode 100644 index 000000000..3173287b8 --- /dev/null +++ b/modules/nf-core/bwafastalign/index/tests/main.nf.test @@ -0,0 +1,64 @@ +nextflow_process { + + name "Test Process BWAFASTALIGN_INDEX" + script "../main.nf" + process "BWAFASTALIGN_INDEX" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "bwafastalign" + tag "bwafastalign/index" + + test("BWAFASTALIGN index") { + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assertAll( + // genome.fasta.kmer is a fixed-size ~4.8 GB hash table — snapshot by name only, MD5 all others + { assert snapshot( + new File(process.out.index[0][1].toString()).listFiles() + .sort { it.name } + .collect { f -> f.name.endsWith('.kmer') ? f.name : file(f.absolutePath) }, + process.out.findAll { key, val -> key.startsWith('versions') } + ).match() } + ) + } + } + + test("BWAFASTALIGN index - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bwafastalign/index/tests/main.nf.test.snap b/modules/nf-core/bwafastalign/index/tests/main.nf.test.snap new file mode 100644 index 000000000..38524381b --- /dev/null +++ b/modules/nf-core/bwafastalign/index/tests/main.nf.test.snap @@ -0,0 +1,131 @@ +{ + "BWAFASTALIGN index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.fasta.amb:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.ann:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.bwt:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.bytesa:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.fmt:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.kmer:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.pac:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.sa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "BWAFASTALIGN_INDEX", + "bwafastalign", + "1.0.0" + ] + ], + "2": [ + [ + "BWAFASTALIGN_INDEX", + "samtools", + "1.21" + ] + ], + "3": [ + [ + "BWAFASTALIGN_INDEX", + "mbuffer", + "version 20160228" + ] + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.fasta.amb:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.ann:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.bwt:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.bytesa:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.fmt:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.kmer:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.pac:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.sa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_bwafastalign": [ + [ + "BWAFASTALIGN_INDEX", + "bwafastalign", + "1.0.0" + ] + ], + "versions_mbuffer": [ + [ + "BWAFASTALIGN_INDEX", + "mbuffer", + "version 20160228" + ] + ], + "versions_samtools": [ + [ + "BWAFASTALIGN_INDEX", + "samtools", + "1.21" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.0" + }, + "timestamp": "2026-06-08T12:26:53.94075" + }, + "BWAFASTALIGN index": { + "content": [ + [ + "genome.fasta.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.fasta.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.fasta.bwt:md5,0469c30a1e239dd08f68afe66fde99da", + "genome.fasta.bytesa:md5,9e4ae311370960b31268a5ef0bde324a", + "genome.fasta.fmt:md5,f993b15dc5f10271f3fa0a68720219b6", + "genome.fasta.kmer", + "genome.fasta.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66", + "genome.fasta.sa:md5,ab3952cabf026b48cd3eb5bccbb636d1" + ], + { + "versions_bwafastalign": [ + [ + "BWAFASTALIGN_INDEX", + "bwafastalign", + "1.0.0" + ] + ], + "versions_mbuffer": [ + [ + "BWAFASTALIGN_INDEX", + "mbuffer", + "version 20160228" + ] + ], + "versions_samtools": [ + [ + "BWAFASTALIGN_INDEX", + "samtools", + "1.21" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.0" + }, + "timestamp": "2026-06-08T12:26:41.54578" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwafastalign/index/tests/nextflow.config b/modules/nf-core/bwafastalign/index/tests/nextflow.config new file mode 100644 index 000000000..ffc65676f --- /dev/null +++ b/modules/nf-core/bwafastalign/index/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: BWAFASTALIGN_INDEX { + memory = 15.GB + } +} diff --git a/modules/nf-core/bwafastalign/mem/README.md b/modules/nf-core/bwafastalign/mem/README.md new file mode 100644 index 000000000..f991034a2 --- /dev/null +++ b/modules/nf-core/bwafastalign/mem/README.md @@ -0,0 +1,51 @@ +# bwafastalign/mem options + +bwa-fastalign is a high-throughput aligner that produces alignments faster than samtools can consume them. To prevent bwa-fastalign from stalling while samtools processes data, mbuffer is used as an intermediary to absorb alignments until samtools is ready. The mbuffer and samtools options are configurable via `ext.args2` and `ext.args3` respectively. + +## Configuring mbuffer (`ext.args2`) + +`ext.args2` is passed to mbuffer. The default buffer size is 3GB (`-m 3072M`). If `-m` is not present in `ext.args2`, the default is injected automatically. + +The mbuffer size should match the total memory allocated to `samtools sort` (`-m` × `-@`) so it can absorb bwa-fastalign output while samtools is flushing its sort buffer to disk. + +``` +withName: 'BWAFASTALIGN_MEM' { + ext.args2 = '-m 20480M' // 20GB mbuffer to match samtools sort total (e.g. -m 1024M x -@ 20) +} +``` + +## Configuring samtools (`ext.args3`) + +`ext.args3` is passed to `samtools sort` or `samtools view` depending on the `sort_bam` input. Defaults are injected if not supplied: + +- `-@ 3` (threads) — always injected if `-@` is absent +- `-m 1024M` (memory per thread) — injected if `-m` is absent and `sort_bam` is true + +``` +withName: 'BWAFASTALIGN_MEM' { + ext.args3 = '-@ 20 -m 1024M' // 20 threads, 1GB per thread = 20GB total +} +``` + +## Example: tuning for human genome alignment + +For a large reference (e.g. human genome) on a well-resourced machine, you may want to increase both values. The mbuffer size should equal the total samtools sort memory: + +``` +withName: 'BWAFASTALIGN_MEM' { + ext.args2 = '-m 20480M' // mbuffer = samtools total (20 threads x 1GB) + ext.args3 = '-@ 20 -m 1024M' +} +``` + +## CRAM output + +To produce CRAM output, pass `--output-fmt` via `ext.args3`: + +``` +withName: 'BWAFASTALIGN_MEM' { + ext.args3 = '--output-fmt cram' +} +``` + +A FASTA reference must be provided as `input[2]` when using CRAM output. diff --git a/modules/nf-core/bwafastalign/mem/environment.yml b/modules/nf-core/bwafastalign/mem/environment.yml new file mode 100644 index 000000000..a89afa0a9 --- /dev/null +++ b/modules/nf-core/bwafastalign/mem/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bwa-fastalign=1.0.0 + - bioconda::mbuffer=20160228 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/bwafastalign/mem/main.nf b/modules/nf-core/bwafastalign/mem/main.nf new file mode 100644 index 000000000..36bca6fed --- /dev/null +++ b/modules/nf-core/bwafastalign/mem/main.nf @@ -0,0 +1,73 @@ +process BWAFASTALIGN_MEM { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f8/f8c975324a12014c8a817c2c1ad0cd68b077cf09c4370717589177b262dcd1dc/data': + 'community.wave.seqera.io/library/bwa-fastalign_mbuffer_samtools:35f24ce8addcd26b'}" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) + val sort_bam + + output: + tuple val(meta), path("${prefix}.{sam,bam,cram}"), emit: output + tuple val(meta), path("${prefix}.{csi,crai}") , emit: index , optional: true + tuple val("${task.process}"), val('bwafastalign'), val('1.0.0'), topic: versions, emit: versions_bwafastalign + tuple val("${task.process}"), val('samtools'), eval("samtools --version 2>&1 | sed '1!d;s/.* //'") , topic: versions, emit: versions_samtools + tuple val("${task.process}"), val('mbuffer'), eval("mbuffer --version 2>&1 | sed -n 's/mbuffer version//p'") , topic: versions, emit: versions_mbuffer + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def samtools_command = sort_bam ? 'sort' : 'view' + // ext.args2 controls mbuffer options; inject default -m if not supplied + def mbuffer_args = args2.contains('-m') ? args2 : "-m 3072M ${args2}" + def mbuffer_command = sort_bam ? "| mbuffer ${mbuffer_args}" : "" + // ext.args3 controls samtools options; inject defaults for -@ and -m (sort only) if not supplied + def samtools_threads_arg = args3.contains('-@') ? '' : '-@ 3' + def samtools_mem_arg = (sort_bam && !args3.contains('-m')) ? '-m 1024M' : '' + def samtools_args = "${samtools_mem_arg} ${samtools_threads_arg} ${args3}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args3 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + bwa-fastalign \\ + mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + $mbuffer_command \\ + | samtools $samtools_command $samtools_args ${reference} -o ${prefix}.${extension} - + """ + + stub: + + def args3 = task.ext.args3 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args3 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = extension == "cram" ? "touch ${prefix}.crai" : + extension == "bam" ? "touch ${prefix}.csi" : "" + """ + touch ${prefix}.${extension} + ${create_index} + """ +} diff --git a/modules/nf-core/bwafastalign/mem/meta.yml b/modules/nf-core/bwafastalign/mem/meta.yml new file mode 100644 index 000000000..0a6dfe042 --- /dev/null +++ b/modules/nf-core/bwafastalign/mem/meta.yml @@ -0,0 +1,144 @@ +name: "bwafastalign_mem" +description: Performs fastq alignment to a fasta reference using BWA-FastAlign. +keywords: + - mem + - bwa + - bwafastalign + - alignment + - map + - fastq + - bam + - sam + - cram +tools: + - "bwafastalign": + description: "Faster and Cheaper Sequence Alignment on Commercial CPUs (A faster + version of bwa-mem with identical outputs)." + homepage: "https://github.com/zzhofict/BWA-FastAlign" + documentation: "https://github.com/zzhofict/BWA-FastAlign" + doi: "10.1145/3774934.3786421" + licence: + - "MIT" + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference/index information + e.g. [ id:'test' ] + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{0132,amb,ann,bwt.2bit.64,pac}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fa,fasta,fna}" + ontologies: [] + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + output: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.{sam,bam,cram}: + type: file + description: Output alignment file (SAM, BAM, or CRAM) + pattern: "*.{sam,bam,cram}" + ontologies: [] + index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.{csi,crai}: + type: file + description: Index file for BAM (.csi) or CRAM (.crai) output + pattern: "*.{csi,crai}" + ontologies: [] + versions_bwafastalign: + - - ${task.process}: + type: string + description: The name of the process + - bwafastalign: + type: string + description: The name of the tool + - 1.0.0: + type: string + description: The expression to obtain the version of the tool + versions_samtools: + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: Name of the tool + - samtools --version 2>&1 | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool + versions_mbuffer: + - - ${task.process}: + type: string + description: The name of the process + - mbuffer: + type: string + description: The name of the tool + - mbuffer --version 2>&1 | sed -n 's/mbuffer version//p': + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - bwafastalign: + type: string + description: The name of the tool + - 1.0.0: + type: string + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - samtools: + type: string + description: Name of the tool + - samtools --version 2>&1 | sed '1!d;s/.* //': + type: eval + description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - mbuffer: + type: string + description: The name of the tool + - mbuffer --version 2>&1 | sed -n 's/mbuffer version//p': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@peterpru" +maintainers: + - "@peterpru" diff --git a/modules/nf-core/bwafastalign/mem/tests/main.nf.test b/modules/nf-core/bwafastalign/mem/tests/main.nf.test new file mode 100644 index 000000000..83fef0386 --- /dev/null +++ b/modules/nf-core/bwafastalign/mem/tests/main.nf.test @@ -0,0 +1,204 @@ +nextflow_process { + + name "Test Process BWAFASTALIGN_MEM" + script "../main.nf" + process "BWAFASTALIGN_MEM" + + tag "modules" + tag "modules_nfcore" + tag "bwafastalign" + tag "bwafastalign/mem" + tag "bwafastalign/index" + config "./nextflow.config" + + setup { + run("BWAFASTALIGN_INDEX") { + script "../../index/main.nf" + config "./nextflow.config" + process { + """ + input[0] = Channel.of([ + [:], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + ]) + """ + } + } + } + + test("sarscov2 - fastq, index, fasta, false") { + + when { + params { + module_args2 = "" + module_args3 = "" + } + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = BWAFASTALIGN_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.output[0][1]).getHeaderMD5(), + bam(process.out.output[0][1]).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true") { + + when { + params { + module_args2 = "-m 2048M" + module_args3 = "-@ 4" + } + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = BWAFASTALIGN_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.output[0][1]).getHeaderMD5(), + bam(process.out.output[0][1]).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false") { + + when { + params { + module_args2 = "" + module_args3 = "" + } + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAFASTALIGN_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.output[0][1]).getHeaderMD5(), + bam(process.out.output[0][1]).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true") { + + when { + params { + module_args2 = "-m 2048M" + module_args3 = "-@ 4" + } + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAFASTALIGN_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.output[0][1]).getHeaderMD5(), + bam(process.out.output[0][1]).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith("versions")} + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true - stub") { + + options "-stub" + when { + params { + module_args2 = "-m 2048M" + module_args3 = "-@ 4" + } + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAFASTALIGN_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/bwafastalign/mem/tests/main.nf.test.snap b/modules/nf-core/bwafastalign/mem/tests/main.nf.test.snap new file mode 100644 index 000000000..c3064b6ff --- /dev/null +++ b/modules/nf-core/bwafastalign/mem/tests/main.nf.test.snap @@ -0,0 +1,227 @@ +{ + "sarscov2 - [fastq1, fastq2], index, fasta, false": { + "content": [ + "49bb35727c1eec2fc59473c0cf26c09c", + "57aeef88ed701a8ebc8e2f0a381b2a6", + { + "versions_bwafastalign": [ + [ + "BWAFASTALIGN_MEM", + "bwafastalign", + "1.0.0" + ] + ], + "versions_mbuffer": [ + [ + "BWAFASTALIGN_MEM", + "mbuffer", + " 20160228" + ] + ], + "versions_samtools": [ + [ + "BWAFASTALIGN_MEM", + "samtools", + "1.21" + ] + ] + } + ], + "timestamp": "2026-06-12T08:29:18.833924106", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "BWAFASTALIGN_MEM", + "bwafastalign", + "1.0.0" + ] + ], + "3": [ + [ + "BWAFASTALIGN_MEM", + "samtools", + "1.21" + ] + ], + "4": [ + [ + "BWAFASTALIGN_MEM", + "mbuffer", + " 20160228" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "output": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bwafastalign": [ + [ + "BWAFASTALIGN_MEM", + "bwafastalign", + "1.0.0" + ] + ], + "versions_mbuffer": [ + [ + "BWAFASTALIGN_MEM", + "mbuffer", + " 20160228" + ] + ], + "versions_samtools": [ + [ + "BWAFASTALIGN_MEM", + "samtools", + "1.21" + ] + ] + } + ], + "timestamp": "2026-06-12T08:32:15.456124051", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true": { + "content": [ + "ff9384834da1473da24b9e75718695f8", + "af8628d9df18b2d3d4f6fd47ef2bb872", + { + "versions_bwafastalign": [ + [ + "BWAFASTALIGN_MEM", + "bwafastalign", + "1.0.0" + ] + ], + "versions_mbuffer": [ + [ + "BWAFASTALIGN_MEM", + "mbuffer", + " 20160228" + ] + ], + "versions_samtools": [ + [ + "BWAFASTALIGN_MEM", + "samtools", + "1.21" + ] + ] + } + ], + "timestamp": "2026-06-12T08:32:05.037964846", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "sarscov2 - fastq, index, fasta, false": { + "content": [ + "8f2ce115e2c6aabbd963c5375dc4637a", + "798439cbd7fd81cbcc5078022dc5479d", + { + "versions_bwafastalign": [ + [ + "BWAFASTALIGN_MEM", + "bwafastalign", + "1.0.0" + ] + ], + "versions_mbuffer": [ + [ + "BWAFASTALIGN_MEM", + "mbuffer", + " 20160228" + ] + ], + "versions_samtools": [ + [ + "BWAFASTALIGN_MEM", + "samtools", + "1.21" + ] + ] + } + ], + "timestamp": "2026-06-12T08:23:56.737918563", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "sarscov2 - fastq, index, fasta, true": { + "content": [ + "bf990ba932129f47990a9a9f98c30038", + "94fcf617f5b994584c4e8d4044e16b4f", + { + "versions_bwafastalign": [ + [ + "BWAFASTALIGN_MEM", + "bwafastalign", + "1.0.0" + ] + ], + "versions_mbuffer": [ + [ + "BWAFASTALIGN_MEM", + "mbuffer", + " 20160228" + ] + ], + "versions_samtools": [ + [ + "BWAFASTALIGN_MEM", + "samtools", + "1.21" + ] + ] + } + ], + "timestamp": "2026-06-12T08:26:37.839861629", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/modules/nf-core/bwafastalign/mem/tests/nextflow.config b/modules/nf-core/bwafastalign/mem/tests/nextflow.config new file mode 100644 index 000000000..fc560b4f6 --- /dev/null +++ b/modules/nf-core/bwafastalign/mem/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + withName: 'BWAFASTALIGN.*' { + memory = 15.GB + } + withName: 'BWAFASTALIGN_MEM' { + ext.args2 = { params.module_args2 } + ext.args3 = { params.module_args3 } + } +} diff --git a/modules/nf-core/bwameme/mem/README.md b/modules/nf-core/bwameme/mem/README.md new file mode 100644 index 000000000..b681f9a02 --- /dev/null +++ b/modules/nf-core/bwameme/mem/README.md @@ -0,0 +1,51 @@ +# bwameme/mem options + +bwa-meme is a high-throughput aligner that produces alignments faster than samtools can consume them. To prevent bwa-meme from stalling while samtools processes data, mbuffer is used as an intermediary to absorb alignments until samtools is ready. The mbuffer and samtools options are configurable via `ext.args2` and `ext.args3` respectively. + +## Configuring mbuffer (`ext.args2`) + +`ext.args2` is passed to mbuffer. The default buffer size is 3GB (`-m 3072M`). If `-m` is not present in `ext.args2`, the default is injected automatically. + +The mbuffer size should match the total memory allocated to `samtools sort` (`-m` × `-@`) so it can absorb bwa-meme output while samtools is flushing its sort buffer to disk. + +``` +withName: 'BWAMEME_MEM' { + ext.args2 = '-m 20480M' // 20GB mbuffer to match samtools sort total (e.g. -m 1024M x -@ 20) +} +``` + +## Configuring samtools (`ext.args3`) + +`ext.args3` is passed to `samtools sort` or `samtools view` depending on the `sort_bam` input. Defaults are injected if not supplied: + +- `-@ 3` (threads) — always injected if `-@` is absent +- `-m 1024M` (memory per thread) — injected if `-m` is absent and `sort_bam` is true + +``` +withName: 'BWAMEME_MEM' { + ext.args3 = '-@ 20 -m 1024M' // 20 threads, 1GB per thread = 20GB total +} +``` + +## Example: tuning for human genome alignment + +For a large reference (e.g. human genome) on a well-resourced machine, you may want to increase both values. The mbuffer size should equal the total samtools sort memory: + +``` +withName: 'BWAMEME_MEM' { + ext.args2 = '-m 20480M' // mbuffer = samtools total (20 threads x 1GB) + ext.args3 = '-@ 20 -m 1024M' +} +``` + +## CRAM output + +To produce CRAM output, pass `--output-fmt` via `ext.args3`: + +``` +withName: 'BWAMEME_MEM' { + ext.args3 = '--output-fmt cram' +} +``` + +A FASTA reference must be provided as `input[2]` when using CRAM output. diff --git a/modules/nf-core/bwameme/mem/main.nf b/modules/nf-core/bwameme/mem/main.nf index 14421453b..6b356e302 100644 --- a/modules/nf-core/bwameme/mem/main.nf +++ b/modules/nf-core/bwameme/mem/main.nf @@ -3,7 +3,7 @@ process BWAMEME_MEM { label 'process_high' conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9d/9ddd41b93c5e182db9d643ca266dd1677e59593a9cb49904b982ff45ad5aa8c3/data': 'community.wave.seqera.io/library/bwa-meme_mbuffer_samtools:03f3f60b6c289776' }" @@ -12,42 +12,32 @@ process BWAMEME_MEM { tuple val(meta2), path(index) tuple val(meta3), path(fasta) val sort_bam - val mbuffer - val samtools_threads output: - tuple val(meta), path("*.sam") , emit: sam , optional:true - tuple val(meta), path("*.bam") , emit: bam , optional:true - tuple val(meta), path("*.cram") , emit: cram, optional:true - tuple val(meta), path("*.crai") , emit: crai, optional:true - tuple val(meta), path("*.csi") , emit: csi , optional:true + tuple val(meta), path("${prefix}.{sam,bam,cram}"), emit: output + tuple val(meta), path("${prefix}.{csi,crai}") , emit: index , optional: true tuple val("${task.process}"), val('bwameme'), val('1.0.6'), topic: versions, emit: versions_bwameme tuple val("${task.process}"), val('samtools'), eval('samtools version | sed "1!d;s/.* //"'), topic: versions, emit: versions_samtools // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + tuple val("${task.process}"), val('mbuffer'), eval("mbuffer --version 2>&1 | sed -n 's/mbuffer version //p'") , topic: versions, emit: versions_mbuffer when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args3 = task.ext.args3 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" def samtools_command = sort_bam ? 'sort' : 'view' - if (!mbuffer) { - log.info '[bwameme-mbuffer] Memory for mbuffer is not set - defaulting to 3GB for mbuffer.' - mbuffer_mem = 3072 - } else { - mbuffer_mem = mbuffer - } - if (!samtools_threads) { - log.info 'Number of threads for samtools is not set - defaulting to 2 threads.' - threads = 2 - } else { - threads = samtools_threads - } - mbuffer_command = sort_bam ? "| mbuffer -m ${mbuffer_mem}M" : "" - mem_per_thread = sort_bam ? "-m "+ (mbuffer_mem/threads).intValue()+"M" : "" + // ext.args2 controls mbuffer options; inject default -m if not supplied + def mbuffer_args = args2.contains('-m') ? args2 : "-m 3072M ${args2}".trim() + def mbuffer_command = sort_bam ? "| mbuffer ${mbuffer_args}" : "" + // ext.args3 controls samtools options; inject defaults for -@ and -m (sort only) if not supplied + def samtools_threads_arg = args3.contains('-@') ? '' : '-@ 3' + def samtools_mem_arg = (sort_bam && !args3.contains('-m')) ? '-m 1024M' : '' + def samtools_args = "${samtools_mem_arg} ${samtools_threads_arg} ${args3}".trim() def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ - def extension_matcher = (args2 =~ extension_pattern) + def extension_matcher = (args3 =~ extension_pattern) def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" @@ -61,24 +51,20 @@ process BWAMEME_MEM { \$INDEX \\ $reads \\ $mbuffer_command \\ - | samtools $samtools_command $args2 $mem_per_thread -@ $threads ${reference} -o ${prefix}.${extension} - + | samtools $samtools_command $samtools_args ${reference} -o ${prefix}.${extension} - """ stub: - def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def args3 = task.ext.args3 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ - def extension_matcher = (args2 =~ extension_pattern) + def extension_matcher = (args3 =~ extension_pattern) def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" - def create_index = "" - if (extension == "cram") { - create_index = "touch ${prefix}.crai" - } else if (extension == "bam") { - create_index = "touch ${prefix}.csi" - } + def create_index = extension == "cram" ? "touch ${prefix}.crai" : + extension == "bam" ? "touch ${prefix}.csi" : "" """ touch ${prefix}.${extension} ${create_index} diff --git a/modules/nf-core/bwameme/mem/meta.yml b/modules/nf-core/bwameme/mem/meta.yml index 324d96245..32620223e 100644 --- a/modules/nf-core/bwameme/mem/meta.yml +++ b/modules/nf-core/bwameme/mem/meta.yml @@ -17,9 +17,9 @@ tools: homepage: https://github.com/kaist-ina/BWA-MEME documentation: https://github.com/kaist-ina/BWA-MEME#getting-started doi: "10.1093/bioinformatics/btac137" - licence: ["MIT"] + licence: + - "MIT" identifier: "" - input: - - meta: type: map @@ -56,67 +56,28 @@ input: type: boolean description: use samtools sort (true) or samtools view (false) pattern: "true or false" - - mbuffer: - type: integer - description: memory for mbuffer in megabytes (default 3072) - - samtools_threads: - type: integer - description: number of threads for samtools (default 2) output: - sam: + output: - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.sam": + - ${prefix}.{sam,bam,cram}: type: file - description: Output SAM file containing read alignments - pattern: "*.{sam}" + description: Output alignment file (SAM, BAM, or CRAM) + pattern: "*.{sam,bam,cram}" ontologies: [] - bam: + index: - - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.bam": + - ${prefix}.{csi,crai}: type: file - description: Output BAM file containing read alignments - pattern: "*.{bam}" - ontologies: [] - cram: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.cram": - type: file - description: Output CRAM file containing read alignments - pattern: "*.{cram}" - ontologies: [] - crai: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.crai": - type: file - description: Index file for CRAM file - pattern: "*.{crai}" - ontologies: [] - csi: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - "*.csi": - type: file - description: Index file for BAM file - pattern: "*.{csi}" + description: Index file for BAM (.csi) or CRAM (.crai) output + pattern: "*.{csi,crai}" ontologies: [] versions_bwameme: - - ${task.process}: @@ -131,13 +92,23 @@ output: versions_samtools: - - ${task.process}: type: string - description: Name of the process + description: The name of the process - samtools: type: string description: Name of the tool - samtools version | sed "1!d;s/.* //": type: eval description: The expression to obtain the version of the tool + versions_mbuffer: + - - ${task.process}: + type: string + description: The name of the process + - mbuffer: + type: string + description: The name of the tool + - mbuffer --version 2>&1 | sed -n 's/mbuffer version //p': + type: eval + description: The expression to obtain the version of the tool topics: versions: - - ${task.process}: @@ -151,13 +122,22 @@ topics: description: The expression to obtain the version of the tool - - ${task.process}: type: string - description: Name of the process + description: The name of the process - samtools: type: string description: Name of the tool - samtools version | sed "1!d;s/.* //": type: eval description: The expression to obtain the version of the tool + - - ${task.process}: + type: string + description: The name of the process + - mbuffer: + type: string + description: The name of the tool + - mbuffer --version 2>&1 | sed -n 's/mbuffer version //p': + type: eval + description: The expression to obtain the version of the tool authors: - "@ramprasadn" maintainers: diff --git a/modules/nf-core/bwameme/mem/tests/main.nf.test b/modules/nf-core/bwameme/mem/tests/main.nf.test index fd378c0b3..31911da25 100644 --- a/modules/nf-core/bwameme/mem/tests/main.nf.test +++ b/modules/nf-core/bwameme/mem/tests/main.nf.test @@ -26,9 +26,14 @@ nextflow_process { } } - test("sarscov2 - fastq, index, fasta, false, 0, 4") { + test("sarscov2 - fastq, index, fasta, false") { when { + params { + module_args2 = "" + module_args3 = "" + } + process { """ input[0] = Channel.of([ @@ -38,8 +43,6 @@ nextflow_process { input[1] = BWAMEME_INDEX.out.index input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = false - input[4] = 0 - input[5] = 4 """ } } @@ -48,18 +51,23 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - bam(process.out.bam[0][1]).getHeaderMD5(), - bam(process.out.bam[0][1]).getReadsMD5(), - process.out.versions + bam(process.out.output[0][1]).getHeaderMD5(), + bam(process.out.output[0][1]).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith("versions")} ).match() } ) } } - test("sarscov2 - fastq, index, fasta, true, 2048, 4") { + test("sarscov2 - fastq, index, fasta, true") { when { + params { + module_args2 = "-m 2048M" + module_args3 = "-@ 4" + } + process { """ input[0] = Channel.of([ @@ -69,8 +77,6 @@ nextflow_process { input[1] = BWAMEME_INDEX.out.index input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = true - input[4] = 2048 - input[5] = 4 """ } } @@ -79,18 +85,23 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - bam(process.out.bam[0][1]).getHeaderMD5(), - bam(process.out.bam[0][1]).getReadsMD5(), - process.out.versions + bam(process.out.output[0][1]).getHeaderMD5(), + bam(process.out.output[0][1]).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith("versions")} ).match() } ) } } - test("sarscov2 - [fastq1, fastq2], index, fasta, false, 0, 4") { + test("sarscov2 - [fastq1, fastq2], index, fasta, false") { when { + params { + module_args2 = "" + module_args3 = "" + } + process { """ input[0] = Channel.of([ @@ -103,8 +114,6 @@ nextflow_process { input[1] = BWAMEME_INDEX.out.index input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = false - input[4] = 0 - input[5] = 4 """ } } @@ -113,18 +122,23 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - bam(process.out.bam[0][1]).getHeaderMD5(), - bam(process.out.bam[0][1]).getReadsMD5(), - process.out.versions + bam(process.out.output[0][1]).getHeaderMD5(), + bam(process.out.output[0][1]).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith("versions")} ).match() } ) } } - test("sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, ''") { + test("sarscov2 - [fastq1, fastq2], index, fasta, true") { when { + params { + module_args2 = "-m 2048M" + module_args3 = "-@ 4" + } + process { """ input[0] = Channel.of([ @@ -137,8 +151,6 @@ nextflow_process { input[1] = BWAMEME_INDEX.out.index input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = true - input[4] = 2048 - input[5] = "" """ } } @@ -147,20 +159,25 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - bam(process.out.bam[0][1]).getHeaderMD5(), - bam(process.out.bam[0][1]).getReadsMD5(), - process.out.versions + bam(process.out.output[0][1]).getHeaderMD5(), + bam(process.out.output[0][1]).getReadsMD5(), + process.out.findAll { key, val -> key.startsWith("versions")} ).match() } ) } } - test("sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, 4 - stub") { + test("sarscov2 - [fastq1, fastq2], index, fasta, true - stub") { options "-stub" when { + params { + module_args2 = "-m 2048M" + module_args3 = "-@ 4" + } + process { """ input[0] = Channel.of([ @@ -173,8 +190,6 @@ nextflow_process { input[1] = BWAMEME_INDEX.out.index input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) input[3] = true - input[4] = 2048 - input[5] = 4 """ } } diff --git a/modules/nf-core/bwameme/mem/tests/main.nf.test.snap b/modules/nf-core/bwameme/mem/tests/main.nf.test.snap index 5c6487e75..5d3180a46 100644 --- a/modules/nf-core/bwameme/mem/tests/main.nf.test.snap +++ b/modules/nf-core/bwameme/mem/tests/main.nf.test.snap @@ -1,59 +1,42 @@ { - "sarscov2 - [fastq1, fastq2], index, fasta, false, 0, 4": { + "sarscov2 - [fastq1, fastq2], index, fasta, false": { "content": [ - "b537f2936bcb8c120127251ab44d917b", + "2dd7c6542f553040eebaa572c10fe035", "57aeef88ed701a8ebc8e2f0a381b2a6", - null - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.3" - }, - "timestamp": "2026-02-13T19:51:39.510097" - }, - "sarscov2 - fastq, index, fasta, true, 2048, 4": { - "content": [ - "91c0b36a7aa238cca2acbce1997d94e8", - "94fcf617f5b994584c4e8d4044e16b4f", - null - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.3" - }, - "timestamp": "2026-02-13T19:51:19.196468" - }, - "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, ''": { - "content": [ - "ffc18373a0992612ccb5df41a245947b", - "af8628d9df18b2d3d4f6fd47ef2bb872", - null - ], - "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.3" - }, - "timestamp": "2026-02-13T19:52:00.628046" - }, - "sarscov2 - fastq, index, fasta, false, 0, 4": { - "content": [ - "585cc3d6cdff0dce2e30b6e8c1542669", - "798439cbd7fd81cbcc5078022dc5479d", - null + { + "versions_bwameme": [ + [ + "BWAMEME_MEM", + "bwameme", + "1.0.6" + ] + ], + "versions_mbuffer": [ + [ + "BWAMEME_MEM", + "mbuffer", + "20160228" + ] + ], + "versions_samtools": [ + [ + "BWAMEME_MEM", + "samtools", + "1.21" + ] + ] + } ], + "timestamp": "2026-06-12T08:19:39.496429529", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.3" - }, - "timestamp": "2026-02-13T19:50:58.198221" + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } }, - "sarscov2 - [fastq1, fastq2], index, fasta, true, 2048, 4 - stub": { + "sarscov2 - [fastq1, fastq2], index, fasta, true - stub": { "content": [ { "0": [ - - ], - "1": [ [ { "id": "test", @@ -62,13 +45,7 @@ "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "2": [ - - ], - "3": [ - - ], - "4": [ + "1": [ [ { "id": "test", @@ -77,47 +54,113 @@ "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "5": [ + "2": [ [ "BWAMEME_MEM", "bwameme", "1.0.6" ] ], - "6": [ + "3": [ [ "BWAMEME_MEM", "samtools", "1.21" ] ], - "bam": [ + "4": [ + [ + "BWAMEME_MEM", + "mbuffer", + "20160228" + ] + ], + "index": [ [ { "id": "test", "single_end": false }, - "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "crai": [ - - ], - "cram": [ - - ], - "csi": [ + "output": [ [ { "id": "test", "single_end": false }, - "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bwameme": [ + [ + "BWAMEME_MEM", + "bwameme", + "1.0.6" + ] + ], + "versions_mbuffer": [ + [ + "BWAMEME_MEM", + "mbuffer", + "20160228" ] ], - "sam": [ - + "versions_samtools": [ + [ + "BWAMEME_MEM", + "samtools", + "1.21" + ] + ] + } + ], + "timestamp": "2026-06-12T08:20:13.023631084", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true": { + "content": [ + "8c45fe3cdc7021ffab9c5eea83fc5850", + "af8628d9df18b2d3d4f6fd47ef2bb872", + { + "versions_bwameme": [ + [ + "BWAMEME_MEM", + "bwameme", + "1.0.6" + ] ], + "versions_mbuffer": [ + [ + "BWAMEME_MEM", + "mbuffer", + "20160228" + ] + ], + "versions_samtools": [ + [ + "BWAMEME_MEM", + "samtools", + "1.21" + ] + ] + } + ], + "timestamp": "2026-06-12T08:20:02.737739229", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "sarscov2 - fastq, index, fasta, false": { + "content": [ + "9661f4c03c6b2abdc93204890aa61673", + "798439cbd7fd81cbcc5078022dc5479d", + { "versions_bwameme": [ [ "BWAMEME_MEM", @@ -125,6 +168,47 @@ "1.0.6" ] ], + "versions_mbuffer": [ + [ + "BWAMEME_MEM", + "mbuffer", + "20160228" + ] + ], + "versions_samtools": [ + [ + "BWAMEME_MEM", + "samtools", + "1.21" + ] + ] + } + ], + "timestamp": "2026-06-12T08:18:55.571422607", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, + "sarscov2 - fastq, index, fasta, true": { + "content": [ + "30aae1770b553c055553c48dcad73384", + "94fcf617f5b994584c4e8d4044e16b4f", + { + "versions_bwameme": [ + [ + "BWAMEME_MEM", + "bwameme", + "1.0.6" + ] + ], + "versions_mbuffer": [ + [ + "BWAMEME_MEM", + "mbuffer", + "20160228" + ] + ], "versions_samtools": [ [ "BWAMEME_MEM", @@ -134,10 +218,10 @@ ] } ], + "timestamp": "2026-06-12T08:19:17.606378245", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.3" - }, - "timestamp": "2026-02-13T19:52:05.913267" + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } } } \ No newline at end of file diff --git a/modules/nf-core/bwameme/mem/tests/nextflow.config b/modules/nf-core/bwameme/mem/tests/nextflow.config index e7dd707ea..835b858dc 100644 --- a/modules/nf-core/bwameme/mem/tests/nextflow.config +++ b/modules/nf-core/bwameme/mem/tests/nextflow.config @@ -1,9 +1,10 @@ process { - withName: BWAMEME_INDEX { ext.args = '-a meme' } withName: BWAMEME_MEM { - ext.args = '-7' + ext.args = '-7' + ext.args2 = { params.module_args2 } + ext.args3 = { params.module_args3 } } } diff --git a/modules/nf-core/ensemblvep/filtervep/main.nf b/modules/nf-core/ensemblvep/filtervep/main.nf deleted file mode 100644 index d9deff488..000000000 --- a/modules/nf-core/ensemblvep/filtervep/main.nf +++ /dev/null @@ -1,40 +0,0 @@ -process ENSEMBLVEP_FILTERVEP { - tag "${meta.id}" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/3d/3da6e21cbf9803529421d7e136d1ebec5ff71ec50e0d996eda2ce11ec2c19bf9/data' - : 'community.wave.seqera.io/library/ensembl-vep_perl-math-cdf:1e13f65f931a6954'}" - - input: - tuple val(meta), path(input) - path feature_file - - output: - tuple val(meta), path("*.${extension}"), emit: output - tuple val("${task.process}"), val('ensemblvep'), eval("vep --help | sed -n '/ensembl-vep/s/.*: //p'"), topic: versions, emit: versions_ensemblvep - tuple val("${task.process}"), val('perl-math-cdf'), eval("perl -MMath::CDF -e 'print \\\$Math::CDF::VERSION'"), topic: versions, emit: versions_perlmathcdf - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - extension = task.ext.suffix ?: "vcf" - """ - filter_vep \\ - ${args} \\ - --input_file ${input} \\ - --output_file ${prefix}.${extension} \\ - --only_matched - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - extension = task.ext.suffix ?: "vcf" - """ - touch ${prefix}.${extension} - """ -} diff --git a/modules/nf-core/ensemblvep/filtervep/meta.yml b/modules/nf-core/ensemblvep/filtervep/meta.yml deleted file mode 100644 index 20239ba91..000000000 --- a/modules/nf-core/ensemblvep/filtervep/meta.yml +++ /dev/null @@ -1,92 +0,0 @@ -name: ensemblvep_filtervep -description: Filter variants based on Ensembl Variant Effect Predictor (VEP) - annotations. -keywords: - - annotation - - vcf - - tab - - filter -tools: - - ensemblvep: - description: | - VEP determines the effect of your variants (SNPs, insertions, deletions, CNVs - or structural variants) on genes, transcripts, and protein sequence, as well as regulatory regions. - homepage: https://www.ensembl.org/info/docs/tools/vep/index.html - documentation: https://www.ensembl.org/info/docs/tools/vep/script/index.html - licence: - - "Apache-2.0" - identifier: "" -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test', single_end:false ]` - - input: - type: file - description: VCF/TAB file annotated with vep - pattern: "*.{vcf,tab,tsv,txt}" - ontologies: - - edam: http://edamontology.org/format_3475 - - feature_file: - type: file - description: File containing features on separate lines. To be used with - --filter option. - ontologies: [] -output: - output: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'test', single_end:false ]` - - "*.${extension}": - type: file - description: VCF/TAB file - pattern: "*.{vcf,tab,txt,tsv}" - ontologies: - - edam: http://edamontology.org/format_3475 - versions_ensemblvep: - - - ${task.process}: - type: string - description: The process the versions were collected from - - ensemblvep: - type: string - description: The tool name - - "vep --help | sed -n '/ensembl-vep/s/.*: //p'": - type: eval - description: The command used to generate the version of the tool - versions_perlmathcdf: - - - ${task.process}: - type: string - description: The process the versions were collected from - - perl-math-cdf: - type: string - description: The tool name - - perl -MMath::CDF -e 'print \\$Math::CDF::VERSION': - type: eval - description: The expression to obtain the version of the tool -topics: - versions: - - - ${task.process}: - type: string - description: The process the versions were collected from - - ensemblvep: - type: string - description: The tool name - - "vep --help | sed -n '/ensembl-vep/s/.*: //p'": - type: eval - description: The command used to generate the version of the tool - - - ${task.process}: - type: string - description: The process the versions were collected from - - perl-math-cdf: - type: string - description: The tool name - - perl -MMath::CDF -e 'print \\$Math::CDF::VERSION': - type: eval - description: The expression to obtain the version of the tool -authors: - - "@ramprasadn" -maintainers: - - "@ramprasadn" diff --git a/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test b/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test deleted file mode 100644 index 044934579..000000000 --- a/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test +++ /dev/null @@ -1,109 +0,0 @@ -nextflow_process { - - name "Test Process ENSEMBLVEP_FILTERVEP" - script "../main.nf" - process "ENSEMBLVEP_FILTERVEP" - config "./nextflow.config" - - tag "modules" - tag "modules_nfcore" - tag "ensemblvep" - tag "ensemblvep/vep" - tag "ensemblvep/filtervep" - - test("test_ensemblvep_filtervep_vcf") { - config "./vcf.config" - - setup { - run("ENSEMBLVEP_VEP") { - script "../../vep/main.nf" - - process { - """ - vep_cache = Channel.of(file('s3://annotation-cache/vep_cache/115_WBcel235/')).collect() - - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), - [] - ]) - input[1] = params.vep_genome - input[2] = params.vep_species - input[3] = params.vep_cache_version - input[4] = vep_cache - input[5] = Channel.value([ - [id:"fasta"], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ]) - input[6] = [] - """ - } - } - } - - when { - process { - """ - input[0] = ENSEMBLVEP_VEP.out.vcf - input[1] = [] - """ - } - } - - then { - assert process.success - assertAll( - { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() }, - { assert path(process.out.output.get(0).get(1)).readLines().first().contains("##fileformat=VCFv4.2") } - ) - } - } - - test("test_ensemblvep_filtervep_tab_gz") { - config "./tab.gz.config" - - setup { - run("ENSEMBLVEP_VEP") { - script "../../vep/main.nf" - - process { - """ - vep_cache = Channel.of(file('s3://annotation-cache/vep_cache/115_WBcel235/')).collect() - - input[0] = Channel.of([ - [ id:'test' ], // meta map - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), - [] - ]) - input[1] = params.vep_genome - input[2] = params.vep_species - input[3] = params.vep_cache_version - input[4] = vep_cache - input[5] = Channel.value([ - [id:"fasta"], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ]) - input[6] = [] - """ - } - } - } - - when { - process { - """ - input[0] = ENSEMBLVEP_VEP.out.tab - input[1] = [] - """ - } - } - - then { - assert process.success - assertAll( - { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() }, - { assert path(process.out.output.get(0).get(1)).readLines().first().contains("## ENSEMBL VARIANT EFFECT PREDICTOR v115.2") } - ) - } - } -} diff --git a/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test.snap b/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test.snap deleted file mode 100644 index 6ed6da27b..000000000 --- a/modules/nf-core/ensemblvep/filtervep/tests/main.nf.test.snap +++ /dev/null @@ -1,52 +0,0 @@ -{ - "test_ensemblvep_filtervep_vcf": { - "content": [ - { - "versions_ensemblvep": [ - [ - "ENSEMBLVEP_FILTERVEP", - "ensemblvep", - "115.2" - ] - ], - "versions_perlmathcdf": [ - [ - "ENSEMBLVEP_FILTERVEP", - "perl-math-cdf", - "0.1" - ] - ] - } - ], - "timestamp": "2026-02-25T10:48:08.943902", - "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.4" - } - }, - "test_ensemblvep_filtervep_tab_gz": { - "content": [ - { - "versions_ensemblvep": [ - [ - "ENSEMBLVEP_FILTERVEP", - "ensemblvep", - "115.2" - ] - ], - "versions_perlmathcdf": [ - [ - "ENSEMBLVEP_FILTERVEP", - "perl-math-cdf", - "0.1" - ] - ] - } - ], - "timestamp": "2026-02-25T10:48:20.24913", - "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.4" - } - } -} \ No newline at end of file diff --git a/modules/nf-core/ensemblvep/filtervep/tests/nextflow.config b/modules/nf-core/ensemblvep/filtervep/tests/nextflow.config deleted file mode 100644 index 32ff2cb6f..000000000 --- a/modules/nf-core/ensemblvep/filtervep/tests/nextflow.config +++ /dev/null @@ -1,10 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -params { - vep_cache_version = "115" - vep_genome = "WBcel235" - vep_species = "caenorhabditis_elegans" -} diff --git a/modules/nf-core/ensemblvep/filtervep/tests/tab.gz.config b/modules/nf-core/ensemblvep/filtervep/tests/tab.gz.config deleted file mode 100644 index ace089baa..000000000 --- a/modules/nf-core/ensemblvep/filtervep/tests/tab.gz.config +++ /dev/null @@ -1,18 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -process { - - withName: ENSEMBLVEP_VEP { - ext.args = '--tab --offline' - ext.prefix = { "${meta.id}_vep" } - } - - withName: ENSEMBLVEP_FILTERVEP { - ext.args = '--filter "Feature_type is Transcript"' - ext.suffix = "tab" - } -} diff --git a/modules/nf-core/ensemblvep/filtervep/tests/vcf.config b/modules/nf-core/ensemblvep/filtervep/tests/vcf.config deleted file mode 100644 index 9f34e2a90..000000000 --- a/modules/nf-core/ensemblvep/filtervep/tests/vcf.config +++ /dev/null @@ -1,17 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for running tests -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -process { - - withName: ENSEMBLVEP_VEP { - ext.args = '--vcf --offline' - ext.prefix = { "${meta.id}_vep" } - } - - withName: ENSEMBLVEP_FILTERVEP { - ext.args = '--filter "Feature_type is Transcript"' - } -} diff --git a/modules/nf-core/saltshaker/call/environment.yml b/modules/nf-core/saltshaker/call/environment.yml index f76c4bf5a..ba1172042 100644 --- a/modules/nf-core/saltshaker/call/environment.yml +++ b/modules/nf-core/saltshaker/call/environment.yml @@ -4,4 +4,4 @@ channels: dependencies: - pip==26.0.1 - pip: - - saltshaker==1.0.0 + - saltshaker==1.1.1 diff --git a/modules/nf-core/saltshaker/call/main.nf b/modules/nf-core/saltshaker/call/main.nf index 7565ce603..4f16b1f4e 100644 --- a/modules/nf-core/saltshaker/call/main.nf +++ b/modules/nf-core/saltshaker/call/main.nf @@ -2,10 +2,11 @@ process SALTSHAKER_CALL { tag "$meta.id" label 'process_single' + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/e9/e93d703b195dd27cd920cee46669d3f51043216c12fd05168c937e93adf170e8/data': - 'community.wave.seqera.io/library/pip_saltshaker:e08e38a6d45f8f32' }" + container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/5a/5a902cc9f161d602fde9c268a509be2f593cfac7ed4cdc2219f630e02e43b2ec/data': + 'community.wave.seqera.io/library/pip_saltshaker:be40ca61bbf77cf2' }" input: tuple val(meta), path(breakpoint), path(cluster) @@ -20,7 +21,7 @@ process SALTSHAKER_CALL { output: tuple val(meta), path("*_call_metadata.tsv"), emit: call - tuple val("${task.process}"), val('saltshaker'), val("1.0.0"), topic: versions, emit: versions_saltshaker + tuple val("${task.process}"), val('saltshaker'), val("1.1.1"), topic: versions, emit: versions_saltshaker when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/saltshaker/call/meta.yml b/modules/nf-core/saltshaker/call/meta.yml index 51be6c452..773034b7b 100644 --- a/modules/nf-core/saltshaker/call/meta.yml +++ b/modules/nf-core/saltshaker/call/meta.yml @@ -7,10 +7,11 @@ keywords: - structural-variant calling tools: - "saltshaker": - description: "A Python package for classifying and visualizing mitochondrial structural variants from MitoSAlt pipeline output." - homepage: "https://github.com/aksenia/saltshaker" - documentation: "https://github.com/aksenia/saltshaker/tree/main/saltshaker/docs" - + description: "A Python package for classifying and visualizing mitochondrial structural + variants from MitoSAlt pipeline output." + homepage: "https://pypi.org/project/saltshaker" + documentation: "https://pypi.org/project/saltshaker/" + identifier: "" input: - - meta: type: map @@ -58,7 +59,6 @@ input: - light_strand_origin_end: type: integer description: End position of the light strand origin of replication - output: call: - - meta: @@ -68,20 +68,21 @@ output: e.g. `[ id:'sample1' ]` - "*_call_metadata.tsv": type: file - description: tsv with variant call metadata to be used in saltshaker_classify + description: tsv with variant call metadata to be used in + saltshaker_classify pattern: "*_call_metadata.tsv" - ontologies: [] + ontologies: + - edam: http://edamontology.org/format_3475 versions_saltshaker: - - - "${task.process}": + - - ${task.process}: type: string description: The name of the process - - "saltshaker": + - saltshaker: type: string description: The name of the tool - - "1.0.0": + - 1.1.1: type: string - description: Hardcoded version of saltshaker used in the module - + description: The expression to obtain the version of the tool topics: versions: - - ${task.process}: @@ -90,9 +91,9 @@ topics: - saltshaker: type: string description: The name of the tool - - 1.0.0: + - 1.1.1: type: string - description: Hardcoded version of saltshaker used in the module + description: The expression to obtain the version of the tool authors: - "@ieduba" maintainers: diff --git a/modules/nf-core/saltshaker/call/tests/main.nf.test.snap b/modules/nf-core/saltshaker/call/tests/main.nf.test.snap index d377c6626..fa9ff6b47 100644 --- a/modules/nf-core/saltshaker/call/tests/main.nf.test.snap +++ b/modules/nf-core/saltshaker/call/tests/main.nf.test.snap @@ -7,22 +7,22 @@ { "id": "test" }, - "test.saltshaker_call_metadata.tsv:md5,f0e21ea8c12afecbae37cd9b55f8f8c2" + "test.saltshaker_call_metadata.tsv:md5,5d0d87c5e2a29a07afa0b60cdb9c72d2" ] ], "versions_saltshaker": [ [ "SALTSHAKER_CALL", "saltshaker", - "1.0.0" + "1.1.1" ] ] } ], - "timestamp": "2026-03-06T15:18:19.555305", + "timestamp": "2026-06-01T13:42:19.667355", "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.0" + "nf-test": "0.9.5", + "nextflow": "26.04.3" } }, "call - tsv - stub": { @@ -40,15 +40,15 @@ [ "SALTSHAKER_CALL", "saltshaker", - "1.0.0" + "1.1.1" ] ] } ], - "timestamp": "2026-03-05T09:54:13.739643", + "timestamp": "2026-06-01T13:41:08.005824", "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.0" + "nf-test": "0.9.5", + "nextflow": "26.04.3" } } } \ No newline at end of file diff --git a/modules/nf-core/saltshaker/classify/environment.yml b/modules/nf-core/saltshaker/classify/environment.yml index 3672c7873..ba1172042 100644 --- a/modules/nf-core/saltshaker/classify/environment.yml +++ b/modules/nf-core/saltshaker/classify/environment.yml @@ -4,4 +4,4 @@ channels: dependencies: - pip==26.0.1 - pip: - - saltshaker==1.0.1 + - saltshaker==1.1.1 diff --git a/modules/nf-core/saltshaker/classify/main.nf b/modules/nf-core/saltshaker/classify/main.nf index 72fa27c16..4a685c5b6 100644 --- a/modules/nf-core/saltshaker/classify/main.nf +++ b/modules/nf-core/saltshaker/classify/main.nf @@ -2,10 +2,11 @@ process SALTSHAKER_CLASSIFY { tag "$meta.id" label 'process_single' + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/0c/0c955cc086622ef50876a10e58a1e6711e42b70a0e4cbbc377142b62b0ad4f47/data': - 'community.wave.seqera.io/library/pip_saltshaker:ef543ea5ca09afbe' }" + container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/5a/5a902cc9f161d602fde9c268a509be2f593cfac7ed4cdc2219f630e02e43b2ec/data': + 'community.wave.seqera.io/library/pip_saltshaker:be40ca61bbf77cf2' }" input: tuple val(meta), path(call) @@ -15,7 +16,7 @@ process SALTSHAKER_CLASSIFY { tuple val(meta), path("*_classify_metadata.tsv"), emit: classify tuple val(meta), path("*_classify.txt") , emit: txt tuple val(meta), path("*saltshaker.vcf") , emit: vcf, optional: true - tuple val("${task.process}"), val('saltshaker'), val("1.0.1"), topic: versions, emit: versions_saltshaker + tuple val("${task.process}"), val('saltshaker'), val("1.1.1"), topic: versions, emit: versions_saltshaker when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/saltshaker/classify/meta.yml b/modules/nf-core/saltshaker/classify/meta.yml index be07a8946..c02c10c59 100644 --- a/modules/nf-core/saltshaker/classify/meta.yml +++ b/modules/nf-core/saltshaker/classify/meta.yml @@ -8,8 +8,8 @@ keywords: tools: - "saltshaker": description: "A Python package for classifying and visualizing mitochondrial structural variants from MitoSAlt pipeline output." - homepage: "https://github.com/aksenia/saltshaker" - documentation: "https://github.com/aksenia/saltshaker/tree/main/saltshaker/docs" + homepage: "https://pypi.org/project/saltshaker" + documentation: "https://pypi.org/project/saltshaker" input: - - meta: @@ -74,7 +74,7 @@ output: - saltshaker: type: string description: The name of the tool - - 1.0.1: + - 1.1.1: type: string description: Hardcoded version of saltshaker used in the module topics: @@ -85,7 +85,7 @@ topics: - saltshaker: type: string description: The name of the tool - - 1.0.1: + - 1.1.1: type: string description: Hardcoded version of saltshaker used in the module authors: diff --git a/modules/nf-core/saltshaker/classify/tests/main.nf.test.snap b/modules/nf-core/saltshaker/classify/tests/main.nf.test.snap index 507105e8c..c7e9d7868 100644 --- a/modules/nf-core/saltshaker/classify/tests/main.nf.test.snap +++ b/modules/nf-core/saltshaker/classify/tests/main.nf.test.snap @@ -25,15 +25,15 @@ [ "SALTSHAKER_CLASSIFY", "saltshaker", - "1.0.1" + "1.1.1" ] ] } ], - "timestamp": "2026-03-20T12:29:31.591954", + "timestamp": "2026-06-01T13:52:59.878729", "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.0" + "nf-test": "0.9.5", + "nextflow": "26.04.3" } }, "classify - defaults": { @@ -44,7 +44,7 @@ { "id": "test" }, - "test.saltshaker_classify_metadata.tsv:md5,0c86c16ae38a5dc7070ad76e764aee6d" + "test.saltshaker_classify_metadata.tsv:md5,2ed89e393a16ab2b7602ed06e807e187" ] ], "txt": [ @@ -52,7 +52,7 @@ { "id": "test" }, - "test.saltshaker_classify.txt:md5,8e11ad30539d0b5e0ab9a42b4d68b208" + "test.saltshaker_classify.txt:md5,21b5114cd8e2d1cd720de1bfa18abd3e" ] ], "vcf": [ @@ -62,15 +62,15 @@ [ "SALTSHAKER_CLASSIFY", "saltshaker", - "1.0.1" + "1.1.1" ] ] } ], - "timestamp": "2026-03-20T12:29:26.57794", + "timestamp": "2026-06-01T15:59:15.738107", "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.0" + "nf-test": "0.9.5", + "nextflow": "26.04.3" } }, "classify - vcf - stub": { @@ -104,15 +104,15 @@ [ "SALTSHAKER_CLASSIFY", "saltshaker", - "1.0.1" + "1.1.1" ] ] } ], - "timestamp": "2026-03-20T11:05:52.842015", + "timestamp": "2026-06-01T13:52:46.708731", "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.0" + "nf-test": "0.9.5", + "nextflow": "26.04.3" } }, "classify - vcf": { @@ -123,7 +123,7 @@ { "id": "test" }, - "test.saltshaker_classify_metadata.tsv:md5,0c86c16ae38a5dc7070ad76e764aee6d" + "test.saltshaker_classify_metadata.tsv:md5,2ed89e393a16ab2b7602ed06e807e187" ] ], "txt": [ @@ -131,7 +131,7 @@ { "id": "test" }, - "test.saltshaker_classify.txt:md5,8e11ad30539d0b5e0ab9a42b4d68b208" + "test.saltshaker_classify.txt:md5,21b5114cd8e2d1cd720de1bfa18abd3e" ] ], "vcf": [ @@ -139,22 +139,24 @@ { "id": "test" }, - "test.saltshaker.vcf:md5,0c59728289ee8bcb19d44aea4d3155ab" + + "test.saltshaker.vcf:md5,e8e3c00ef62888bb217983a4bf5559a5" ] ], "versions_saltshaker": [ [ "SALTSHAKER_CLASSIFY", "saltshaker", - "1.0.1" + "1.1.1" ] ] } ], - "timestamp": "2026-03-20T11:05:45.058275", + + "timestamp": "2026-06-05T09:21:05.922028", "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.0" + "nf-test": "0.9.5", + "nextflow": "26.04.3" } } } \ No newline at end of file diff --git a/modules/nf-core/saltshaker/plot/environment.yml b/modules/nf-core/saltshaker/plot/environment.yml index f76c4bf5a..ba1172042 100644 --- a/modules/nf-core/saltshaker/plot/environment.yml +++ b/modules/nf-core/saltshaker/plot/environment.yml @@ -4,4 +4,4 @@ channels: dependencies: - pip==26.0.1 - pip: - - saltshaker==1.0.0 + - saltshaker==1.1.1 diff --git a/modules/nf-core/saltshaker/plot/main.nf b/modules/nf-core/saltshaker/plot/main.nf index 56e7b9a3e..75679dfde 100644 --- a/modules/nf-core/saltshaker/plot/main.nf +++ b/modules/nf-core/saltshaker/plot/main.nf @@ -2,17 +2,18 @@ process SALTSHAKER_PLOT { tag "$meta.id" label 'process_single' + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/e9/e93d703b195dd27cd920cee46669d3f51043216c12fd05168c937e93adf170e8/data': - 'community.wave.seqera.io/library/pip_saltshaker:e08e38a6d45f8f32' }" + container "${ workflow.containerEngine in ['singularity', 'apptainer'] && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/5a/5a902cc9f161d602fde9c268a509be2f593cfac7ed4cdc2219f630e02e43b2ec/data': + 'community.wave.seqera.io/library/pip_saltshaker:be40ca61bbf77cf2' }" input: tuple val(meta), path(classify) output: tuple val(meta), path("*saltshaker.png"), emit: plot - tuple val("${task.process}"), val('saltshaker'), val("1.0.0"), topic: versions, emit: versions_saltshaker + tuple val("${task.process}"), val('saltshaker'), val("1.1.1"), topic: versions, emit: versions_saltshaker when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/saltshaker/plot/meta.yml b/modules/nf-core/saltshaker/plot/meta.yml index b95deb3f5..200700538 100644 --- a/modules/nf-core/saltshaker/plot/meta.yml +++ b/modules/nf-core/saltshaker/plot/meta.yml @@ -7,10 +7,11 @@ keywords: - structural-variant calling tools: - "saltshaker": - description: "A Python package for classifying and visualizing mitochondrial structural variants from MitoSAlt pipeline output." - homepage: "https://github.com/aksenia/saltshaker" - documentation: "https://github.com/aksenia/saltshaker/tree/main/saltshaker/docs" - + description: "A Python package for classifying and visualizing mitochondrial structural + variants from MitoSAlt pipeline output." + homepage: "https://pypi.org/project/saltshaker" + documentation: "https://pypi.org/project/saltshaker" + identifier: "" input: - - meta: type: map @@ -24,7 +25,6 @@ input: ontologies: - edam: http://edamontology.org/operation_3225 #classification - edam: http://edamontology.org/format_3475 #tsv - output: plot: - - meta: @@ -46,19 +46,18 @@ output: - "saltshaker": type: string description: The name of the tool - - "1.0.0": + - "1.1.1": type: string description: Hardcoded version of saltshaker used in the module - topics: versions: - - - ${task.process}: + - - "${task.process}": type: string description: The name of the process - - saltshaker: + - "saltshaker": type: string description: The name of the tool - - 1.0.0: + - "1.1.1": type: string description: Hardcoded version of saltshaker used in the module authors: diff --git a/modules/nf-core/saltshaker/plot/tests/main.nf.test b/modules/nf-core/saltshaker/plot/tests/main.nf.test index ec18cc241..f40353dcb 100644 --- a/modules/nf-core/saltshaker/plot/tests/main.nf.test +++ b/modules/nf-core/saltshaker/plot/tests/main.nf.test @@ -68,8 +68,10 @@ nextflow_process { assert process.success assertAll( { assert snapshot( - sanitizeOutput(process.out), - ).match() } + process.out.findAll { key, val -> key.startsWith('versions') }, + process.out.plot.collect { meta, plot -> file(plot).name } + ).match() + } ) } diff --git a/modules/nf-core/saltshaker/plot/tests/main.nf.test.snap b/modules/nf-core/saltshaker/plot/tests/main.nf.test.snap index ea7f4d94c..ebde03458 100644 --- a/modules/nf-core/saltshaker/plot/tests/main.nf.test.snap +++ b/modules/nf-core/saltshaker/plot/tests/main.nf.test.snap @@ -14,41 +14,36 @@ [ "SALTSHAKER_PLOT", "saltshaker", - "1.0.0" + "1.1.1" ] ] } ], - "timestamp": "2026-03-17T14:57:52.937691", + "timestamp": "2026-06-03T10:34:26.252966", "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.0" + "nf-test": "0.9.5", + "nextflow": "26.04.3" } }, "plot - png": { "content": [ { - "plot": [ - [ - { - "id": "test" - }, - "test.saltshaker.png:md5,c5c8f7b96daab112b53dfe42194855af" - ] - ], "versions_saltshaker": [ [ "SALTSHAKER_PLOT", "saltshaker", - "1.0.0" + "1.1.1" ] ] - } + }, + [ + "test.saltshaker.png" + ] ], - "timestamp": "2026-03-17T14:57:46.77575", + "timestamp": "2026-06-05T09:43:08.53978", "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.0" + "nf-test": "0.9.5", + "nextflow": "26.04.3" } } } \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 4baa3cf23..e03f5347e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -50,10 +50,14 @@ params { fasta = null fai = null bwa = null + bwafastalign = null bwamem2 = null bwameme = null call_interval = null + cadd_prescored = null cadd_resources = null + manta_call_regions = null + manta_call_regions_tbi = null gcnvcaller_model = null gens_interval_list = null gens_pon_female = null @@ -110,7 +114,7 @@ params { // Alignment aligner = 'bwamem2' mt_aligner = 'bwamem2' - mbuffer_mem = 3072 + mbuffer_mem = 8192 samtools_sort_threads = 4 min_trimmed_length = 40 mt_subsample_rd = 150 @@ -518,7 +522,7 @@ includeConfig 'conf/modules/call_sv_tiddit.config' includeConfig 'conf/modules/postprocess_MT_calls.config' includeConfig 'conf/modules/call_mobile_elements.config' includeConfig 'conf/modules/annotate_mobile_elements.config' -includeConfig 'conf/modules/vcf_filter_bcftools_ensemblvep.config' +includeConfig 'conf/modules/vcf_filter_bcftools_filtervep.config' includeConfig 'conf/modules/variant_evaluation.config' includeConfig 'conf/modules/subsample_mt_frac.config' includeConfig 'conf/modules/subsample_mt_reads.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index a5b135431..e8d3c34c6 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -65,6 +65,14 @@ "help_text": "If none provided, will be generated automatically from the FASTA reference.", "fa_icon": "fas fa-folder-open" }, + "bwafastalign": { + "type": "string", + "exists": true, + "format": "directory-path", + "description": "Directory for pre-built bwa-fastalign index.", + "help_text": "If none provided, will be generated automatically from the FASTA reference.", + "fa_icon": "fas fa-folder-open" + }, "bwamem2": { "type": "string", "exists": true, @@ -81,6 +89,14 @@ "help_text": "If none provided, will be generated automatically from the FASTA reference.", "fa_icon": "fas fa-folder-open" }, + "cadd_prescored": { + "type": "string", + "exists": true, + "format": "directory-path", + "fa_icon": "fas fa-folder-open", + "description": "Path to the directory containing pre-scored CADD indel annotations.", + "help_text": "This folder contains the pre-scored indel files that would otherwise be in data/prescored folder as described in https://github.com/kircherlab/CADD-scripts/#manual-installation." + }, "cadd_resources": { "type": "string", "exists": true, @@ -235,6 +251,23 @@ "description": "Local directory base for genome references that map to the config.", "help_text": "This folder is a flat structure with file names that map to the config." }, + "manta_call_regions": { + "type": "string", + "exists": true, + "format": "file-path", + "fa_icon": "fas fa-file", + "pattern": "^\\S+\\.bed\\.gz$", + "description": "Path to a bgzipped BED file restricting Manta SV calling to specific regions (e.g. primary chromosomes). Only applied for WGS; for WES, Manta always uses target_bed instead.", + "help_text": "Must be supplied together with --manta_call_regions_tbi. Only used when analysis_type is wgs — for wes, Manta uses the target_bed supplied via --target_bed and this parameter has no effect. Useful for reducing runtime on references with many short contigs such as GRCh38." + }, + "manta_call_regions_tbi": { + "type": "string", + "exists": true, + "format": "file-path", + "fa_icon": "fas fa-file", + "pattern": "^\\S+\\.bed\\.gz\\.tbi$", + "description": "Tabix index for the file supplied via --manta_call_regions." + }, "mito_name": { "type": "string", "description": "Name of the mitochondrial contig in the reference fasta file", @@ -635,7 +668,7 @@ "default": "bwamem2", "description": "Specifies the alignment algorithm to use - available options are 'bwamem2', 'bwa', 'bwameme' and 'sentieon'.", "fa_icon": "fas fa-align-center", - "enum": ["bwa", "bwamem2", "bwameme", "sentieon"] + "enum": ["bwa", "bwafastalign", "bwamem2", "bwameme", "sentieon"] }, "mt_aligner": { "type": "string", @@ -647,14 +680,14 @@ "samtools_sort_threads": { "type": "integer", "default": 4, - "description": "Number of threads allocated for sorting alignment files (used only by bwameme)", + "description": "Number of threads allocated for sorting alignment files (used by bwameme and bwafastalign)", "help_text": "To know more about this parameter check [bwameme](https://github.com/kaist-ina/BWA-MEME?tab=readme-ov-file#building-pipeline-with-samtools) documentation.", "fa_icon": "fas fa-less-than" }, "mbuffer_mem": { "type": "integer", - "default": 3072, - "description": "Memory allocated for mbuffer in megabytes (used only by bwameme)", + "default": 8192, + "description": "Memory allocated for mbuffer in megabytes (MB) (used by bwameme and bwafastalign)", "help_text": "To know more about this parameter check [bwameme](https://github.com/kaist-ina/BWA-MEME?tab=readme-ov-file#building-pipeline-with-samtools) documentation.", "fa_icon": "fas fa-less-than" }, diff --git a/subworkflows/local/align/main.nf b/subworkflows/local/align/main.nf index 932ace4f8..aedfbf0e3 100644 --- a/subworkflows/local/align/main.nf +++ b/subworkflows/local/align/main.nf @@ -14,36 +14,35 @@ include { SAMTOOLS_VIEW as SAMTOOLS_VIEW_EXCLUDE_ALT } from '../../../modules/nf workflow ALIGN { take: - ch_alignments // channel: [optional] [ val(meta), [path(bam),path(bai)] ] - ch_genome_bwaindex // channel: [mandatory] [ val(meta), path(index) ] - ch_genome_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] - ch_genome_bwamemeindex // channel: [mandatory] [ val(meta), path(index) ] - ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_input_reads // channel: [optional] [ val(meta), [path(reads)] ] - ch_mt_bwaindex // channel: [mandatory] [ val(meta), path(index) ] - ch_mt_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] - ch_mt_dictionary // channel: [mandatory] [ val(meta), path(dict) ] - ch_mt_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_mt_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ] - ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] - ch_mtshift_dictionary // channel: [mandatory] [ val(meta), path(dict) ] - ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - skip_fastp // boolean - val_aligner // string: 'bwa', 'bwamem2', 'bwameme', or 'sentieon' - val_analysis_type // string: 'wgs', 'wes', or 'mito' - val_exclude_alt // boolean - val_extract_alignments // boolean - val_mbuffer_mem // integer: [mandatory] memory in megabytes - val_mt_aligner // string: 'bwa', 'bwamem2', or 'sentieon' - val_platform // string: [mandatory] illumina or a different technology - val_run_mt_for_wes // boolean - val_samtools_sort_threads // integer: [mandatory] number of sorting threads - val_save_all_mapped_as_cram // boolean - val_save_noalt_mapped_as_cram // boolean + ch_alignments // channel: [optional] [ val(meta), [path(bam),path(bai)] ] + ch_genome_bwafastalignindex // channel: [mandatory] [ val(meta), path(index) ] + ch_genome_bwaindex // channel: [mandatory] [ val(meta), path(index) ] + ch_genome_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] + ch_genome_bwamemeindex // channel: [mandatory] [ val(meta), path(index) ] + ch_genome_dictionary // channel: [mandatory] [ val(meta), path(dict) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_input_reads // channel: [optional] [ val(meta), [path(reads)] ] + ch_mt_bwaindex // channel: [mandatory] [ val(meta), path(index) ] + ch_mt_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] + ch_mt_dictionary // channel: [mandatory] [ val(meta), path(dict) ] + ch_mt_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_mt_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_mtshift_bwaindex // channel: [mandatory] [ val(meta), path(index) ] + ch_mtshift_bwamem2index // channel: [mandatory] [ val(meta), path(index) ] + ch_mtshift_dictionary // channel: [mandatory] [ val(meta), path(dict) ] + ch_mtshift_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_mtshift_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + skip_fastp // boolean + val_aligner // string: 'bwa', 'bwafastalign', 'bwamem2', 'bwameme', or 'sentieon' + val_analysis_type // string: 'wgs', 'wes', or 'mito' + val_exclude_alt // boolean + val_extract_alignments // boolean + val_mt_aligner // string: 'bwa', 'bwamem2', or 'sentieon' + val_platform // string: [mandatory] illumina or a different technology + val_run_mt_for_wes // boolean + val_save_all_mapped_as_cram // boolean + val_save_noalt_mapped_as_cram // boolean main: ch_bwamem2_bam = channel.empty() @@ -91,9 +90,10 @@ workflow ALIGN { ch_input_bam = ch_input_aligned.bam ch_input_bai = ch_input_aligned.bai - if (val_aligner.matches("bwamem2|bwa|bwameme")) { + if (val_aligner.matches("bwamem2|bwa|bwameme|bwafastalign")) { ALIGN_BWA_BWAMEM2_BWAMEME ( ch_genome_bwaindex, + ch_genome_bwafastalignindex, ch_genome_bwamem2index, ch_genome_bwamemeindex, ch_genome_fai, @@ -101,9 +101,7 @@ workflow ALIGN { ch_input_reads, val_aligner, val_extract_alignments, - val_mbuffer_mem, - val_platform, - val_samtools_sort_threads + val_platform ) ch_bwamem2_bam = ALIGN_BWA_BWAMEM2_BWAMEME.out.marked_bam ch_bwamem2_bai = ALIGN_BWA_BWAMEM2_BWAMEME.out.marked_bai @@ -200,7 +198,7 @@ workflow ALIGN { ch_bam_publish = channel.empty() if (!val_save_noalt_mapped_as_cram && !val_save_all_mapped_as_cram) { - if (val_aligner.matches("bwamem2|bwa|bwameme")) { + if (val_aligner.matches("bwamem2|bwa|bwameme|bwafastalign")) { ch_bam_publish = ALIGN_BWA_BWAMEM2_BWAMEME.out.publish } else if (val_aligner.equals("sentieon")) { ch_bam_publish = ALIGN_SENTIEON.out.publish diff --git a/subworkflows/local/align/tests/main.nf.test b/subworkflows/local/align/tests/main.nf.test index 7a8e427ac..c7f41cd5b 100644 --- a/subworkflows/local/align/tests/main.nf.test +++ b/subworkflows/local/align/tests/main.nf.test @@ -7,6 +7,7 @@ nextflow_workflow { tag "subworkflows" tag "align" tag "align_bwa_bwamem2_bwameme" + tag "bwafastalign/mem" tag "align_sentieon" tag "align_MT" tag "convert_mt_bam_to_fastq" @@ -72,12 +73,13 @@ nextflow_workflow { """ input[0] = channel.empty() input[1] = channel.empty() - input[2] = GENOME_MEM2.out.index.collect() - input[3] = channel.empty() - input[4] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.dict', checkIfExists: true)).collect() - input[5] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)).collect() - input[6] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)).collect() - input[7] = channel.of( + input[2] = channel.empty() + input[3] = GENOME_MEM2.out.index.collect() + input[4] = channel.empty() + input[5] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.dict', checkIfExists: true)).collect() + input[6] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)).collect() + input[7] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)).collect() + input[8] = channel.of( [ [id:'earlycasualcaiman', sample:'earlycasualcaiman', single_end:false, num_lanes:1, read_group: "'@RG\\\\tID:earlycasualcaiman\\\\tPL:illumina\\\\tSM:earlycasualcaiman'", lane:1, sex:1, phenotype:1, paternal:0, maternal:0, case_id:'justhusky'], [ @@ -100,28 +102,26 @@ nextflow_workflow { ] ] ) - input[8] = channel.empty() - input[9] = MT_MEM2.out.index.collect() - input[10] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.dict', checkIfExists: true)).collect() - input[11] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)).collect() - input[12] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)).collect() - input[13] = channel.empty() - input[14] = SHIFTMT_MEM2.out.index.collect() - input[15] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.dict', checkIfExists: true)).collect() - input[16] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)).collect() - input[17] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)).collect() - input[18] = false - input[19] = "bwamem2" - input[20] = "wgs" - input[21] = true - input[22] = false - input[23] = 3072 + input[9] = channel.empty() + input[10] = MT_MEM2.out.index.collect() + input[11] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.dict', checkIfExists: true)).collect() + input[12] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)).collect() + input[13] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)).collect() + input[14] = channel.empty() + input[15] = SHIFTMT_MEM2.out.index.collect() + input[16] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.dict', checkIfExists: true)).collect() + input[17] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)).collect() + input[18] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)).collect() + input[19] = false + input[20] = "bwamem2" + input[21] = "wgs" + input[22] = true + input[23] = false input[24] = "bwamem2" input[25] = "illumina" input[26] = false - input[27] = 4 + input[27] = true input[28] = true - input[29] = true """ } } @@ -179,12 +179,13 @@ nextflow_workflow { """ input[0] = channel.empty() input[1] = channel.empty() - input[2] = GENOME_MEM2.out.index.collect() - input[3] = channel.empty() - input[4] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.dict', checkIfExists: true)).collect() - input[5] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)).collect() - input[6] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)).collect() - input[7] = channel.of( + input[2] = channel.empty() + input[3] = GENOME_MEM2.out.index.collect() + input[4] = channel.empty() + input[5] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.dict', checkIfExists: true)).collect() + input[6] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)).collect() + input[7] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)).collect() + input[8] = channel.of( [ [id:'earlycasualcaiman', sample:'earlycasualcaiman', single_end:false, num_lanes:1, read_group: "'@RG\\\\tID:earlycasualcaiman\\\\tPL:illumina\\\\tSM:earlycasualcaiman'", lane:1, sex:1, phenotype:1, paternal:0, maternal:0, case_id:'justhusky'], [ @@ -207,28 +208,26 @@ nextflow_workflow { ] ] ) - input[8] = channel.empty() - input[9] = MT_MEM2.out.index.collect() - input[10] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.dict', checkIfExists: true)).collect() - input[11] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)).collect() - input[12] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)).collect() - input[13] = channel.empty() - input[14] = SHIFTMT_MEM2.out.index.collect() - input[15] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.dict', checkIfExists: true)).collect() - input[16] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)).collect() - input[17] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)).collect() - input[18] = false - input[19] = "bwamem2" - input[20] = "wes" - input[21] = false + input[9] = channel.empty() + input[10] = MT_MEM2.out.index.collect() + input[11] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.dict', checkIfExists: true)).collect() + input[12] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)).collect() + input[13] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)).collect() + input[14] = channel.empty() + input[15] = SHIFTMT_MEM2.out.index.collect() + input[16] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.dict', checkIfExists: true)).collect() + input[17] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)).collect() + input[18] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)).collect() + input[19] = false + input[20] = "bwamem2" + input[21] = "wes" input[22] = false - input[23] = 3072 + input[23] = false input[24] = "bwamem2" input[25] = "illumina" input[26] = false - input[27] = 4 - input[28] = true - input[29] = false + input[27] = true + input[28] = false """ } } @@ -284,11 +283,12 @@ nextflow_workflow { input[0] = channel.empty() input[1] = channel.empty() input[2] = channel.empty() - input[3] = GENOME_MEME.out.index.collect() - input[4] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.dict', checkIfExists: true)).collect() - input[5] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)).collect() - input[6] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)).collect() - input[7] = channel.fromList([ + input[3] = channel.empty() + input[4] = GENOME_MEME.out.index.collect() + input[5] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.dict', checkIfExists: true)).collect() + input[6] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)).collect() + input[7] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)).collect() + input[8] = channel.fromList([ [[ id:'earlycasualcaiman', sample:'earlycasualcaiman', single_end:false, num_lanes:1, read_group: "'@RG\\\\tID:earlycasualcaiman\\\\tPL:illumina\\\\tSM:earlycasualcaiman'", 'lane':1, 'sex':1, 'phenotype':1, 'paternal':0, 'maternal':0, 'case_id':'justhusky' ], // meta map [ file(params.pipelines_testdata_base_path + '/testdata/1_171015_HHT5NDSXX_earlycasualcaiman_XXXXXX_1.fastq.gz', checkIfExists: true), @@ -305,28 +305,26 @@ nextflow_workflow { file(params.pipelines_testdata_base_path + '/testdata/1_171015_HHT5NDSXX_slowlycivilbuck_XXXXXX_2.fastq.gz', checkIfExists: true) ]] ]) - input[8] = channel.empty() - input[9] = MT_MEM2.out.index.collect() - input[10] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.dict', checkIfExists: true)).collect() - input[11] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)).collect() - input[12] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)).collect() - input[13] = channel.empty() - input[14] = SHIFTMT_MEM2.out.index.collect() - input[15] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.dict', checkIfExists: true)).collect() - input[16] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)).collect() - input[17] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)).collect() - input[18] = true - input[19] = "bwameme" - input[20] = "wgs" - input[21] = false + input[9] = channel.empty() + input[10] = MT_MEM2.out.index.collect() + input[11] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.dict', checkIfExists: true)).collect() + input[12] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)).collect() + input[13] = channel.of([id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)).collect() + input[14] = channel.empty() + input[15] = SHIFTMT_MEM2.out.index.collect() + input[16] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.dict', checkIfExists: true)).collect() + input[17] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)).collect() + input[18] = channel.of([id:'shiftmt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)).collect() + input[19] = true + input[20] = "bwameme" + input[21] = "wgs" input[22] = false - input[23] = 3072 + input[23] = false input[24] = "bwamem2" input[25] = "illumina" input[26] = false - input[27] = 4 - input[28] = true - input[29] = false + input[27] = true + input[28] = false """ } } diff --git a/subworkflows/local/align_bwa_bwamem2_bwameme/main.nf b/subworkflows/local/align_bwa_bwamem2_bwameme/main.nf index 6ea0434de..99af7cd75 100644 --- a/subworkflows/local/align_bwa_bwamem2_bwameme/main.nf +++ b/subworkflows/local/align_bwa_bwamem2_bwameme/main.nf @@ -2,6 +2,7 @@ // Map to reference, fetch stats for each demultiplexed read pair, merge, mark duplicates, and index. // +include { BWAFASTALIGN_MEM } from '../../../modules/nf-core/bwafastalign/mem/main' include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main' include { BWAMEME_MEM } from '../../../modules/nf-core/bwameme/mem/main' include { BWA_MEM as BWA } from '../../../modules/nf-core/bwa/mem/main' @@ -17,25 +18,27 @@ include { SAMTOOLS_VIEW as EXTRACT_ALIGNMENTS } from '../../../modules/nf-c workflow ALIGN_BWA_BWAMEM2_BWAMEME { take: ch_bwa_index // channel: [mandatory] [ val(meta), path(bwa_index) ] + ch_bwafastalign_index // channel: [mandatory] [ val(meta), path(bwafastalign_index) ] ch_bwamem2_index // channel: [mandatory] [ val(meta), path(bwamem2_index) ] ch_bwameme_index // channel: [mandatory] [ val(meta), path(bwameme_index) ] ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_input_reads // channel: [mandatory] [ val(meta), path(reads_input) ] - val_aligner // string: 'bwa', 'bwamem2', 'bwameme', or 'sentieon' + val_aligner // string: 'bwa', 'bwafastalign', 'bwamem2', or 'bwameme' val_extract_alignments // boolean - val_mbuffer_mem // integer: [mandatory] default: 3072 val_platform // string: [mandatory] default: illumina - val_sort_threads // integer: [mandatory] default: 4 main: // Map, sort, and index if (val_aligner.equals("bwa")) { BWA ( ch_input_reads, ch_bwa_index, ch_genome_fasta, true ) ch_align = BWA.out.bam + } else if (val_aligner.equals("bwafastalign")) { + BWAFASTALIGN_MEM ( ch_input_reads, ch_bwafastalign_index, ch_genome_fasta, true ) + ch_align = BWAFASTALIGN_MEM.out.output } else if (val_aligner.equals("bwameme")) { - BWAMEME_MEM ( ch_input_reads, ch_bwameme_index, ch_genome_fasta, true, val_mbuffer_mem, val_sort_threads ) - ch_align = BWAMEME_MEM.out.bam + BWAMEME_MEM ( ch_input_reads, ch_bwameme_index, ch_genome_fasta, true ) + ch_align = BWAMEME_MEM.out.output } else { BWAMEM2_MEM ( ch_input_reads, ch_bwamem2_index, ch_genome_fasta, true ) ch_align = BWAMEM2_MEM.out.bam diff --git a/subworkflows/local/align_bwa_bwamem2_bwameme/tests/main.nf.test b/subworkflows/local/align_bwa_bwamem2_bwameme/tests/main.nf.test index 7e64d0355..803c63ff5 100644 --- a/subworkflows/local/align_bwa_bwamem2_bwameme/tests/main.nf.test +++ b/subworkflows/local/align_bwa_bwamem2_bwameme/tests/main.nf.test @@ -8,6 +8,7 @@ nextflow_workflow { tag "align_bwa_bwamem2_bwameme" tag "bwa/mem" + tag "bwafastalign/mem" tag "bwamem2/mem" tag "bwameme/mem" tag "samtools/index" @@ -42,22 +43,80 @@ nextflow_workflow { workflow { """ input[0] = [[:],[]] - input[1] = BWAMEM2_INDEX.out.index + input[1] = [[:],[]] + input[2] = BWAMEM2_INDEX.out.index + input[3] = [[:],[]] + input[4] = channel.of([[id:'sarscov2'], [file(params.sarscov_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]]) + input[5] = channel.of([[id:'sarscov2'], [file(params.sarscov_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[6] = channel.of([ + [ id:'test', sample:'test', single_end:false, num_lanes:1, read_group: "'@RG\\\\tID:test\\\\tPL:illumina\\\\tSM:test'" ], // meta map + [ + file(params.sarscov_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.sarscov_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[7] = "bwamem2" + input[8] = true + input[9] = "illumina" + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.publish.flatten() + .findAll { it instanceof String && (it.startsWith('/') || it.contains('.')) } + .collect { new File(it).name } + .sort(), + workflow.out.marked_bam.collect { meta, bamfile -> [ meta, bam(bamfile).getHeaderMD5() ] }, + workflow.out.marked_bam.collect { meta, bamfile -> [ meta, bam(bamfile).getReadsMD5() ] } + ).match() + } + ) + } + } + + test("align bwafastalign") { + + setup { + run("BWAFASTALIGN_INDEX") { + script "modules/nf-core/bwafastalign/index/main.nf" + process { + """ + input[0] = channel.of([ + [id:'sarscov2'], + file('https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + } + + when { + params { + sarscov_testdata_base_path= 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + restrict_to_contigs = "MT192765.1" + } + workflow { + """ + input[0] = [[:],[]] + input[1] = BWAFASTALIGN_INDEX.out.index input[2] = [[:],[]] - input[3] = channel.of([[id:'sarscov2'], [file(params.sarscov_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]]) - input[4] = channel.of([[id:'sarscov2'], [file(params.sarscov_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) - input[5] = channel.of([ + input[3] = [[:],[]] + input[4] = channel.of([[id:'sarscov2'], [file(params.sarscov_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]]) + input[5] = channel.of([[id:'sarscov2'], [file(params.sarscov_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[6] = channel.of([ [ id:'test', sample:'test', single_end:false, num_lanes:1, read_group: "'@RG\\\\tID:test\\\\tPL:illumina\\\\tSM:test'" ], // meta map [ file(params.sarscov_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.sarscov_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ]) - input[6] = "bwamem2" - input[7] = true - input[8] = 3072 + input[7] = "bwafastalign" + input[8] = true input[9] = "illumina" - input[10] = 4 """ } } @@ -103,21 +162,20 @@ nextflow_workflow { """ input[0] = [[:],[]] input[1] = [[:],[]] - input[2] = BWAMEME_INDEX.out.index - input[3] = channel.of([[id:'sarscov2'], [file(params.sarscov_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]]) - input[4] = channel.of([[id:'sarscov2'], [file(params.sarscov_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) - input[5] = channel.of([ + input[2] = [[:],[]] + input[3] = BWAMEME_INDEX.out.index + input[4] = channel.of([[id:'sarscov2'], [file(params.sarscov_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true)]]) + input[5] = channel.of([[id:'sarscov2'], [file(params.sarscov_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[6] = channel.of([ [ id:'test', sample:'test', single_end:false, num_lanes:1, read_group:"\'@RG\\\\tID:test\\\\tPL:illumina\\\\tSM:test\'" ], // meta map [ file(params.sarscov_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.sarscov_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ]) - input[6] = "bwameme" - input[7] = true - input[8] = 3072 + input[7] = "bwameme" + input[8] = true input[9] = "illumina" - input[10] = 4 """ } } diff --git a/subworkflows/local/align_bwa_bwamem2_bwameme/tests/main.nf.test.snap b/subworkflows/local/align_bwa_bwamem2_bwameme/tests/main.nf.test.snap index 7632f79b8..109453536 100644 --- a/subworkflows/local/align_bwa_bwamem2_bwameme/tests/main.nf.test.snap +++ b/subworkflows/local/align_bwa_bwamem2_bwameme/tests/main.nf.test.snap @@ -1,4 +1,48 @@ { + "align bwafastalign": { + "content": [ + [ + "test_sorted_md.bam", + "test_sorted_md.bam.bai", + "test_sorted_md.metrics.txt" + ], + [ + [ + { + "groupSize": 1, + "groupTarget": { + "id": "test", + "sample": "test", + "single_end": false, + "num_lanes": 1, + "read_group": "'@RG\\tID:test\\tPL:illumina\\tSM:test'" + } + }, + "b88d51cb3010d98935b39ef6a3c58499" + ] + ], + [ + [ + { + "groupSize": 1, + "groupTarget": { + "id": "test", + "sample": "test", + "single_end": false, + "num_lanes": 1, + "read_group": "'@RG\\tID:test\\tPL:illumina\\tSM:test'" + } + }, + "af8628d9df18b2d3d4f6fd47ef2bb872" + ] + ] + ], + "timestamp": "2026-06-12T15:46:43.756159921", + "meta": { + "nf-test": "0.9.5", + "nextflow": "25.10.4" + } + }, "align bwamem2": { "content": [ [ @@ -37,9 +81,9 @@ ] ] ], - "timestamp": "2026-03-13T15:04:33.885203481", + "timestamp": "2026-06-12T15:43:33.682165862", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.5", "nextflow": "25.10.4" } }, @@ -62,7 +106,7 @@ "read_group": "'@RG\\tID:test\\tPL:illumina\\tSM:test'" } }, - "40408ed51d46ab3f1524be98d6b31441" + "11413e81e47d32ac386d29b693693a2e" ] ], [ @@ -81,9 +125,9 @@ ] ] ], - "timestamp": "2026-03-13T15:05:20.75721248", + "timestamp": "2026-06-12T15:47:31.561305633", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.5", "nextflow": "25.10.4" } } diff --git a/subworkflows/local/align_bwa_bwamem2_bwameme/tests/nextflow.config b/subworkflows/local/align_bwa_bwamem2_bwameme/tests/nextflow.config index 4563b2d78..99347febb 100644 --- a/subworkflows/local/align_bwa_bwamem2_bwameme/tests/nextflow.config +++ b/subworkflows/local/align_bwa_bwamem2_bwameme/tests/nextflow.config @@ -1,12 +1,24 @@ process { + withName: 'BWAFASTALIGN_INDEX' { + memory = 15.GB + } + + withName: 'BWAFASTALIGN_MEM' { + memory = 15.GB + ext.args = { "-M -K 100000000 -R ${meta.read_group}" } + ext.args3 = { "-T ./samtools_sort_tmp" } + ext.prefix = { "${meta.id}_sorted" } + } + withName: 'BWAMEME_INDEX' { ext.args = '-a meme' } withName: 'BWAMEME_MEM' { - ext.args = { "-7 -M -K 100000000 -R ${meta.read_group}" } - ext.args2 = { "-T ./samtools_sort_tmp" } + memory = 15.GB + ext.args = { "-7 -M -K 100000000 -R ${meta.read_group}" } + ext.args3 = { "-T ./samtools_sort_tmp" } ext.prefix = { "${meta.id}_sorted" } } diff --git a/subworkflows/local/annotate_cadd/main.nf b/subworkflows/local/annotate_cadd/main.nf index 5b8d3369a..92f0b6d5d 100644 --- a/subworkflows/local/annotate_cadd/main.nf +++ b/subworkflows/local/annotate_cadd/main.nf @@ -13,11 +13,12 @@ include { TABIX_TABIX as TABIX_CADD } from '../../../modules/nf-core/ workflow ANNOTATE_CADD { take: - ch_cadd_resources // channel: [mandatory] [ path(dir) ] - ch_fai // channel: [optional] [ path(fai) ] - ch_header // channel: [mandatory] [ path(txt) ] - ch_vcf // channel: [mandatory] [ val(meta), path(vcfs), path(idx) ] - val_genome // string: GRCh37 or GRCh38 + ch_cadd_resources // channel: [mandatory] [ val(meta), path(dir) ] + ch_cadd_prescored // channel: [optional] [ val(meta), path(prescored) ] + ch_fai // channel: [optional] [ val(meta), path(fai) ] + ch_header // channel: [mandatory] [ path(txt) ] + ch_vcf // channel: [mandatory] [ val(meta), path(vcfs), path(idx) ] + val_genome // string: GRCh37 or GRCh38 main: ch_rename_chrs = channel.value([[]]) @@ -45,7 +46,7 @@ workflow ANNOTATE_CADD { BCFTOOLS_VIEW(ch_vcf, [], [], []) - CADD(BCFTOOLS_VIEW.out.vcf, ch_cadd_resources, [[:], []]) + CADD(BCFTOOLS_VIEW.out.vcf, ch_cadd_resources, ch_cadd_prescored) TABIX_CADD(CADD.out.tsv) diff --git a/subworkflows/local/annotate_cadd/tests/main.nf.test b/subworkflows/local/annotate_cadd/tests/main.nf.test index de40e5fe4..d5afb3bb3 100644 --- a/subworkflows/local/annotate_cadd/tests/main.nf.test +++ b/subworkflows/local/annotate_cadd/tests/main.nf.test @@ -26,14 +26,15 @@ nextflow_workflow { workflow { """ input[0] = Channel.from("\$PWD").map { dir -> [ [ id: 'cadd_resources' ], dir ] } - input[1] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect() - input[2] = channel.of(file('https://raw.githubusercontent.com/nf-core/raredisease/refs/heads/master/assets/cadd_to_vcf_header_-1.0-.txt', checkIfExists: true)).collect() - input[3] = channel.of([ + input[1] = channel.value([[:], []]) + input[2] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect() + input[3] = channel.of(file('https://raw.githubusercontent.com/nf-core/raredisease/refs/heads/master/assets/cadd_to_vcf_header_-1.0-.txt', checkIfExists: true)).collect() + input[4] = channel.of([ [id:'test', single_end: false], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ]) - input[4] = 'GRCh37' + input[5] = 'GRCh37' """ } } @@ -57,14 +58,15 @@ nextflow_workflow { workflow { """ input[0] = Channel.from("\$PWD").map { dir -> [ [ id: 'cadd_resources' ], dir ] } - input[1] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect() - input[2] = channel.of(file('https://raw.githubusercontent.com/nf-core/raredisease/refs/heads/master/assets/cadd_to_vcf_header_-1.0-.txt', checkIfExists: true)).collect() - input[3] = channel.of([ + input[1] = channel.value([[:], []]) + input[2] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect() + input[3] = channel.of(file('https://raw.githubusercontent.com/nf-core/raredisease/refs/heads/master/assets/cadd_to_vcf_header_-1.0-.txt', checkIfExists: true)).collect() + input[4] = channel.of([ [id:'test', single_end: false], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ]) - input[4] = 'GRCh38' + input[5] = 'GRCh38' """ } } diff --git a/subworkflows/local/annotate_genome_snvs/main.nf b/subworkflows/local/annotate_genome_snvs/main.nf index 64de7e73a..f0e3af434 100644 --- a/subworkflows/local/annotate_genome_snvs/main.nf +++ b/subworkflows/local/annotate_genome_snvs/main.nf @@ -21,7 +21,8 @@ workflow ANNOTATE_GENOME_SNVS { take: ch_cadd_header // channel: [mandatory] [ path(txt) ] - ch_cadd_resources // channel: [mandatory] [ path(annotation) ] + ch_cadd_prescored // channel: [optional] [ val(meta), path(prescored) ] + ch_cadd_resources // channel: [mandatory] [ val(meta), path(annotation) ] ch_genome_chrsizes // channel: [mandatory] [ path(sizes) ] ch_genome_fai // channel: [mandatory] [ path(fai) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] @@ -121,6 +122,7 @@ workflow ANNOTATE_GENOME_SNVS { ANNOTATE_CADD ( ch_cadd_resources, + ch_cadd_prescored, ch_genome_fai, ch_cadd_header, ch_cadd_in, diff --git a/subworkflows/local/annotate_genome_snvs/tests/main.nf.test b/subworkflows/local/annotate_genome_snvs/tests/main.nf.test index 0b6b95c53..cdb8ca0e7 100644 --- a/subworkflows/local/annotate_genome_snvs/tests/main.nf.test +++ b/subworkflows/local/annotate_genome_snvs/tests/main.nf.test @@ -30,28 +30,29 @@ nextflow_workflow { workflow { """ input[0] = channel.of([[id:'resources'], file('https://raw.githubusercontent.com/nf-core/raredisease/refs/heads/master/assets/cadd_to_vcf_header_-1.0-.txt', checkIfExists: true)]).collect() - input[1] = Channel.from(env('PWD')).map { dir -> [ [ id: 'cadd_resources' ], dir ] } - input[2] = channel.of(file(params.pipelines_testdata_base_path + 'reference/reference_chr.sizes', checkIfExists: true)).collect() - input[3] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect() - input[4] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)]).collect() - input[5] = channel.value([[],[]]) - input[6] = channel.of([id:'earlycasualcaiman', sample:'earlycasualcaiman', sex:1, phenotype:2, paternal:0, maternal:0, case_id:'justhusky']) - input[7] = channel.of(file(params.pipelines_testdata_base_path + 'reference/target_wgs.interval_list', checkIfExists: true)) - input[8] = channel.of([ + input[1] = channel.value([[:], []]) + input[2] = Channel.from(env('PWD')).map { dir -> [ [ id: 'cadd_resources' ], dir ] } + input[3] = channel.of(file(params.pipelines_testdata_base_path + 'reference/reference_chr.sizes', checkIfExists: true)).collect() + input[4] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect() + input[5] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)]).collect() + input[6] = channel.value([[],[]]) + input[7] = channel.of([id:'earlycasualcaiman', sample:'earlycasualcaiman', sex:1, phenotype:2, paternal:0, maternal:0, case_id:'justhusky']) + input[8] = channel.of(file(params.pipelines_testdata_base_path + 'reference/target_wgs.interval_list', checkIfExists: true)) + input[9] = channel.of([ [id:'justhusky', probands:['earlycasualcaiman'], upd_children:['earlycasualcaiman'], mother:'', father:''], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ]) - input[9] = channel.value([[]]) - input[10] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_functions.lua', checkIfExists: true)).collect() - input[11] = channel.value(file(params.pipelines_testdata_base_path + 'reference/grch38_gnomad_reformated_-r3.1.1-.vcf.gz', checkIfExists: true)) - input[12] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_config.toml', checkIfExists: true)).collect() - input[13] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vep_cache_and_plugins.tar.gz', checkIfExists: true)).collect() - input[14] = channel.value([]) - input[15] = 'wgs' - input[16] = null - input[17] = 'GRCh37' - input[18] = 107 + input[10] = channel.value([[]]) + input[11] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_functions.lua', checkIfExists: true)).collect() + input[12] = channel.value(file(params.pipelines_testdata_base_path + 'reference/grch38_gnomad_reformated_-r3.1.1-.vcf.gz', checkIfExists: true)) + input[13] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_config.toml', checkIfExists: true)).collect() + input[14] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vep_cache_and_plugins.tar.gz', checkIfExists: true)).collect() + input[15] = channel.value([]) + input[16] = 'wgs' + input[17] = null + input[18] = 'GRCh37' + input[19] = 107 """ } } @@ -86,28 +87,29 @@ nextflow_workflow { workflow { """ input[0] = channel.of([[id:'resources'], file('https://raw.githubusercontent.com/nf-core/raredisease/refs/heads/master/assets/cadd_to_vcf_header_-1.0-.txt', checkIfExists: true)]).collect() - input[1] = Channel.from(env('PWD')).map { dir -> [ [ id: 'cadd_resources' ], dir ] } - input[2] = channel.of(file(params.pipelines_testdata_base_path + 'reference/reference_chr.sizes', checkIfExists: true)).collect() - input[3] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect() - input[4] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)]).collect() - input[5] = channel.value([[],[]]) - input[6] = channel.of([id:'earlycasualcaiman', sample:'earlycasualcaiman', sex:1, phenotype:2, paternal:0, maternal:0, case_id:'justhusky']) - input[7] = channel.of(file(params.pipelines_testdata_base_path + 'reference/target_wgs.interval_list', checkIfExists: true)) - input[8] = channel.of([ + input[1] = channel.value([[:], []]) + input[2] = Channel.from(env('PWD')).map { dir -> [ [ id: 'cadd_resources' ], dir ] } + input[3] = channel.of(file(params.pipelines_testdata_base_path + 'reference/reference_chr.sizes', checkIfExists: true)).collect() + input[4] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect() + input[5] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)]).collect() + input[6] = channel.value([[],[]]) + input[7] = channel.of([id:'earlycasualcaiman', sample:'earlycasualcaiman', sex:1, phenotype:2, paternal:0, maternal:0, case_id:'justhusky']) + input[8] = channel.of(file(params.pipelines_testdata_base_path + 'reference/target_wgs.interval_list', checkIfExists: true)) + input[9] = channel.of([ [id:'justhusky', probands:['earlycasualcaiman'], upd_children:['earlycasualcaiman'], mother:'', father:''], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) ]) - input[9] = channel.value([[]]) - input[10] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_functions.lua', checkIfExists: true)).collect() - input[11] = channel.value(file(params.pipelines_testdata_base_path + 'reference/grch38_gnomad_reformated_-r3.1.1-.vcf.gz', checkIfExists: true)) - input[12] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_config.toml', checkIfExists: true)).collect() - input[13] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vep_cache_and_plugins.tar.gz', checkIfExists: true)).collect() - input[14] = channel.value([]) - input[15] = 'wes' - input[16] = null - input[17] = 'GRCh37' - input[18] = 107 + input[10] = channel.value([[]]) + input[11] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_functions.lua', checkIfExists: true)).collect() + input[12] = channel.value(file(params.pipelines_testdata_base_path + 'reference/grch38_gnomad_reformated_-r3.1.1-.vcf.gz', checkIfExists: true)) + input[13] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_config.toml', checkIfExists: true)).collect() + input[14] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vep_cache_and_plugins.tar.gz', checkIfExists: true)).collect() + input[15] = channel.value([]) + input[16] = 'wes' + input[17] = null + input[18] = 'GRCh37' + input[19] = 107 """ } } diff --git a/subworkflows/local/annotate_mt_snvs/main.nf b/subworkflows/local/annotate_mt_snvs/main.nf index 5b80e5132..c95545993 100644 --- a/subworkflows/local/annotate_mt_snvs/main.nf +++ b/subworkflows/local/annotate_mt_snvs/main.nf @@ -12,9 +12,10 @@ include { VCFANNO as VCFANNO_MT } from '../../../modules/nf-core/vcf workflow ANNOTATE_MT_SNVS { take: ch_cadd_header // channel: [mandatory] [ path(txt) ] - ch_cadd_resources // channel: [mandatory] [ path(annotation) ] + ch_cadd_prescored // channel: [optional] [ val(meta), path(prescored) ] + ch_cadd_resources // channel: [mandatory] [ val(meta), path(annotation) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_fai // channel: [mandatory] [ path(fai) ] + ch_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_mt_vcf_tbi // channel: [mandatory] [ val(meta), path(vcf), path(tbi) ] ch_vcfanno_extra // channel: [mandatory] [ [path(vcf),path(index)] ] ch_vcfanno_lua // channel: [mandatory] [ path(lua) ] @@ -40,6 +41,7 @@ workflow ANNOTATE_MT_SNVS { if (!val_cadd_resources.equals(null)) { ANNOTATE_CADD ( ch_cadd_resources, + ch_cadd_prescored, ch_fai, ch_cadd_header, VCFANNO_MT.out.vcf.join(VCFANNO_MT.out.tbi, failOnMismatch:true, failOnDuplicate:true), diff --git a/subworkflows/local/annotate_mt_snvs/tests/main.nf.test b/subworkflows/local/annotate_mt_snvs/tests/main.nf.test index 63910dd93..7935ee805 100644 --- a/subworkflows/local/annotate_mt_snvs/tests/main.nf.test +++ b/subworkflows/local/annotate_mt_snvs/tests/main.nf.test @@ -28,24 +28,25 @@ nextflow_workflow { workflow { """ input[0] = channel.of([[id:'resources'], file('https://raw.githubusercontent.com/nf-core/raredisease/refs/heads/master/assets/cadd_to_vcf_header_-1.0-.txt', checkIfExists: true)]).collect() - input[1] = Channel.from(env("PWD")).map { dir -> [ [ id: 'cadd_resources' ], dir ] } - input[2] = channel.of([[id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)]).collect() - input[3] = channel.of([[id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)]).collect() - input[4] = channel.of([ + input[1] = channel.value([[:], []]) + input[2] = Channel.from(env("PWD")).map { dir -> [ [ id: 'cadd_resources' ], dir ] } + input[3] = channel.of([[id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)]).collect() + input[4] = channel.of([[id:'mt'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)]).collect() + input[5] = channel.of([ [id:'justhusky', case_id:'justhusky'], file(params.pipelines_testdata_base_path + 'reference/vcfanno_gnomad_mt.vcf.gz', checkIfExists: true), file(params.pipelines_testdata_base_path + 'reference/vcfanno_gnomad_mt.vcf.gz.tbi', checkIfExists: true), ]) - input[5] = channel.value([[]]) - input[6] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_functions.lua', checkIfExists: true)).collect() - input[7] = channel.value(file(params.pipelines_testdata_base_path + 'reference/grch38_gnomad_reformated_-r3.1.1-.vcf.gz', checkIfExists: true)) - input[8] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_config.toml', checkIfExists: true)).collect() - input[9] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vep_cache_and_plugins.tar.gz', checkIfExists: true)).collect() - input[10] = channel.value([]) - input[11] = null - input[12] = 'GRCh37' - input[13] = 1.0 - input[14] = 107 + input[6] = channel.value([[]]) + input[7] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_functions.lua', checkIfExists: true)).collect() + input[8] = channel.value(file(params.pipelines_testdata_base_path + 'reference/grch38_gnomad_reformated_-r3.1.1-.vcf.gz', checkIfExists: true)) + input[9] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vcfanno_config.toml', checkIfExists: true)).collect() + input[10] = channel.of(file(params.pipelines_testdata_base_path + 'reference/vep_cache_and_plugins.tar.gz', checkIfExists: true)).collect() + input[11] = channel.value([]) + input[12] = null + input[13] = 'GRCh37' + input[14] = 1.0 + input[15] = 107 """ } } diff --git a/subworkflows/local/call_structural_variants/main.nf b/subworkflows/local/call_structural_variants/main.nf index b5407b11e..871914722 100644 --- a/subworkflows/local/call_structural_variants/main.nf +++ b/subworkflows/local/call_structural_variants/main.nf @@ -24,6 +24,7 @@ workflow CALL_STRUCTURAL_VARIANTS { ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_genome_hisat2index // channel: [mandatory] [ val(meta), path(hisat2index) ] + ch_manta_regions // channel: [mandatory] [ path(bed), path(tbi) ] ch_mitosalt_config // channel: [mandatory] [val(mitosalt_breakspan),val(mitosalt_breakthreshold),...,val(mitosalt_split_length)] ch_mt_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] ch_mt_fai // channel: [mandatory] [ val(meta), path(mtfai) ] @@ -34,7 +35,6 @@ workflow CALL_STRUCTURAL_VARIANTS { ch_reads // channel: [mandatory] [ val(meta), [path(reads)] ] ch_subdepth // channel: [mandatory] [ val(mitosalt_depth) ] ch_svcaller_priority // channel: [mandatory] [ val(["var caller tag 1", ...]) ] - ch_target_bed // channel: [mandatory for WES] [ val(meta), path(bed), path(tbi) ] skip_germlinecnvcaller // boolean skip_mitosalt // boolean val_analysis_type // string: "wes", "wgs", or "mito" @@ -59,7 +59,7 @@ workflow CALL_STRUCTURAL_VARIANTS { ch_tiddit_vcf = channel.empty() if (!val_analysis_type.equals("mito")) { - CALL_SV_MANTA (ch_genome_bam, ch_genome_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_target_bed, val_analysis_type) + CALL_SV_MANTA (ch_genome_bam, ch_genome_bai, ch_genome_fasta, ch_genome_fai, ch_case_info, ch_manta_regions) .filtered_diploid_sv_vcf .collect{ _meta, vcf -> vcf } .set{ ch_manta_vcf } diff --git a/subworkflows/local/call_structural_variants/tests/main.nf.test b/subworkflows/local/call_structural_variants/tests/main.nf.test index f2d69db9d..f4287b53b 100644 --- a/subworkflows/local/call_structural_variants/tests/main.nf.test +++ b/subworkflows/local/call_structural_variants/tests/main.nf.test @@ -42,27 +42,27 @@ nextflow_workflow { input[8] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)]).collect() input[9] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)]).collect() input[10] = Channel.from("\$PWD").map { dir -> [[id:'genome'], file(dir)] } - input[11] = channel.of([15, 2, 5, 30000, 30, 0.00001, 5, 1000, 80, 10000, 5, 15]) - input[12] = channel.of([ + input[11] = channel.value([[], []]) + input[12] = channel.of([15, 2, 5, 30000, 30, 0.00001, 5, 1000, 80, 10000, 5, 15]) + input[13] = channel.of([ [id:'earlycasualcaiman', sample:'earlycasualcaiman', single_end:false, num_lanes:1, read_group: "'@RG\\\\tID:earlycasualcaiman\\\\tPL:illumina\\\\tSM:earlycasualcaiman'", lane:1, sex:1, phenotype:1, paternal:0, maternal:0, case_id:'justhusky'], file(params.pipelines_testdata_base_path + 'testdata/earlycasualcaiman_sorted_md.bam', checkIfExists: true), file(params.pipelines_testdata_base_path + 'testdata/earlycasualcaiman_sorted_md.bam.bai', checkIfExists: true) ]) - input[13] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)]).collect() - input[14] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)]).collect() - input[15] = Channel.from("${projectDir}/subworkflows/local/variant_evaluation/tests").map { dir -> [[id:'mt'], file(dir)] } - input[16] = channel.of([[id:'ploidy'], []]) - input[17] = channel.of([[]]) - input[18] = channel.of([ + input[14] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa.fai', checkIfExists: true)]).collect() + input[15] = channel.of([[id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference_mt.fa', checkIfExists: true)]).collect() + input[16] = Channel.from("${projectDir}/subworkflows/local/variant_evaluation/tests").map { dir -> [[id:'mt'], file(dir)] } + input[17] = channel.of([[id:'ploidy'], []]) + input[18] = channel.of([[]]) + input[19] = channel.of([ [id:'earlycasualcaiman', sample:'earlycasualcaiman', single_end:false, num_lanes:1, read_group: "'@RG\\\\tID:earlycasualcaiman\\\\tPL:illumina\\\\tSM:earlycasualcaiman'", lane:1, sex:1, phenotype:1, paternal:0, maternal:0, case_id:'justhusky'], [ file(params.pipelines_testdata_base_path + 'testdata/earlycasualcaiman_mt_1.fastq.gz', checkIfExists: true), file(params.pipelines_testdata_base_path + 'testdata/earlycasualcaiman_mt_2.fastq.gz', checkIfExists: true) ] ]) - input[19] = channel.value(10000000) - input[20] = channel.value(['manta', 'tiddit', 'cnvnator']) - input[21] = channel.of([[id:'target'], [], []]) + input[20] = channel.value(10000000) + input[21] = channel.value(['manta', 'tiddit', 'cnvnator']) input[22] = true input[23] = false input[24] = 'wgs' diff --git a/subworkflows/local/call_structural_variants/tests/main.nf.test.snap b/subworkflows/local/call_structural_variants/tests/main.nf.test.snap index d3b9e9234..3b8a5f36c 100644 --- a/subworkflows/local/call_structural_variants/tests/main.nf.test.snap +++ b/subworkflows/local/call_structural_variants/tests/main.nf.test.snap @@ -16,10 +16,10 @@ "justhusky_sv.vcf.gz.tbi" ] ], + "timestamp": "2026-04-14T15:51:47.077800376", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.4" - }, - "timestamp": "2026-04-14T15:51:47.077800376" + } } } \ No newline at end of file diff --git a/subworkflows/local/call_sv_manta/main.nf b/subworkflows/local/call_sv_manta/main.nf index 3a514c3ec..2c01d251f 100644 --- a/subworkflows/local/call_sv_manta/main.nf +++ b/subworkflows/local/call_sv_manta/main.nf @@ -7,13 +7,12 @@ include { MANTA_GERMLINE as MANTA } from '../../../modules/nf-core/ workflow CALL_SV_MANTA { take: - ch_bam // channel: [mandatory] [ val(meta), path(bam) ] - ch_bai // channel: [mandatory] [ val(meta), path(bai) ] - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_case_info // channel: [mandatory] [ val(case_info) ] - ch_bed // channel: [mandatory for WES] [ val(meta), path(bed), path(tbi) ] - val_analysis_type // string: "wes", "wgs", or "mito" + ch_bam // channel: [mandatory] [ val(meta), path(bam) ] + ch_bai // channel: [mandatory] [ val(meta), path(bai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_case_info // channel: [mandatory] [ val(case_info) ] + ch_regions // channel: [mandatory] [ path(bed), path(tbi) ] main: ch_bam.map{ _meta, bam -> bam } @@ -26,24 +25,11 @@ workflow CALL_SV_MANTA { .toList() .set { bai_file_list } - ch_bed.map { - _id, bed_file, index -> - return [bed_file, index]} - .set { bed_input } - - if (val_analysis_type.equals("wgs")) { - ch_case_info.combine(bam_file_list) - .combine(bai_file_list) - .map { meta, input, index -> [meta, input, index] + [ [], [] ] } - .set { manta_input } - MANTA ( manta_input, ch_genome_fasta, ch_genome_fai, [] ) - } else { - ch_case_info.combine(bam_file_list) - .combine(bai_file_list) - .combine(bed_input) - .set { manta_input } - MANTA ( manta_input, ch_genome_fasta, ch_genome_fai, [] ) - } + ch_case_info.combine(bam_file_list) + .combine(bai_file_list) + .combine(ch_regions) + .set { manta_input } + MANTA ( manta_input, ch_genome_fasta, ch_genome_fai, [] ) MANTA.out.diploid_sv_vcf .join(MANTA.out.diploid_sv_vcf_tbi) diff --git a/subworkflows/local/call_sv_manta/tests/main.nf.test b/subworkflows/local/call_sv_manta/tests/main.nf.test index fd43a3cda..1757244d9 100644 --- a/subworkflows/local/call_sv_manta/tests/main.nf.test +++ b/subworkflows/local/call_sv_manta/tests/main.nf.test @@ -12,7 +12,7 @@ nextflow_workflow { config "./nextflow.config" - test("CALL_SV_MANTA - wgs") { + test("CALL_SV_MANTA") { when { workflow { @@ -36,8 +36,7 @@ nextflow_workflow { input[2] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)).collect() input[3] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)).collect() input[4] = channel.of([id:'justhusky']) - input[5] = channel.of([[id:'target'], [], []]) - input[6] = 'wgs' + input[5] = channel.value([[], []]) """ } } @@ -52,7 +51,7 @@ nextflow_workflow { ) } } - test("CALL_SV_MANTA - wgs, stub") { + test("CALL_SV_MANTA - stub") { options "-stub" @@ -78,8 +77,7 @@ nextflow_workflow { input[2] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta', checkIfExists: true)).collect() input[3] = channel.of([id:'genome'], file(params.pipelines_testdata_base_path + 'reference/reference.fasta.fai', checkIfExists: true)).collect() input[4] = channel.of([id:'justhusky']) - input[5] = channel.of([[id:'target'], [], []]) - input[6] = 'wgs' + input[5] = channel.value([[], []]) """ } } diff --git a/subworkflows/local/call_sv_manta/tests/main.nf.test.snap b/subworkflows/local/call_sv_manta/tests/main.nf.test.snap index cbc481726..b2b600081 100644 --- a/subworkflows/local/call_sv_manta/tests/main.nf.test.snap +++ b/subworkflows/local/call_sv_manta/tests/main.nf.test.snap @@ -1,16 +1,16 @@ { - "CALL_SV_MANTA - wgs": { + "CALL_SV_MANTA": { "content": [ "9f37331609347a1685ba4862d8583b2f", "2111e88e54fdd01f0492901606bcea6f" ], - "timestamp": "2026-03-01T20:55:33.556714847", + "timestamp": "2026-06-03T11:38:41.791162924", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.5", "nextflow": "25.10.4" } }, - "CALL_SV_MANTA - wgs, stub": { + "CALL_SV_MANTA - stub": { "content": [ { "0": [ @@ -127,9 +127,9 @@ ] } ], - "timestamp": "2026-03-05T12:58:54.578278212", + "timestamp": "2026-06-03T11:39:05.440330127", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.5", "nextflow": "25.10.4" } } diff --git a/subworkflows/local/postprocess_MT_calls/main.nf b/subworkflows/local/postprocess_MT_calls/main.nf index 758e9ec8d..de88e029a 100644 --- a/subworkflows/local/postprocess_MT_calls/main.nf +++ b/subworkflows/local/postprocess_MT_calls/main.nf @@ -7,6 +7,7 @@ include { BCFTOOLS_ANNOTATE } from '../../.. include { BCFTOOLS_MERGE as BCFTOOLS_MERGE_MT } from '../../../modules/nf-core/bcftools/merge/main' include { BCFTOOLS_NORM as REMOVE_DUPLICATES_MT } from '../../../modules/nf-core/bcftools/norm/main' include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_MT } from '../../../modules/nf-core/bcftools/norm/main' +include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_POSTMERGE_MT } from '../../../modules/nf-core/bcftools/norm/main' include { GATK4_MERGEVCFS as GATK4_MERGEVCFS_LIFT_UNLIFT_MT } from '../../../modules/nf-core/gatk4/mergevcfs/main' include { GATK4_VARIANTFILTRATION as GATK4_VARIANTFILTRATION_MT } from '../../../modules/nf-core/gatk4/variantfiltration/main' include { PICARD_LIFTOVERVCF } from '../../../modules/nf-core/picard/liftovervcf/main' @@ -94,7 +95,12 @@ workflow POSTPROCESS_MT_CALLS { ch_genome_fasta.join(ch_genome_fai, failOnMismatch:true, failOnDuplicate:true).collect() ) - BCFTOOLS_MERGE_MT.out.vcf + SPLIT_MULTIALLELICS_POSTMERGE_MT( + BCFTOOLS_MERGE_MT.out.vcf.join(BCFTOOLS_MERGE_MT.out.index, failOnMismatch:true, failOnDuplicate:true).collect(), + ch_genome_fasta + ) + + SPLIT_MULTIALLELICS_POSTMERGE_MT.out.vcf .mix(ch_case_vcf.single) .set { ch_addfoundintag_in } diff --git a/subworkflows/local/prepare_references/main.nf b/subworkflows/local/prepare_references/main.nf index b513bb4e6..fda4f87f8 100644 --- a/subworkflows/local/prepare_references/main.nf +++ b/subworkflows/local/prepare_references/main.nf @@ -4,6 +4,7 @@ // include { BEDTOOLS_SLOP as BEDTOOLS_PAD_TARGET_BED } from '../../../modules/nf-core/bedtools/slop/main' +include { BWAFASTALIGN_INDEX as BWAFASTALIGN_INDEX_GENOME } from '../../../modules/nf-core/bwafastalign/index/main' include { BWAMEM2_INDEX as BWAMEM2_INDEX_GENOME } from '../../../modules/nf-core/bwamem2/index/main' include { BWAMEM2_INDEX as BWAMEM2_INDEX_MT } from '../../../modules/nf-core/bwamem2/index/main' include { BWAMEM2_INDEX as BWAMEM2_INDEX_MT_SHIFT } from '../../../modules/nf-core/bwamem2/index/main' @@ -36,9 +37,10 @@ include { UNTAR as UNTAR_VEP_CACHE } from '../../../mod workflow PREPARE_REFERENCES { take: - val_aligner // String: "bwa", "bwamem2", "sentieon" or "bwameme" + val_aligner // String: "bwa", "bwafastalign", "bwamem2", "sentieon" or "bwameme" val_analysis_type // String: "wgs", "wes", or "mito" val_bwa // String: path to bwa index + val_bwafastalign // String: path to bwafastalign index val_bwamem2 // String: path to bwamem2 index val_bwameme // String: path to bwameme index val_fai // String: path to genome fasta index @@ -62,6 +64,7 @@ workflow PREPARE_REFERENCES { main: ch_bait_intervals = channel.empty() ch_genome_bwa_index = channel.empty() + ch_genome_bwafastalign_index = channel.empty() ch_genome_bwameme_index = channel.empty() ch_genome_bwamem2_index = channel.empty() ch_genome_hisat2_index = channel.empty() @@ -132,6 +135,12 @@ workflow PREPARE_REFERENCES { } else if (val_bwameme) { ch_genome_bwameme_index = channel.fromPath(val_bwameme).map {it -> [[id:it.simpleName], it]}.collect() } + + if (!val_bwafastalign && val_aligner.equals("bwafastalign")) { + ch_genome_bwafastalign_index = BWAFASTALIGN_INDEX_GENOME(ch_genome_fasta).index.collect() + } else if (val_bwafastalign) { + ch_genome_bwafastalign_index = channel.fromPath(val_bwafastalign).map {it -> [[id:it.simpleName], it]}.collect() + } // // MT genome indices // @@ -278,6 +287,7 @@ workflow PREPARE_REFERENCES { ch_publish = ch_dbsnp .mix(ch_dbsnp_tbi) .mix(ch_genome_bwa_index) + .mix(ch_genome_bwafastalign_index) .mix(ch_genome_bwamem2_index) .mix(ch_genome_bwameme_index) .mix(ch_genome_fai) @@ -316,36 +326,37 @@ workflow PREPARE_REFERENCES { } emit: - bait_intervals = ch_bait_intervals // channel:[ path(intervals) ] - dbsnp = ch_dbsnp // channel:[ val(meta), path(dbsnp) ] - dbsnp_tbi = ch_dbsnp_tbi // channel:[ val(meta), path(dbsnp_idx) ] - genome_bwa_index = ch_genome_bwa_index // channel:[ val(meta), path(index) ] - genome_bwamem2_index = ch_genome_bwamem2_index // channel:[ val(meta), path(index) ] - genome_bwameme_index = ch_genome_bwameme_index // channel:[ val(meta), path(index) ] - genome_chrom_sizes = ch_chrom_sizes // channel:[ path(sizes) ] - genome_fai = ch_genome_fai // channel:[ val(meta), path(fai) ] - genome_fasta = ch_genome_fasta // channel:[ val(meta), path(fasta) ] - genome_hisat2_index = ch_genome_hisat2_index // channel: [ val(meta), path(index) ] - genome_dict = ch_genome_dict // channel:[ val(meta), path(dict) ] - gnomad_af_idx = ch_gnomad_af_idx // channel:[ val(gnomad), path(idx) ] - mt_bwa_index = ch_mt_bwa_index // channel:[ val(meta), path(index) ] - mt_bwamem2_index = ch_mt_bwamem2_index // channel:[ val(meta), path(index) ] - mt_dict = ch_mt_dict // channel:[ val(meta), path(dict) ] - mt_fai = ch_mt_fai // channel:[ val(meta), path(fai) ] - mt_fasta = ch_mt_fasta // channel:[ val(meta), path(fasta) ] - mt_intervals = ch_shiftfasta_mtintervals // channel:[ path(intervals) ] - mt_last_index = ch_mt_last_index // channel: [ val(meta), path(index) ] - mtshift_backchain = ch_mtshift_backchain // channel:[ val(meta), path(backchain) ] - mtshift_bwa_index = ch_mtshift_bwa_index // channel:[ val(meta), path(index) ] - mtshift_bwamem2_index = ch_mtshift_bwamem2_index // channel:[ val(meta), path(index) ] - mtshift_dict = ch_mtshift_dict // channel:[ val(meta), path(dict) ] - mtshift_fai = ch_mtshift_fai // channel:[ val(meta), path(fai) ] - mtshift_fasta = ch_mtshift_fasta // channel:[ val(meta), path(fasta) ] - mtshift_intervals = ch_shiftfasta_mtshiftintervals // channel:[ path(intervals) ] - sdf = ch_sdf // channel:[ val (meta), path(sdf) ] - target_bed = ch_target_bed_gz_tbi.collect() // channel:[ val(meta), path(bed), path(tbi) ] - target_intervals = ch_target_intervals // channel:[ path(interval_list) ] - vcfanno_extra = ch_vcfanno_extra // channel:[ [path(vcf), path(tbi)] ] - vep_resources = ch_vep_resources // channel:[ path(cache) ] - publish = ch_publish // channel: [ val(destination), val(value) ] + bait_intervals = ch_bait_intervals // channel:[ path(intervals) ] + dbsnp = ch_dbsnp // channel:[ val(meta), path(dbsnp) ] + dbsnp_tbi = ch_dbsnp_tbi // channel:[ val(meta), path(dbsnp_idx) ] + genome_bwa_index = ch_genome_bwa_index // channel:[ val(meta), path(index) ] + genome_bwafastalign_index = ch_genome_bwafastalign_index // channel:[ val(meta), path(index) ] + genome_bwamem2_index = ch_genome_bwamem2_index // channel:[ val(meta), path(index) ] + genome_bwameme_index = ch_genome_bwameme_index // channel:[ val(meta), path(index) ] + genome_chrom_sizes = ch_chrom_sizes // channel:[ path(sizes) ] + genome_fai = ch_genome_fai // channel:[ val(meta), path(fai) ] + genome_fasta = ch_genome_fasta // channel:[ val(meta), path(fasta) ] + genome_hisat2_index = ch_genome_hisat2_index // channel: [ val(meta), path(index) ] + genome_dict = ch_genome_dict // channel:[ val(meta), path(dict) ] + gnomad_af_idx = ch_gnomad_af_idx // channel:[ val(gnomad), path(idx) ] + mt_bwa_index = ch_mt_bwa_index // channel:[ val(meta), path(index) ] + mt_bwamem2_index = ch_mt_bwamem2_index // channel:[ val(meta), path(index) ] + mt_dict = ch_mt_dict // channel:[ val(meta), path(dict) ] + mt_fai = ch_mt_fai // channel:[ val(meta), path(fai) ] + mt_fasta = ch_mt_fasta // channel:[ val(meta), path(fasta) ] + mt_intervals = ch_shiftfasta_mtintervals // channel:[ path(intervals) ] + mt_last_index = ch_mt_last_index // channel: [ val(meta), path(index) ] + mtshift_backchain = ch_mtshift_backchain // channel:[ val(meta), path(backchain) ] + mtshift_bwa_index = ch_mtshift_bwa_index // channel:[ val(meta), path(index) ] + mtshift_bwamem2_index = ch_mtshift_bwamem2_index // channel:[ val(meta), path(index) ] + mtshift_dict = ch_mtshift_dict // channel:[ val(meta), path(dict) ] + mtshift_fai = ch_mtshift_fai // channel:[ val(meta), path(fai) ] + mtshift_fasta = ch_mtshift_fasta // channel:[ val(meta), path(fasta) ] + mtshift_intervals = ch_shiftfasta_mtshiftintervals // channel:[ path(intervals) ] + sdf = ch_sdf // channel:[ val (meta), path(sdf) ] + target_bed = ch_target_bed_gz_tbi.collect() // channel:[ val(meta), path(bed), path(tbi) ] + target_intervals = ch_target_intervals // channel:[ path(interval_list) ] + vcfanno_extra = ch_vcfanno_extra // channel:[ [path(vcf), path(tbi)] ] + vep_resources = ch_vep_resources // channel:[ path(cache) ] + publish = ch_publish // channel: [ val(destination), val(value) ] } diff --git a/subworkflows/local/prepare_references/tests/main.nf.test b/subworkflows/local/prepare_references/tests/main.nf.test index 56e9d6831..6784060cc 100644 --- a/subworkflows/local/prepare_references/tests/main.nf.test +++ b/subworkflows/local/prepare_references/tests/main.nf.test @@ -9,6 +9,7 @@ nextflow_workflow { tag "bedtools/slop" tag "bwa/index" + tag "bwafastalign/index" tag "bwamem2/index" tag "bwameme/index" tag "cat/cat" @@ -44,21 +45,22 @@ nextflow_workflow { input[3] = null input[4] = null input[5] = null - input[6] = params.pipelines_testdata_base_path + 'reference/reference.fasta' - input[7] = params.pipelines_testdata_base_path + 'reference/gnomad_reformated.tab.gz' - input[8] = null - input[9] = params.pipelines_testdata_base_path + 'reference/dbsnp_-138-.vcf.gz' - input[10] = null - input[11] = "bwa" - input[12] = null - input[13] = false + input[6] = null + input[7] = params.pipelines_testdata_base_path + 'reference/reference.fasta' + input[8] = params.pipelines_testdata_base_path + 'reference/gnomad_reformated.tab.gz' + input[9] = null + input[10] = params.pipelines_testdata_base_path + 'reference/dbsnp_-138-.vcf.gz' + input[11] = null + input[12] = "bwa" + input[13] = null input[14] = false - input[15] = null + input[15] = false input[16] = null - input[17] = params.pipelines_testdata_base_path + 'reference/target.bed' - input[18] = null - input[19] = params.pipelines_testdata_base_path + 'reference/vep_cache_and_plugins.tar.gz' - input[20] = true + input[17] = null + input[18] = params.pipelines_testdata_base_path + 'reference/target.bed' + input[19] = null + input[20] = params.pipelines_testdata_base_path + 'reference/vep_cache_and_plugins.tar.gz' + input[21] = true """ } } diff --git a/subworkflows/local/prepare_references/tests/main.nf.test.snap b/subworkflows/local/prepare_references/tests/main.nf.test.snap index ee140eaa3..74215cf20 100644 --- a/subworkflows/local/prepare_references/tests/main.nf.test.snap +++ b/subworkflows/local/prepare_references/tests/main.nf.test.snap @@ -28,12 +28,32 @@ { "id": "reference" }, - "reference.dict:md5,81abd0fc051c084405024e0927f7b436" + [ + "reference.1.ht2:md5,f1f1779341ba9a0ab7078e358f6efdde", + "reference.2.ht2:md5,09f0021a3a29a0e4aee96083ea4d4020", + "reference.3.ht2:md5,3b641d6168aa70fe864844bb02ef39f2", + "reference.4.ht2:md5,2dd52cfb372404a3619c0f9367fc4012", + "reference.5.ht2:md5,999891dff8303f7c576f5a1ad4be5036", + "reference.6.ht2:md5,e1a25310dabfaeb7f0fa200785bad49f", + "reference.7.ht2:md5,9013eccd91ad614d7893c739275a394f", + "reference.8.ht2:md5,33cdeccccebe80329f1fdbee7f5874cb" + ] ] ] ], [ "11", + [ + [ + { + "id": "reference" + }, + "reference.dict:md5,81abd0fc051c084405024e0927f7b436" + ] + ] + ], + [ + "12", [ [ "/nf-core/test-datasets/raredisease/reference/gnomad_reformated.tab.gz", @@ -42,7 +62,7 @@ ] ], [ - "12", + "13", [ [ { @@ -59,13 +79,13 @@ ] ], [ - "13", + "14", [ ] ], [ - "14", + "15", [ [ { @@ -76,7 +96,7 @@ ] ], [ - "15", + "16", [ [ { @@ -87,7 +107,7 @@ ] ], [ - "16", + "17", [ [ { @@ -98,7 +118,7 @@ ] ], [ - "17", + "18", [ [ "reference_mt.intervals:md5,0930dafc5645246542112ae3d62573a3" @@ -106,7 +126,7 @@ ] ], [ - "18", + "19", [ [ { @@ -125,29 +145,29 @@ ] ], [ - "19", + "2", [ [ { - "id": "reference" + "id": "dbsnp_-138-" }, - "reference_shift.back_chain:md5,25d682d73a9bae009f4a38b3ede5dbc8" + "dbsnp_-138-.vcf.gz.tbi:md5,6695b71d9819806d50cd908051cf4cc0" ] ] ], [ - "2", + "20", [ [ { - "id": "dbsnp_-138-" + "id": "reference" }, - "dbsnp_-138-.vcf.gz.tbi:md5,6695b71d9819806d50cd908051cf4cc0" + "reference_shift.back_chain:md5,25d682d73a9bae009f4a38b3ede5dbc8" ] ] ], [ - "20", + "21", [ [ { @@ -164,13 +184,13 @@ ] ], [ - "21", + "22", [ ] ], [ - "22", + "23", [ [ { @@ -181,7 +201,7 @@ ] ], [ - "23", + "24", [ [ { @@ -192,7 +212,7 @@ ] ], [ - "24", + "25", [ [ { @@ -203,7 +223,7 @@ ] ], [ - "25", + "26", [ [ "reference_mt.shifted.intervals:md5,083d8f782fd4228dd9f6948ddbc64fd0" @@ -211,13 +231,13 @@ ] ], [ - "26", + "27", [ ] ], [ - "27", + "28", [ [ { @@ -228,21 +248,11 @@ ] ] ], - [ - "28", - [ - [ - "target_target.interval_list:md5,2959b7b0e1b298fe84ad708f447d6703" - ] - ] - ], [ "29", [ [ - [ - - ] + "target_target.interval_list:md5,2959b7b0e1b298fe84ad708f447d6703" ] ] ], @@ -265,6 +275,16 @@ ], [ "30", + [ + [ + [ + + ] + ] + ] + ], + [ + "31", [ [ [ @@ -498,7 +518,7 @@ ] ], [ - "31", + "32", [ [ "references/", @@ -1010,6 +1030,12 @@ ], [ "4", + [ + + ] + ], + [ + "5", [ [ { @@ -1025,28 +1051,17 @@ ] ] ], - [ - "5", - [ - - ] - ], [ "6", [ - [ - "reference.fasta.sizes:md5,b7bb58fb4d5cb50796e722222a13a2e6" - ] + ] ], [ "7", [ [ - { - "id": "reference" - }, - "reference.fasta.fai:md5,bf4047e05f95deb372eb8ffd601cbc29" + "reference.fasta.sizes:md5,b7bb58fb4d5cb50796e722222a13a2e6" ] ] ], @@ -1057,7 +1072,7 @@ { "id": "reference" }, - "/nf-core/test-datasets/raredisease/reference/reference.fasta" + "reference.fasta.fai:md5,bf4047e05f95deb372eb8ffd601cbc29" ] ] ], @@ -1068,16 +1083,7 @@ { "id": "reference" }, - [ - "reference.1.ht2:md5,f1f1779341ba9a0ab7078e358f6efdde", - "reference.2.ht2:md5,09f0021a3a29a0e4aee96083ea4d4020", - "reference.3.ht2:md5,3b641d6168aa70fe864844bb02ef39f2", - "reference.4.ht2:md5,2dd52cfb372404a3619c0f9367fc4012", - "reference.5.ht2:md5,999891dff8303f7c576f5a1ad4be5036", - "reference.6.ht2:md5,e1a25310dabfaeb7f0fa200785bad49f", - "reference.7.ht2:md5,9013eccd91ad614d7893c739275a394f", - "reference.8.ht2:md5,33cdeccccebe80329f1fdbee7f5874cb" - ] + "/nf-core/test-datasets/raredisease/reference/reference.fasta" ] ] ], @@ -1128,6 +1134,12 @@ ] ] ], + [ + "genome_bwafastalign_index", + [ + + ] + ], [ "genome_bwamem2_index", [ @@ -2162,9 +2174,9 @@ ] ] ], - "timestamp": "2026-03-18T21:59:15.358100588", + "timestamp": "2026-06-10T17:23:36.97002628", "meta": { - "nf-test": "0.9.4", + "nf-test": "0.9.5", "nextflow": "25.10.4" } } diff --git a/subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/main.nf b/subworkflows/local/vcf_filter_bcftools_filtervep/main.nf similarity index 55% rename from subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/main.nf rename to subworkflows/local/vcf_filter_bcftools_filtervep/main.nf index 00c752502..b355fb026 100644 --- a/subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/main.nf +++ b/subworkflows/local/vcf_filter_bcftools_filtervep/main.nf @@ -1,20 +1,20 @@ -include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view' -include { ENSEMBLVEP_FILTERVEP } from '../../../modules/nf-core/ensemblvep/filtervep' -include { TABIX_BGZIPTABIX } from '../../../modules/nf-core/tabix/bgziptabix' +include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view' +include { FILTERVEP } from '../../../modules/local/filtervep' +include { TABIX_BGZIPTABIX } from '../../../modules/nf-core/tabix/bgziptabix' // Please note this subworkflow requires the options for bcftools_view that are included in the nextflow.config -workflow VCF_FILTER_BCFTOOLS_ENSEMBLVEP { +workflow VCF_FILTER_BCFTOOLS_FILTERVEP { take: - ch_vcf // channel: [ val(meta), path(vcf) ] + ch_vcf // channel: [ val(meta), path(vcf) ] ch_filter_vep_feature_file // channel: [ val(meta), path(txt) ] - filter_with_bcftools // bool: should bcftools view be run - filter_with_filter_vep // bool: should filter_vep be run + filter_with_bcftools // bool: should bcftools view be run + filter_with_filter_vep // bool: should filtervep be run main: ch_tbi = channel.empty() - // Since bcftools is likely much faster than filter_vep, - // we run it first to reduce the number of variants that filter_vep has to process. + // Since bcftools is likely much faster than filtervep, + // we run it first to reduce the number of variants that filtervep has to process. if (filter_with_bcftools) { BCFTOOLS_VIEW( @@ -30,13 +30,13 @@ workflow VCF_FILTER_BCFTOOLS_ENSEMBLVEP { if (filter_with_filter_vep) { - ENSEMBLVEP_FILTERVEP( + FILTERVEP( ch_vcf, ch_filter_vep_feature_file.map { _meta, file -> file }, ) TABIX_BGZIPTABIX( - ENSEMBLVEP_FILTERVEP.out.output + FILTERVEP.out.output ) ch_vcf = TABIX_BGZIPTABIX.out.gz_index.map { meta, vcf, _tbi -> [meta, vcf] } @@ -44,6 +44,6 @@ workflow VCF_FILTER_BCFTOOLS_ENSEMBLVEP { } emit: - vcf = ch_vcf // channel: [ val(meta), path(vcf) ] - tbi = ch_tbi // channel: [ val(meta), path(tbi) ] + vcf = ch_vcf // channel: [ val(meta), path(vcf) ] + tbi = ch_tbi // channel: [ val(meta), path(tbi) ] } diff --git a/subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/meta.yml b/subworkflows/local/vcf_filter_bcftools_filtervep/meta.yml similarity index 100% rename from subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/meta.yml rename to subworkflows/local/vcf_filter_bcftools_filtervep/meta.yml diff --git a/subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/nextflow.config b/subworkflows/local/vcf_filter_bcftools_filtervep/nextflow.config similarity index 100% rename from subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/nextflow.config rename to subworkflows/local/vcf_filter_bcftools_filtervep/nextflow.config diff --git a/subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/tests/main.nf.test b/subworkflows/local/vcf_filter_bcftools_filtervep/tests/main.nf.test similarity index 94% rename from subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/tests/main.nf.test rename to subworkflows/local/vcf_filter_bcftools_filtervep/tests/main.nf.test index fb000d16a..622d385d8 100644 --- a/subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/tests/main.nf.test +++ b/subworkflows/local/vcf_filter_bcftools_filtervep/tests/main.nf.test @@ -1,15 +1,15 @@ nextflow_workflow { - name "Test Subworkflow VCF_FILTER_BCFTOOLS_ENSEMBLVEP" + name "Test Subworkflow VCF_FILTER_BCFTOOLS_FILTERVEP" script "../main.nf" - workflow "VCF_FILTER_BCFTOOLS_ENSEMBLVEP" + workflow "VCF_FILTER_BCFTOOLS_FILTERVEP" config "./nextflow.config" tag "subworkflows" - tag "subworkflows_nfcore" - tag "subworkflows/vcf_filter_bcftools_ensemblvep" + tag "subworkflows_local" + tag "subworkflows/vcf_filter_bcftools_filtervep" tag "bcftools/view" - tag "ensemblvep/filtervep" + tag "filtervep" tag "tabix/bgziptabix" test("vcf, hgnc_ids - filter on expression") { diff --git a/subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/tests/main.nf.test.snap b/subworkflows/local/vcf_filter_bcftools_filtervep/tests/main.nf.test.snap similarity index 90% rename from subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/tests/main.nf.test.snap rename to subworkflows/local/vcf_filter_bcftools_filtervep/tests/main.nf.test.snap index 03ac1e7b0..9954d80cf 100644 --- a/subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/tests/main.nf.test.snap +++ b/subworkflows/local/vcf_filter_bcftools_filtervep/tests/main.nf.test.snap @@ -8,11 +8,11 @@ "18e010447bd23a05a202d44daf66bcd2", "VcfFile [chromosomes=[1], sampleCount=3, variantCount=1, phased=false, phasedAutodetect=false]" ], + "timestamp": "2026-01-27T19:43:16.754963", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" - }, - "timestamp": "2026-01-27T19:43:16.754963" + } }, "vcf, hgnc_ids - filter on hgnc ids and expression": { "content": [ @@ -23,11 +23,11 @@ "18e010447bd23a05a202d44daf66bcd2", "VcfFile [chromosomes=[1], sampleCount=3, variantCount=1, phased=false, phasedAutodetect=false]" ], + "timestamp": "2026-01-27T19:43:25.968654", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" - }, - "timestamp": "2026-01-27T19:43:25.968654" + } }, "vcf, hgnc_ids - filter on expression": { "content": [ @@ -38,11 +38,11 @@ "9236236d782191442193a20355604927", "VcfFile [chromosomes=[1], sampleCount=3, variantCount=37, phased=false, phasedAutodetect=false]" ], + "timestamp": "2026-01-20T12:44:40.698598144", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.2" - }, - "timestamp": "2026-01-20T12:44:40.698598144" + } }, "vcf, hgnc_ids - filter on hgnc ids and expression - stub": { "content": [ @@ -81,10 +81,10 @@ ] } ], + "timestamp": "2026-01-27T19:43:35.16804", "meta": { "nf-test": "0.9.3", "nextflow": "25.10.3" - }, - "timestamp": "2026-01-27T19:43:35.16804" + } } } \ No newline at end of file diff --git a/subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/tests/nextflow.config b/subworkflows/local/vcf_filter_bcftools_filtervep/tests/nextflow.config similarity index 86% rename from subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/tests/nextflow.config rename to subworkflows/local/vcf_filter_bcftools_filtervep/tests/nextflow.config index 49d12aa8f..670e5d97e 100644 --- a/subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/tests/nextflow.config +++ b/subworkflows/local/vcf_filter_bcftools_filtervep/tests/nextflow.config @@ -1,5 +1,5 @@ process { - withName: 'ENSEMBLVEP_FILTERVEP' { + withName: 'FILTERVEP' { ext.args = { "--filter \"HGNC_ID in ${feature_file}\"" } } diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index ea6fc5d95..1cf8b8e62 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -131,10 +131,6 @@ "DEEPVARIANT": { "deepvariant": "1.9.0" }, - "ENSEMBLVEP_FILTERVEP": { - "ensemblvep": 115.2, - "perl-math-cdf": 0.1 - }, "ENSEMBLVEP_ME": { "ensemblvep": 110.1, "perl-math-cdf": 0.1, @@ -165,6 +161,9 @@ "FASTQC": { "fastqc": "0.12.1" }, + "FILTERVEP": { + "filtervep": "1.0.0" + }, "GATK4_FILTERMUTECTCALLS_MT": { "gatk4": "4.6.2.0" }, @@ -322,13 +321,13 @@ "rhocall": "0.5.1" }, "SALTSHAKER_CALL": { - "saltshaker": "1.0.0" + "saltshaker": "1.1.1" }, "SALTSHAKER_CLASSIFY": { - "saltshaker": "1.0.1" + "saltshaker": "1.1.1" }, "SALTSHAKER_PLOT": { - "saltshaker": "1.0.0" + "saltshaker": "1.1.1" }, "SAMTOOLS_COLLATE": { "samtools": "1.22.1" @@ -384,6 +383,9 @@ "SPLIT_MULTIALLELICS_MT": { "bcftools": 1.22 }, + "SPLIT_MULTIALLELICS_POSTMERGE_MT": { + "bcftools": 1.22 + }, "SPRING_DECOMPRESS_TO_FQ_PAIR": { "spring": "1.1.1" }, @@ -928,10 +930,10 @@ "vcf2cytosure/slowlycivilbuck.cgh" ] ], - "timestamp": "2026-05-08T16:30:33.887641119", + "timestamp": "2026-06-11T10:20:14.951812109", "meta": { "nf-test": "0.9.5", - "nextflow": "25.10.4" + "nextflow": "26.04.3" } } -} \ No newline at end of file +} diff --git a/tests/test_bam.nf.test.snap b/tests/test_bam.nf.test.snap index e26145db1..5952f98d0 100644 --- a/tests/test_bam.nf.test.snap +++ b/tests/test_bam.nf.test.snap @@ -1,7 +1,7 @@ { "-profile test_bam": { "content": [ - 773, + 774, { "ADD_VARCALLER_TO_BED": { "bgzip": 1.21, @@ -125,10 +125,6 @@ "DEEPVARIANT": { "deepvariant": "1.9.0" }, - "ENSEMBLVEP_FILTERVEP": { - "ensemblvep": 115.2, - "perl-math-cdf": 0.1 - }, "ENSEMBLVEP_ME": { "ensemblvep": 110.1, "perl-math-cdf": 0.1, @@ -153,6 +149,9 @@ "bgzip": "1.22.1", "expansionhunter": "5.0.0" }, + "FILTERVEP": { + "filtervep": "1.0.0" + }, "GATK4_FILTERMUTECTCALLS_MT": { "gatk4": "4.6.2.0" }, @@ -345,6 +344,9 @@ "SPLIT_MULTIALLELICS_MT": { "bcftools": 1.22 }, + "SPLIT_MULTIALLELICS_POSTMERGE_MT": { + "bcftools": 1.22 + }, "STRANGER": { "stranger": "0.10.0", "tabix": 1.23 @@ -844,7 +846,7 @@ "vcf2cytosure/slowlycivilbuck.cgh" ] ], - "timestamp": "2026-04-29T22:26:48.082490207", + "timestamp": "2026-06-05T00:25:53.862854239", "meta": { "nf-test": "0.9.5", "nextflow": "25.10.4" diff --git a/tests/test_singleton.nf.test.snap b/tests/test_singleton.nf.test.snap index 66f438cc4..1e7680598 100644 --- a/tests/test_singleton.nf.test.snap +++ b/tests/test_singleton.nf.test.snap @@ -123,10 +123,6 @@ "DEEPVARIANT": { "deepvariant": "1.9.0" }, - "ENSEMBLVEP_FILTERVEP": { - "ensemblvep": 115.2, - "perl-math-cdf": 0.1 - }, "ENSEMBLVEP_ME": { "ensemblvep": 110.1, "perl-math-cdf": 0.1, @@ -157,6 +153,9 @@ "FASTQC": { "fastqc": "0.12.1" }, + "FILTERVEP": { + "filtervep": "1.0.0" + }, "GATK4_FILTERMUTECTCALLS_MT": { "gatk4": "4.6.2.0" }, @@ -314,13 +313,13 @@ "rhocall": "0.5.1" }, "SALTSHAKER_CALL": { - "saltshaker": "1.0.0" + "saltshaker": "1.1.1" }, "SALTSHAKER_CLASSIFY": { - "saltshaker": "1.0.1" + "saltshaker": "1.1.1" }, "SALTSHAKER_PLOT": { - "saltshaker": "1.0.0" + "saltshaker": "1.1.1" }, "SAMTOOLS_COLLATE": { "samtools": "1.22.1" @@ -655,10 +654,10 @@ "vcf2cytosure/hugelymodelbat.cgh" ] ], - "timestamp": "2026-04-29T22:30:32.111448817", + "timestamp": "2026-06-05T00:29:37.176160755", "meta": { "nf-test": "0.9.5", - "nextflow": "25.10.4" + "nextflow": "26.04.3" } } -} \ No newline at end of file +} diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index b3b1c1470..7835eb454 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -63,10 +63,10 @@ include { RANK_VARIANTS as RANK_VARIANTS_SV } from '.. include { SUBSAMPLE_MT_FRAC } from '../subworkflows/local/subsample_mt_frac' include { SUBSAMPLE_MT_READS } from '../subworkflows/local/subsample_mt_reads' include { VARIANT_EVALUATION } from '../subworkflows/local/variant_evaluation' -include { VCF_FILTER_BCFTOOLS_ENSEMBLVEP as GENERATE_CLINICAL_SET_ME } from '../subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/main' -include { VCF_FILTER_BCFTOOLS_ENSEMBLVEP as GENERATE_CLINICAL_SET_MT } from '../subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/main' -include { VCF_FILTER_BCFTOOLS_ENSEMBLVEP as GENERATE_CLINICAL_SET_SNV } from '../subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/main' -include { VCF_FILTER_BCFTOOLS_ENSEMBLVEP as GENERATE_CLINICAL_SET_SV } from '../subworkflows/nf-core/vcf_filter_bcftools_ensemblvep/main' +include { VCF_FILTER_BCFTOOLS_FILTERVEP as GENERATE_CLINICAL_SET_ME } from '../subworkflows/local/vcf_filter_bcftools_filtervep' +include { VCF_FILTER_BCFTOOLS_FILTERVEP as GENERATE_CLINICAL_SET_MT } from '../subworkflows/local/vcf_filter_bcftools_filtervep' +include { VCF_FILTER_BCFTOOLS_FILTERVEP as GENERATE_CLINICAL_SET_SNV } from '../subworkflows/local/vcf_filter_bcftools_filtervep' +include { VCF_FILTER_BCFTOOLS_FILTERVEP as GENERATE_CLINICAL_SET_SV } from '../subworkflows/local/vcf_filter_bcftools_filtervep' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -80,6 +80,7 @@ workflow RAREDISEASE { ch_alignments ch_bait_intervals ch_cadd_header + ch_cadd_prescored ch_cadd_resources ch_call_interval ch_case_info @@ -87,6 +88,7 @@ workflow RAREDISEASE { ch_dbsnp_tbi ch_foundin_header ch_gcnvcaller_model + ch_genome_bwafastalignindex ch_genome_bwaindex ch_genome_bwamem2index ch_genome_bwamemeindex @@ -103,6 +105,7 @@ workflow RAREDISEASE { ch_hgnc_ids ch_intervals_wgs ch_intervals_y + ch_manta_regions ch_me_references ch_me_svdb_resources ch_ml_model @@ -191,7 +194,6 @@ workflow RAREDISEASE { val_homoplasmy_af_threshold val_light_strand_origin_end val_light_strand_origin_start - val_mbuffer_mem val_mito_length val_mito_name val_mitosalt_breakspan @@ -217,7 +219,6 @@ workflow RAREDISEASE { val_run_rtgvcfeval val_run_vcfanno_db_sanity_check val_sample_id_map - val_samtools_sort_threads val_save_all_mapped_as_cram val_save_noalt_mapped_as_cram val_svdb_query_bedpedbs @@ -290,6 +291,7 @@ workflow RAREDISEASE { ALIGN ( ch_alignments, + ch_genome_bwafastalignindex, ch_genome_bwaindex, ch_genome_bwamem2index, ch_genome_bwamemeindex, @@ -312,11 +314,9 @@ workflow RAREDISEASE { val_analysis_type, val_exclude_alt, val_extract_alignments, - val_mbuffer_mem, val_mt_aligner, val_platform, val_run_mt_for_wes, - val_samtools_sort_threads, val_save_all_mapped_as_cram, val_save_noalt_mapped_as_cram ) @@ -460,6 +460,7 @@ workflow RAREDISEASE { ANNOTATE_GENOME_SNVS ( ch_cadd_header, + ch_cadd_prescored, ch_cadd_resources, ch_genome_chrsizes, ch_genome_fai, @@ -535,6 +536,7 @@ workflow RAREDISEASE { ANNOTATE_MT_SNVS ( ch_cadd_header, + ch_cadd_prescored, ch_cadd_resources, ch_genome_fasta, ch_genome_fai, @@ -633,6 +635,7 @@ workflow RAREDISEASE { ch_genome_fai, ch_genome_fasta, ch_genome_hisat2index, + ch_manta_regions, ch_mitosalt_config, ch_mapped.mt_bam_bai, ch_mt_fai, @@ -643,7 +646,6 @@ workflow RAREDISEASE { ch_input_fastqs, ch_subdepth, ch_svcaller_priority, - ch_target_bed, skip_germlinecnvcaller, skip_mitosalt, val_analysis_type,