From ad3480d8c21080947eff1e5dca345b549b8772a0 Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 28 May 2026 11:20:03 +1000 Subject: [PATCH 01/17] Fix #619 extract_metrics shebang for python3-only containers --- bin/extract_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/extract_metrics.py b/bin/extract_metrics.py index 40e4c8fa8..4abf14d64 100755 --- a/bin/extract_metrics.py +++ b/bin/extract_metrics.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 import pickle import os From 44dd5c42c0fc455cb5a71f81547203e3ff0cfab0 Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 28 May 2026 11:20:16 +1000 Subject: [PATCH 02/17] Fix #209 disable ColabFold templates in offline local mode --- conf/modules_colabfold.config | 3 ++- docs/usage/colabfold.md | 3 ++- nextflow.config | 1 + nextflow_schema.json | 8 +++++++- 4 files changed, 12 insertions(+), 3 deletions(-) diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config index 15c0c90d9..20d09aeb8 100644 --- a/conf/modules_colabfold.config +++ b/conf/modules_colabfold.config @@ -33,7 +33,8 @@ process { ext.args = [ params.colabfold_use_gpu_relax ? '--use-gpu-relax' : '', params.colabfold_use_amber ? '--amber' : '', - params.colabfold_use_templates ? '--templates' : '', + params.colabfold_use_templates && (params.use_msa_server || params.colabfold_template_path) ? '--templates' : '', + params.colabfold_template_path ? "--custom-template-path ${params.colabfold_template_path}" : '', params.random_seed != null ? "--random-seed ${params.random_seed}" : '', params.use_msa_server && params.msa_server_url ? "--host-url ${params.msa_server_url}" : '' ].join(' ').trim() diff --git a/docs/usage/colabfold.md b/docs/usage/colabfold.md index e2a6ec36c..c8b16c874 100644 --- a/docs/usage/colabfold.md +++ b/docs/usage/colabfold.md @@ -108,7 +108,8 @@ See the [ColabFold](https://github.com/sokrypton/ColabFold) documentation for a | `--colabfold_use_amber` | `true` | ColabFold outputs will sometimes contain phsyical violations such as steric clashes. These clashes can be resolved by post-processing the outputs with a short relaxation using the Amber Force Field. Non-clashing atoms are pinned to starting coordinates such that the relaxation has a minimal impact on final structures. | | `--colabfold_db_load_mode` | `0` | Specify the way that MMSeqs2 will load the required databases in memory | | `--colabfold_alphafold2_params_prefix` | `alphafold_params_2022-12-06` | Specify the alphafold2 params used for prediction. | -| `--colabfold_use_templates` | `false` | Use PDB templates to support predictions. The ColabFold notebooks do not use templates by default. | +| `--colabfold_use_templates` | `false` | Use PDB templates to support predictions. When `--use_msa_server` is disabled, this only takes effect if `--colabfold_template_path` is also set so ColabFold can use local templates without contacting the MMSeqs API. The ColabFold notebooks do not use templates by default. | +| `--colabfold_template_path` | `null` | Path to a local ColabFold template directory. Set this together with `--colabfold_use_templates` to enable template use in local ColabFold mode without remote template lookups. | | `--colabfold_create_index` | `false` | Create index for ColabFold databases during setup. On network filesystems it can be more performant to re-compute the index on the fly | > You can override any of these parameters via the command line or a params file. diff --git a/nextflow.config b/nextflow.config index 75f1dbba6..f599079b7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -109,6 +109,7 @@ params { colabfold_db = null colabfold_db_load_mode = 0 colabfold_use_templates = false + colabfold_template_path = null colabfold_create_index = false // Colabfold links diff --git a/nextflow_schema.json b/nextflow_schema.json index 0a27d0788..bc70a24bf 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -189,7 +189,13 @@ "colabfold_use_templates": { "type": "boolean", "default": false, - "description": "Use PDB templates", + "description": "Use PDB templates. Local ColabFold runs require colabfold_template_path to avoid remote template lookups.", + "fa_icon": "fas fa-paste" + }, + "colabfold_template_path": { + "type": "string", + "format": "directory-path", + "description": "Path to a local ColabFold template directory. Required to use templates without --use_msa_server.", "fa_icon": "fas fa-paste" }, "colabfold_create_index": { From 18466ea5d0ebe4390c4434509f7472dd96f428dc Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 28 May 2026 11:21:29 +1000 Subject: [PATCH 03/17] Fix #456 derive metric ordering from ranked structure filenames --- bin/extract_metrics.py | 70 ++++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 20 deletions(-) diff --git a/bin/extract_metrics.py b/bin/extract_metrics.py index 4abf14d64..e47445778 100755 --- a/bin/extract_metrics.py +++ b/bin/extract_metrics.py @@ -11,6 +11,7 @@ import numpy as np import csv import string +import re from utils import plddt_from_struct_b_factor, get_chain_ids # TODO: Issue #309, make into a proper separate process, it its own module so that dependencies can be managed better @@ -128,6 +129,41 @@ def write_tsv(file_path, rows): writer.writerows(rows) +def infer_model_rank(file_path): + basename = os.path.basename(file_path) + rank_patterns = [ + r"ranked_(\d+)", + r"_rank_(\d+)", + r"_model_(\d+)", + r"-rank(\d+)", + ] + + for pattern in rank_patterns: + match = re.search(pattern, basename) + if match: + return int(match.group(1)) + + return None + + +def sort_paths_by_rank(paths): + def sort_key(path): + rank = infer_model_rank(path) + if rank is None: + return (1, os.path.basename(path)) + return (0, rank, os.path.basename(path)) + + return sorted(paths, key=sort_key) + + +def build_struct_map(struct_files): + struct_map = {} + for idx, struct_file in enumerate(sort_paths_by_rank(struct_files)): + rank = infer_model_rank(struct_file) + struct_map[rank if rank is not None else idx] = struct_file + return struct_map + + def resolve_struct_for_model(struct_map, model_id): if model_id in struct_map: return struct_map[model_id] @@ -135,7 +171,7 @@ def resolve_struct_for_model(struct_map, model_id): numeric_model_id = int(model_id) except (TypeError, ValueError): return None - return struct_map.get(numeric_model_id, struct_map.get(numeric_model_id - 1)) + return struct_map.get(numeric_model_id) def parse_ipsae_text_report(report_path): @@ -222,13 +258,17 @@ def extract_structs_plddt_to_tsv(name, structures): Write out a tsv file contain pLDDTs for reading by MultiQC in nf-core/proteinfold Uses utils function with BioPython PDB package to extract residue pLDDT values from the b-factor column. """ - plddt_cols = [plddt_from_struct_b_factor(structure) for structure in structures] + sorted_structures = sort_paths_by_rank(structures) + plddt_cols = [plddt_from_struct_b_factor(structure) for structure in sorted_structures] res_counts = [len(plddt_col) for plddt_col in plddt_cols] if len(set(res_counts)) != 1: raise ValueError("Not all structures have the same number of residues!") - rank_names = [f"rank_{i}" for i in range(len(structures))] + rank_names = [] + for idx, structure in enumerate(sorted_structures): + rank = infer_model_rank(structure) + rank_names.append(f"rank_{rank}" if rank is not None else f"rank_{idx}") # Create header as the first row plddt_rows = [["Positions"] + rank_names] res_id_col = list(range(len(plddt_cols[0]))) @@ -244,10 +284,7 @@ def read_pkl(name, pkl_files, struct_files=None): ipsae_data = {} chainwise_iptm = {} chainwise_ipsae = {} - struct_map = {} - if struct_files: - for idx, struct_file in enumerate(sorted(struct_files)): - struct_map[idx] = struct_file + struct_map = build_struct_map(struct_files) if struct_files else {} for pkl_file in pkl_files: print(f"Processing {pkl_file}") data = pickle.load(open(pkl_file, "rb")) @@ -357,10 +394,7 @@ def read_a3m(name, a3m_files): def read_npz(name, npz_files, struct_files=None): ipsae_rows = [] chainwise_ipsae = {} - struct_map = {} - if struct_files: - for idx, struct_file in enumerate(sorted(struct_files)): - struct_map[idx] = struct_file + struct_map = build_struct_map(struct_files) if struct_files else {} for idx, npz_file in enumerate(npz_files): data = np.load(npz_file) #Boltz PAE files if --write_full_pae is used @@ -467,10 +501,7 @@ def read_json(name, json_files, struct_files=None): chain_pair_entries = {} chainwise_ptms = {} chain_ids = [] - struct_map = {} - if struct_files: - for idx, struct_file in enumerate(sorted(struct_files)): - struct_map[idx] = struct_file + struct_map = build_struct_map(struct_files) if struct_files else {} for idx, json_file in enumerate(json_files): with open(json_file, 'r') as f: @@ -632,14 +663,13 @@ def read_colabfold_metrics(name, colabfold_metrics_fns, struct_files=None): ipsae_rows = [] chainwise_iptm = {} chainwise_ipsae = {} - struct_map = {} - if struct_files: - for idx, struct_file in enumerate(sorted(struct_files)): - struct_map[idx] = struct_file + struct_map = build_struct_map(struct_files) if struct_files else {} for fn in colabfold_metrics_fns: with open(fn) as f: data = json.load(f) - rank_id = int(fn.split("rank_")[1].split("_")[0])-1 + rank_id = infer_model_rank(fn) + if rank_id is None: + raise ValueError(f"Unable to infer ColabFold rank from metrics filename: {fn}") if "pae" in data: write_tsv(f"{name}_{rank_id}_pae.tsv", format_pae_rows(data["pae"])) if "ptm" in data: From 803ac74f0d5b7c23d006a4e6126f5d0941792f74 Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 28 May 2026 11:22:59 +1000 Subject: [PATCH 04/17] Fix #489 use explicit Boltz output directory names --- docs/output.md | 2 +- modules/local/run_boltz/main.nf | 24 ++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/output.md b/docs/output.md index bd9e2a73a..cb4ae4df5 100644 --- a/docs/output.md +++ b/docs/output.md @@ -224,7 +224,7 @@ Examples include: - `alphafold2///raw/` - `colabfold//raw/` -- `boltz//boltz_results_*/` +- `boltz//boltz_results_/` - `rosettafold_all_atom//raw/` - `alphafold3//raw/` - `helixfold3//raw/` diff --git a/modules/local/run_boltz/main.nf b/modules/local/run_boltz/main.nf index fd9eb99cd..d4678b3b7 100644 --- a/modules/local/run_boltz/main.nf +++ b/modules/local/run_boltz/main.nf @@ -18,14 +18,14 @@ process RUN_BOLTZ { output: tuple val(meta), path ("boltz_results_${meta.id}") , optional: true, emit: intermediates - tuple val(meta), path ("boltz_results_*/processed/msa/*.npz") , emit: msa - tuple val(meta), path ("boltz_results_*/processed/structures/*.npz") , emit: structures - tuple val(meta), path ("boltz_results_*/predictions/*/confidence*.json") , emit: confidence + tuple val(meta), path ("boltz_results_${meta.id}/processed/msa/*.npz") , emit: msa + tuple val(meta), path ("boltz_results_${meta.id}/processed/structures/*.npz") , emit: structures + tuple val(meta), path ("boltz_results_${meta.id}/predictions/${meta.id}/confidence*.json") , emit: confidence tuple val(meta), path ("${meta.id}_plddt_mqc.tsv") , emit: multiqc tuple val(meta), path ("${meta.id}_boltz.pdb") , emit: top_ranked_pdb - tuple val(meta), path ("boltz_results_*/predictions/*/*.pdb") , emit: pdb - tuple val(meta), path ("boltz_results_*/predictions/*/plddt_*model_0.npz"), emit: plddt - tuple val(meta), path ("boltz_results_*/predictions/*/pae_*model_0.npz") , emit: pae + tuple val(meta), path ("boltz_results_${meta.id}/predictions/${meta.id}/*.pdb") , emit: pdb + tuple val(meta), path ("boltz_results_${meta.id}/predictions/${meta.id}/plddt_*model_0.npz"), emit: plddt + tuple val(meta), path ("boltz_results_${meta.id}/predictions/${meta.id}/pae_*model_0.npz") , emit: pae tuple val(meta), path ("${meta.id}_plddt_mqc.tsv") , emit: plddt_raw tuple val(meta), path ("${meta.id}_boltz_msa.tsv") , emit: msa_raw tuple val(meta), path ("${meta.id}_*_pae.tsv") , emit: pae_raw @@ -63,17 +63,17 @@ process RUN_BOLTZ { boltz predict "${meta.id}.yaml" --output_format "pdb" ${args} --cache ./ fi - cp boltz_results_*/predictions/${meta.id}/*_0.pdb ./${meta.id}_boltz.pdb + cp boltz_results_${meta.id}/predictions/${meta.id}/*_0.pdb ./${meta.id}_boltz.pdb # For consistency between server and local - if compgen -G "boltz_results_*/msa/${meta.id}*.csv" > /dev/null; then - cp boltz_results_*/msa/${meta.id}_*.csv ./ + if compgen -G "boltz_results_${meta.id}/msa/${meta.id}*.csv" > /dev/null; then + cp boltz_results_${meta.id}/msa/${meta.id}_*.csv ./ fi extract_metrics.py --name ${meta.id} \\ - --structs boltz_results_*/predictions/${meta.id}/*.pdb \\ - --jsons boltz_results_*/predictions/${meta.id}/confidence_*_model_*.json \\ - --npzs boltz_results_*/predictions/${meta.id}/pae_*_model_*.npz \\ + --structs boltz_results_${meta.id}/predictions/${meta.id}/*.pdb \\ + --jsons boltz_results_${meta.id}/predictions/${meta.id}/confidence_*_model_*.json \\ + --npzs boltz_results_${meta.id}/predictions/${meta.id}/pae_*_model_*.npz \\ --csvs ${meta.id}_*.csv touch "${meta.id}_iptm.tsv" "${meta.id}_ipsae.tsv" "${meta.id}_chainwise_iptm.tsv" "${meta.id}_chainwise_ipsae.tsv" From 366a8e6d2c5fd9e0dbc4ad51587ae5f6f23c529c Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 28 May 2026 12:03:10 +1000 Subject: [PATCH 05/17] Updated CHANGELOG --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44bf0a211..45041814b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#586](https://github.com/nf-core/proteinfold/pull/586)] - Allow local msa for Boltz with non-protein entities. - [[#618](https://github.com/nf-core/proteinfold/pull/618)] - Resolve boltz `ext.args` in closure. - [[PR #626](https://github.com/nf-core/proteinfold/pull/618)] - Move scientific validation tests and BioPython setup to manual workflow. +- [[#619](https://github.com/nf-core/proteinfold/issues/619)] - Fix `extract_metrics.py` shebang to use `python3` for compatibility with minimal containers. +- [[#209](https://github.com/nf-core/proteinfold/issues/209)] - Prevent local ColabFold runs from enabling remote template lookups unless `--colabfold_template_path` is provided. +- [[#456](https://github.com/nf-core/proteinfold/issues/456)] - Derive ranked metric ordering from structure filenames when generating TSV outputs. +- [[#365](https://github.com/nf-core/proteinfold/issues/365)] - Run nf-tests with module containers so captured versions match. +- [[#489](https://github.com/nf-core/proteinfold/issues/489)] - Specified Boltz output paths on `boltz_results_/`. | Old parameter | New parameter | | -------------------------- | --------------- | From 4a4f6e98875c294afc2552f3e49df1448418ec0e Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 29 May 2026 10:29:24 +1000 Subject: [PATCH 06/17] Add closure and filter to modules_colabfold.config ext.args --- conf/modules_colabfold.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config index 20d09aeb8..e2366c471 100644 --- a/conf/modules_colabfold.config +++ b/conf/modules_colabfold.config @@ -30,14 +30,14 @@ process { process { withName: 'COLABFOLD_BATCH' { accelerator = { params.use_gpu ? 1 : 0 } - ext.args = [ + ext.args = {[ params.colabfold_use_gpu_relax ? '--use-gpu-relax' : '', params.colabfold_use_amber ? '--amber' : '', params.colabfold_use_templates && (params.use_msa_server || params.colabfold_template_path) ? '--templates' : '', params.colabfold_template_path ? "--custom-template-path ${params.colabfold_template_path}" : '', params.random_seed != null ? "--random-seed ${params.random_seed}" : '', params.use_msa_server && params.msa_server_url ? "--host-url ${params.msa_server_url}" : '' - ].join(' ').trim() + ].findAll { arg -> arg }.join(' ').trim()} publishDir = [ [ path: { "${params.outdir}/colabfold/${meta.id}/" }, From 97e41216d2cc6958fae40c233268405e8e92e893 Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 29 May 2026 11:27:50 +1000 Subject: [PATCH 07/17] Fix #576 improve metric rank ordering in extract_metrics.py --- bin/extract_metrics.py | 21 ++++++++++++++------- docs/output.md | 2 +- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/bin/extract_metrics.py b/bin/extract_metrics.py index e47445778..2a2be8d72 100755 --- a/bin/extract_metrics.py +++ b/bin/extract_metrics.py @@ -86,13 +86,15 @@ def idx_to_letter(idx): break return result + sorted_entries = sorted(chain_pair_entries.items(), key=lambda item: sort_model_label(item[0])) + if chain_ids: #would be better with some model_id sorting - iptm_rows = [[""]+[f"{chain_ids[idx[0]]}:{chain_ids[idx[1]]}" for idx, val in next(iter(chain_pair_entries.values()))]] + iptm_rows = [[""]+[f"{chain_ids[idx[0]]}:{chain_ids[idx[1]]}" for idx, val in sorted_entries[0][1]]] else: - iptm_rows = [[""]+[f"{idx_to_letter(idx[0])}:{idx_to_letter(idx[1])}" for idx, val in next(iter(chain_pair_entries.values()))]] + iptm_rows = [[""]+[f"{idx_to_letter(idx[0])}:{idx_to_letter(idx[1])}" for idx, val in sorted_entries[0][1]]] - for model_idx, chain_pair_entries_values in chain_pair_entries.items(): + for model_idx, chain_pair_entries_values in sorted_entries: iptm_rows.append([model_idx]+[f"{val:.4f}" for idx, val in chain_pair_entries_values]) return [list(row) for row in zip(*iptm_rows)] @@ -103,7 +105,7 @@ def format_pair_score_rows(pair_score_entries, pair_labels=None): pair_labels = sorted({label for score_values in pair_score_entries.values() for label, _ in score_values}) rows = [[""] + pair_labels] - for model_idx, score_values in pair_score_entries.items(): + for model_idx, score_values in sorted(pair_score_entries.items(), key=lambda item: sort_model_label(item[0])): score_map = {label: value for label, value in score_values} rows.append([model_idx] + [f"{score_map[label]:.4f}" if label in score_map else "n/a" for label in pair_labels]) @@ -128,18 +130,23 @@ def write_tsv(file_path, rows): writer = csv.writer(out_f, delimiter='\t') writer.writerows(rows) +def sort_model_label(label): + try: + return (0, int(label)) + except (TypeError, ValueError): + return (1, str(label)) def infer_model_rank(file_path): - basename = os.path.basename(file_path) + normalized_path = file_path.replace(os.sep, "/") rank_patterns = [ r"ranked_(\d+)", r"_rank_(\d+)", + r"-rank(\d+)(?:/|$)", r"_model_(\d+)", - r"-rank(\d+)", ] for pattern in rank_patterns: - match = re.search(pattern, basename) + match = re.search(pattern, normalized_path) if match: return int(match.group(1)) diff --git a/docs/output.md b/docs/output.md index cb4ae4df5..e6384a772 100644 --- a/docs/output.md +++ b/docs/output.md @@ -124,7 +124,7 @@ In the HTML reports, chainwise iPTM and ipSAE are displayed as chain-by-chain ma Predicted alignment error of residues `j` aligned by residue `i`, rounded to 4 decimal places. The row number gives you the index of residue `i` and the column value within the row gives the index of residue `j` for the 2D PAE matrix. -Each model prediction generates a separate file containing the rank number. The `_0_pae.tsv` file corresponds to the top ranked model, other ranked results are stored within the `paes/` folder. +Each model prediction generates a separate file containing the rank number. Rank numbering follows the native convention of the underlying tool, so top-ranked models may appear as either `_0_pae.tsv` or `_1_pae.tsv` depending on the mode. Additional ranked results are stored within the `paes/` folder. ``` 0.2500 1.5710 3.9037 6.2177 8.4471 11.4583 12.9679 15.1237 18.0263 18.3868 18.9381 20.5747 19.3314 20.1825 21.6145 23.2190 From 2ac436818e616ef8232be16cfb10d3206b126034 Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 29 May 2026 12:07:23 +1000 Subject: [PATCH 08/17] Updated snapshots --- ro-crate-metadata.json | 2 +- tests/alphafold2_download.nf.test.snap | 8 ++++---- tests/alphafold2_split.nf.test.snap | 8 ++++---- tests/alphafold3.nf.test.snap | 8 ++++---- tests/boltz.nf.test.snap | 8 ++++---- tests/colabfold_download.nf.test.snap | 8 ++++---- tests/colabfold_local.nf.test.snap | 8 ++++---- tests/colabfold_webserver.nf.test.snap | 8 ++++---- tests/default.nf.test.snap | 8 ++++---- tests/esmfold.nf.test.snap | 8 ++++---- tests/helixfold3.nf.test.snap | 8 ++++---- tests/rosettafold2na.nf.test.snap | 8 ++++---- tests/rosettafold_all_atom.nf.test.snap | 8 ++++---- tests/scientific_validation.nf.test.snap | 16 +++++++++++----- tests/split_fasta.nf.test.snap | 8 ++++---- 15 files changed, 64 insertions(+), 58 deletions(-) diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index a2132852a..e70f465bf 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "InProgress", "datePublished": "2026-04-30T13:33:25+00:00", - "description": "

\n \n \n \"nf-core/proteinfold\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinfold)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinfold/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinfold/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.13135393-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.13135393)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.4-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-4.0.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/4.0.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinfold)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinfold-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinfold)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinfold** is a bioinformatics best-practice analysis pipeline for Protein 3D structure prediction.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/proteinfold/results).\n\n## Pipeline summary\n\n![Alt text](docs/images/nf-core-proteinfold_metro_map_2.0.0.png?raw=true \"nf-core-proteinfold 2.0.0 metro map\")\n\n| Mode | Protein | RNA | Small-molecule | PTM | Constraints | pLM | MSA server | Split MSA |\n| :------------------------------------------------------------------------------------------------- | :-----: | :-: | :------------: | :-: | :---------: | :-: | :--------: | :-------: |\n| [AlphaFold2](https://github.com/deepmind/alphafold) | \u2705 | \u274c | \u274c | \u274c | \u274c | \u274c | \u274c | \u2705 |\n| [ESMFold](https://github.com/facebookresearch/esm) | \u2705 | \u274c | \u274c | \u274c | \u274c | \u2705 | \u274c | \u274c |\n| [ColabFold](https://github.com/sokrypton/ColabFold) | \u2705 | \u274c | \u274c | \u274c | \u274c | \u274c | \u2705 | \u2705 |\n| [RoseTTAFold2NA](https://github.com/uw-ipd/RoseTTAFold2NA) | \u2705 | \u2705 | \u274c | \u274c | \u274c | \u274c | \u274c | \u274c |\n| [RoseTTAFold-All-Atom](https://github.com/baker-laboratory/RoseTTAFold-All-Atom/) | \u2705 | \u2705 | \u2705 | \u2705 | \u274c | \u274c | \u274c | \u274c |\n| [AlphaFold3](https://github.com/google-deepmind/alphafold3) | \u2705 | \u2705 | \u2705 | \u2705 | \u274c | \u274c | \u274c | \u2705 |\n| [HelixFold3](https://github.com/PaddlePaddle/PaddleHelix/tree/dev/apps/protein_folding/helixfold3) | \u2705 | \u2705 | \u2705 | \u2705 | \u274c | \u274c | \u274c | \u274c |\n| [Boltz](https://github.com/jwohlwend/boltz/) | \u2705 | \u2705 | \u2705 | \u2705 | \u2705 | \u274c | \u2705 | \u2705 |\n\n**nf-core/proteinfold** supports multiple tools for general molecular structure prediction. Each of the methods have overlapping functionality which can be utilized within the pipeline. All tools support predicting protein structure from an input amino acid sequence. The pipeline is composed of the following steps:\n\n1. Split input fasta file (Optional): The pipeline can split large batches of monomeric sequences (eg an entire genome) from a multi-entry fasta input using the `--split_fasta` flag.\n\n2. Prepare databases for chosen methods: The pipeline downloads any required reference data.\n\n3. Structure prediction:\n\n i. Combined: MSA Search + Model Inference: Structures are predicted from MSAs generated using built-in homolog search pipelines.\n\n ii. Split: AlphaFold2 MSA Search + Model Inference: The AlphaFold2 MSA generation pipeline is executed independently and then provided as input for AlphaFold2 structure prediction.\n\n iii. Split: ColabFold MSA Search + Model Inference: The ColabFold MSA generation pipeline is used to produce input MSAs which can be used by ColabFold and Boltz.\n\n iv. pLM: Protein Language Model: The ESMFold model is used to predict structures without generating an MSA.\n\n4. Generate Report: The pipeline produces an interactive HTML report to visualize structure prediction outputs.\n\n5. Comparison Report: The structures predicted by parallel modes are combined in an interactive HTML report.\n\n6. MultiQC: The overall QC statistics are summarized.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/get_started/environment_setup/overview) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/get_started/run-your-first-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n```csv title=\"samplesheet.csv\"\nid,fasta\nT1024,T1024.fasta\nT1026,T1026.fasta\n```\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinfold \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode \n```\n\nThe pipeline takes care of downloading the databases and parameters required by each of the modes. In case you have already downloaded the required files, you can skip this step by providing the path to the databases using the `--db` parameter.\n\n```bash\nnextflow run nf-core/proteinfold \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode \\\n --db \n```\n\n> [!WARNING]\n> The reference data for most methods is extremely large and may exceed individual user disk allocations on shared HPC systems.\n\nIn order to run multiple methods simultaneously where reference data is stored at different locations, the `--db` flag can be overwritten for each specific mode (e.g. `--alphafold2_db`, `--colabfold_db`, `--esmfold_db` and `--rosettafold_all_atom_db`). Please refer to the [usage documentation](https://nf-co.re/proteinfold/usage) to check the directory structure you must provide for each database.\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/running/run-pipelines#using-parameter-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinfold/usage) and the [parameter documentation](https://nf-co.re/proteinfold/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinfold/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinfold/output).\n\n## Adding new modes to the pipeline\n\nFor details on how to contribute new modes to the pipeline please refer to the [Howto contribute new modes](https://nf-co.re/proteinfold/usage/HOWTO_CONTRIBUTE_NEW_MODES).\n\n## Credits\n\nnf-core/proteinfold was originally written by Athanasios Baltzis ([@athbaltzis](https://github.com/athbaltzis)), Jose Espinosa-Carrasco ([@JoseEspinosa](https://github.com/JoseEspinosa)), Luisa Santus ([@luisas](https://github.com/luisas)) and Leila Mansouri ([@l-mansouri](https://github.com/l-mansouri)) from [The Comparative Bioinformatics Group](https://www.crg.eu/en/cedric_notredame) at [The Centre for Genomic Regulation, Spain](https://www.crg.eu/) under the umbrella of the [BovReg project](https://www.bovreg.eu/) and Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/).\n\nMany thanks to others who have helped out and contributed along the way too, including (but not limited to): Norman Goodacre and Waleed Osman from Interline Therapeutics ([@interlinetx](https://github.com/interlinetx)), Martin Steinegger ([@martin-steinegger](https://github.com/martin-steinegger)) and Raoul J.P. Bonnal ([@rjpbonnal](https://github.com/rjpbonnal))\n\nThe pipeline has been further developed and maintained through contributions from the [Structural Biology Facility (SBF) at UNSW Sydney](https://www.unsw.edu.au/research/facilities-and-infrastructure/find-a-facility/sbf), including Joshua Caley ([@jscgh](https://github.com/jscgh)), Keiran Rowell ([@keiran-rowell-unsw](https://github.com/keiran-rowell-unsw)), Thomas Litfin ([@tlitfin](https://github.com/tlitfin)) and Nathan Glades ([@nbtm-sh](https://github.com/nbtm-sh)).\n\nWe would also like to thanks to the AWS Open Data Sponsorship Program for generously providing the resources necessary to host the data utilized in the testing, development, and deployment of nf-core proteinfold.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](docs/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinfold` channel](https://nfcore.slack.com/channels/proteinfold) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinfold for your analysis, please cite it using the following doi: [10.5281/zenodo.7437038](https://doi.org/10.5281/zenodo.7437038)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "

\n \n \n \"nf-core/proteinfold\"\n \n

\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/nf-core/proteinfold)\n[![GitHub Actions CI Status](https://github.com/nf-core/proteinfold/actions/workflows/nf-test.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/proteinfold/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/proteinfold/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.13135393-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.13135393)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.10.4-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-4.0.2-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/4.0.2)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/proteinfold)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23proteinfold-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/proteinfold)[![Follow on Bluesky](https://img.shields.io/badge/bluesky-%40nf__core-1185fe?labelColor=000000&logo=bluesky)](https://bsky.app/profile/nf-co.re)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/proteinfold** is a bioinformatics best-practice analysis pipeline for Protein 3D structure prediction.\n\nThe pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community!\n\nOn release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/proteinfold/results).\n\n## Pipeline summary\n\n![Alt text](docs/images/nf-core-proteinfold_metro_map_2.1.0.png?raw=true \"nf-core-proteinfold 2.1.0 metro map\")\n\n| Mode | Protein | RNA | Small-molecule | PTM | Constraints | pLM | MSA server | Split MSA |\n| :------------------------------------------------------------------------------------------------- | :-----: | :-: | :------------: | :-: | :---------: | :-: | :--------: | :-------: |\n| [AlphaFold2](https://github.com/deepmind/alphafold) | \u2705 | \u274c | \u274c | \u274c | \u274c | \u274c | \u274c | \u2705 |\n| [ESMFold](https://github.com/facebookresearch/esm) | \u2705 | \u274c | \u274c | \u274c | \u274c | \u2705 | \u274c | \u274c |\n| [ColabFold](https://github.com/sokrypton/ColabFold) | \u2705 | \u274c | \u274c | \u274c | \u274c | \u274c | \u2705 | \u2705 |\n| [RoseTTAFold2NA](https://github.com/uw-ipd/RoseTTAFold2NA) | \u2705 | \u2705 | \u274c | \u274c | \u274c | \u274c | \u274c | \u274c |\n| [RoseTTAFold-All-Atom](https://github.com/baker-laboratory/RoseTTAFold-All-Atom/) | \u2705 | \u2705 | \u2705 | \u2705 | \u274c | \u274c | \u274c | \u274c |\n| [AlphaFold3](https://github.com/google-deepmind/alphafold3) | \u2705 | \u2705 | \u2705 | \u2705 | \u274c | \u274c | \u274c | \u2705 |\n| [HelixFold3](https://github.com/PaddlePaddle/PaddleHelix/tree/dev/apps/protein_folding/helixfold3) | \u2705 | \u2705 | \u2705 | \u2705 | \u274c | \u274c | \u274c | \u274c |\n| [Boltz](https://github.com/jwohlwend/boltz/) | \u2705 | \u2705 | \u2705 | \u2705 | \u2705 | \u274c | \u2705 | \u2705 |\n\n**nf-core/proteinfold** supports multiple tools for general molecular structure prediction. Each of the methods have overlapping functionality which can be utilized within the pipeline. All tools support predicting protein structure from an input amino acid sequence. The pipeline is composed of the following steps:\n\n1. Split input fasta file (Optional): The pipeline can split large batches of monomeric sequences (eg an entire genome) from a multi-entry fasta input using the `--split_fasta` flag.\n\n2. Prepare databases for chosen methods: The pipeline downloads any required reference data.\n\n3. Structure prediction:\n\n i. Combined: MSA Search + Model Inference: Structures are predicted from MSAs generated using built-in homolog search pipelines.\n\n ii. Split: AlphaFold2 MSA Search + Model Inference: The AlphaFold2 MSA generation pipeline is executed independently and then provided as input for AlphaFold2 structure prediction.\n\n iii. Split: ColabFold MSA Search + Model Inference: The ColabFold MSA generation pipeline is used to produce input MSAs which can be used by ColabFold and Boltz.\n\n iv. pLM: Protein Language Model: The ESMFold model is used to predict structures without generating an MSA.\n\n4. Generate Report: The pipeline produces an interactive HTML report to visualize structure prediction outputs.\n\n5. Comparison Report: The structures predicted by parallel modes are combined in an interactive HTML report.\n\n6. MultiQC: The overall QC statistics are summarized.\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/get_started/environment_setup/overview) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/get_started/run-your-first-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n```csv title=\"samplesheet.csv\"\nid,fasta\nT1024,T1024.fasta\nT1026,T1026.fasta\n```\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/proteinfold \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode \n```\n\nThe pipeline takes care of downloading the databases and parameters required by each of the modes. In case you have already downloaded the required files, you can skip this step by providing the path to the databases using the `--db` parameter.\n\n```bash\nnextflow run nf-core/proteinfold \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \\\n --mode \\\n --db \n```\n\n> [!WARNING]\n> The reference data for most methods is extremely large and may exceed individual user disk allocations on shared HPC systems.\n\nIn order to run multiple methods simultaneously where reference data is stored at different locations, the `--db` flag can be overwritten for each specific mode (e.g. `--alphafold2_db`, `--colabfold_db`, `--esmfold_db` and `--rosettafold_all_atom_db`). Please refer to the [usage documentation](https://nf-co.re/proteinfold/usage) to check the directory structure you must provide for each database.\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/running/run-pipelines#using-parameter-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/proteinfold/usage) and the [parameter documentation](https://nf-co.re/proteinfold/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/proteinfold/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/proteinfold/output).\n\n## Adding new modes to the pipeline\n\nFor details on how to contribute new modes to the pipeline please refer to the [Howto contribute new modes](https://nf-co.re/proteinfold/usage/HOWTO_CONTRIBUTE_NEW_MODES).\n\n## Credits\n\nnf-core/proteinfold was originally written by Athanasios Baltzis ([@athbaltzis](https://github.com/athbaltzis)), Jose Espinosa-Carrasco ([@JoseEspinosa](https://github.com/JoseEspinosa)), Luisa Santus ([@luisas](https://github.com/luisas)) and Leila Mansouri ([@l-mansouri](https://github.com/l-mansouri)) from [The Comparative Bioinformatics Group](https://www.crg.eu/en/cedric_notredame) at [The Centre for Genomic Regulation, Spain](https://www.crg.eu/) under the umbrella of the [BovReg project](https://www.bovreg.eu/) and Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/).\n\nMany thanks to others who have helped out and contributed along the way too, including (but not limited to): Norman Goodacre and Waleed Osman from Interline Therapeutics ([@interlinetx](https://github.com/interlinetx)), Martin Steinegger ([@martin-steinegger](https://github.com/martin-steinegger)) and Raoul J.P. Bonnal ([@rjpbonnal](https://github.com/rjpbonnal))\n\nThe pipeline has been further developed and maintained through contributions from the [Structural Biology Facility (SBF) at UNSW Sydney](https://www.unsw.edu.au/research/facilities-and-infrastructure/find-a-facility/sbf), including Joshua Caley ([@jscgh](https://github.com/jscgh)), Keiran Rowell ([@keiran-rowell-unsw](https://github.com/keiran-rowell-unsw)), Thomas Litfin ([@tlitfin](https://github.com/tlitfin)) and Nathan Glades ([@nbtm-sh](https://github.com/nbtm-sh)).\n\nWe would also like to thanks to the AWS Open Data Sponsorship Program for generously providing the resources necessary to host the data utilized in the testing, development, and deployment of nf-core proteinfold.\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](docs/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#proteinfold` channel](https://nfcore.slack.com/channels/proteinfold) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\nIf you use nf-core/proteinfold for your analysis, please cite it using the following doi: [10.5281/zenodo.7437038](https://doi.org/10.5281/zenodo.7437038)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" diff --git a/tests/alphafold2_download.nf.test.snap b/tests/alphafold2_download.nf.test.snap index 82124400c..0c39f2806 100644 --- a/tests/alphafold2_download.nf.test.snap +++ b/tests/alphafold2_download.nf.test.snap @@ -134,10 +134,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-05-29T11:58:00.077126331", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-04-24T10:38:04.23112396" + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } } } \ No newline at end of file diff --git a/tests/alphafold2_split.nf.test.snap b/tests/alphafold2_split.nf.test.snap index 70c30c653..ee321f420 100644 --- a/tests/alphafold2_split.nf.test.snap +++ b/tests/alphafold2_split.nf.test.snap @@ -92,10 +92,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-05-29T11:58:14.638567413", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-04-24T10:38:24.179333033" + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } } } \ No newline at end of file diff --git a/tests/alphafold3.nf.test.snap b/tests/alphafold3.nf.test.snap index 4c99ede27..6d03593dc 100644 --- a/tests/alphafold3.nf.test.snap +++ b/tests/alphafold3.nf.test.snap @@ -128,10 +128,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-05-29T11:58:30.358487564", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.4" - }, - "timestamp": "2026-05-16T00:02:51.199631231" + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } } } \ No newline at end of file diff --git a/tests/boltz.nf.test.snap b/tests/boltz.nf.test.snap index b28aa6385..eca36cf16 100644 --- a/tests/boltz.nf.test.snap +++ b/tests/boltz.nf.test.snap @@ -119,10 +119,10 @@ "T1026.yaml:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-05-29T11:58:46.633453978", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-05-25T22:28:36.13892942" + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } } } \ No newline at end of file diff --git a/tests/colabfold_download.nf.test.snap b/tests/colabfold_download.nf.test.snap index a031fd7e1..2043a1675 100644 --- a/tests/colabfold_download.nf.test.snap +++ b/tests/colabfold_download.nf.test.snap @@ -91,10 +91,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-05-29T11:59:02.057607138", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-04-24T10:39:23.534361445" + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } } } \ No newline at end of file diff --git a/tests/colabfold_local.nf.test.snap b/tests/colabfold_local.nf.test.snap index 341f9fff7..6f1792af3 100644 --- a/tests/colabfold_local.nf.test.snap +++ b/tests/colabfold_local.nf.test.snap @@ -85,10 +85,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-05-29T11:59:16.850232481", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-04-24T10:39:42.637766168" + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } } } \ No newline at end of file diff --git a/tests/colabfold_webserver.nf.test.snap b/tests/colabfold_webserver.nf.test.snap index 496580e80..a53456b33 100644 --- a/tests/colabfold_webserver.nf.test.snap +++ b/tests/colabfold_webserver.nf.test.snap @@ -81,10 +81,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-05-29T11:59:31.097404212", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-04-24T10:40:00.665170193" + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } } } \ No newline at end of file diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 6544e1e09..0ce80da84 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -79,10 +79,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-05-29T11:59:44.891085767", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-04-24T10:40:17.952575615" + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } } } \ No newline at end of file diff --git a/tests/esmfold.nf.test.snap b/tests/esmfold.nf.test.snap index 81c80ee5c..f8b9138ae 100644 --- a/tests/esmfold.nf.test.snap +++ b/tests/esmfold.nf.test.snap @@ -49,10 +49,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-05-29T11:59:58.125532552", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.4" - }, - "timestamp": "2026-05-07T17:22:32.343559139" + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } } } \ No newline at end of file diff --git a/tests/helixfold3.nf.test.snap b/tests/helixfold3.nf.test.snap index 82f029ae7..08d62807e 100644 --- a/tests/helixfold3.nf.test.snap +++ b/tests/helixfold3.nf.test.snap @@ -99,10 +99,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-05-29T12:00:12.95097286", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-04-24T10:40:53.31097046" + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } } } \ No newline at end of file diff --git a/tests/rosettafold2na.nf.test.snap b/tests/rosettafold2na.nf.test.snap index 98e54c7c7..e1bbdacb2 100644 --- a/tests/rosettafold2na.nf.test.snap +++ b/tests/rosettafold2na.nf.test.snap @@ -51,10 +51,10 @@ "rna_complex.pdb:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-05-29T12:00:28.18549971", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.4" - }, - "timestamp": "2026-05-07T17:25:05.15820443" + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } } } \ No newline at end of file diff --git a/tests/rosettafold_all_atom.nf.test.snap b/tests/rosettafold_all_atom.nf.test.snap index be8c7ca88..2fbe2db11 100644 --- a/tests/rosettafold_all_atom.nf.test.snap +++ b/tests/rosettafold_all_atom.nf.test.snap @@ -65,10 +65,10 @@ "T1026.pdb:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-05-29T12:00:44.038745396", "meta": { - "nf-test": "0.9.3", - "nextflow": "25.10.4" - }, - "timestamp": "2026-05-07T17:26:21.935502056" + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } } } \ No newline at end of file diff --git a/tests/scientific_validation.nf.test.snap b/tests/scientific_validation.nf.test.snap index 2a570e904..206024199 100644 --- a/tests/scientific_validation.nf.test.snap +++ b/tests/scientific_validation.nf.test.snap @@ -49,6 +49,12 @@ "Boltz: BioPython structure validation": { "content": [ { + "BOLTZ_FASTA": { + "python": "3.8.3" + }, + "BOLTZ_YAML_TO_COLABFOLD_FASTA": { + "python": "3.14.0" + }, "GENERATE_REPORT": { "python": "3.12.7", "generate_report.py": "Python 3.12.7" @@ -61,17 +67,17 @@ "boltz": "2.2.1" }, "SPLIT_MSA": { - "python": "3.8.3" + "python": "3.14.0" }, "Workflow": { "nf-core/proteinfold": "v2.1.0dev" } } ], - "timestamp": "2026-05-22T15:48:52.149573252", + "timestamp": "2026-05-28T15:45:28.059583051", "meta": { "nf-test": "0.9.5", - "nextflow": "26.04.2" + "nextflow": "26.04.1" } }, "HelixFold3: BioPython structure validation": { @@ -147,10 +153,10 @@ } } ], - "timestamp": "2026-05-22T15:19:53.579103078", + "timestamp": "2026-05-28T15:40:56.83446726", "meta": { "nf-test": "0.9.5", - "nextflow": "26.04.2" + "nextflow": "26.04.1" } }, "AlphaFold2: BioPython structure validation": { diff --git a/tests/split_fasta.nf.test.snap b/tests/split_fasta.nf.test.snap index 27e033593..2bd8c7b7a 100644 --- a/tests/split_fasta.nf.test.snap +++ b/tests/split_fasta.nf.test.snap @@ -85,10 +85,10 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], + "timestamp": "2026-05-29T12:06:47.424380551", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.4" - }, - "timestamp": "2026-04-24T10:41:52.280585558" + "nf-test": "0.9.5", + "nextflow": "26.04.0" + } } } \ No newline at end of file From 1228ff1ac652d13559a1d6f91e49a6435a89afbc Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 29 May 2026 12:10:23 +1000 Subject: [PATCH 09/17] Update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 45041814b..d8759d67e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,8 +24,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#619](https://github.com/nf-core/proteinfold/issues/619)] - Fix `extract_metrics.py` shebang to use `python3` for compatibility with minimal containers. - [[#209](https://github.com/nf-core/proteinfold/issues/209)] - Prevent local ColabFold runs from enabling remote template lookups unless `--colabfold_template_path` is provided. - [[#456](https://github.com/nf-core/proteinfold/issues/456)] - Derive ranked metric ordering from structure filenames when generating TSV outputs. -- [[#365](https://github.com/nf-core/proteinfold/issues/365)] - Run nf-tests with module containers so captured versions match. - [[#489](https://github.com/nf-core/proteinfold/issues/489)] - Specified Boltz output paths on `boltz_results_/`. +- [[#576](https://github.com/nf-core/proteinfold/issues/576)] - Preserve native metric rank numbering and sort rank-derived outputs numerically. | Old parameter | New parameter | | -------------------------- | --------------- | From e38174abfda01fca188fd38aac63e057da99c5db Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 29 May 2026 13:26:01 +1000 Subject: [PATCH 10/17] Updated schema --- nextflow_schema.json | 2 -- 1 file changed, 2 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index bc70a24bf..9f6d28229 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -175,7 +175,6 @@ }, "colabfold_use_gpu_relax": { "type": "boolean", - "default": false, "description": "Use GPU for Amber relaxation in ColabFold", "fa_icon": "fas fa-microchip" }, @@ -188,7 +187,6 @@ }, "colabfold_use_templates": { "type": "boolean", - "default": false, "description": "Use PDB templates. Local ColabFold runs require colabfold_template_path to avoid remote template lookups.", "fa_icon": "fas fa-paste" }, From 4c8f90ce7a26c5a0c160f1338af5ca2d461ddac4 Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 29 May 2026 14:51:55 +1000 Subject: [PATCH 11/17] Fix output file naming and add additional stubfiles in COLABFOLD_BATCH process --- modules/local/colabfold_batch/main.nf | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/modules/local/colabfold_batch/main.nf b/modules/local/colabfold_batch/main.nf index 2901e61bc..0042ae894 100644 --- a/modules/local/colabfold_batch/main.nf +++ b/modules/local/colabfold_batch/main.nf @@ -16,7 +16,7 @@ process COLABFOLD_BATCH { tuple val(meta), path ("${meta.id}_colabfold_msa.tsv") , emit: msa tuple val(meta), path ("${meta.id}_plddt_mqc.tsv") , emit: multiqc tuple val(meta), path ("${meta.id}_*_pae.tsv") , optional: true, emit: paes - tuple val(meta), path ("${meta.id}_0_pae.tsv") , optional: true, emit: pae + tuple val(meta), path ("${meta.id}_1_pae.tsv") , optional: true, emit: pae tuple val(meta), path ("${meta.id}_ptm.tsv") , optional: true, emit: ptms tuple val(meta), path ("${meta.id}_iptm.tsv") , optional: true, emit: iptms tuple val(meta), path ("${meta.id}_ipsae.tsv") , optional: true, emit: ipsaes @@ -83,9 +83,19 @@ process COLABFOLD_BATCH { touch ./raw/${meta.id}_relaxed_rank_001_model_1_seed_000.pdb touch ./raw/${meta.id}_relaxed_rank_002_model_2_seed_000.pdb touch ./raw/${meta.id}_relaxed_rank_003_model_3_seed_000.pdb + touch ./raw/${meta.id}_relaxed_rank_004_model_4_seed_000.pdb + touch ./raw/${meta.id}_relaxed_rank_005_model_5_seed_000.pdb touch ./${meta.id}_seq_coverage.png - touch ./raw/${meta.id}_scores_rank.json - touch ./${meta.id}_0_pae.tsv + touch ./raw/${meta.id}_scores_rank_001_model_1_seed_000.json + touch ./raw/${meta.id}_scores_rank_002_model_2_seed_000.json + touch ./raw/${meta.id}_scores_rank_003_model_3_seed_000.json + touch ./raw/${meta.id}_scores_rank_004_model_4_seed_000.json + touch ./raw/${meta.id}_scores_rank_005_model_5_seed_000.json + touch ./${meta.id}_1_pae.tsv + touch ./${meta.id}_2_pae.tsv + touch ./${meta.id}_3_pae.tsv + touch ./${meta.id}_4_pae.tsv + touch ./${meta.id}_5_pae.tsv touch ./${meta.id}_ptm.tsv touch ./${meta.id}_iptm.tsv touch ./${meta.id}_ipsae.tsv From 8551cf8c79535dfbd4a5831a19954dd264b4eb6a Mon Sep 17 00:00:00 2001 From: jscgh Date: Fri, 29 May 2026 15:03:57 +1000 Subject: [PATCH 12/17] Updated snapshots --- tests/colabfold_download.nf.test.snap | 26 +++++++++++++++++++++----- tests/colabfold_local.nf.test.snap | 26 +++++++++++++++++++++----- tests/colabfold_webserver.nf.test.snap | 26 +++++++++++++++++++++----- tests/split_fasta.nf.test.snap | 26 +++++++++++++++++++++----- 4 files changed, 84 insertions(+), 20 deletions(-) diff --git a/tests/colabfold_download.nf.test.snap b/tests/colabfold_download.nf.test.snap index 2043a1675..954a8c0dc 100644 --- a/tests/colabfold_download.nf.test.snap +++ b/tests/colabfold_download.nf.test.snap @@ -36,7 +36,11 @@ "colabfold/T1024/T1024_plddt.tsv", "colabfold/T1024/T1024_ptm.tsv", "colabfold/T1024/paes", - "colabfold/T1024/paes/T1024_0_pae.tsv", + "colabfold/T1024/paes/T1024_1_pae.tsv", + "colabfold/T1024/paes/T1024_2_pae.tsv", + "colabfold/T1024/paes/T1024_3_pae.tsv", + "colabfold/T1024/paes/T1024_4_pae.tsv", + "colabfold/T1024/paes/T1024_5_pae.tsv", "colabfold/T1026", "colabfold/T1026/T1026_chainwise_ipsae.tsv", "colabfold/T1026/T1026_chainwise_iptm.tsv", @@ -46,7 +50,11 @@ "colabfold/T1026/T1026_plddt.tsv", "colabfold/T1026/T1026_ptm.tsv", "colabfold/T1026/paes", - "colabfold/T1026/paes/T1026_0_pae.tsv", + "colabfold/T1026/paes/T1026_1_pae.tsv", + "colabfold/T1026/paes/T1026_2_pae.tsv", + "colabfold/T1026/paes/T1026_3_pae.tsv", + "colabfold/T1026/paes/T1026_4_pae.tsv", + "colabfold/T1026/paes/T1026_5_pae.tsv", "colabfold/top_ranked_structures", "colabfold/top_ranked_structures/T1024.pdb", "colabfold/top_ranked_structures/T1026.pdb", @@ -74,7 +82,11 @@ "T1024_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1024_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_1_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_2_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_3_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_4_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_5_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_chainwise_ipsae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_chainwise_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_colabfold_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -82,7 +94,11 @@ "T1026_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1026_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_1_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_2_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_3_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_4_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_5_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "input.csv:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -91,7 +107,7 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "timestamp": "2026-05-29T11:59:02.057607138", + "timestamp": "2026-05-29T15:01:54.756116028", "meta": { "nf-test": "0.9.5", "nextflow": "26.04.0" diff --git a/tests/colabfold_local.nf.test.snap b/tests/colabfold_local.nf.test.snap index 6f1792af3..9ad861771 100644 --- a/tests/colabfold_local.nf.test.snap +++ b/tests/colabfold_local.nf.test.snap @@ -33,7 +33,11 @@ "colabfold/T1024/T1024_plddt.tsv", "colabfold/T1024/T1024_ptm.tsv", "colabfold/T1024/paes", - "colabfold/T1024/paes/T1024_0_pae.tsv", + "colabfold/T1024/paes/T1024_1_pae.tsv", + "colabfold/T1024/paes/T1024_2_pae.tsv", + "colabfold/T1024/paes/T1024_3_pae.tsv", + "colabfold/T1024/paes/T1024_4_pae.tsv", + "colabfold/T1024/paes/T1024_5_pae.tsv", "colabfold/T1026", "colabfold/T1026/T1026_chainwise_ipsae.tsv", "colabfold/T1026/T1026_chainwise_iptm.tsv", @@ -43,7 +47,11 @@ "colabfold/T1026/T1026_plddt.tsv", "colabfold/T1026/T1026_ptm.tsv", "colabfold/T1026/paes", - "colabfold/T1026/paes/T1026_0_pae.tsv", + "colabfold/T1026/paes/T1026_1_pae.tsv", + "colabfold/T1026/paes/T1026_2_pae.tsv", + "colabfold/T1026/paes/T1026_3_pae.tsv", + "colabfold/T1026/paes/T1026_4_pae.tsv", + "colabfold/T1026/paes/T1026_5_pae.tsv", "colabfold/top_ranked_structures", "colabfold/top_ranked_structures/T1024.pdb", "colabfold/top_ranked_structures/T1026.pdb", @@ -68,7 +76,11 @@ "T1024_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1024_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_1_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_2_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_3_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_4_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_5_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_chainwise_ipsae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_chainwise_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_colabfold_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -76,7 +88,11 @@ "T1026_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1026_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_1_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_2_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_3_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_4_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_5_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "input.csv:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -85,7 +101,7 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "timestamp": "2026-05-29T11:59:16.850232481", + "timestamp": "2026-05-29T15:02:09.999749672", "meta": { "nf-test": "0.9.5", "nextflow": "26.04.0" diff --git a/tests/colabfold_webserver.nf.test.snap b/tests/colabfold_webserver.nf.test.snap index a53456b33..036ec6a80 100644 --- a/tests/colabfold_webserver.nf.test.snap +++ b/tests/colabfold_webserver.nf.test.snap @@ -29,7 +29,11 @@ "colabfold/T1024/T1024_plddt.tsv", "colabfold/T1024/T1024_ptm.tsv", "colabfold/T1024/paes", - "colabfold/T1024/paes/T1024_0_pae.tsv", + "colabfold/T1024/paes/T1024_1_pae.tsv", + "colabfold/T1024/paes/T1024_2_pae.tsv", + "colabfold/T1024/paes/T1024_3_pae.tsv", + "colabfold/T1024/paes/T1024_4_pae.tsv", + "colabfold/T1024/paes/T1024_5_pae.tsv", "colabfold/T1026", "colabfold/T1026/T1026_chainwise_ipsae.tsv", "colabfold/T1026/T1026_chainwise_iptm.tsv", @@ -39,7 +43,11 @@ "colabfold/T1026/T1026_plddt.tsv", "colabfold/T1026/T1026_ptm.tsv", "colabfold/T1026/paes", - "colabfold/T1026/paes/T1026_0_pae.tsv", + "colabfold/T1026/paes/T1026_1_pae.tsv", + "colabfold/T1026/paes/T1026_2_pae.tsv", + "colabfold/T1026/paes/T1026_3_pae.tsv", + "colabfold/T1026/paes/T1026_4_pae.tsv", + "colabfold/T1026/paes/T1026_5_pae.tsv", "colabfold/top_ranked_structures", "colabfold/top_ranked_structures/T1024.pdb", "colabfold/top_ranked_structures/T1026.pdb", @@ -64,7 +72,11 @@ "T1024_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1024_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_1_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_2_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_3_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_4_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1024_5_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_chainwise_ipsae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_chainwise_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_colabfold_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -72,7 +84,11 @@ "T1026_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "T1026_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_1_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_2_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_3_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_4_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "T1026_5_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "T1024.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "T1026.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "input.csv:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -81,7 +97,7 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "timestamp": "2026-05-29T11:59:31.097404212", + "timestamp": "2026-05-29T15:02:24.412363191", "meta": { "nf-test": "0.9.5", "nextflow": "26.04.0" diff --git a/tests/split_fasta.nf.test.snap b/tests/split_fasta.nf.test.snap index 2bd8c7b7a..fa2956f3a 100644 --- a/tests/split_fasta.nf.test.snap +++ b/tests/split_fasta.nf.test.snap @@ -33,7 +33,11 @@ "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_plddt.tsv", "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_ptm.tsv", "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/paes", - "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/paes/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_0_pae.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/paes/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_1_pae.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/paes/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_2_pae.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/paes/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_3_pae.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/paes/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_4_pae.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues/paes/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_5_pae.tsv", "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues", "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_chainwise_ipsae.tsv", "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_chainwise_iptm.tsv", @@ -43,7 +47,11 @@ "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_plddt.tsv", "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_ptm.tsv", "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/paes", - "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/paes/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_0_pae.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/paes/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_1_pae.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/paes/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_2_pae.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/paes/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_3_pae.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/paes/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_4_pae.tsv", + "colabfold/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues/paes/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_5_pae.tsv", "colabfold/top_ranked_structures", "colabfold/top_ranked_structures/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues.pdb", "colabfold/top_ranked_structures/H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues.pdb", @@ -68,7 +76,11 @@ "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_1_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_2_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_3_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_4_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues_5_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_chainwise_ipsae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_chainwise_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_colabfold_msa.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -76,7 +88,11 @@ "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_iptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_plddt.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_ptm.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", - "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_0_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_1_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_2_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_3_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_4_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", + "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues_5_pae.tsv:md5,d41d8cd98f00b204e9800998ecf8427e", "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_1_127_residues.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "H1065_H1065_N4-Cytosine_Methyltransferase_Serratia_marcescens_subunit_2_98_residues.pdb:md5,d41d8cd98f00b204e9800998ecf8427e", "input.csv:md5,d41d8cd98f00b204e9800998ecf8427e", @@ -85,7 +101,7 @@ "test_alphafold2_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "timestamp": "2026-05-29T12:06:47.424380551", + "timestamp": "2026-05-29T15:03:57.105744253", "meta": { "nf-test": "0.9.5", "nextflow": "26.04.0" From f5979038f0570a795e605cc7267d069707b7ebf5 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 9 Jun 2026 15:51:53 +1000 Subject: [PATCH 13/17] Fixes handling uneven AF3 paired MSA depth --- bin/extract_metrics.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bin/extract_metrics.py b/bin/extract_metrics.py index 2a2be8d72..3ea0bd4c5 100755 --- a/bin/extract_metrics.py +++ b/bin/extract_metrics.py @@ -528,7 +528,8 @@ def read_json(name, json_files, struct_files=None): chains = len(data['sequences']) final_rows = [] # Paired - for i in range(len(paired_msa_rows[0])): #The number of paired lines is common to all MSAs + paired_row_count = min((len(rows) for rows in paired_msa_rows), default=0) + for i in range(paired_row_count): #The number of paired lines is common to all MSAs temp_row = [] #This needs to be fixed if inference is batched in future. for j in range(chains): @@ -536,7 +537,7 @@ def read_json(name, json_files, struct_files=None): final_rows.append(temp_row) # Un-paired - msa_widths = [len(paired_msa_rows[chain][0]) for chain in range(chains)] + msa_widths = [len(paired_msa_rows[chain][0]) if paired_msa_rows[chain] else (len(unpaired_msa_rows[chain][0]) if unpaired_msa_rows[chain] else 0) for chain in range(chains)] msa_heights = [len(unpaired_msa_rows[chain]) for chain in range(chains)] cum_total_rows = np.cumsum(msa_heights) From 1a69d409c1f0e6ae434c453786a09cd7e639cfb1 Mon Sep 17 00:00:00 2001 From: jscgh Date: Tue, 9 Jun 2026 16:30:23 +1000 Subject: [PATCH 14/17] Address PR comment: temporarily exclude AF3 paired MSAs --- bin/extract_metrics.py | 17 ++--------------- modules/local/run_rosettafold2na/main.nf | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/bin/extract_metrics.py b/bin/extract_metrics.py index 3ea0bd4c5..31ecf8761 100755 --- a/bin/extract_metrics.py +++ b/bin/extract_metrics.py @@ -515,29 +515,16 @@ def read_json(name, json_files, struct_files=None): data = json.load(f) if json_file.endswith("_data.json"): #AF3 output with MSA info # Can't just used format_msa_rows since there's FASTA headers in the json content - paired_msa_rows = [] unpaired_msa_rows = [] for chain in data['sequences']: unpaired_MSA = chain['protein']['unpairedMsa'] unpaired_msa_lines = [''.join(c for c in line if not c.islower()) for line in unpaired_MSA.split("\n") if line.strip() and not line.startswith(">")] unpaired_msa_rows.append([[str(AA_to_int.get(residue, 20)) for residue in line] for line in unpaired_msa_lines]) - paired_MSA = chain['protein']['pairedMsa'] - paired_msa_lines = [''.join(c for c in line if not c.islower()) for line in paired_MSA.split("\n") if line.strip() and not line.startswith(">")] - paired_msa_rows.append([[str(AA_to_int.get(residue, 20)) for residue in line] for line in paired_msa_lines]) chains = len(data['sequences']) final_rows = [] - # Paired - paired_row_count = min((len(rows) for rows in paired_msa_rows), default=0) - for i in range(paired_row_count): #The number of paired lines is common to all MSAs - temp_row = [] - #This needs to be fixed if inference is batched in future. - for j in range(chains): - temp_row.extend(paired_msa_rows[j][i]) - final_rows.append(temp_row) - - # Un-paired - msa_widths = [len(paired_msa_rows[chain][0]) if paired_msa_rows[chain] else (len(unpaired_msa_rows[chain][0]) if unpaired_msa_rows[chain] else 0) for chain in range(chains)] + # Exclude the paired block for now; use the unpaired MSA only. + msa_widths = [len(unpaired_msa_rows[chain][0]) if unpaired_msa_rows[chain] else 0 for chain in range(chains)] msa_heights = [len(unpaired_msa_rows[chain]) for chain in range(chains)] cum_total_rows = np.cumsum(msa_heights) diff --git a/modules/local/run_rosettafold2na/main.nf b/modules/local/run_rosettafold2na/main.nf index f2418f78b..1a500f316 100644 --- a/modules/local/run_rosettafold2na/main.nf +++ b/modules/local/run_rosettafold2na/main.nf @@ -44,6 +44,30 @@ process RUN_ROSETTAFOLD2NA { ln -s /app/RoseTTAFold2NA/network/* ./network fi + # RF2NA hard-codes the UniRef30_2020_06 database prefix. Allow a staged + # UniRef30 hhsuite database from another release by adding local aliases. + expected_uniref30_prefix="UniRef30_2020_06/UniRef30_2020_06" + if [ ! -s "\${expected_uniref30_prefix}_cs219.ffdata" ]; then + detected_uniref30_cs219="\$(find UniRef30_2020_06 -maxdepth 1 -name 'UniRef30_*_cs219.ffdata' -print -quit)" + if [ -z "\$detected_uniref30_cs219" ]; then + echo "[ROSETTAFOLD2NA] Could not find a staged UniRef30 *_cs219.ffdata file in UniRef30_2020_06/." >&2 + exit 1 + fi + + detected_uniref30_prefix="\${detected_uniref30_cs219%_cs219.ffdata}" + for ext in a3m.ffdata a3m.ffindex cs219.ffdata cs219.ffindex hhm.ffdata hhm.ffindex; do + src="\${detected_uniref30_prefix}_\${ext}" + dst="\${expected_uniref30_prefix}_\${ext}" + if [ ! -s "\$src" ]; then + echo "[ROSETTAFOLD2NA] Missing staged UniRef30 database file: \$src" >&2 + exit 1 + fi + if [ ! -e "\$dst" ]; then + ln -s "\$(basename "\$src")" "\$dst" + fi + done + fi + rf2na_input_dir="\${rf2na_input:-rf2na_input}" chain_map="\${rf2na_input_dir}/chain_map.tsv" From 1aad0fae04e39b20ee75412139ef2f5b8a524cd5 Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 11 Jun 2026 15:13:16 +1000 Subject: [PATCH 15/17] Add warnings to extract_metrics.py to prevent silent fallback --- bin/extract_metrics.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/bin/extract_metrics.py b/bin/extract_metrics.py index 31ecf8761..c8fa352b9 100755 --- a/bin/extract_metrics.py +++ b/bin/extract_metrics.py @@ -7,6 +7,7 @@ import subprocess import sys import tempfile +import warnings #import torch moved to a conditional import since too bulky import if not used import numpy as np import csv @@ -157,6 +158,7 @@ def sort_paths_by_rank(paths): def sort_key(path): rank = infer_model_rank(path) if rank is None: + warnings.warn(f"Unable to infer model rank from path: {path}; falling back to basename sort") return (1, os.path.basename(path)) return (0, rank, os.path.basename(path)) @@ -167,6 +169,8 @@ def build_struct_map(struct_files): struct_map = {} for idx, struct_file in enumerate(sort_paths_by_rank(struct_files)): rank = infer_model_rank(struct_file) + if rank is None: + warnings.warn(f"Unable to infer model rank from path: {struct_file}; falling back to index {idx}") struct_map[rank if rank is not None else idx] = struct_file return struct_map @@ -275,6 +279,8 @@ def extract_structs_plddt_to_tsv(name, structures): rank_names = [] for idx, structure in enumerate(sorted_structures): rank = infer_model_rank(structure) + if rank is None: + warnings.warn(f"Unable to infer model rank from path: {structure}; falling back to index {idx}") rank_names.append(f"rank_{rank}" if rank is not None else f"rank_{idx}") # Create header as the first row plddt_rows = [["Positions"] + rank_names] From 10bc5325cf32a76e384ae925b47654cbfe88432f Mon Sep 17 00:00:00 2001 From: jscgh Date: Thu, 11 Jun 2026 15:17:29 +1000 Subject: [PATCH 16/17] Renameed colabfold_metrics_files for clarity --- bin/extract_metrics.py | 10 +++++----- modules/local/colabfold_batch/main.nf | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bin/extract_metrics.py b/bin/extract_metrics.py index c8fa352b9..9514342fe 100755 --- a/bin/extract_metrics.py +++ b/bin/extract_metrics.py @@ -658,14 +658,14 @@ def read_pt(name, pt_files): write_tsv(f"{name}_0_pae.tsv", format_pae_rows(np.squeeze(data["pae"].numpy()))) break -def read_colabfold_metrics(name, colabfold_metrics_fns, struct_files=None): +def read_colabfold_metrics(name, colabfold_metrics_files, struct_files=None): ptm_rows = [] iptm_rows = [] ipsae_rows = [] chainwise_iptm = {} chainwise_ipsae = {} struct_map = build_struct_map(struct_files) if struct_files else {} - for fn in colabfold_metrics_fns: + for fn in colabfold_metrics_files: with open(fn) as f: data = json.load(f) rank_id = infer_model_rank(fn) @@ -708,7 +708,7 @@ def main(): parser.add_argument("--paired_a3m", dest="paired_a3m", required=False) # For reading the ColabFold MSA format parser.add_argument("--csvs", dest="csvs", required=False, nargs="+") # For reading boltz csvs parser.add_argument("--jsons", dest="jsons", required=False, nargs="+") # For reading the AF3 MSA & PAE, HF3 PAE - parser.add_argument("--colabfold_metrics_fns", required=False, nargs="+") + parser.add_argument("--colabfold_metrics_files", required=False, nargs="+") parser.add_argument("--pts", dest="pts", required=False, nargs="+") # For read RFAA pytorch model to get PAE data parser.add_argument("--structs", dest="structs", required=False, nargs="+") parser.add_argument("--name", default="untitled", dest="name") # might need a --name $meta.id @@ -730,8 +730,8 @@ def main(): read_pt(args.name, args.pts) if args.structs: extract_structs_plddt_to_tsv(args.name, args.structs) - if args.colabfold_metrics_fns: - read_colabfold_metrics(args.name, args.colabfold_metrics_fns, args.structs) + if args.colabfold_metrics_files: + read_colabfold_metrics(args.name, args.colabfold_metrics_files, args.structs) if __name__ == "__main__": main() diff --git a/modules/local/colabfold_batch/main.nf b/modules/local/colabfold_batch/main.nf index 0042ae894..68bd81ef2 100644 --- a/modules/local/colabfold_batch/main.nf +++ b/modules/local/colabfold_batch/main.nf @@ -60,7 +60,7 @@ process COLABFOLD_BATCH { fi extract_metrics.py --name ${meta.id} \\ - --colabfold_metrics_fns raw/*scores_rank*.json \\ + --colabfold_metrics_files raw/*scores_rank*.json \\ --structs raw/*_\${prefix}_rank*.pdb \\ --paired_a3m raw/${meta.id}.a3m From 02b886b638ae8f8eddbd551af78356749a579f02 Mon Sep 17 00:00:00 2001 From: jscgh Date: Mon, 15 Jun 2026 16:09:25 +1000 Subject: [PATCH 17/17] Removed local colabfold template option --- CHANGELOG.md | 2 +- conf/modules_colabfold.config | 3 +-- docs/usage/colabfold.md | 3 +-- main.nf | 4 ++++ nextflow.config | 1 - nextflow_schema.json | 9 ++------- 6 files changed, 9 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d8759d67e..3e7cf51f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#618](https://github.com/nf-core/proteinfold/pull/618)] - Resolve boltz `ext.args` in closure. - [[PR #626](https://github.com/nf-core/proteinfold/pull/618)] - Move scientific validation tests and BioPython setup to manual workflow. - [[#619](https://github.com/nf-core/proteinfold/issues/619)] - Fix `extract_metrics.py` shebang to use `python3` for compatibility with minimal containers. -- [[#209](https://github.com/nf-core/proteinfold/issues/209)] - Prevent local ColabFold runs from enabling remote template lookups unless `--colabfold_template_path` is provided. +- [[#209](https://github.com/nf-core/proteinfold/issues/209)] - Prevent ColabFold template runs unless `--use_msa_server` is enabled. - [[#456](https://github.com/nf-core/proteinfold/issues/456)] - Derive ranked metric ordering from structure filenames when generating TSV outputs. - [[#489](https://github.com/nf-core/proteinfold/issues/489)] - Specified Boltz output paths on `boltz_results_/`. - [[#576](https://github.com/nf-core/proteinfold/issues/576)] - Preserve native metric rank numbering and sort rank-derived outputs numerically. diff --git a/conf/modules_colabfold.config b/conf/modules_colabfold.config index e2366c471..2382cb975 100644 --- a/conf/modules_colabfold.config +++ b/conf/modules_colabfold.config @@ -33,8 +33,7 @@ process { ext.args = {[ params.colabfold_use_gpu_relax ? '--use-gpu-relax' : '', params.colabfold_use_amber ? '--amber' : '', - params.colabfold_use_templates && (params.use_msa_server || params.colabfold_template_path) ? '--templates' : '', - params.colabfold_template_path ? "--custom-template-path ${params.colabfold_template_path}" : '', + params.colabfold_use_templates ? '--templates' : '', params.random_seed != null ? "--random-seed ${params.random_seed}" : '', params.use_msa_server && params.msa_server_url ? "--host-url ${params.msa_server_url}" : '' ].findAll { arg -> arg }.join(' ').trim()} diff --git a/docs/usage/colabfold.md b/docs/usage/colabfold.md index c8b16c874..11be454ee 100644 --- a/docs/usage/colabfold.md +++ b/docs/usage/colabfold.md @@ -108,8 +108,7 @@ See the [ColabFold](https://github.com/sokrypton/ColabFold) documentation for a | `--colabfold_use_amber` | `true` | ColabFold outputs will sometimes contain phsyical violations such as steric clashes. These clashes can be resolved by post-processing the outputs with a short relaxation using the Amber Force Field. Non-clashing atoms are pinned to starting coordinates such that the relaxation has a minimal impact on final structures. | | `--colabfold_db_load_mode` | `0` | Specify the way that MMSeqs2 will load the required databases in memory | | `--colabfold_alphafold2_params_prefix` | `alphafold_params_2022-12-06` | Specify the alphafold2 params used for prediction. | -| `--colabfold_use_templates` | `false` | Use PDB templates to support predictions. When `--use_msa_server` is disabled, this only takes effect if `--colabfold_template_path` is also set so ColabFold can use local templates without contacting the MMSeqs API. The ColabFold notebooks do not use templates by default. | -| `--colabfold_template_path` | `null` | Path to a local ColabFold template directory. Set this together with `--colabfold_use_templates` to enable template use in local ColabFold mode without remote template lookups. | +| `--colabfold_use_templates` | `false` | Use PDB templates to support predictions. This option is only supported when `--use_msa_server` is enabled. | | `--colabfold_create_index` | `false` | Create index for ColabFold databases during setup. On network filesystems it can be more performant to re-compute the index on the fly | > You can override any of these parameters via the command line or a params file. diff --git a/main.nf b/main.nf index 661bcd6ab..eba7f76bd 100644 --- a/main.nf +++ b/main.nf @@ -63,6 +63,10 @@ workflow NFCORE_PROTEINFOLD { requested_modes = params.mode.toLowerCase().split(",") requested_modes_size = requested_modes.size() + if (requested_modes.contains("colabfold") && params.colabfold_use_templates && !params.use_msa_server) { + error("`--colabfold_use_templates` requires `--use_msa_server` in ColabFold mode.") + } + ch_dummy_file = channel.fromPath("$projectDir/assets/NO_FILE") ch_dummy_file_pae = channel.fromPath("$projectDir/assets/NO_FILE_PAE") diff --git a/nextflow.config b/nextflow.config index f599079b7..75f1dbba6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -109,7 +109,6 @@ params { colabfold_db = null colabfold_db_load_mode = 0 colabfold_use_templates = false - colabfold_template_path = null colabfold_create_index = false // Colabfold links diff --git a/nextflow_schema.json b/nextflow_schema.json index 9f6d28229..66fd8cbfa 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -187,13 +187,8 @@ }, "colabfold_use_templates": { "type": "boolean", - "description": "Use PDB templates. Local ColabFold runs require colabfold_template_path to avoid remote template lookups.", - "fa_icon": "fas fa-paste" - }, - "colabfold_template_path": { - "type": "string", - "format": "directory-path", - "description": "Path to a local ColabFold template directory. Required to use templates without --use_msa_server.", + "default": false, + "description": "Use PDB templates", "fa_icon": "fas fa-paste" }, "colabfold_create_index": {