diff --git a/bin/msa_manager.py b/bin/msa_manager.py index 2f248017c..284fae4ae 100755 --- a/bin/msa_manager.py +++ b/bin/msa_manager.py @@ -249,21 +249,31 @@ def pop_match(match_fn): if template_yaml: template_order, template_entities = parse_template_yaml(template_yaml) for key in template_order: - # Prefer exact id/type match first + template_entity = template_entities.get(key) + template_type = None + template_payload = None + + if template_entity is not None: + template_type = str(template_entity["seq_type"]).strip().lower() + template_payload = template_entity.get("seq_value") + if template_payload is not None: + template_payload = str(template_payload).strip() + + # Prefer exact id/type match first, but reject reshuffled ids whose sequence payload does not match the template + def payload_matches_template(entry): + return template_payload is None or entry["payload"] == template_payload + matched = pop_match( lambda entry: make_entity_key(entry["seq_type"], entry["seq_details"].get("id")) == key + and payload_matches_template(entry) ) # Fallback: MMseqs may reshuffle/swap ids; match by type + sequence payload - if matched is None and key in template_entities: - template_type = str(template_entities[key]["seq_type"]).strip().lower() - template_payload = template_entities[key].get("seq_value") - if template_payload is not None: - template_payload = str(template_payload).strip() - matched = pop_match( - lambda entry: str(entry["seq_type"]).strip().lower() == template_type - and entry["payload"] == template_payload - ) + if matched is None and template_payload is not None: + matched = pop_match( + lambda entry: str(entry["seq_type"]).strip().lower() == template_type + and entry["payload"] == template_payload + ) # Keep template entries even if no MMseqs match, so metadata is retained. ordered_entries.append(matched) diff --git a/conf/test_nostub.config b/conf/test_nostub.config new file mode 100644 index 000000000..45fd2a5c1 --- /dev/null +++ b/conf/test_nostub.config @@ -0,0 +1,9 @@ +stubRun = false + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} diff --git a/modules/local/boltz_fasta/tests/main.nf.test b/modules/local/boltz_fasta/tests/main.nf.test new file mode 100644 index 000000000..3dac08100 --- /dev/null +++ b/modules/local/boltz_fasta/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process BOLTZ_FASTA" + script "../main.nf" + process "BOLTZ_FASTA" + + tag "modules" + tag "modules_local" + tag "boltz_fasta" + profile "test_nostub" + + test("converts mixed fasta to boltz yaml") { + + when { + process { + """ + input[0] = [ + [ id: 'mixed_sample' ], + file(params.pipelines_testdata_base_path + 'testdata/sequences/untagged_complex.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/local/boltz_fasta/tests/main.nf.test.snap b/modules/local/boltz_fasta/tests/main.nf.test.snap new file mode 100644 index 000000000..3f379017c --- /dev/null +++ b/modules/local/boltz_fasta/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "converts mixed fasta to boltz yaml": { + "content": [ + { + "0": [ + [ + { + "id": "mixed_sample" + }, + "mixed_sample.yaml:md5,52bdd0470032c2e6c93b886b50906c09" + ] + ], + "1": [ + "versions.yml:md5,0d82efa297b86ccb1ae14852c66f16ab" + ], + "boltz_yaml": [ + [ + { + "id": "mixed_sample" + }, + "mixed_sample.yaml:md5,52bdd0470032c2e6c93b886b50906c09" + ] + ], + "versions": [ + "versions.yml:md5,0d82efa297b86ccb1ae14852c66f16ab" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-27T18:09:17.307872399" + } +} \ No newline at end of file diff --git a/modules/local/boltz_yaml_to_colabfold_fasta/tests/main.nf.test b/modules/local/boltz_yaml_to_colabfold_fasta/tests/main.nf.test new file mode 100644 index 000000000..c09ca7b28 --- /dev/null +++ b/modules/local/boltz_yaml_to_colabfold_fasta/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process BOLTZ_YAML_TO_COLABFOLD_FASTA" + script "../main.nf" + process "BOLTZ_YAML_TO_COLABFOLD_FASTA" + + tag "modules" + tag "modules_local" + tag "boltz_yaml_to_colabfold_fasta" + profile "test_nostub" + + test("converts boltz yaml to colabfold fasta") { + + when { + process { + """ + input[0] = [ + [ id: 'mixed_sample' ], + file(params.pipelines_testdata_base_path + 'testdata/sequences/boltz/mixed.yaml', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/local/boltz_yaml_to_colabfold_fasta/tests/main.nf.test.snap b/modules/local/boltz_yaml_to_colabfold_fasta/tests/main.nf.test.snap new file mode 100644 index 000000000..0c6c4a6bc --- /dev/null +++ b/modules/local/boltz_yaml_to_colabfold_fasta/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "converts boltz yaml to colabfold fasta": { + "content": [ + { + "0": [ + [ + { + "id": "mixed_sample" + }, + "mixed_sample.fasta:md5,475da65322d4589b4bfe1c6374a5b1a2" + ] + ], + "1": [ + "versions.yml:md5,7f72de6c3c7727358d4638fd60c2a674" + ], + "query_fasta": [ + [ + { + "id": "mixed_sample" + }, + "mixed_sample.fasta:md5,475da65322d4589b4bfe1c6374a5b1a2" + ] + ], + "versions": [ + "versions.yml:md5,7f72de6c3c7727358d4638fd60c2a674" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-27T18:44:11.575259411" + } +} \ No newline at end of file diff --git a/modules/local/mmseqs_colabfoldsearch/tests/main.nf.test b/modules/local/mmseqs_colabfoldsearch/tests/main.nf.test new file mode 100644 index 000000000..80b9aa97d --- /dev/null +++ b/modules/local/mmseqs_colabfoldsearch/tests/main.nf.test @@ -0,0 +1,42 @@ +nextflow_process { + + name "Test Process MMSEQS_COLABFOLDSEARCH" + script "../main.nf" + process "MMSEQS_COLABFOLDSEARCH" + config "./nextflow.config" + + tag "modules" + tag "modules_local" + tag "mmseqs_colabfoldsearch" + profile "test_nostub" + + test("runs mmseqs colabfoldsearch") { + + when { + process { + """ + def dbDir = file('db') + dbDir.mkdirs() + file('db/dummy.db').text = 'x' + + def uniref30Dir = file('s3://proteinfold-dataset/test-data/mini_dbs/colabfold_uniref30/*', checkIfExists: true) + + input[0] = [ + [ id: 'mixed' ], + file(params.pipelines_testdata_base_path + 'testdata/sequences/mmseqs/ubq.fasta', checkIfExists: true) + ] + input[1] = file('db/*') + input[2] = uniref30Dir + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/local/mmseqs_colabfoldsearch/tests/main.nf.test.snap b/modules/local/mmseqs_colabfoldsearch/tests/main.nf.test.snap new file mode 100644 index 000000000..d72768fc4 --- /dev/null +++ b/modules/local/mmseqs_colabfoldsearch/tests/main.nf.test.snap @@ -0,0 +1,51 @@ +{ + "runs mmseqs colabfoldsearch": { + "content": [ + { + "0": [ + [ + { + "id": "mixed" + }, + "ubq.a3m:md5,48af8d06679fa8d083df615c1bbdafb4" + ] + ], + "1": [ + [ + { + "id": "mixed" + }, + "ubq.json:md5,1d038ed924926e239647485dc3219dae" + ] + ], + "2": [ + "versions.yml:md5,4655c25b6d38ecaeb0960ba8d95c2a35" + ], + "a3m": [ + [ + { + "id": "mixed" + }, + "ubq.a3m:md5,48af8d06679fa8d083df615c1bbdafb4" + ] + ], + "json": [ + [ + { + "id": "mixed" + }, + "ubq.json:md5,1d038ed924926e239647485dc3219dae" + ] + ], + "versions": [ + "versions.yml:md5,4655c25b6d38ecaeb0960ba8d95c2a35" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-05-30T13:54:01.899878117" + } +} \ No newline at end of file diff --git a/modules/local/mmseqs_colabfoldsearch/tests/nextflow.config b/modules/local/mmseqs_colabfoldsearch/tests/nextflow.config new file mode 100644 index 000000000..56feeeb30 --- /dev/null +++ b/modules/local/mmseqs_colabfoldsearch/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: MMSEQS_COLABFOLDSEARCH { + //ext.args = '--use-env=0 --pair-mode=unpaired' //https://github.com/sokrypton/ColabFold/issues/821 + ext.args = '--use-env=0' //TODO: multimer test when above bug fixed + } +} diff --git a/modules/local/split_msa/tests/main.nf.test b/modules/local/split_msa/tests/main.nf.test new file mode 100644 index 000000000..fd6b87996 --- /dev/null +++ b/modules/local/split_msa/tests/main.nf.test @@ -0,0 +1,55 @@ +nextflow_process { + + name "Test Process SPLIT_MSA" + script "../main.nf" + process "SPLIT_MSA" + + tag "modules" + tag "modules_local" + tag "split_msa" + profile "test_nostub" + + test("splits msa from af3 json using yaml template") { + + when { + process { + """ + input[0] = [ + [ id: 'mixed_sample' ], + file(params.pipelines_testdata_base_path + 'testdata/sequences/boltz/mixed.json', checkIfExists: true), + file(params.pipelines_testdata_base_path + 'testdata/sequences/boltz/mixed.yaml', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("splits msa from af3 json with renamed chains using yaml template") { + + when { + process { + """ + input[0] = [ + [ id: 'renamed_sample' ], + file(params.pipelines_testdata_base_path + 'testdata/sequences/boltz/renamed.json', checkIfExists: true), + file(params.pipelines_testdata_base_path + 'testdata/sequences/boltz/original.yaml', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/local/split_msa/tests/main.nf.test.snap b/modules/local/split_msa/tests/main.nf.test.snap new file mode 100644 index 000000000..9cd915bb7 --- /dev/null +++ b/modules/local/split_msa/tests/main.nf.test.snap @@ -0,0 +1,86 @@ +{ + "splits msa from af3 json using yaml template": { + "content": [ + { + "0": [ + [ + { + "id": "mixed_sample" + }, + "mixed_sample.yaml:md5,57c46f0979af6e243fc1ea73119bd508", + [ + "mixed_sample_0.csv:md5,9ff514c968f73b1cf9f6db8b7ae576d5", + "mixed_sample_2.csv:md5,6ed43aa7595c4223b963007db8cfd538" + ] + ] + ], + "1": [ + "versions.yml:md5,d68ad4d16c6dbfe3f799e7983057aa64" + ], + "boltz_data": [ + [ + { + "id": "mixed_sample" + }, + "mixed_sample.yaml:md5,57c46f0979af6e243fc1ea73119bd508", + [ + "mixed_sample_0.csv:md5,9ff514c968f73b1cf9f6db8b7ae576d5", + "mixed_sample_2.csv:md5,6ed43aa7595c4223b963007db8cfd538" + ] + ] + ], + "versions": [ + "versions.yml:md5,d68ad4d16c6dbfe3f799e7983057aa64" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-06-07T16:00:25.556454737" + }, + "splits msa from af3 json with renamed chains using yaml template": { + "content": [ + { + "0": [ + [ + { + "id": "renamed_sample" + }, + "renamed_sample.yaml:md5,b9c8781042323dcfde6e5122a0dc6946", + [ + "renamed_sample_0.csv:md5,b731c66ea86be7699860f96d79a50b8f", + "renamed_sample_1.csv:md5,b6c5b7cb2032121e60ae68cc6d4204b6", + "renamed_sample_2.csv:md5,e69ffc08e47926a49c3a69bd4f7ea02f" + ] + ] + ], + "1": [ + "versions.yml:md5,d68ad4d16c6dbfe3f799e7983057aa64" + ], + "boltz_data": [ + [ + { + "id": "renamed_sample" + }, + "renamed_sample.yaml:md5,b9c8781042323dcfde6e5122a0dc6946", + [ + "renamed_sample_0.csv:md5,b731c66ea86be7699860f96d79a50b8f", + "renamed_sample_1.csv:md5,b6c5b7cb2032121e60ae68cc6d4204b6", + "renamed_sample_2.csv:md5,e69ffc08e47926a49c3a69bd4f7ea02f" + ] + ] + ], + "versions": [ + "versions.yml:md5,d68ad4d16c6dbfe3f799e7983057aa64" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.4" + }, + "timestamp": "2026-06-07T16:00:30.862429279" + } +} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 75f1dbba6..5d22c6bb0 100644 --- a/nextflow.config +++ b/nextflow.config @@ -378,6 +378,7 @@ profiles { singularity.runOptions = '--nv' } test { includeConfig 'conf/test.config' } + test_nostub { includeConfig 'conf/test_nostub.config' } test_alphafold2_split { includeConfig 'conf/test_alphafold_split.config' } test_alphafold2_download { includeConfig 'conf/test_alphafold_download.config' } test_alphafold3_standard { includeConfig 'conf/test_alphafold3_standard.config' } @@ -400,12 +401,11 @@ profiles { test_full_esmfold_multimer { includeConfig 'conf/test_full_esmfold_multimer.config' } test_full_helixfold3 { includeConfig 'conf/test_full_helixfold3.config' } test_full_boltz { includeConfig 'conf/test_full_boltz.config' } - test_full_rosettafold_all_atom { includeConfig 'conf/test_full_rosettafold_all_atom.config' } - test_full_rosettafold2na { includeConfig 'conf/test_full_rosettafold2na.config' } + test_full_rosettafold_all_atom { includeConfig 'conf/test_full_rosettafold_all_atom.config' } + test_full_rosettafold2na { includeConfig 'conf/test_full_rosettafold2na.config' } test_rosettafold_all_atom { includeConfig 'conf/test_rosettafold_all_atom.config' } test_helixfold3 { includeConfig 'conf/test_helixfold3.config' } test_rosettafold2na { includeConfig 'conf/test_rosettafold2na.config' } - test_full_boltz { includeConfig 'conf/test_full_boltz.config' } test_boltz { includeConfig 'conf/test_boltz.config' } } diff --git a/tests/nextflow.config b/tests/nextflow.config index 8f1aac860..16d3f8772 100644 --- a/tests/nextflow.config +++ b/tests/nextflow.config @@ -8,7 +8,9 @@ // Or any resources requirements params { modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' - pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/proteinfold/' + //pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/proteinfold/' + //TODO: revert to nfcore path when ready + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/tlitfin/test-datasets/refs/heads/proteinfold/' } aws.client.anonymous = true // fixes S3 access issues on self-hosted runners