Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2,192 changes: 2,192 additions & 0 deletions integration_tests/golden_data/protprot_complex_1.pdb

Large diffs are not rendered by default.

2,192 changes: 2,192 additions & 0 deletions integration_tests/golden_data/protprot_complex_2.pdb

Large diffs are not rendered by default.

108 changes: 108 additions & 0 deletions integration_tests/test_clustfcc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import os
import shutil
import tempfile
from pathlib import Path

import pytest

from haddock.libs.libontology import ModuleIO, PDBFile
Comment thread
rvhonorato marked this conversation as resolved.
Outdated
from haddock.modules.analysis.clustfcc import DEFAULT_CONFIG as clustfcc_pars
from haddock.modules.analysis.clustfcc import HaddockModule as ClustFCCModule

from . import golden_data


class MockPreviousIO:
def __init__(self, path):
self.path = path

def retrieve_models(self, individualize: bool = False):
shutil.copy(
Path(golden_data, "protprot_complex_1.pdb"),
Path(self.path, "protprot_complex_1.pdb"),
)

shutil.copy(
Path(golden_data, "protprot_complex_2.pdb"),
Path(self.path, "protprot_complex_2.pdb"),
)

# add the topology to the models
model_list = [
PDBFile(
file_name="protprot_complex_1.pdb",
path=self.path,
),
PDBFile(
file_name="protprot_complex_2.pdb",
path=self.path,
),
]
return model_list

def output(self) -> None:
return None


@pytest.fixture
def output_list():
"""Clustfcc output list."""
return [
"fcc.matrix",
"cluster.out",
"protprot_complex_1.con",
"protprot_complex_2.con",
"clustfcc.txt",
"io.json",
"clustfcc.tsv",
]


@pytest.fixture
def fcc_module():
"""Clustfcc module."""
with tempfile.TemporaryDirectory() as tempdir:
yield ClustFCCModule(order=1, path=Path(tempdir), initial_params=clustfcc_pars)


def test_clustfcc_output_existence(fcc_module, output_list):
"""Test clustfcc output."""
fcc_module.previous_io = MockPreviousIO(path=fcc_module.path)

fcc_module.run()

for _f in output_list:
expected_file = Path(fcc_module.path, _f)
assert expected_file.exists()

# Test the fcc matrix contents
with open(Path(fcc_module.path, "fcc.matrix"), encoding="utf-8", mode="r") as f:
observed_fcc_matrix = f.read()
expected_fcc_output = "1 2 0.05 0.062" + os.linesep

assert observed_fcc_matrix == expected_fcc_output
Comment thread
rvhonorato marked this conversation as resolved.

# Check .con files.
expected_output_length = [100, 119]

observed_contact_files = [
Path(fcc_module.path, "protprot_complex_1.con"),
Path(fcc_module.path, "protprot_complex_2.con"),
]

for exp_output_len, observed_con_file in zip(
expected_output_length, observed_contact_files
):
with open(observed_con_file, encoding="utf-8", mode="r") as f:
observed_output_len = len(f.read().splitlines())

assert observed_output_len == exp_output_len
Comment thread
rvhonorato marked this conversation as resolved.
Outdated

# Check cluster.out file.
expected_cluster_output = [
"Cluster 1 -> 2 " + os.linesep,
"Cluster 2 -> 1 " + os.linesep,
]
with open(Path(fcc_module.path, "cluster.out"), encoding="utf-8", mode="r") as f:
for expected, line in zip(expected_cluster_output, f.readlines()):
assert line == expected
70 changes: 32 additions & 38 deletions src/haddock/modules/analysis/clustfcc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from pathlib import Path

from fcc.scripts import calc_fcc_matrix, cluster_fcc

from haddock import FCC_path, log
from haddock.core.defaults import MODULE_DEFAULT_YAML
from haddock.core.typing import Union
Expand All @@ -22,15 +23,15 @@
write_structure_list,
)
from haddock.libs.libsubprocess import JobInputFirst
from haddock.modules import get_engine
from haddock.modules import BaseHaddockModule, read_from_yaml_config
from haddock.modules import BaseHaddockModule, get_engine, read_from_yaml_config
from haddock.modules.analysis import get_analysis_exec_mode
from haddock.modules.analysis.clustfcc.clustfcc import (
get_cluster_centers,
iterate_clustering,
write_clusters,
write_clustfcc_file,
)
from haddock.modules.analysis import get_analysis_exec_mode


RECIPE_PATH = Path(__file__).resolve().parent
DEFAULT_CONFIG = Path(RECIPE_PATH, MODULE_DEFAULT_YAML)
Expand All @@ -42,11 +43,11 @@ class HaddockModule(BaseHaddockModule):
name = RECIPE_PATH.name

def __init__(
self,
order: int,
path: Path,
initial_params: Union[Path, str] = DEFAULT_CONFIG,
) -> None:
self,
order: int,
path: Path,
initial_params: Union[Path, str] = DEFAULT_CONFIG,
) -> None:
super().__init__(order, path, initial_params)

@classmethod
Expand Down Expand Up @@ -81,9 +82,9 @@ def _run(self) -> None:
contact_f,
contact_executable,
self.params["contact_distance_cutoff"],
)
)
contact_jobs.append(job)

exec_mode = get_analysis_exec_mode(self.params["mode"])

Engine = get_engine(exec_mode, self.params)
Expand All @@ -104,26 +105,23 @@ def _run(self) -> None:

if not_found:
# No contacts were calculated, we cannot cluster
self.finish_with_error(
"Several files were not generated:"
f" {not_found}"
)
self.finish_with_error("Several files were not generated:" f" {not_found}")

log.info("Calculating the FCC matrix")
parsed_contacts = calc_fcc_matrix.parse_contact_file(
contact_file_l,
False,
)
)

# Imporant: matrix is a generator object, be careful with it
matrix = calc_fcc_matrix.calculate_pairwise_matrix(
parsed_contacts,
False,
)
)

# write the matrix to a file, so we can read it afterwards and don't
# need to reinvent the wheel handling this
fcc_matrix_f = Path("fcc.matrix")
fcc_matrix_f = Path(self.path, "fcc.matrix")
with open(fcc_matrix_f, "w") as fh:
for data in list(matrix):
data_str = f"{data[0]} {data[1]} {data[2]:.2f} {data[3]:.3f}"
Expand All @@ -136,26 +134,26 @@ def _run(self) -> None:
fcc_matrix_f,
self.params["clust_cutoff"],
self.params["strictness"],
)
)

# iterate clustering until at least one cluster is found
clusters, min_population = iterate_clustering(
pool,
self.params['min_population'],
)
self.params['min_population'] = min_population
self.params["min_population"],
)
self.params["min_population"] = min_population

# Prepare output and read the elements
if clusters:
# Write the clusters
write_clusters(clusters)
write_clusters(clusters, out_filename=str(Path(self.path, "cluster.out")))

# Get the cluster centers
clt_dic, clt_centers = get_cluster_centers(
clusters,
models_to_clust,
)
)

# ranking clusters
_scores, sorted_score_dic = rank_clusters(clt_dic, min_population)

Expand All @@ -167,45 +165,41 @@ def _run(self) -> None:
models_to_clust,
self.output_models,
out_fname="clustfcc.tsv",
)
)

write_clustfcc_file(
clusters,
clt_centers,
clt_dic,
self.params,
sorted_score_dic
)
clusters, clt_centers, clt_dic, self.params, sorted_score_dic
)
else:
log.warning("No clusters were found")
self.output_models = models_to_clust # type: ignore

# Draw the matrix
if self.params['plot_matrix']:
if self.params["plot_matrix"]:
# Obtain final models indices
final_order_idx, labels, cluster_ids = [], [], []
for pdb in self.output_models:
final_order_idx.append(models_to_clust.index(pdb))
labels.append(pdb.file_name.replace('.pdb', ''))
labels.append(pdb.file_name.replace(".pdb", ""))
cluster_ids.append(pdb.clt_id)
# Get custom cluster data
matrix_cluster_dt, cluster_limits = get_cluster_matrix_plot_clt_dt(
cluster_ids
)
)

# Define output filename
html_matrix_basepath = 'fcc_matrix'
html_matrix_basepath = "fcc_matrix"
# Plot matrix
html_matrixpath = plot_cluster_matrix(
fcc_matrix_f,
final_order_idx,
labels,
dttype='FCC',
dttype="FCC",
diag_fill=1,
output_fname=html_matrix_basepath,
matrix_cluster_dt=matrix_cluster_dt,
cluster_limits=cluster_limits,
)
)
log.info(f"Plotting matrix in {html_matrixpath}")

# Export models for next module
Expand Down
Loading