Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 74 additions & 15 deletions src/recharge_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,12 @@
class Recharge_SMIRNOFF(Target):
"""A custom optimisation target which employs the `openff-recharge`
package to train bond charge correction parameters against QM derived
electrostatic potential data."""
electrostatic potential data.

Note -- this has NOT been written to work with anything but BCCs. It will
refuse a force field whose virtual sites carry charge (see
``_check_no_virtual_site_charges``); charge-free virtual sites are ignored.
"""

def __init__(self, options, tgt_opts, forcefield):

Expand All @@ -55,7 +60,7 @@ def __init__(self, options, tgt_opts, forcefield):

# Pre-calculate the expensive portion of the objective function.
self._design_matrix = None
self._target_residuals = None
self._reference_values = None

# Store a copy of the objective function details from the previous
# optimisation cycle.
Expand All @@ -72,6 +77,34 @@ def __init__(self, options, tgt_opts, forcefield):
# Initialize the target.
self._initialize()

@staticmethod
def _check_no_virtual_site_charges(force_field):
"""Raise ``NotImplementedError`` if ``force_field`` defines virtual sites
that carry charge.

The target only places charge on atoms (AM1 base charges perturbed by bond
charge corrections) and never passes a virtual site collection to
``openff-recharge``. A virtual site with a non-zero ``charge_increment``
would contribute to the electrostatic potential in a way this target cannot
represent, and would otherwise be silently ignored - biasing the fitted
bond charge corrections. Virtual sites whose charge increments are all zero
do not affect the ESP and are therefore permitted.
"""

vsite_handler = force_field.get_parameter_handler("VirtualSites")

for parameter in vsite_handler.parameters:
if any(
getattr(charge_increment, "m", charge_increment) != 0.0
for charge_increment in parameter.charge_increment
):
raise NotImplementedError(
"The Recharge_SMIRNOFF target does not support virtual sites "
"that carry charge (VirtualSites parameter '{}' has a non-zero "
"charge_increment). Only bond charge corrections on atoms can "
"be trained.".format(parameter.smirks)
)

def _initialize(self):
"""Initializes the target."""

Expand All @@ -92,6 +125,11 @@ def _initialize(self):

if bcc_handler.partial_charge_method.lower() != "am1elf10":
raise NotImplementedError()

# The target only models charge on atoms (AM1 base charges perturbed by
# bond charge corrections); a virtual site that carries charge cannot be
# represented and would otherwise be silently ignored, biasing the fit.
self._check_no_virtual_site_charges(force_field)

# TODO: it is assumed that the MDL aromaticity model should be used
# rather than the once specified in the FF as the model is not
Expand All @@ -118,20 +156,24 @@ def _initialize(self):
bcc_index = bcc_smirks.index(parameter_smirks)
bcc_to_parameter_index[bcc_index] = parameter_index

fixed_parameters = [
i for i in range(len(bcc_smirks)) if i not in bcc_to_parameter_index
# The BCC parameters being optimized, identified by their SMIRKS pattern.
# ``compute_objective_terms`` now expects the SMIRKS of the *trainable*
# parameters (previously it expected the indices of the *fixed* ones). The
# order of these keys defines the column ordering of the design matrix, so it
# must match the ``_parameter_to_bcc_map`` used to map BCCs back to FB params.
trainable_bcc_indices = [
i for i in range(len(bcc_smirks)) if i in bcc_to_parameter_index
]
bcc_parameter_keys = [bcc_smirks[i] for i in trainable_bcc_indices]

self._parameter_to_bcc_map = np.array(
[
bcc_to_parameter_index[i]
for i in range(len(bcc_collection.parameters))
if i not in fixed_parameters
]
[bcc_to_parameter_index[i] for i in trainable_bcc_indices]
)

# TODO: Currently only AM1 is supported by the SMIRNOFF handler.
charge_settings = QCChargeSettings(theory="am1", symmetrize=True, optimize=True)
charge_settings = QCChargeSettings(
theory="am1", symmetrize=True, optimize=True
)

# Pre-calculate the expensive operations which are needed to evaluate the
# objective function, but do not depend on the current parameters.
Expand All @@ -140,18 +182,29 @@ def _initialize(self):
"electric-field": ElectricFieldObjective,
}[self.recharge_property]

# Retrieve the ESP records to train against. They are gathered per molecule so
# that the ordering matches the per-molecule residual ranges computed below.
esp_records = [
esp_record
for smiles_pattern in smiles
for esp_record in esp_store.retrieve(smiles_pattern)
]

objective_terms = [
objective_term
for objective_term in optimization_class.compute_objective_terms(
smiles, esp_store, bcc_collection, fixed_parameters, charge_settings
esp_records,
charge_collection=charge_settings,
bcc_collection=bcc_collection,
bcc_parameter_keys=bcc_parameter_keys,
)
]

self._design_matrix = np.vstack(
[objective_term.design_matrix for objective_term in objective_terms]
[objective_term.atom_charge_design_matrix for objective_term in objective_terms]
)
self._target_residuals = np.vstack(
[objective_term.target_residuals for objective_term in objective_terms]
self._reference_values = np.vstack(
[objective_term.reference_values for objective_term in objective_terms]
)

# Track which residuals map to which molecule.
Expand Down Expand Up @@ -248,7 +301,7 @@ def get(self, mvals, AGrad=True, AHess=True):
bcc_values = bcc_values.flatten()

# Compute the objective function
delta = self._target_residuals - np.matmul(self._design_matrix, bcc_values)
delta = self._reference_values - np.matmul(self._design_matrix, bcc_values)
loss = (delta * delta).sum()

loss_gradient = np.zeros(len(parameter_values))
Expand Down Expand Up @@ -285,6 +338,12 @@ def get(self, mvals, AGrad=True, AHess=True):
else:
raise NotImplementedError()

# Flatten to a 1-D vector of one gradient per BCC. The ESP branch yields
# a column vector of shape (n_bcc, 1); older NumPy used to silently squeeze
# the resulting (1,)-shaped slices into the scalar assignment below, but
# newer NumPy raises instead, so squeeze explicitly here.
bcc_gradient = np.asarray(bcc_gradient).reshape(-1)

for bcc_index, parameter_index in enumerate(self._parameter_to_bcc_map):
loss_gradient[parameter_index] = bcc_gradient[bcc_index]

Expand Down
56 changes: 50 additions & 6 deletions src/tests/test_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,17 @@
EXPECTED_OPENFF_TORSIONPROFILE_RESULTS = array([-8.6810e-02, 6.7106e-03, 3.0992e-03, 1.8605e-02, -1.1292e-01, 5.6741e-02, 1.8884e-02, 7.3325e-02, -1.4203e-01, -9.2920e-03])

# expected objective function from 025 recharge methane study. (updated 08/04/20)
EXPECTED_RECHARGE_METHANE_ESP_OBJECTIVE = array([5.68107e-04])
EXPECTED_RECHARGE_METHANE_FIELD_OBJECTIVE = array([7.43711e-04])

# expected gradient elements from 025 recharge methane. (updated 08/04/20)
EXPECTED_RECHARGE_METHANE_ESP_GRADIENT = array([9.76931016e-03])
EXPECTED_RECHARGE_METHANE_FIELD_GRADIENT = array([1.12071584e-02])
# 2026-06-05: recalibrated for the openff-recharge >=0.4 API
# (QCChargeSettings/compute_objective_terms). The v2 ESP store DB was regenerated
# from the original v1 reference data under numpy 1.26 (see
# tools/migrate_esp_store_v1_to_v2.py) so its pickled arrays load under both numpy
# 1.x (GitHub CI) and 2.x.
EXPECTED_RECHARGE_METHANE_ESP_OBJECTIVE = array([5.6869324e-04])
EXPECTED_RECHARGE_METHANE_FIELD_OBJECTIVE = array([7.4438322e-04])

# expected gradient elements from 025 recharge methane. (updated 08/04/20; 2026-06-05)
EXPECTED_RECHARGE_METHANE_ESP_GRADIENT = array([9.77458642e-03])
EXPECTED_RECHARGE_METHANE_FIELD_GRADIENT = array([1.12129298e-02])

# in practice these aren't hit, we don't simulate nearly long enough
EXPECTED_VSITE_VDW_PARAMETERS = array([
Expand Down Expand Up @@ -420,6 +425,45 @@ def test_study(self):
err_msg=msgG
)


# A minimal SMIRNOFF force field with a single BondCharge virtual site whose
# leading charge increment is templated, used to check the Recharge target's
# virtual-site guard.
_VIRTUAL_SITE_FF_TEMPLATE = """<?xml version="1.0" encoding="utf-8"?>
<SMIRNOFF version="0.3" aromaticity_model="OEAroModel_MDL">
<VirtualSites version="0.3" exclusion_policy="parents">
<VirtualSite type="BondCharge" name="EP" smirks="[#7:1]#[#7:2]"
distance="1.4 * angstrom"
charge_increment1="{charge} * elementary_charge"
charge_increment2="0.0 * elementary_charge"
sigma="1.0 * angstrom" epsilon="0.0 * kilocalorie_per_mole"
match="all_permutations"/>
</VirtualSites>
</SMIRNOFF>
"""


@skip_openff_py39
def test_recharge_rejects_charged_virtual_sites():
"""Recharge_SMIRNOFF must refuse a force field whose virtual sites carry
charge, while tolerating charge-free virtual sites (and none at all)."""
pytest.importorskip("openff.toolkit")
from openff.toolkit.typing.engines.smirnoff import ForceField
from forcebalance.recharge_io import Recharge_SMIRNOFF

# A virtual site carrying charge cannot be represented -> must raise.
charged_ff = ForceField(_VIRTUAL_SITE_FF_TEMPLATE.format(charge="0.2"))
with pytest.raises(NotImplementedError):
Recharge_SMIRNOFF._check_no_virtual_site_charges(charged_ff)

# Charge-free virtual sites do not affect the ESP and must be allowed.
uncharged_ff = ForceField(_VIRTUAL_SITE_FF_TEMPLATE.format(charge="0.0"))
Recharge_SMIRNOFF._check_no_virtual_site_charges(uncharged_ff)

# A force field without any virtual sites is fine too.
Recharge_SMIRNOFF._check_no_virtual_site_charges(ForceField())


@skip_openff_py39
class TestEvaluatorWaterVSiteStudy(EvaluatorServerMixin, ForceBalanceSystemTest):
"""Check that we can co-optimize pure and mixture properties of water with a virtual site.
Expand Down
Binary file modified studies/025_openff_recharge/targets.tar.gz
Binary file not shown.
Loading