diff --git a/src/recharge_io.py b/src/recharge_io.py index 25613424e..2d76195f0 100644 --- a/src/recharge_io.py +++ b/src/recharge_io.py @@ -37,7 +37,12 @@ class Recharge_SMIRNOFF(Target): """A custom optimisation target which employs the `openff-recharge` package to train bond charge correction parameters against QM derived - electrostatic potential data.""" + electrostatic potential data. + + Note -- this has NOT been written to work with anything but BCCs. It will + refuse a force field whose virtual sites carry charge (see + ``_check_no_virtual_site_charges``); charge-free virtual sites are ignored. + """ def __init__(self, options, tgt_opts, forcefield): @@ -55,7 +60,7 @@ def __init__(self, options, tgt_opts, forcefield): # Pre-calculate the expensive portion of the objective function. self._design_matrix = None - self._target_residuals = None + self._reference_values = None # Store a copy of the objective function details from the previous # optimisation cycle. @@ -72,6 +77,34 @@ def __init__(self, options, tgt_opts, forcefield): # Initialize the target. self._initialize() + @staticmethod + def _check_no_virtual_site_charges(force_field): + """Raise ``NotImplementedError`` if ``force_field`` defines virtual sites + that carry charge. + + The target only places charge on atoms (AM1 base charges perturbed by bond + charge corrections) and never passes a virtual site collection to + ``openff-recharge``. A virtual site with a non-zero ``charge_increment`` + would contribute to the electrostatic potential in a way this target cannot + represent, and would otherwise be silently ignored - biasing the fitted + bond charge corrections. Virtual sites whose charge increments are all zero + do not affect the ESP and are therefore permitted. + """ + + vsite_handler = force_field.get_parameter_handler("VirtualSites") + + for parameter in vsite_handler.parameters: + if any( + getattr(charge_increment, "m", charge_increment) != 0.0 + for charge_increment in parameter.charge_increment + ): + raise NotImplementedError( + "The Recharge_SMIRNOFF target does not support virtual sites " + "that carry charge (VirtualSites parameter '{}' has a non-zero " + "charge_increment). Only bond charge corrections on atoms can " + "be trained.".format(parameter.smirks) + ) + def _initialize(self): """Initializes the target.""" @@ -92,6 +125,11 @@ def _initialize(self): if bcc_handler.partial_charge_method.lower() != "am1elf10": raise NotImplementedError() + + # The target only models charge on atoms (AM1 base charges perturbed by + # bond charge corrections); a virtual site that carries charge cannot be + # represented and would otherwise be silently ignored, biasing the fit. + self._check_no_virtual_site_charges(force_field) # TODO: it is assumed that the MDL aromaticity model should be used # rather than the once specified in the FF as the model is not @@ -118,20 +156,24 @@ def _initialize(self): bcc_index = bcc_smirks.index(parameter_smirks) bcc_to_parameter_index[bcc_index] = parameter_index - fixed_parameters = [ - i for i in range(len(bcc_smirks)) if i not in bcc_to_parameter_index + # The BCC parameters being optimized, identified by their SMIRKS pattern. + # ``compute_objective_terms`` now expects the SMIRKS of the *trainable* + # parameters (previously it expected the indices of the *fixed* ones). The + # order of these keys defines the column ordering of the design matrix, so it + # must match the ``_parameter_to_bcc_map`` used to map BCCs back to FB params. + trainable_bcc_indices = [ + i for i in range(len(bcc_smirks)) if i in bcc_to_parameter_index ] + bcc_parameter_keys = [bcc_smirks[i] for i in trainable_bcc_indices] self._parameter_to_bcc_map = np.array( - [ - bcc_to_parameter_index[i] - for i in range(len(bcc_collection.parameters)) - if i not in fixed_parameters - ] + [bcc_to_parameter_index[i] for i in trainable_bcc_indices] ) # TODO: Currently only AM1 is supported by the SMIRNOFF handler. - charge_settings = QCChargeSettings(theory="am1", symmetrize=True, optimize=True) + charge_settings = QCChargeSettings( + theory="am1", symmetrize=True, optimize=True + ) # Pre-calculate the expensive operations which are needed to evaluate the # objective function, but do not depend on the current parameters. @@ -140,18 +182,29 @@ def _initialize(self): "electric-field": ElectricFieldObjective, }[self.recharge_property] + # Retrieve the ESP records to train against. They are gathered per molecule so + # that the ordering matches the per-molecule residual ranges computed below. + esp_records = [ + esp_record + for smiles_pattern in smiles + for esp_record in esp_store.retrieve(smiles_pattern) + ] + objective_terms = [ objective_term for objective_term in optimization_class.compute_objective_terms( - smiles, esp_store, bcc_collection, fixed_parameters, charge_settings + esp_records, + charge_collection=charge_settings, + bcc_collection=bcc_collection, + bcc_parameter_keys=bcc_parameter_keys, ) ] self._design_matrix = np.vstack( - [objective_term.design_matrix for objective_term in objective_terms] + [objective_term.atom_charge_design_matrix for objective_term in objective_terms] ) - self._target_residuals = np.vstack( - [objective_term.target_residuals for objective_term in objective_terms] + self._reference_values = np.vstack( + [objective_term.reference_values for objective_term in objective_terms] ) # Track which residuals map to which molecule. @@ -248,7 +301,7 @@ def get(self, mvals, AGrad=True, AHess=True): bcc_values = bcc_values.flatten() # Compute the objective function - delta = self._target_residuals - np.matmul(self._design_matrix, bcc_values) + delta = self._reference_values - np.matmul(self._design_matrix, bcc_values) loss = (delta * delta).sum() loss_gradient = np.zeros(len(parameter_values)) @@ -285,6 +338,12 @@ def get(self, mvals, AGrad=True, AHess=True): else: raise NotImplementedError() + # Flatten to a 1-D vector of one gradient per BCC. The ESP branch yields + # a column vector of shape (n_bcc, 1); older NumPy used to silently squeeze + # the resulting (1,)-shaped slices into the scalar assignment below, but + # newer NumPy raises instead, so squeeze explicitly here. + bcc_gradient = np.asarray(bcc_gradient).reshape(-1) + for bcc_index, parameter_index in enumerate(self._parameter_to_bcc_map): loss_gradient[parameter_index] = bcc_gradient[bcc_index] diff --git a/src/tests/test_system.py b/src/tests/test_system.py index 5bf8aed93..ab713f260 100644 --- a/src/tests/test_system.py +++ b/src/tests/test_system.py @@ -52,12 +52,17 @@ EXPECTED_OPENFF_TORSIONPROFILE_RESULTS = array([-8.6810e-02, 6.7106e-03, 3.0992e-03, 1.8605e-02, -1.1292e-01, 5.6741e-02, 1.8884e-02, 7.3325e-02, -1.4203e-01, -9.2920e-03]) # expected objective function from 025 recharge methane study. (updated 08/04/20) -EXPECTED_RECHARGE_METHANE_ESP_OBJECTIVE = array([5.68107e-04]) -EXPECTED_RECHARGE_METHANE_FIELD_OBJECTIVE = array([7.43711e-04]) - -# expected gradient elements from 025 recharge methane. (updated 08/04/20) -EXPECTED_RECHARGE_METHANE_ESP_GRADIENT = array([9.76931016e-03]) -EXPECTED_RECHARGE_METHANE_FIELD_GRADIENT = array([1.12071584e-02]) +# 2026-06-05: recalibrated for the openff-recharge >=0.4 API +# (QCChargeSettings/compute_objective_terms). The v2 ESP store DB was regenerated +# from the original v1 reference data under numpy 1.26 (see +# tools/migrate_esp_store_v1_to_v2.py) so its pickled arrays load under both numpy +# 1.x (GitHub CI) and 2.x. +EXPECTED_RECHARGE_METHANE_ESP_OBJECTIVE = array([5.6869324e-04]) +EXPECTED_RECHARGE_METHANE_FIELD_OBJECTIVE = array([7.4438322e-04]) + +# expected gradient elements from 025 recharge methane. (updated 08/04/20; 2026-06-05) +EXPECTED_RECHARGE_METHANE_ESP_GRADIENT = array([9.77458642e-03]) +EXPECTED_RECHARGE_METHANE_FIELD_GRADIENT = array([1.12129298e-02]) # in practice these aren't hit, we don't simulate nearly long enough EXPECTED_VSITE_VDW_PARAMETERS = array([ @@ -420,6 +425,45 @@ def test_study(self): err_msg=msgG ) + +# A minimal SMIRNOFF force field with a single BondCharge virtual site whose +# leading charge increment is templated, used to check the Recharge target's +# virtual-site guard. +_VIRTUAL_SITE_FF_TEMPLATE = """ + + + + + +""" + + +@skip_openff_py39 +def test_recharge_rejects_charged_virtual_sites(): + """Recharge_SMIRNOFF must refuse a force field whose virtual sites carry + charge, while tolerating charge-free virtual sites (and none at all).""" + pytest.importorskip("openff.toolkit") + from openff.toolkit.typing.engines.smirnoff import ForceField + from forcebalance.recharge_io import Recharge_SMIRNOFF + + # A virtual site carrying charge cannot be represented -> must raise. + charged_ff = ForceField(_VIRTUAL_SITE_FF_TEMPLATE.format(charge="0.2")) + with pytest.raises(NotImplementedError): + Recharge_SMIRNOFF._check_no_virtual_site_charges(charged_ff) + + # Charge-free virtual sites do not affect the ESP and must be allowed. + uncharged_ff = ForceField(_VIRTUAL_SITE_FF_TEMPLATE.format(charge="0.0")) + Recharge_SMIRNOFF._check_no_virtual_site_charges(uncharged_ff) + + # A force field without any virtual sites is fine too. + Recharge_SMIRNOFF._check_no_virtual_site_charges(ForceField()) + + @skip_openff_py39 class TestEvaluatorWaterVSiteStudy(EvaluatorServerMixin, ForceBalanceSystemTest): """Check that we can co-optimize pure and mixture properties of water with a virtual site. diff --git a/studies/025_openff_recharge/targets.tar.gz b/studies/025_openff_recharge/targets.tar.gz index 7438e2bc9..7d9201a66 100644 Binary files a/studies/025_openff_recharge/targets.tar.gz and b/studies/025_openff_recharge/targets.tar.gz differ