Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@ and emergent properties of liquid systems.
Description
-----------

The benchmark performs an **MD** simulation using the **MLIP** model in the **NVT** ensemble at
**300 K** for **500,000 steps**, leveraging the `jax-md <https://github.com/google/jax-md>`_ engine
from the `mlip <https://github.com/instadeepai/mlip>`_ library. The starting configuration is already
The benchmark performs an **MD** simulation using the **MLIP** model in the **NPT** ensemble at
**295.15 K** and **1 atm** for **500,000 steps**, leveraging the `jax-md <https://github.com/google/jax-md>`_
engine from the `mlip <https://github.com/instadeepai/mlip>`_ library. The starting configuration is already
equilibrated. For every specific atom pair (e.g., **oxygen-oxygen** in water) the radial distribution
function (**RDF** or **g(r)**) is calculated from the simulation.
Comment on lines 22 to 23

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TODO: Add details RE how the densities are computed and (below) what experimental values we compare to


# TODO: Add details of density computation + details RE experimental density to sections below

.. figure:: img/rdf.png
:figwidth: 35%
:align: center
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from ase import Atoms, units
from ase.io import read as ase_read
from mlip.simulation import SimulationState
from mlip.simulation.enums import MDIntegrator
from pydantic import BaseModel, ConfigDict, NonNegativeFloat

from mlipaudit.benchmark import (
Expand All @@ -44,28 +45,25 @@
"snapshot_interval": 500,
"num_episodes": 1000,
"temperature_kelvin": 295.15,
"pressure_bar": 1.01325,
}

SIMULATION_CONFIG_DEV = {
"num_steps": 5,
"snapshot_interval": 1,
"num_episodes": 1,
"temperature_kelvin": 295.15,
"pressure_bar": 1.01325,
}
SIMULATION_CONFIG_FAST = {
"num_steps": 250_000,
"snapshot_interval": 250,
"num_episodes": 1000,
"temperature_kelvin": 295.15,
"pressure_bar": 1.01325,
}
NUM_DEV_SYSTEMS = 1

BOX_CONFIG = { # In Angstrom
"CCl4": 28.575,
"methanol": 29.592,
"acetonitrile": 27.816,
}

SYSTEM_ATOM_OF_INTEREST = {
"CCl4": "C",
"methanol": "O",
Expand All @@ -74,6 +72,20 @@

MIN_RADII, MAX_RADII = 0.0, 20.0 # In Angstrom

MOLECULE_CONFIG = {
"CCl4": {"mw": 153.823, "atoms_per_molecule": 5},
"methanol": {"mw": 32.042, "atoms_per_molecule": 6},
"acetonitrile": {"mw": 41.053, "atoms_per_molecule": 6},

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: Sorry for my noobiness, what's mw? Maybe a more explicit name?

}

# TODO: Check reference densities (g/cm3)
# These are the best I could find. All are at 20deg - should we change temperature?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: So this would be 2 deg less than what we have now? I don't see why not then? @lwehran?

# (https://ce.sysu.edu.cn/zhaolab/resource/tables/solvent_properties.pdf)
REFERENCE_DENSITIES = {
"CCl4": 1.594,
"acetonitrile": 0.786,
"methanol": 0.791,
}
REFERENCE_MAXIMA = {
"CCl4": {"type": "C-C", "distance": 5.9, "range": (0.0, 20.0)},
"acetonitrile": {"type": "N-N", "distance": 4.0, "range": (3.5, 4.5)},
Expand All @@ -85,6 +97,8 @@
"methanol": (0.0, 20.0),
}

AVAGADROS_CONSTANT = units.mol / 1e23


class SolventRadialDistributionModelOutput(ModelOutput):
"""Model output containing the final simulation states for
Expand All @@ -108,19 +122,23 @@ class SolventRadialDistributionStructureResult(BaseModel):

Attributes:
structure_name: The structure name.
densities: List of densities in g/cm3.
average_density: Average density over the final 4 fifths of the frames.
density_deviation: Deviation of the average density from the reference.
radii: The radii values in Angstrom.
rdf: The radial distribution function values at the
radii.
first_solvent_peak: The first solvent peak, i.e.
the radius at which the rdf is the maximum.
peak_deviation: The deviation of the
first solvent peak from the reference.
rdf: The radial distribution function values at the radii.
first_solvent_peak: The first solvent peak, i.e. the radius at which the
rdf is the maximum.
peak_deviation: The deviation of the first solvent peak from the reference.
failed: Whether the simulation was successful. If unsuccessful, the other
attributes will be not be set.
score: The score for the molecule.
"""

structure_name: str
densities: list[float] | None = None
average_density: float | None = None
density_deviation: NonNegativeFloat | None = None
radii: list[float] | None = None
rdf: list[float] | None = None
first_solvent_peak: float | None = None
Expand All @@ -136,7 +154,8 @@ class SolventRadialDistributionResult(BenchmarkResult):
Attributes:
structure_names: The names of the structures.
structures: List of per structure results.
avg_peak_deviation: The average deviation across all structures.
avg_density_deviation: The average density deviation across all structures.
avg_peak_deviation: The average peak deviation across all structures.
failed: Whether all the simulations failed and no analysis could be
performed. Defaults to False.
score: The final score for the benchmark between
Expand All @@ -145,6 +164,7 @@ class SolventRadialDistributionResult(BenchmarkResult):

structure_names: list[str]
structures: list[SolventRadialDistributionStructureResult]
avg_density_deviation: NonNegativeFloat | None = None
avg_peak_deviation: NonNegativeFloat | None = None


Expand Down Expand Up @@ -179,10 +199,11 @@ class SolventRadialDistributionBenchmark(Benchmark):
required_elements = {"N", "H", "O", "C", "Cl"}

def run_model(self) -> None:
"""Run an MD simulation for each structure.
"""Run an MD simulation for each structure using the NPT ensemble.

The MD simulation is performed using the JAX MD engine and starts from
the reference structure. NOTE: This benchmark runs a simulation in the
NVT ensemble, which is not recommended for a water RDF calculation.
the reference structure. The NPT integrator uses Langevin dynamics with
a Monte Carlo barostat.
"""
if self.run_mode == RunMode.DEV:
md_kwargs = SIMULATION_CONFIG_DEV
Expand All @@ -195,13 +216,13 @@ def run_model(self) -> None:
for system_name in self._system_names:
logger.info("Running MD for %s radial distribution function.", system_name)

md_kwargs["box"] = BOX_CONFIG[system_name]
simulation_state = run_simulation(
atoms=self._load_system(system_name),
force_field=self.force_field,
md_integrator=MDIntegrator.NPT_MC_LANGEVIN,
molecule_indices=self._load_molecule_indices(system_name),
**md_kwargs,
)

simulation_states.append(simulation_state)

self.model_output = SolventRadialDistributionModelOutput(
Expand Down Expand Up @@ -238,14 +259,17 @@ def analyze(self) -> SolventRadialDistributionResult:

num_succeeded += 1

box_length = BOX_CONFIG[system_name]
# TODO: How many frames to use for equilibration?
densities = self._compute_densities(simulation_state, system_name)
n_frames_equilibration = len(densities) // 5
average_density = np.mean(densities[n_frames_equilibration:])
density_deviation = abs(average_density - REFERENCE_DENSITIES[system_name])

traj = create_mdtraj_trajectory_from_simulation_state(
simulation_state=simulation_state,
topology_path=self.data_input_dir
/ self.name
/ self._get_pdb_file_name(system_name),
cell_lengths=(box_length, box_length, box_length),
)
pair_indices = traj.top.select(
f"symbol == {SYSTEM_ATOM_OF_INTEREST[system_name]}"
Expand Down Expand Up @@ -283,12 +307,17 @@ def analyze(self) -> SolventRadialDistributionResult:
peak_deviation = abs(
first_solvent_peak - REFERENCE_MAXIMA[system_name]["distance"]
)

# TODO: Include density_deviation in the score

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: Probably want to use a similar scheme to how the peak_deviation is calculated. Then take mean of score of peak_deviation and density_deviation.

score = math.exp(
-ALPHA * peak_deviation / REFERENCE_MAXIMA[system_name]["distance"]
)

structure_result = SolventRadialDistributionStructureResult(
structure_name=system_name,
densities=densities,
average_density=average_density,
density_deviation=density_deviation,
radii=radii.tolist(),
rdf=rdf,
first_solvent_peak=first_solvent_peak,
Expand Down Expand Up @@ -322,10 +351,10 @@ def analyze(self) -> SolventRadialDistributionResult:
@property
def _system_names(self) -> list[str]:
if self.run_mode == RunMode.STANDARD:
return list(BOX_CONFIG.keys())
return list(SYSTEM_ATOM_OF_INTEREST.keys())

# reduced number of cases for DEV and FAST run mode
return list(BOX_CONFIG.keys())[:NUM_DEV_SYSTEMS]
return list(SYSTEM_ATOM_OF_INTEREST.keys())[:NUM_DEV_SYSTEMS]

def _load_system(self, system_name) -> Atoms:
atoms = ase_read(
Expand All @@ -335,6 +364,38 @@ def _load_system(self, system_name) -> Atoms:
atoms.info["spin"] = DEFAULT_SPIN
return atoms

def _load_molecule_indices(self, system_name) -> np.ndarray:
molecule_indices_filename = self._get_molecule_indices_file_name(system_name)
molecule_indices = np.load(
self.data_input_dir / self.name / molecule_indices_filename
)
return molecule_indices

@staticmethod
def _get_pdb_file_name(system_name: str) -> str:
return f"{system_name}_eq.pdb"

@staticmethod
def _get_molecule_indices_file_name(system_name: str) -> str:
return f"{system_name}_molecule_indices.npy"

@staticmethod
def _compute_densities(
simulation_state: SimulationState, system_name: str
) -> np.ndarray:
"""Compute the density (g/cm3) for each frame of the simulation.

Returns:
densities: Computed density (g/cm3) for each frame of the simulation.
"""
mol_config = MOLECULE_CONFIG[system_name]
n_molecules = (
simulation_state.positions.shape[1] / mol_config["atoms_per_molecule"]
)
volumes = np.abs(np.linalg.det(simulation_state.cell))

density_numerator = mol_config["mw"] * 10 / 1000 * n_molecules
density_denominator = AVAGADROS_CONSTANT * volumes

densities = density_numerator / density_denominator
return densities
Loading
Loading