diff --git a/src/data/md_one.py b/src/data/md_one.py new file mode 100644 index 000000000..e71f9c3bc --- /dev/null +++ b/src/data/md_one.py @@ -0,0 +1,168 @@ +#!/usr/bin/env python + +""" +md_one +======== + +This script is a part of ForceBalance and runs a single simulation +that may be combined with others to calculate general thermodynamic +properties. + +This script is meant to be launched automatically by ForceBalance. + +""" + +#==================# +#| Global Imports |# +#==================# + +import os, sys, re +import argparse +import numpy as np +import importlib as il + +from forcebalance.nifty import click +from forcebalance.nifty import lp_dump, lp_load, wopen +from forcebalance.nifty import printcool, printcool_dictionary +from forcebalance.molecule import Molecule +from forcebalance.thermo import energy_derivatives + +from collections import OrderedDict + +from forcebalance.output import getLogger +logger = getLogger(__name__) + +#========================================================# +#| Global, user-tunable variables (simulation settings) |# +#========================================================# + +# Note: Only the simulation settings that vary across different +# simulations in a target may be specified on the command line. +parser = argparse.ArgumentParser() +parser.add_argument('-T', '--temp', '--temperature', dest='temperature', type=float, + help='Simulation temperature, leave blank for constant energy') +parser.add_argument('-P', '--pres', '--pressure', dest='pressure', type=float, + help='Simulation pressure, leave blank for constant volume') +parser.add_argument('-g', '--grad', '--gradient', dest='gradient', action='store_true', + help='Calculate gradients for output time series') + +# These settings may be specified for debugging purposes (i.e. they +# will override what we read from forcebalance.p) +parser.add_argument('-eq', '--nequil', dest='nequil', type=int, + help='Number of steps for equilibration run (leave blank to use default from forcebalance.p)') +parser.add_argument('-md', '--nsteps', dest='nsteps', type=int, + help='Number of steps for production run (leave blank to use default from forcebalance.p)') +parser.add_argument('-dt', '--timestep', dest='timestep', type=float, + help='Time step in femtoseconds (leave blank to use default from forcebalance.p)') +parser.add_argument('-sp', '--sample', dest='sample', type=float, + help='Sampling interval in picoseonds (leave blank to use default from forcebalance.p)') +parser.add_argument('-nt', '--threads', dest='threads', type=int, + help='Sampling interval in picoseonds (leave blank to use default from forcebalance.p)') +parser.add_argument('-min', '--minimize', dest='minimize', action='store_true', + help='Whether to minimize the energy before starting the simulation') +parser.add_argument('-o', '-out', '--output', dest='output', type=str, nargs='+', + help='Specify the time series which are written to disk') + +# Parse the command line options and save as a dictionary (don't save NoneTypes) +parsed = parser.parse_args() +args = OrderedDict([(i, j) for i, j in vars(parsed).items() if j != None]) + +#---- +# Load the ForceBalance pickle file which contains: +#---- +# - Force field object +# - Optimization parameters +# - Options loaded from file +FF, mvals, Sim = lp_load(open('forcebalance.p')) +FF.ffdir = '.' + +# Engine name. +engname = Sim.engname + +# Import modules and create the correct Engine object. +if engname == "openmm": + try: + from simtk.unit import * + from simtk.openmm import * + from simtk.openmm.app import * + except: + traceback.print_exc() + raise Exception("Cannot import OpenMM modules") + from forcebalance.openmmio import * + EngineClass = OpenMM +elif engname == "gromacs" or engname == "gmx": + from forcebalance.gmxio import * + EngineClass = GMX +elif engname == "tinker": + from forcebalance.tinkerio import * + EngineClass = TINKER +else: + raise Exception('OpenMM, GROMACS, and TINKER are supported at this time.') + +def main(): + + """Usage: + + (prefix.sh) md_one.py -T, --temperature + -P, --pressure + -g, --grad (if gradients of output timeseries are desired) + -eq, --nequil + -md, --nsteps + -dt, --timestep + -sp, --sample + -nt, --threads + -min, --minimize + + This program is meant to be called automatically by ForceBalance + because most options are loaded from the 'forcebalance.p' + simulation file. + + """ + + # Write the force field file. + FF.make(mvals) + + # Read the command line options (they may override the options from file.) + AGrad = args['gradient'] or Sim.gradient + for i in ['temperature', 'pressure', 'nequil', 'nsteps', 'timestep', 'sample', 'threads', 'minimize']: + if i in args: + Sim.MDOpts[i] = args[i] + + #---- + # Print some options. + # At this point, engine and MD options should be SET! + #---- + printcool("ForceBalance simulation using engine: %s" % engname.upper(), + color=4, bold=True) + printcool_dictionary(args, title="Options from command line") + printcool_dictionary(Sim.EngOpts, title="Engine options") + printcool_dictionary(Sim.MDOpts, title="Molecular dynamics options") + + #---- + # For convenience, assign some local variables. + #---- + # Finite difference step size + h = Sim.h + # Active parameters to differentiate + pgrad = Sim.pgrad + # Create instances of the MD Engine objects. + Engine = EngineClass(name=Sim.type, **Sim.EngOpts) + click() # Start timer. + # This line runs the condensed phase simulation. + Engine.molecular_dynamics(**Sim.MDOpts) + logger.info("MD simulation took %.3f seconds\n" % click()) + # Extract properties. + Results = Engine.md_extract(OrderedDict([(i, {}) for i in Sim.timeseries.keys()])) + # Calculate energy and dipole derivatives if needed. + if AGrad: + Results['derivatives'] = energy_derivatives(Engine, FF, mvals, h, pgrad, dipole='dipole' in Sim.timeseries.keys()) + # Dump results to file + logger.info("Writing final force field.\n") + pvals = FF.make(mvals) + logger.info("Writing all simulation data to disk.\n") + with wopen('md_result.p') as f: + lp_dump(Results, f) + +if __name__ == "__main__": + main() + diff --git a/src/gmxio.py b/src/gmxio.py index 273438ada..c651df3b1 100644 --- a/src/gmxio.py +++ b/src/gmxio.py @@ -8,6 +8,7 @@ import os, sys import re +import pandas as pd from forcebalance.nifty import * from forcebalance.nifty import _exec from forcebalance import BaseReader @@ -1120,13 +1121,18 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None, if temperature != None: md_opts["ref_t"] = temperature md_opts["gen_vel"] = "no" + # Set some default methods for temperature coupling. md_defs["tc_grps"] = "System" md_defs["tcoupl"] = "v-rescale" md_defs["tau_t"] = 1.0 if self.pbc: md_opts["comm_mode"] = "linear" + # Removing center of mass motion at every time step should not impact performance. + # http://gromacs.5086.x6.nabble.com/COM-motion-removal-td4413458.html + md_opts["nstcomm"] = 1 if pressure != None: md_opts["ref_p"] = pressure + # Set some default methods for pressure coupling. md_defs["pcoupl"] = "parrinello-rahman" md_defs["tau_p"] = 1.5 else: @@ -1172,64 +1178,137 @@ def molecular_dynamics(self, nsteps, timestep, temperature=None, pressure=None, self.warngmx("grompp -c %s -p %s.top -f %s-md.mdp -o %s-md.tpr" % (gro2, self.name, self.name, self.name), warnings=warnings, print_command=verbose) self.callgmx("mdrun -v -deffnm %s-md -nt %i -stepout %i" % (self.name, threads, nsave), print_command=verbose, print_to_screen=verbose) + if verbose: logger.info("Finished!\n") + + # Final frame of molecular dynamics. + self.md_final = "%s-md.gro" % self.name + + # Name of the molecular dynamics trajectory. self.mdtraj = '%s-md.trr' % self.name - if verbose: logger.info("Production run finished, calculating properties...\n") - # Figure out dipoles - note we use g_dipoles and not the multipole_moments function. - self.callgmx("g_dipoles -s %s-md.tpr -f %s-md.trr -o %s-md-dip.xvg -xvg no" % (self.name, self.name, self.name), stdin="System\n") + # Call md_extract and return the prop_return dictionary (backward compatibility with old functionality.) + old_map = {'potential' : 'Potentials', 'kinetic' : 'Kinetics', 'dipole' : 'Dips', 'components' : 'Ecomps', + 'density' : 'Rhos', 'volume' : 'Volumes', 'al' : 'Als', 'scd' : 'Scds'} + tsnames = ['potential', 'kinetic', 'dipole', 'components'] + if self.pbc: tsnames += ['density', 'volume'] + if bilayer: tsnames += ['al', 'scd'] + Extract = self.md_extract(tsnames) + prop_return = OrderedDict([(old_map[i], Extract[i]) for i in Extract.keys() if i in old_map]) + return prop_return + + def md_extract(self, tsnames, tsspec={}, verbose=True): + """ + Extract time series from the MD trajectory / energy file. + Since Gromacs can do so many things in a single call to + g_energy, we implement all the functionality in a single big + function (it can be split off later.) + + @param[in] tsnames List of tsnames, containing names of + timeseries that need to be evaluated. + + @param[in] tsspec Dictionary with tsnames : tsparams key/value + pairs. tsparams contains any extra information needed to + calculate the observable (e.g. atom indices in S_cd) but it + isn't strictly required. + + @return answer Dictionary with tsnames : timeseries key/value pairs. + The leading dimension of the time series is always the sample axis. + """ + + if not hasattr(self, 'mdtraj') or not os.path.exists(self.mdtraj): + logger.error('Called the md_extract method without having an MD trajectory!') + raise RuntimeError + + if verbose: logger.info("Calculating properties...\n") # Figure out which energy terms need to be printed. energyterms = self.energy_termnames(edrfile="%s-md.edr" % self.name) - ekeep = [k for k,v in energyterms.items() if v <= energyterms['Total-Energy']] - ekeep += ['Temperature', 'Volume', 'Density'] + """ + For reference the menu from g_energy may look like this. + + Select the terms you want from the following list by + selecting either (part of) the name or the number or a combination. + End your selection with an empty line or a zero. + ------------------------------------------------------------------- + 1 LJ-(SR) 2 Disper.-corr. 3 Coulomb-(SR) 4 Potential + 5 Kinetic-En. 6 Total-Energy 7 Temperature 8 Pres.-DC + 9 Pressure 10 Constr.-rmsd 11 Box-X 12 Box-Y + 13 Box-Z 14 Volume 15 Density 16 pV + 17 Enthalpy 18 Vir-XX 19 Vir-XY 20 Vir-XZ + 21 Vir-YX 22 Vir-YY 23 Vir-YZ 24 Vir-ZX + 25 Vir-ZY 26 Vir-ZZ 27 Pres-XX 28 Pres-XY + 29 Pres-XZ 30 Pres-YX 31 Pres-YY 32 Pres-YZ + 33 Pres-ZX 34 Pres-ZY 35 Pres-ZZ 36 #Surf*SurfTen + 37 Box-Vel-XX 38 Box-Vel-YY 39 Box-Vel-ZZ 40 T-System + 41 Lamb-System + """ - # Calculate deuterium order parameter for bilayer optimization. - if bilayer: - n_snap = self.n_snaps(nsteps, 1000, timestep) - Scds = self.calc_scd(n_snap, timestep) - al_vars = ['Box-Y', 'Box-X'] - self.callgmx("g_energy -f %s-md.edr -o %s-md-energy-xy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(al_vars)) - Xs = [] - Ys = [] - for line in open("%s-md-energy-xy.xvg" % self.name): - s = [float(i) for i in line.split()] - Xs.append(s[-1]) - Ys.append(s[-2]) - Xs = np.array(Xs) - Ys = np.array(Ys) - Als = (Xs * Ys) / 64 - else: - Scds = 0 - Als = 0 + # Term names that we want to get from g_energy. + ekeep = [] + # Save anything that comes before Total-Energy if doing an energy component analysis. + if 'components' in tsnames: + ecomp = [k for k,v in energyterms.items() if v <= energyterms['Total-Energy']] + ekeep += ecomp[:] + # These are time series which can be directly copied from g_energy output. + copy_keys = {'energy' : 'Total-Energy', 'potential' : 'Potential', 'kinetic' : 'Kinetic-En.', + 'temperature' : 'Temperature', 'pressure' : 'Pressure', 'volume' : 'Volume', + 'density' : 'Density', 'pv' : 'pV'} + for i in copy_keys: + if i in tsnames and copy_keys[i] not in ekeep: + ekeep.append(copy_keys[i]) + # Area per lipid requires Box-X and Box-Y time series. + if 'al' in tsnames: + ekeep += ['Box-X', 'Box-Y'] + ekeep = list(set(ekeep)) + eksort = [] + for i in energyterms.keys(): + for j in ekeep: + if j not in energyterms.keys(): + logger.error('Energy term in ekeep %s is not present in edr file' % j) + raise RuntimeError + if i == j: eksort.append(j) # Perform energy component analysis and return properties. - self.callgmx("g_energy -f %s-md.edr -o %s-md-energy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(ekeep)) - ecomp = OrderedDict() - Rhos = [] - Volumes = [] - Kinetics = [] - Potentials = [] - for line in open("%s-md-energy.xvg" % self.name): - s = [float(i) for i in line.split()] - for i in range(len(ekeep) - 2): - val = s[i+1] - if ekeep[i] in ecomp: - ecomp[ekeep[i]].append(val) - else: - ecomp[ekeep[i]] = [val] - Rhos.append(s[-1]) - Volumes.append(s[-2]) - Rhos = np.array(Rhos) - Volumes = np.array(Volumes) - Potentials = np.array(ecomp['Potential']) - Kinetics = np.array(ecomp['Kinetic-En.']) - Dips = np.array([[float(i) for i in line.split()[1:4]] for line in open("%s-md-dip.xvg" % self.name)]) - Ecomps = OrderedDict([(key, np.array(val)) for key, val in ecomp.items()]) - # Initialized property dictionary. - prop_return = OrderedDict() - prop_return.update({'Rhos': Rhos, 'Potentials': Potentials, 'Kinetics': Kinetics, 'Volumes': Volumes, 'Dips': Dips, 'Ecomps': Ecomps, 'Als': Als, 'Scds': Scds}) - if verbose: logger.info("Finished!\n") - return prop_return + self.callgmx("g_energy -f %s-md.edr -o %s-md-energy.xvg -xvg no" % (self.name, self.name), stdin="\n".join(eksort)) + + tarray = np.array([float(line.split()[0]) for line in open("%s-md-energy.xvg" % self.name)]) + times = pd.Index(tarray, name='time') + xvgdata = [[float(i) for i in line.split()[1:]] for line in open("%s-md-energy.xvg" % self.name)] + xvgdf = pd.DataFrame(xvgdata, columns=eksort, index = times) + + + # Attempt to use Pandas more effectively. + Output = OrderedDict() + + Output['time'] = tarray + # Now take the output values from g_energy and allocate them into the Output dictionary. + for i in tsnames: + if i in copy_keys: + Output[i] = np.array(xvgdf[copy_keys[i]]) + if 'components' in tsnames: + # Energy component analysis is a DataFrame. + Output['components'] = xvgdf[ecomp] + + # Area per lipid. + # HARD CODED NUMBER: number of lipid molecules! + if 'al' in tsnames: + Output['al'] = np.array(xvgdf['Box-X'])*np.array(xvgdf['Box-Y']) / 64 + + # Deuterium order parameter. + # HARD CODED: atom names of lipid tails! + if 'scd' in tsnames: + n_snap = self.n_snaps(nsteps, 1000, timestep) + Output['scd'] = self.calc_scd(n_snap, timestep) + + # Dipole moments; note we use g_dipoles and not the multipole_moments function. + if 'dipole' in tsnames: + self.callgmx("g_dipoles -s %s-md.tpr -f %s-md.trr -o %s-md-dip.xvg -xvg no" % + (self.name, self.name, self.name), stdin="System\n") + Output['dipole'] = np.array([[float(i) for i in line.split()[1:4]] + for line in open("%s-md-dip.xvg" % self.name)]) + + # We could convert it to a Panel if we wanted, but I'm not fully confident using it... + return Output def md(self, nsteps=0, nequil=0, verbose=False, deffnm=None, **kwargs): @@ -1491,12 +1570,18 @@ def __init__(self,options,tgt_opts,forcefield): self.set_option(options,'gmxpath') # Suffix for GROMACS executables. self.set_option(options,'gmxsuffix') + # Engine for calculating things locally (e.g. polarization correction) self.engine_ = GMX # Name of the engine to pass to scripts. self.engname = "gromacs" + # Valid coordinate suffix. + self.crdsfx = ['.gro', '.pdb'] + # Auxiliary (e.g. topology) files. + self.auxsfx = [['.mdp'], ['.top']] # Command prefix. self.mdpfx = "bash gmxprefix.bash" # Scripts to be copied from the ForceBalance installation directory. - self.scripts = ['gmxprefix.bash', 'md_chain.py'] + self.scripts = ['gmxprefix.bash', 'md_one.py'] ## Initialize base class. super(Thermo_GMX,self).__init__(options,tgt_opts,forcefield) + diff --git a/src/molecule.py b/src/molecule.py index f423ec254..0d69eaa81 100644 --- a/src/molecule.py +++ b/src/molecule.py @@ -1419,20 +1419,20 @@ def build_topology(self, sn=None, Fac=1.2): zidx = -1 for j in xgrd: xi = self.xyzs[sn][i][0] - if toppbc and xi < 0: xi += xmax - if toppbc and xi > xmax: xi -= xmax + while toppbc and xi < 0: xi += xmax + while toppbc and xi > xmax: xi -= xmax if xi < j: break xidx += 1 for j in ygrd: yi = self.xyzs[sn][i][1] - if toppbc and yi < 0: yi += ymax - if toppbc and yi > ymax: yi -= ymax + while toppbc and yi < 0: yi += ymax + while toppbc and yi > ymax: yi -= ymax if yi < j: break yidx += 1 for j in zgrd: zi = self.xyzs[sn][i][2] - if toppbc and zi < 0: zi += zmax - if toppbc and zi > zmax: zi -= zmax + while toppbc and zi < 0: zi += zmax + while toppbc and zi > zmax: zi -= zmax if zi < j: break zidx += 1 gasn[(xidx,yidx,zidx)].append(i) diff --git a/src/nifty.py b/src/nifty.py index 3fcc29d3b..938b4dd2a 100644 --- a/src/nifty.py +++ b/src/nifty.py @@ -237,6 +237,18 @@ def magic_string(str): #===============================# #| Math: Variable manipulation |# #===============================# +def isnpnan(var): + """ + + Determine whether a variable is np.nan. I wrote this function + because np.isnan would crash if we use it on a dtype that is not + np.float + + """ + if any([isinstance(var, x) for x in [float, np.float, np.float32, np.float64, np.double]]): + return np.isnan(var) + else: return False + def isint(word): """ONLY matches integers! If you have a decimal point? None shall pass! @@ -308,6 +320,14 @@ def flat(vec): """ return np.array(vec).reshape(-1) +def getval(dframe, col): + """ Extract the single non-NaN value from a column. """ + nnan = [i for i in dframe[col] if not isnpnan(i)] + if len(nnan) != 1: + logger.error('%i values in column %s are not NaN (expected only 1)' % (len(nnan), col)) + raise RuntimeError + return nnan[0] + def monotonic(arr, start, end): # Make sure an array is monotonically decreasing from the start to the end. a0 = arr[start] diff --git a/src/quantity.py b/src/observable.py similarity index 52% rename from src/quantity.py rename to src/observable.py index 44b137068..23fb6a0cc 100644 --- a/src/quantity.py +++ b/src/observable.py @@ -3,8 +3,9 @@ from forcebalance.finite_difference import fdwrap, f12d3p from forcebalance.molecule import Molecule -from forcebalance.nifty import col, flat, statisticalInefficiency -from forcebalance.nifty import printcool +from forcebalance.nifty import col, flat, getval +from forcebalance.nifty import printcool, statisticalInefficiency +from forcebalance.optimizer import Counter from collections import OrderedDict @@ -14,8 +15,7 @@ # method mean_stderr def mean_stderr(ts): """Return mean and standard deviation of a time series ts.""" - return np.mean(ts), \ - np.std(ts)*np.sqrt(statisticalInefficiency(ts, warn=False)/len(ts)) + return np.mean(ts), np.std(ts)*np.sqrt(statisticalInefficiency(ts, warn=False)/len(ts)) # method energy_derivatives def energy_derivatives(engine, FF, mvals, h, pgrad, length, AGrad=True): @@ -63,44 +63,36 @@ def energy_driver(mvals_): G[i,:] = EDG[:] return G -class Quantity(object): +class Observable(object): """ - Base class for thermodynamical quantity used for fitting. This can + Base class for thermodynamical observable used for fitting. This can be any experimental data that can be calculated as an ensemble average from a simulation. Data attributes --------------- name : string - Identifier for the quantity that is specified in `quantities` in Target + Identifier for the observable that is specified in `observables` in Target options. - engname : string - Use this engine to extract the quantity from the simulation results. - At present, only `gromacs` is supported. - temperature : float - Calculate the quantity at this temperature (in K). - pressure : float - Calculate the quantity at this pressure (in bar). - """ - def __init__(self, engname, temperature, pressure, name=None): - self.name = name if name is not None else "empty" - self.engname = engname - self.temperature = temperature - self.pressure = pressure - + def __init__(self, source): + # Reference data which can be useful in calculating the observable. + if 'temp' in source: self.temp = getval(source, 'temp') + if 'pres' in source: self.pres = getval(source, 'pres') + self.Data = source[self.columns] + def __str__(self): - return "quantity is " + self.name.capitalize() + "." + return "Observable = " + self.name.capitalize() + "; Columns = " + ', '.join(self.columns) def extract(self, engines, FF, mvals, h, AGrad=True): - """Calculate and extract the quantity from MD results. How this is done - depends on the quantity and the engine so this must be + """Calculate and extract the observable from MD results. How this is done + depends on the observable and the engine so this must be implemented in the subclass. Parameters ---------- engines : list - A list of Engine objects that are requred to calculate the quantity. + A list of Engine objects that are requred to calculate the observable. FF : FF Force field object. mvals : list @@ -114,92 +106,106 @@ def extract(self, engines, FF, mvals, h, AGrad=True): ------- result : (float, float, np.array) The returned tuple is (Q, Qerr, Qgrad), where Q is the calculated - quantity, Qerr is the calculated standard deviation of the quantity, + observable, Qerr is the calculated standard deviation of the observable, and Qgrad is a M-array with the calculated gradients for the - quantity, with M being the number of force field parameters that are + observable, with M being the number of force field parameters that are being fitted. """ logger.error("Extract method not implemented in base class.\n") raise NotImplementedError -# class Quantity_Density -class Quantity_Density(Quantity): - def __init__(self, engname, temperature, pressure, name=None): - """ Density. """ - super(Quantity_Density, self).__init__(engname, temperature, pressure, name) - - self.name = name if name is not None else "density" + def aggregate(self, Sims, AGrad, cycle=None): + print self.name + if cycle == None: cycle = Counter() + # Different from the Results objects in the Simulation, this + # one is keyed by the simulation type then by the time series + # data type. + self.TimeSeries = OrderedDict([(i, OrderedDict()) for i, j in self.requires.items()]) + for stype in self.requires: + for dtype in self.requires[stype]: + self.TimeSeries[stype][dtype] = np.concatenate([Sim.Results[cycle][dtype] for Sim in Sims if Sim.type == stype]) + if AGrad: + # Also aggregate the derivative information along the second axis (snapshot axis) + self.Derivatives = OrderedDict() + for stype in self.requires: + # The derivatives that we have may be obtained from the 'derivatives' data structure of the first Simulation + # that matches the required simulation type. + self.Derivatives[stype] = OrderedDict() + for dtype in [Sim.Results[cycle]['derivatives'].keys() for Sim in Sims if Sim.type == stype][0]: + self.Derivatives[stype][dtype] = np.concatenate([Sim.Results[cycle]['derivatives'][dtype] for Sim in Sims if Sim.type == stype], axis=1) + +# class Observable_Density +class Observable_Density(Observable): + + """ + The Observable_Density class implements common methods for + extracting the density from a simulation, but does not specify the + simulation itself ('requires' attribute). Don't create a + Density object directly, use the Liquid_Density and Solid_Density + derived classes. - def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): + This is due to our overall framework that each observable must + have a unique list of required simulations, yet the formula for + calculating the density and its derivative is always the same. + """ + + def __init__(self, source): + # Name of the observable. + self.name = 'density' + # Columns that are taken from the data table. + self.columns = ['density'] + super(Observable_Density, self).__init__(source) + + def evaluate(self, AGrad): #==========================================# # Physical constants and local variables. # #==========================================# # Energies in kJ/mol and lengths in nanometers. kB = 0.008314472471220214 - kT = kB*self.temperature + kT = kB*self.temp Beta = 1.0/kT mBeta = -Beta - - #======================================================# - # Get simulation properties depending on the engines. # - #======================================================# - if self.engname == "gromacs": - # Default name - deffnm = os.path.basename(os.path.splitext(engines[0].mdene)[0]) - # What energy terms are there and what is their order - energyterms = engines[0].energy_termnames(edrfile="%s.%s" % (deffnm, "edr")) - # Grab energy terms to print and keep track of energy term order. - ekeep = ['Total-Energy', 'Potential', 'Kinetic-En.', 'Temperature'] - ekeep += ['Volume', 'Density'] + phase = self.requires.keys()[0] + # Density time series. + Density = self.TimeSeries[phase]['density'] + # Average and error. + Rho_avg, Rho_err = mean_stderr(Density) + Answer = OrderedDict() + Answer['mean'] = Rho_avg + Answer['stderr'] = Rho_err + if AGrad: + G = self.Derivatives[phase]['potential'] + # Analytic first derivative. + Rho_grad = mBeta * (flat(np.matrix(G) * col(Density)) / len(Density) + - np.mean(Density) * np.mean(G, axis=1)) + Answer['grad'] = Rho_grad + return Answer - ekeep_order = [key for (key, value) in - sorted(energyterms.items(), key=lambda (k, v) : v) - if key in ekeep] +class Liquid_Density(Observable_Density): + def __init__(self, source): + # The density time series is required from the simulation. + self.requires = OrderedDict([('liquid', ['density'])]) + super(Liquid_Density, self).__init__(source) - # Perform energy component analysis and return properties. - engines[0].callgmx(("g_energy " + - "-f %s.%s " % (deffnm, "edr") + - "-o %s-energy.xvg " % deffnm + - "-xvg no"), - stdin="\n".join(ekeep)) - - # Read data and store properties by grabbing columns in right order. - data = np.loadtxt("%s-energy.xvg" % deffnm) - Energy = data[:, ekeep_order.index("Total-Energy") + 1] - Potential = data[:, ekeep_order.index("Potential") + 1] - Kinetic = data[:, ekeep_order.index("Kinetic-En.") + 1] - Volume = data[:, ekeep_order.index("Volume") + 1] - Temperature = data[:, ekeep_order.index("Temperature") + 1] - Density = data[:, ekeep_order.index("Density") + 1] +class Solid_Density(Observable_Density): + def __init__(self, source): + # The density time series is required from the simulation. + self.requires = OrderedDict([('solid', ['density'])]) + super(Solid_Density, self).__init__(source) - #============================================# - # Compute the potential energy derivatives. # - #============================================# - logger.info(("Calculating potential energy derivatives " + - "with finite difference step size: %f\n" % h)) - printcool("Initializing array to length %i" % len(Energy), - color=4, bold=True) - G = energy_derivatives(engines[0], FF, mvals, h, pgrad, len(Energy), AGrad) - - #=======================================# - # Quantity properties and derivatives. # - #=======================================# - # Average and error. - Rho_avg, Rho_err = mean_stderr(Density) - # Analytic first derivative. - Rho_grad = mBeta * (flat(np.mat(G) * col(Density)) / len(Density) \ - - np.mean(Density) * np.mean(G, axis=1)) - - return Rho_avg, Rho_err, Rho_grad - -# class Quantity_H_vap -class Quantity_H_vap(Quantity): - def __init__(self, engname, temperature, pressure, name=None): +# class Observable_H_vap +class Observable_H_vap(Observable): + def __init__(self, source): """ Enthalpy of vaporization. """ - super(Quantity_H_vap, self).__init__(engname, temperature, pressure, name) - - self.name = name if name is not None else "H_vap" + # Name of the observable. + self.name = 'hvap' + # Columns that are taken from the data table. + self.columns = ['hvap'] + # Get energy/volume from liquid simulation, and energy from gas simulation. + self.requires = OrderedDict([('liquid', ['energy', 'volume']), ('gas', ['energy'])]) + # Initialize the base class + super(Observable_H_vap, self).__init__(source) def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): #==========================================# @@ -274,9 +280,9 @@ def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): G = energy_derivatives(engines[0], FF, mvals, h, pgrad, len(Energy), AGrad) Gm = energy_derivatives(engines[1], FF, mvals, h, pgrad, len(mEnergy), AGrad) - #=======================================# - # Quantity properties and derivatives. # - #=======================================# + #=========================================# + # Observable properties and derivatives. # + #=========================================# # Average and error. E_avg, E_err = mean_stderr(Energy) Em_avg, Em_err = mean_stderr(mEnergy) @@ -298,3 +304,63 @@ def extract(self, engines, FF, mvals, h, pgrad, AGrad=True): return Hvap_avg, Hvap_err, Hvap_grad +# class Observable_Al +class Observable_Al(Observable): + def __init__(self, source): + """ Area per lipid. """ + # Name of the observable. + self.name = 'al' + # Columns that are taken from the data table. + self.columns = ['al'] + # Get area per lipid from the bilayer simulation. + self.requires = OrderedDict([('bilayer', ['al'])]) + # Initialize the base class + super(Observable_Al, self).__init__(source) + +# class Observable_Scd +class Observable_Scd(Observable): + def __init__(self, source): + """ Deuterium order parameter. """ + # Name of the observable. + self.name = 'scd' + # Columns that are taken from the data table. + self.columns = ['scd1_idx', 'scd1', 'scd2_idx', 'scd2'] + # Get deuterium order parameter from the bilayer simulation. + self.requires = OrderedDict([('bilayer', ['scd1', 'scd2'])]) + # Initialize the base class + super(Observable_Scd, self).__init__(source) + +# class Lipid_Kappa +class Lipid_Kappa(Observable): + def __init__(self, source): + """ Compressibility as calculated for lipid bilayers. """ + # Name of the observable. + self.name = 'kappa' + # Columns that are taken from the data table. + self.columns = ['kappa'] + # Get area per lipid from the bilayer simulation. + self.requires = OrderedDict([('bilayer', ['al'])]) + # Initialize the base class + super(Lipid_Kappa, self).__init__(source) + +# class Liquid_Kappa +class Liquid_Kappa(Observable): + def __init__(self, source): + """ Compressibility as calculated for liquids. """ + # Name of the observable. + self.name = 'kappa' + # Columns that are taken from the data table. + self.columns = ['kappa'] + # Get area per lipid from the bilayer simulation. + self.requires = OrderedDict([('liquid', ['volume'])]) + # Initialize the base class + super(Liquid_Kappa, self).__init__(source) + +## A mapping that takes us from observable names to possible Observable objects. +OMap = {'density' : [Liquid_Density, Solid_Density], + 'rho' : [Liquid_Density, Solid_Density], + 'hvap' : [Observable_H_vap], + 'h_vap' : [Observable_H_vap], + 'al' : [Observable_Al], + 'kappa' : [Liquid_Kappa, Lipid_Kappa], + 'scd' : [Observable_Scd]} diff --git a/src/parser.py b/src/parser.py index 0780d1d58..b424ae7dc 100644 --- a/src/parser.py +++ b/src/parser.py @@ -123,7 +123,7 @@ "adaptive_damping" : (0.5, 10, 'Damping factor that ties down the trust radius to trust0; decrease for a more variable step size.', 'Main Optimizer'), "error_tolerance" : (0.0, 10, 'Error tolerance; the optimizer will only reject steps that increase the objective function by more than this number.', 'Main Optimizer'), "search_tolerance" : (1e-4, -10, 'Search tolerance; used only when trust radius is negative, dictates convergence threshold of nonlinear search.', 'Main Optimizer with negative mintrust; advanced usage'), - "amoeba_eps" : (None, -10, 'The AMOEBA mutual polarization criterion.', 'Targets in OpenMM / TINKER that use the AMOEBA force field', ['OPENMM','TINKER']) + "amoeba_eps" : (None, -10, 'The AMOEBA mutual polarization criterion.', 'Targets in OpenMM / TINKER that use the AMOEBA force field', ['OPENMM','TINKER']), }, 'sections': {"read_mvals" : (None, 100, 'Paste mathematical parameters into the input file for them to be read in directly', 'Restarting an optimization'), "read_pvals" : (None, 100, 'Paste physical parameters into the input file for them to be read in directly', 'Restarting an optimization (recommend use_mvals instead)'), @@ -151,14 +151,15 @@ "gmx_top" : (None, -10, 'Gromacs .top files. If not provided, will search for default.', 'Targets that use GROMACS', 'GMX'), "gmx_ndx" : (None, -10, 'Gromacs .ndx files. If not provided, will search for default.', 'Targets that use GROMACS', 'GMX'), "tinker_key" : (None, -10, 'TINKER .key files. If not provided, will search for default.', 'Targets that use TINKER', 'TINKER'), - "expdata_txt" : ('expset.txt', 0, 'Text file containing experimental data.', 'Thermodynamic properties target', 'thermo'), + "source" : ('data.txt', 0, 'Text file containing source data (experimental data, parameters for observable models, weights).', 'Thermodynamic properties target', 'thermo'), "read" : (None, 50, 'Provide a temporary directory ".tmp" to read data from a previous calculation on the initial iteration (for instance, to restart an aborted run).', 'Liquid and Remote targets', 'Liquid, Remote'), }, 'allcaps' : {"type" : (None, 200, 'The type of fitting target, for instance AbInitio_GMX ; this must correspond to the name of a Target subclass.', 'All targets (important)' ,''), "engine" : (None, 180, 'The external code used to execute the simulations (GMX, TINKER, AMBER, OpenMM)', 'All targets (important)', '') }, 'lists' : {"fd_ptypes" : ([], -100, 'The parameter types that are differentiated using finite difference', 'In conjunction with fdgrad, fdhess, fdhessdiag; usually not needed'), - "quantities" : ([], 100, 'List of quantities to be fitted, each must have corresponding Quantity subclass', 'Thermodynamic properties target', 'thermo'), + "observables" : ([], 100, 'List of observables to be fitted, each must have corresponding Quantity subclass', 'Thermodynamic properties target', 'thermo'), + "simulations" : ([], 100, 'List of simulations to be run (in order to calculate fitting observables)', 'Thermodynamic properties target', 'thermo'), }, 'ints' : {"shots" : (-1, 0, 'Number of snapshots; defaults to all of the snapshots', 'Energy + Force Matching', 'AbInitio'), "fitatoms" : (0, 0, 'Number of fitting atoms; defaults to all of them', 'Energy + Force Matching', 'AbInitio'), @@ -175,7 +176,6 @@ "save_traj" : (0, -10, 'Whether to save trajectories. 0 = Never save; 1 = Delete if optimization step is good; 2 = Always save', 'Condensed phase properties', 'Liquid, Lipid'), "eq_steps" : (20000, 0, 'Number of time steps for the equilibration run.', 'Thermodynamic property targets', 'thermo'), "md_steps" : (50000, 0, 'Number of time steps for the production run.', 'Thermodynamic property targets', 'thermo'), - "n_sim_chain" : (1, 0, 'Number of simulations required to calculate quantities.', 'Thermodynamic property targets', 'thermo'), }, 'bools' : {"whamboltz" : (0, -100, 'Whether to use WHAM Boltzmann Weights', 'Ab initio targets with Boltzmann weights (advanced usage)', 'AbInitio'), "sampcorr" : (0, -150, 'Whether to use the archaic sampling correction', 'Energy + Force Matching, very old option, do not use', 'AbInitio'), @@ -239,6 +239,8 @@ "self_pol_mu0" : (0.0, -150, 'Gas-phase dipole parameter for self-polarization correction (in debye).', 'Condensed phase property targets', 'liquid'), "self_pol_alpha" : (0.0, -150, 'Polarizability parameter for self-polarization correction (in debye).', 'Condensed phase property targets', 'liquid'), "epsgrad" : (0.0, -150, 'Gradient below this threshold will be set to zero.', 'All targets'), + "timestep" : (1.0, 0, 'Time step for molecular dynamics (in femtoseconds).', 'Thermodynamic property targets', 'thermo'), + "interval" : (1.0, 0, 'Sampling interval for molecular dynamics (in picoseconds).', 'Thermodynamic property targets', 'thermo'), }, 'sections': {} } @@ -286,6 +288,8 @@ "gas_equ_steps" : "gas_eq_steps", "lipid_prod_steps" : "lipid_md_steps", "lipid_equ_steps" : "lipid_eq_steps", + "expdata_txt" : "source", + "quantities" : "observables", } ## Listing of sections in the input file. diff --git a/src/target.py b/src/target.py index cb79b6006..66011cec2 100644 --- a/src/target.py +++ b/src/target.py @@ -715,7 +715,7 @@ def submit_jobs(self, mvals, AGrad=False, AHess=False): wq = getWorkQueue() - logger.info("Sending target '%s' to work queue for remote evaluation\n" % self.name) + # logger.info("Sending target '%s' to work queue for remote evaluation\n" % self.name) # input: # forcebalance.p: pickled mvals, options, and forcefield # rtarget.py: remote target evaluation script @@ -726,7 +726,7 @@ def submit_jobs(self, mvals, AGrad=False, AHess=False): forcebalance.nifty.queue_up(wq, "python rtarget.py > rtarget.out 2>&1", ["forcebalance.p", "rtarget.py", "target.tar.bz2"], ['objective.p', 'indicate.log', 'rtarget.out'], - tgt=self) + tgt=self, verbose=False) def read(self,mvals,AGrad=False,AHess=False): return self.get(mvals, AGrad, AHess) diff --git a/src/thermo.py b/src/thermo.py index b5150604b..76a20d1cf 100644 --- a/src/thermo.py +++ b/src/thermo.py @@ -1,24 +1,486 @@ import os +import re +import csv +import copy import errno +import shutil import numpy as np +import pandas as pd +import itertools +import cStringIO +from forcebalance.molecule import Molecule +from forcebalance.observable import OMap from forcebalance.target import Target -from forcebalance.finite_difference import in_fd -from forcebalance.nifty import flat, col, row -from forcebalance.nifty import lp_dump, lp_load, wopen, _exec -from forcebalance.nifty import LinkFile, link_dir_contents +from forcebalance.finite_difference import in_fd, fdwrap, f12d3p +from forcebalance.nifty import flat, col, row, isint, isnpnan +from forcebalance.nifty import lp_dump, lp_load, wopen, _exec, getval +from forcebalance.nifty import GoInto, LinkFile, link_dir_contents from forcebalance.nifty import printcool, printcool_dictionary +from forcebalance.nifty import getWorkQueue +from forcebalance.optimizer import Counter -from collections import OrderedDict +from collections import defaultdict, OrderedDict -from forcebalance.output import getLogger +import forcebalance +from forcebalance.output import * logger = getLogger(__name__) -# +# print logger.parent.parent.handlers[0] +# logger.parent.parent.handlers = [] + +class TextParser(object): + """ Parse a text file. """ + def __init__(self, fnm): + self.fnm = fnm + self.parse() + + def is_empty_line(self): + return all([len(fld.strip()) == 0 for fld in self.fields]) + + def is_comment_line(self): + return re.match('^[\'"]?#',self.fields[0].strip()) + + def process_header(self): + """ Function for setting more attributes using the header line, if needed. """ + self.heading = [i.strip() for i in self.fields[:]] + + def process_data(self): + """ Function for setting more attributes using the current line, if needed. """ + trow = [] + for ifld in range(len(self.heading)): + if ifld < len(self.fields): + trow.append(self.fields[ifld].strip()) + else: + trow.append('') + return trow + + def sanity_check(self): + """ Extra sanity checks. """ + + def parse(self): + self.heading = [] # Fields in header line + meta = defaultdict(list) # Dictionary of metadata + found_header = 0 # Whether we found the header line + table = [] # List of data records + self.generate_splits() # Generate a list of records for each line. + self.ln = 0 # Current line number + for line, fields in zip(open(self.fnm).readlines(), self.splits): + # Set attribute so methods can use it. + self.fields = fields + # Skip over empty lines or comment lines. + if self.is_empty_line(): + logger.debug("\x1b[96mempt\x1b[0m %s\n" % line.replace('\n','')) + self.ln += 1 + continue + if self.is_comment_line(): + logger.debug("\x1b[96mcomm\x1b[0m %s\n" % line.replace('\n','')) + self.ln += 1 + continue + # Indicates metadata mode. + is_meta = 0 + # Indicates whether this is the header line. + is_header = 0 + # Split line by tabs. + for ifld, fld in enumerate(fields): + fld = fld.strip() + # Stop parsing when we encounter a comment line. + if re.match('^[\'"]?#',fld): break + # The first word would contain the name of the metadata key. + if ifld == 0: + mkey = fld + # Check if the first field is an equals sign (turn on metadata mode). + if ifld == 1: + # Activate metadata mode. + if fld == "=": + is_meta = 1 + # Otherwise, this is the header line. + elif not found_header: + is_header = 1 + found_header = 1 + # Read in metadata. + if ifld > 1 and is_meta: + meta[mkey].append(fld) + # Set field start, field end, and field content for the header. + if is_header: + logger.debug("\x1b[1;96mhead\x1b[0m %s\n" % line.replace('\n','')) + self.process_header() + elif is_meta: + logger.debug("\x1b[96mmeta\x1b[0m %s\n" % line.replace('\n','')) + else: + # Build the row of data to be appended to the table. + # Loop through the fields in the header and inserts fields + # in the data line accordingly. Ignores trailing tabs/spaces. + logger.debug("\x1b[96mdata\x1b[0m %s\n" % line.replace('\n','')) + table.append(self.process_data()) + self.ln += 1 + self.sanity_check() + if logger.level == DEBUG: + printcool("%s parsed as %s" % (self.fnm.replace(os.getcwd()+'/',''), self.format), color=6) + self.metadata = meta + self.table = table + +class CSV_Parser(TextParser): + + """ + Parse a comma-separated file. This class is for all + source files that are .csv format (characterized by having the + same number of comma-separated fields in each line). Fields are + separated by commas but they may contain commas as well. + + In contrast to the other formats, .csv MUST contain the same + number of commas in each line. .csv format is easily prepared + using Excel. + """ + + def __init__(self, fnm): + self.format = "comma-separated values (csv)" + super(CSV_Parser, self).__init__(fnm) + + def generate_splits(self): + with open(self.fnm, 'r') as f: self.splits = list(csv.reader(f)) + +class TAB_Parser(TextParser): + + """ + Parse a tab-delimited file. This function is called for all + source files that aren't csv and contain at least one tab. + Fields are separated by tabs and do not contain tabs. + + Tab-delimited format is easy to prepare using programs like Excel. + It is easier to read than .csv but represented differently by + different editors. + + Empty fields must still exist (represented using multiple tabs). + """ + + def __init__(self, fnm): + self.format = "tab-delimited text" + super(TAB_Parser, self).__init__(fnm) + + def generate_splits(self): + self.splits = [line.split('\t') for line in open(self.fnm).readlines()] + +class FIX_Parser(TextParser): + + """ + Parse a fixed width format file. This function is called for all + source files that aren't csv and contain no tabs. + + Fixed width is harder to prepare by hand but easiest to read, + because it looks the same in all text editors. The field width is + determined by the header line (first line in the data table), + i.e. the first non-empty, non-comment, non-metadata line. + + Empty fields need to be filled with the correct number of spaces. + All fields must have the same alignment (left or right). The + start and end of each field is determined from the header line and + used to determine alignment. If the alignment cannot be determined + then it will throw an error. + + Example of a left-aligned fixed width file: + + T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 + 323.15 1 0.631 1 C15 C34 + C17 0.198144 C36 0.198144 + C18 0.198128 C37 0.198128 + C19 0.198111 C38 0.198111 + C20 0.198095 C39 0.198095 + C21 0.198079 C40 0.198079 + C22 0.197799 C41 0.197537 + C23 0.198045 C42 0.198046 + C24 0.178844 C43 0.178844 + C25 0.167527 C44 0.178565 + C26 0.148851 C45 0.16751 + C27 0.134117 C46 0.148834 + C28 0.119646 C47 0.1341 + C29 0.100969 C48 0.110956 + C30 0.07546 C49 0.087549 + C31 C50 + + """ + + def __init__(self, fnm): + self.format = "fixed-width text" + self.fbegs_dat = [] + self.fends_dat = [] + super(FIX_Parser, self).__init__(fnm) + + def generate_splits(self): + # This regular expression splits a string looking like this: + # "Density (kg m^-3) Hvap (kJ mol^-1) Alpha Kappa". But I + # don't want to split in these places: "Density_(kg_m^-3) + # Hvap_(kJ_mol^-1) Alpha Kappa" + allfields = [list(re.finditer('[^\s(]+(?:\s*\([^)]*\))?', line)) for line in open(self.fnm).readlines()] + self.splits = [] + # Field start / end positions for each line in the file + self.fbegs = [] + self.fends = [] + for line, fields in zip(open(self.fnm).readlines(), allfields): + self.splits.append([fld.group(0) for fld in fields]) + self.fbegs.append([fld.start() for fld in fields]) + self.fends.append([fld.end() for fld in fields]) + + def process_header(self): + super(FIX_Parser, self).process_header() + # Field start / end positions for the header line + self.hbeg = self.fbegs[self.ln] + self.hend = self.fends[self.ln] + + def process_data(self): + trow = [] + hbeg = self.hbeg + hend = self.hend + fbeg = self.fbegs[self.ln] + fend = self.fends[self.ln] + fields = self.fields + # Check alignment and throw an error if incorrectly formatted. + if not ((set(fbeg).issubset(hbeg)) or (set(fend).issubset(hend))): + logger.error("This \x1b[91mdata line\x1b[0m is not aligned with the \x1b[92mheader line\x1b[0m!\n") + logger.error("\x1b[92m%s\x1b[0m\n" % header.replace('\n','')) + logger.error("\x1b[91m%s\x1b[0m\n" % line.replace('\n','')) + raise RuntimeError + # Left-aligned case + if set(fbeg).issubset(hbeg): + for hpos in hbeg: + if hpos in fbeg: + trow.append(fields[fbeg.index(hpos)]) + else: + trow.append('') + # Right-aligned case + if set(fend).issubset(hend): + for hpos in hend: + if hpos in fend: + trow.append(fields[fend.index(hpos)].strip()) + else: + trow.append('') + # Field start / end positions for the line of data + self.fbegs_dat.append(fbeg[:]) + self.fends_dat.append(fend[:]) + return trow + + def sanity_check(self): + if set(self.hbeg).issuperset(set(itertools.chain(*self.fbegs_dat))): + self.format = "left-aligned fixed width text" + elif set(self.hend).issuperset(set(itertools.chain(*self.fends_dat))): + self.format = "right-aligned fixed width text" + else: + # Sanity check - it should never get here unless the parser is incorrect. + logger.error("Fixed-width format detected but columns are neither left-aligned nor right-aligned!\n") + raise RuntimeError + +def parse1(fnm): + + """Determine the format of the source file and call the + appropriate parsing function.""" + + # CSV files have the same number of comma separated fields in every line, they are the simplest to parse. + with open(fnm, 'r') as f: csvf = list(csv.reader(f)) + if len(csvf[0]) > 1 and len(set([len(i) for i in csvf])) == 1: + return CSV_Parser(fnm) + + # Strip away comments and empty lines. + nclines = [re.sub('[ \t]*#.*$','',line) for line in open(fnm).readlines() + if not (line.strip().startswith("#") or not line.strip())] + + # Print the sanitized lines to a new file object. + # Note the file object needs ot be rewound every time we read or write to it. + fdat = cStringIO.StringIO() + for line in nclines: + print >> fdat, line, + fdat.seek(0) + + # Now the file can either be tab-delimited or fixed width. + # If ANY tabs are found in the sanitized lines, then it is taken to be + # a tab-delimited file. + have_tabs = any(['\t' in line for line in fdat.readlines()]) ; fdat.seek(0) + if have_tabs: + return TAB_Parser(fnm) + else: + return FIX_Parser(fnm) + return + +def fix_suffix(obs, head, suffixs, standard_suffix): + + """ Standardize the suffix in a column heading. """ + + if head in suffixs: + if obs == '': + logger.error('\x1b[91mEncountered heading %s but there is no observable to the left\x1b[0m\n' % head) + raise RuntimeError + return obs + '_' + standard_suffix, False + elif len(head.split('_')) > 1 and head.split('_')[-1] in suffixs: + newhl = head.split('_') + newhl[-1] = standard_suffix + return '_'.join(newhl), False + else: + return head, True + +def stand_head(head, obs): + + """ + Standardize a column heading. Does the following: + + 1) Make lowercase + 2) Split off the physical unit + 3) If a weight, uncertainty or atom index, prepend the observable name + 4) Shorten temperature and pressure + 5) Determine if this is a new observable + + Parameters: + head = Name of the heading + obs = Name of the observable (e.g. from a previously read field) + """ + + head = head.lower() + usplit = re.split(' *\(', head, maxsplit=1) + punit = '' + if len(usplit) > 1: + hfirst = usplit[0] + punit = re.sub('\)$','',usplit[1].strip()) + logger.debug("header %s split into %s, %s" % (head, hfirst, punit)) + else: + hfirst = head + newh = hfirst + newh, o1 = fix_suffix(obs, newh, ['w', 'wt', 'wts', 'weight', 'weights'], 'wt') + newh, o2 = fix_suffix(obs, newh, ['s', 'sig', 'sigma', 'sigmas'], 'sig') + newh, o3 = fix_suffix(obs, newh, ['i', 'idx', 'index', 'indices'], 'idx') + if newh in ['t', 'temp', 'temperature']: newh = 'temp' + if newh in ['p', 'pres', 'pressure']: newh = 'pres' + if all([o1, o2, o3]): + obs = newh + if newh != hfirst: + logger.debug("header %s renamed to %s\n" % (hfirst, newh)) + return newh, punit, obs + +def find_file(tgtdir, index, stype, sufs, iscrd, icn=0): + """ + Search for a suitable file that matches the simulation index, + type, suffix and IC number. This can be used to search for + initial coordinates, but also auxiliary files for the + simulation (e.g. .top and .mdp files for a Gromacs simulation, + or .key files for a Tinker simulation.) + + Generally, it is preferred to provide files where the base + name matches the simulation type. However, since it is also + okay to put all files for a simulation type into a + subdirectory, generic file names like 'topol' and 'conf' may + be used. + + Initial condition files will be searched for in the following priority (suf stands for suffix) + targets/target_name/index/stype/ICs/stype_#.suf + targets/target_name/index/stype/ICs/stype#.suf + targets/target_name/index/stype/ICs/#.suf + targets/target_name/index/stype/ICs/stype.suf + targets/target_name/index/stype/ICs/coords.suf + targets/target_name/index/stype/ICs/conf.suf + targets/target_name/index/stype/ICs/topol.suf + targets/target_name/index/stype/ICs/grompp.suf + targets/target_name/index/stype/ICs/input.suf + targets/target_name/index/stype/ICs/tinker.suf + targets/target_name/index/stype/stype.suf + targets/target_name/index/stype/coords.suf + targets/target_name/index/stype.suf + targets/target_name/stype.suf + + @param[in] tgtdir Name of the target directory to look in + @param[in] index Name of the index directory to look in (within tgtdir) + @param[in] stype Name of the simulation type to look for + @param[in] sufs List of suffixes to look for in order of priority + @param[in] iscrd Whether the file is a coordinate file (false for auxiliary files like .mdp). + @param[in] icn Initial coordinate number (will look for sequentially numbered file, or single file with multiple structures) + """ + found = '' + # The 2-tuple here corresponds to: + # - Search path for the file + # - Whether the file that we're looking for is 'numbered' + # (i.e. a different file for each structure); otherwise the + # single file may contain multiple structures + pfxs = [stype, 'coords', 'conf', 'topol', 'grompp', 'input', 'tinker', ''] + + basefnms = list(itertools.chain(*[[(os.path.join(index, stype, 'ICs', pfx+'_'+("%i" % icn)), True), + (os.path.join(index, stype, 'ICs', pfx+("%i" % icn)), True), + (os.path.join(index, stype, 'ICs', pfx), False), + (os.path.join(index, stype, pfx), False), + (os.path.join(index, pfx), False), + (os.path.join(pfx), False)] for pfx in pfxs])) + + paths = OrderedDict() + for fnm, numbered in basefnms: + for suf in sufs: + fpath = os.path.join(tgtdir, fnm+suf if suf.startswith('.') else fnm+'.'+suf) + paths[fpath] = os.path.exists(fpath) + if os.path.exists(fpath): + if found != '': + logger.info('Target %s Index %s Simulation %s : ' + '%s overrides %s\n' % (os.path.basename(tgtdir), index, stype, fpath)) + else: + if iscrd and not numbered: + M = Molecule(fpath) + if len(M) <= icn: + logger.error("Target %s Index %s Simulation %s : " + "file %s doesn't have enough structures\n" % + (os.path.basename(tgtdir), index, stype, fpath)) + raise RuntimeError + logger.info('Target %s Index %s Simulation %s : ' + 'found file %s\n' % (os.path.basename(tgtdir), index, stype, fpath)) + found = os.path.abspath(fpath) + if found == '': + logger.error("Can't find a file for index %s, simulation %s, suffix %s in the search path" % (index, stype, '/'.join(sufs))) + raise RuntimeError + return found, 0 if numbered else icn + +def energy_derivatives(engine, FF, mvals, h, pgrad, dipole=False): + + """ + Compute the first and second derivatives of a set of snapshot + energies with respect to the force field parameters. + + This basically calls the finite difference subroutine on the + energy_driver subroutine also in this script. + + In the future we may need to be more sophisticated with + controlling the quantities which are differentiated, but for + now this is okay.. + + @param[in] engine Engine object for calculating energies + @param[in] FF Force field object + @param[in] mvals Mathematical parameter values + @param[in] h Finite difference step size + @param[in] pgrad List of active parameters for differentiation + @param[in] dipole Switch for dipole derivatives. + @return G First derivative of the energies in a N_param x N_coord array + @return GDx First derivative of the box dipole moment x-component in a N_param x N_coord array + @return GDy First derivative of the box dipole moment y-component in a N_param x N_coord array + @return GDz First derivative of the box dipole moment z-component in a N_param x N_coord array + + """ + def single_point(mvals_): + FF.make(mvals_) + if dipole: + return engine.energy_dipole() + else: + return engine.energy() + + ED0 = single_point(mvals) + G = OrderedDict() + G['potential'] = np.zeros((FF.np, ED0.shape[0])) + if dipole: + G['dipole'] = np.zeros((FF.np, ED0.shape[0], 3)) + for i in pgrad: + logger.info("%i %s\r" % (i, (FF.plist[i] + " "*30))) + edg, _ = f12d3p(fdwrap(single_point,mvals,i),h,f0=ED0) + if dipole: + G['potential'][i] = edg[:,0] + G['dipole'][i] = edg[:,1:] + else: + G['potential'][i] = edg[:] + return G + class Thermo(Target): """ - A target for fitting general experimental data sets. The - experimental data is described in a .txt file and is handled with a - `Quantity` subclass. + A target for fitting general experimental data sets. The source + data is described in a text file formatted according to the + Specification. """ def __init__(self, options, tgt_opts, forcefield): @@ -26,137 +488,308 @@ def __init__(self, options, tgt_opts, forcefield): super(Thermo, self).__init__(options, tgt_opts, forcefield) ## Parameters - # Reference experimental data - self.set_option(tgt_opts, "expdata_txt", forceprint=True) - # Quantities to calculate - self.set_option(tgt_opts, "quantities", forceprint=True) + # Source data (experimental data, model parameters and weights) + self.set_option(tgt_opts, "source", forceprint=True) + # Observables to calculate + self.set_option(tgt_opts, "observables", "user_observable_names", forceprint=True) # Length of simulation chain - self.set_option(tgt_opts, "n_sim_chain", forceprint=True) + self.set_option(tgt_opts, "simulations", "user_simulation_names", forceprint=True) # Number of time steps in the equilibration run self.set_option(tgt_opts, "eq_steps", forceprint=True) # Number of time steps in the production run self.set_option(tgt_opts, "md_steps", forceprint=True) + # Time step (in femtoseconds) + self.set_option(tgt_opts, "timestep", forceprint=True) + # Sampling interval (in picoseconds) + self.set_option(tgt_opts, "interval", forceprint=True) + # Save trajectories? + self.set_option(tgt_opts, "save_traj", forceprint=True) ## Variables # Prefix names for simulation data self.simpfx = "sim" - # Data points for quantities + # Data points for observables self.points = [] - # Denominators for quantities + # Denominators for observables self.denoms = {} - # Weights for quantities + # Weights for observables self.weights = {} + # The list of simulations that we'll be running. + self.SimNames = [i.lower() for i in self.user_simulation_names] + # Store the dictionary of allowed suffixes + self.OptionDict['crdsfx'] = self.crdsfx + + ## Read source data and initialize points; creates self.Data, self.Indices and self.Columns objects. + self.read_source(os.path.join(self.root, self.tgtdir, self.source)) - ## Read experimental data and initialize points - self._read_expdata(os.path.join(self.root, - self.tgtdir, - self.expdata_txt)) + ## Set up self.Observables. + self.initialize_observables() + + ## Set up self.Simulations. + self.initialize_simulations() ## Copy run scripts from ForceBalance installation directory for f in self.scripts: LinkFile(os.path.join(os.path.split(__file__)[0], "data", f), os.path.join(self.root, self.tempdir, f)) - - def _read_expdata(self, expdata): - """Read and store experimental data. + + def read_source(self, srcfnm): + """Read and store source data. Parameters ---------- - expdata : string - Read experimental data from this filename. + srcfnm : string + Read source data from this filename. Returns ------- Nothing """ - fp = open(expdata) - - line = fp.readline() - foundHeader = False - names = None - units = None - label_header = None - label_unit = None - count = 0 - while line: - # Skip comments and blank lines - if line.lstrip().startswith("#") or not line.strip(): - line = fp.readline() + + logger.info('Parsing source file %s\n' % srcfnm) + source = parse1(srcfnm) + printcool_dictionary(source.metadata, title="Metadata") + revhead = [] + col = '' + colnames = [] + + units = defaultdict(str) + + for i, head in enumerate(source.heading): + if i == 0 and head.lower() == 'index': # Treat special case because index can also mean other things + revhead.append('index') continue + newh, punit, col = stand_head(head, col) + if col not in colnames + ['temp', 'pres', 'n_ic']: colnames.append(col) + revhead.append(newh) + if punit != '': + units[newh] = punit + source.heading = revhead + + if len(set(revhead)) != len(revhead): + logger.error('Column headings : ' + str(revhead) + '\n') + logger.error('\x1b[91mColumn headings are not unique!\x1b[0m\n') + raise RuntimeError - if "=" in line: # Read variable - param, value = line.split("=") - param = param.strip().lower() - if param == "denoms": - for e, v in enumerate(value.split()): - self.denoms[self.quantities[e]] = float(v) - elif param == "weights": - for e, v in enumerate(value.split()): - self.weights[self.quantities[e]] = float(v) - elif foundHeader: # Read exp data - count += 1 - vals = line.split() - - label = (vals[0], label_header, label_unit) - refs = np.array(vals[1:-2:2]).astype(float) - wts = np.array(vals[2:-2:2]).astype(float) - temperature = float(vals[-2]) - pressure = None if vals[-1].lower() == "none" else \ - float(vals[-1]) + if revhead[0] != 'index': + logger.error('\x1b[91mIndex column heading is not present\x1b[0m\n(Add an Index column on the left!)\n') + raise RuntimeError + + uqidx = [] + saveidx = '' + index = [] + snum = 0 + drows = [] + # thisidx = Index that is built from the current row (may be empty) + # saveidx = Index that may have been saved from a previous row + # snum = Subindex number + # List of (index, heading) tuples which contain file references. + fref = OrderedDict() + for rn, row in enumerate(source.table): + this_insert = [] + thisidx = row[0] + if thisidx != '': + saveidx = thisidx + snum = 0 + if saveidx in uqidx: + logger.error('Index %s is duplicated in data table\n' % i) + raise RuntimeError + uqidx.append(saveidx) + index.append((saveidx, snum)) + if saveidx == '': + logger.error('Row of data : ' + str(row) + '\n') + logger.error('\x1b[91mThis row does not have an index!\x1b[0m\n') + raise RuntimeError + snum += 1 + if any([':' in fld for fld in row[1:]]): + # Here we read rows from another data table. + # Other files may be referenced in the cell of a primary + # table using filename:column_number (numbered from 1). + # Rules: (1) No matter where the filename appears in the column, + # the column is inserted at the beginning of the system index. + # (2) There can only be one file per system index / column. + # (3) The column heading in the secondary file that's being + # referenced must match that of the reference in the primary file. + col2 = '' + for cid_, fld in enumerate(row[1:]): + if ':' not in fld: continue + cid = cid_ + 1 + def reffld_error(reason=''): + logger.error('Row: : ' + ' '.join(row) + '\n') + logger.error('Entry : ' + fld + '\n') + logger.error('This filename:column reference is not valid!%s' % + (' (%s)' % reason if reason != '' else '')) + raise RuntimeError + if len(fld.split(':')) != 2: + reffld_error('Wrong number of colon-separated fields') + if not isint(fld.split(':')[1]): + reffld_error('Must be an integer after the colon') + fnm = fld.split(':')[0] + fcol_ = int(fld.split(':')[1]) + fpath = os.path.join(os.path.split(srcfnm)[0], fnm) + if not os.path.exists(fpath): + reffld_error('%s does not exist' % fpath) + if (saveidx, revhead[cid]) in fref: + reffld_error('%s already contains a file reference' % (saveidx, revhead[cid])) + subfile = parse1(fpath) + fcol = fcol_ - 1 + head2, punit2, col2 = stand_head(subfile.heading[fcol], col2) + if revhead[cid] != head2: + reffld_error("Column heading of %s (%s) doesn't match original (%s)" % (fnm, head2, revhead[cid])) + fref[(saveidx, revhead[cid])] = [row2[fcol] for row2 in subfile.table] + + # Insert the file-referenced data tables appropriately into + # our main data table. + for (saveidx, head), newcol in fref.items(): + inum = 0 + for irow in range(len(source.table)): + if index[irow][0] != saveidx: continue + lrow = irow + cidx = revhead.index(head) + source.table[irow][cidx] = newcol[inum] + inum += 1 + if inum >= len(newcol): break + for inum1 in range(inum, len(newcol)): + lrow += 1 + nrow = ['' for i in range(len(revhead))] + nrow[cidx] = newcol[inum1] + source.table.insert(lrow, nrow) + index.insert(lrow, (saveidx, inum1)) - dp = Point(count, label=label, refs=refs, weights=wts, - names=names, units=units, - temperature=temperature, pressure=pressure) - self.points.append(dp) - else: # Read headers - foundHeader = True - headers = zip(*[tuple(h.split("_")) for h in line.split() - if h != "w"]) - - label_header = list(headers[0])[0] - label_unit = list(headers[1])[0] - names = list(headers[0][1:-2]) - units = list(headers[1][1:-2]) - - line = fp.readline() - - def retrieve(self, dp): - """Retrieve the molecular dynamics (MD) results and store the calculated - quantities in the Point object dp. + for rn, row in enumerate(source.table): + drows.append([i if i != '' else np.nan for i in row[1:]]) - Parameters - ---------- - dp : Point - Store the calculated quantities in this point. + # Turn it into a pandas DataFrame. + self.Data = pd.DataFrame(drows, columns=revhead[1:], index=pd.MultiIndex.from_tuples(index, names=['index', 'subindex'])) - Returns - ------- - Nothing - - """ - abspath = os.path.join(os.getcwd(), '%d/md_result.p' % dp.idnr) + def intcol(col): + if col in self.Data.columns: + for idx in self.Data.index: + if not isnpnan(self.Data[col][idx]): + self.Data[col][idx] = int(self.Data[col][idx]) - if os.path.exists(abspath): - logger.info('Reading data from ' + abspath + '.\n') + def floatcol(col): + if col in self.Data.columns: + self.Data[col] = self.Data[col].astype(float) - vals, errs, grads = lp_load(open(abspath)) + intcol('n_ic') + floatcol('temp') + floatcol('pres') - dp.data["values"] = vals - dp.data["errors"] = errs - dp.data["grads"] = grads + # A list of indices (i.e. top-level indices) which correspond + # to sets of simulations that we'll be running. + self.Indices = [] + for idx in self.Data.index: + if idx[0] not in self.Indices: + self.Indices.append(idx[0]) - else: - msg = 'The file ' + abspath + ' does not exist so we cannot read it.\n' - logger.warning(msg) + # List of columns in the main data table. + self.Columns = self.Data.columns + + # Certain things (e.g. run parameters like temp, pres) are keyed to the index only. + chead = [] + crows = [] + for index in self.Indices: + crow = [] + for head in ['temp', 'pres']: + if head not in self.Data: continue + if head not in chead: chead.append(head) + rlist = list(set([i for i in self.Data.ix[index][head][:] if not isnpnan(i)])) + if len(rlist) != 1: + logger.error('Heading %s should appear once for index %s (found %i)' % (head, index, len(rlist))) + raise RuntimeError + crow.append(rlist[0]) + crows.append(crow[:]) - dp.data["values"] = np.zeros((len(self.quantities))) - dp.data["errors"] = np.zeros((len(self.quantities))) - dp.data["grads"] = np.zeros((len(self.quantities), self.FF.np)) + # Now create the mini data table. + self.Data2 = pd.DataFrame(crows, columns=chead, index=self.Indices) + + return + + def initialize_observables(self): + """ + Determine Observable objects to be created. Checks to see + whether simulations are consistent with observables (i.e. no + missing simulations or ambiguities.) + + In order to implement a new observable, create a class in + observable.py and add it to OMap. + """ + self.Observables = OrderedDict() + for oname in [stand_head(i, '')[2] for i in self.user_observable_names]: + if oname in self.Observables: + logger.error('%s was already specified as an observable' % (oname)) + raise RuntimeError + self.Observables[oname] = OrderedDict() + for index in self.Indices: + if oname in OMap: + Objs = [] + Reqs = [] + for OClass in OMap[oname]: + OObj = OClass(self.Data.ix[index]) + Reqs.append(OObj.requires.keys()) + if all([i in self.SimNames for i in OObj.requires.keys()]): + Objs.append(OObj) + if len(Objs) == 0: + logger.error('Observable %s is specified but required simulations are missing; choose %s' % (oname, ' or '.join([str(r) for r in Reqs]))) + raise RuntimeError + if len(Objs) > 1: + logger.error("Observable %s not uniquely mapped to simulations (choose between %s)" % (oname, ' or '.join([o.name in Objs]))) + raise RuntimeError + logger.info("Creating %s observable object for index %s\n" % (Objs[0].name, index)) + self.Observables[oname][index] = Objs[0] + else: + logger.error('%s is specified but there is no corresponding Observable class\n' % oname) + raise RuntimeError + return + + def initialize_simulations(self): + + """ + + Prepare simulations to be launched. Set initial conditions + and create directories. This function is intended to be run + at the start of each optimization cycle, so that initial + conditions may be easily set. + + """ + # print narrow() + self.Simulations = OrderedDict([(i, []) for i in self.Indices]) + # Dictionary of time series to extract from each simulation. + SimTS = defaultdict(set) + # Check to see whether each observable can be unambiguously calculated from the specified simulations + for obsname in self.Observables.keys(): + sreq = self.Observables[obsname][self.Indices[0]].requires + for i, j in sreq.items(): + SimTS[i].update(set(j)) + printcool_dictionary(sreq, title="Observable %s uses these simulations : timeseries" % obsname) + printcool_dictionary({i:' '.join(sorted(list(SimTS[i]))) for i in sorted(SimTS.keys())}, + title="Needed Simulations : Extracted Timeseries") + unused = sorted(list(set(self.SimNames).difference(set(SimTS.keys())))) + if len(unused) > 0: + logger.error("Simulation %s is specified but it's never used to calculate any observables" % ', '.join(unused)) + raise RuntimeError + + for index in self.Indices: + for stype, tsset in SimTS.items(): + if 'n_ic' in self.Data2.ix[index]: + n_ic = self.Data2.ix[index]['n_ic'] + print n_ic + if n_ic < 1: + logger.error("n_ic must >= 1") + raise RuntimeError + else: + n_ic = 1 + for icn in range(n_ic): + sname = "%s_%i" % (stype, icn) if n_ic > 1 else stype + self.Simulations[index].append(Simulation(self, self.Data.ix[index], sname, index, stype, icn, sorted(list(tsset)))) + return + def submit_jobs(self, mvals, AGrad=True, AHess=True): """This routine is called by Objective.stage() and will run before "get". - It submits the jobs and the stage() function will wait for jobs + It submits the jobs (or runs them locally) and the stage() function will wait for jobs to complete. Parameters @@ -173,46 +806,61 @@ def submit_jobs(self, mvals, AGrad=True, AHess=True): Nothing. """ - # Set up and run the simulation chain on all points. - for pt in self.points: - # Create subdir - try: - os.makedirs(str(pt.idnr)) - except OSError as exception: - if exception.errno != errno.EEXIST: - raise - - # Goto subdir - os.chdir(str(pt.idnr)) - # Link dir contents from target subdir to current temp directory. - for f in self.scripts: - LinkFile(os.path.join(self.root, self.tempdir, f), - os.path.join(os.getcwd(), f)) - - link_dir_contents(os.path.join(self.root, self.tgtdir, - str(pt.idnr)), os.getcwd()) - - # Dump the force field to a pickle file - with wopen('forcebalance.p') as f: - lp_dump((self.FF, mvals, self.OptionDict, AGrad), f) - - # Run the simulation chain for point. - cmdstr = ("%s python md_chain.py " % self.mdpfx + - " ".join(self.quantities) + " " + - "--engine %s " % self.engname + - "--length %d " % self.n_sim_chain + - "--name %s " % self.simpfx + - "--temperature %f " % pt.temperature + - "--pressure %f " % pt.pressure + - "--nequil %d " % self.eq_steps + - "--nsteps %d " % self.md_steps) - _exec(cmdstr, copy_stderr=True, outfnm='md_chain.out') - - os.chdir('..') + printcool("Submitting jobs") + cwd = os.getcwd() + wq = getWorkQueue() + for index in self.Indices: + # if 'temp' in self.Data: + # tset = set([iself.Data['temp'].ix[index][:]) + temp = self.Data2['temp'].ix[index] if 'temp' in self.Data2 else None + pres = self.Data2['pres'].ix[index] if 'pres' in self.Data2 else None + for Sim in self.Simulations[index]: + Sim.gradient = AGrad + simd = os.path.join(os.getcwd(), index, Sim.name) + Sim.RunDirs[Counter()] = simd + GoInto(simd) + # Submit or run the simulation if the result file does not exist. + if not (os.path.exists('result.p') or os.path.exists('result.p.bz2')): + # Write coordinate file in the current location. + M = Molecule(os.path.join(self.root, Sim.initial))[Sim.iframe] + M.write(Sim.EngOpts['coords']) + # Copy auxiliary files to the current location. + for i, j in Sim.faux.values(): + shutil.copy2(i, j) + # Write to disk: Force field object, current parameter values, target options + with wopen('forcebalance.p') as f: lp_dump((self.FF,mvals,Sim),f) + # Copy scripts to the current location. + for f in self.scripts: + LinkFile(os.path.join(os.path.split(__file__)[0], "data", f), + os.path.join(os.getcwd(), f)) + # Put together the command. + cmdlist = ['%s python md_one.py' % (self.mdpfx)] + if temp != None: + cmdlist.append('-T %g' % float(temp)) + if pres != None: + cmdlist.append('-P %g' % float(pres)) + # if AGrad or AHess: + # cmdlist.append('-g') + # cmdlist.append('-o') + # cmdlist += Sim.timeseries.keys() + cmdstr = ' '.join(cmdlist) + print cmdstr + # # cmdstr = '%s python md1.py %s %.3f %.3f' % (self.runpfx, temperature, pressure) + # if wq == None: + # logger.info("Running condensed phase simulation locally.\n") + # logger.info("You may tail -f %s/npt.out in another terminal window\n" % os.getcwd()) + _exec(cmdstr, copy_stderr=True, outfnm='md_one.out') + # else: + # queue_up(wq, command = cmdstr+' &> npt.out', + # input_files = self.nptfiles + self.scripts + ['forcebalance.p'], + # output_files = ['npt_result.p.bz2', 'npt.out'] + self.extra_output, tgt=self) + os.chdir(cwd) + return def indicate(self): """Shows optimization state.""" + return AGrad = hasattr(self, 'Gp') PrintDict = OrderedDict() @@ -237,7 +885,7 @@ def print_item(key, physunit): (self.Xp[key], self.Wp[key], self.Xp[key]*self.Wp[key]))) - for i, q in enumerate(self.quantities): + for i, q in enumerate(self.observables): print_item(q, self.points[0].ref["units"][i]) PrintDict['Total'] = "% 10s % 8s % 14.5e" % ("","", self.Objective) @@ -247,14 +895,14 @@ def print_item(key, physunit): printcool_dictionary(PrintDict, color=4, title=Title, keywidth=31) return - def objective_term(self, quantity): + def objective_term(self, observable): """Calculates the contribution to the objective function (the term) for a - given quantity. + given observable. Parameters ---------- - quantity : string - Calculate the objective term for this quantity. + observable : string + Calculate the objective term for this observable. Returns ------- @@ -269,18 +917,18 @@ def objective_term(self, quantity): Gradient = np.zeros(self.FF.np) Hessian = np.zeros((self.FF.np, self.FF.np)) - # Grab ref data for quantity - qid = self.quantities.index(quantity) + # Grab ref data for observable + qid = self.observables.index(observable) Exp = np.array([pt.ref["refs"][qid] for pt in self.points]) Weights = np.array([pt.ref["weights"][qid] for pt in self.points]) - Denom = self.denoms[quantity] + Denom = self.denoms[observable] # Renormalize weights Weights /= np.sum(Weights) logger.info("Renormalized weights to " + str(np.sum(Weights)) + "\n") - logger.info(("Physical quantity '%s' uses denominator = %g %s\n" % - (quantity.capitalize(), Denom, - self.points[0].ref["units"][self.quantities.index(quantity)]))) + logger.info(("Physical observable '%s' uses denominator = %g %s\n" % + (observable.capitalize(), Denom, + self.points[0].ref["units"][self.observables.index(observable)]))) # Grab calculated values values = np.array([pt.data["values"][qid] for pt in self.points]) @@ -316,7 +964,7 @@ def objective_term(self, quantity): GradMapPrint.append([' %8.2f %8.1f' % (temp, press)] + ["% 9.3e" % i for i in grads[pt.idnr-1]]) - o = wopen('gradient_%s.dat' % quantity) + o = wopen('gradient_%s.dat' % observable) for line in GradMapPrint: print >> o, ' '.join(line) o.close() @@ -333,7 +981,7 @@ def objective_term(self, quantity): def get(self, mvals, AGrad=True, AHess=True): """Return the contribution to the total objective function. This is a - weighted average of the calculated quantities. + weighted average of the calculated observables. Parameters ---------- @@ -358,22 +1006,35 @@ def get(self, mvals, AGrad=True, AHess=True): Gradient = np.zeros(self.FF.np) Hessian = np.zeros((self.FF.np, self.FF.np)) + # Retrieve simulation results. + for index in self.Indices: + for Sim in self.Simulations[index]: + Sim.retrieve() + + # Calculate observable values. + for oname in self.Observables.keys(): + for index in self.Indices: + self.Observables[oname][index].aggregate(self.Simulations[index], AGrad) + if oname == 'density': self.Observables[oname][index].evaluate(AGrad) + + return { "X": Objective, "G": Gradient, "H": Hessian} + for pt in self.points: # Update data point with MD results self.retrieve(pt) obj = OrderedDict() reweighted = [] - for q in self.quantities: + for q in self.observables: # Returns dict with keys "X"=objective term value, "G"=the # gradient, "H"=the hessian, and "info"=printed info about points obj[q] = self.objective_term(q) - # Apply weights for quantities (normalized) + # Apply weights for observables (normalized) if obj[q]["X"] == 0: self.weights[q] = 0.0 - # Store weights sorted in the order of self.quantities + # Store weights sorted in the order of self.observables reweighted.append(self.weights[q]) # Normalize weights @@ -381,16 +1042,16 @@ def get(self, mvals, AGrad=True, AHess=True): wtot = np.sum(reweighted) reweighted = reweighted/wtot if wtot > 0 else reweighted - # Picks out the "X", "G" and "H" keys for the quantities sorted in the - # order of self.quantities. Xs is N-array, Gs is NxM-array and Hs is - # NxMxM-array, where N is number of quantities and M is number of + # Picks out the "X", "G" and "H" keys for the observables sorted in the + # order of self.observables. Xs is N-array, Gs is NxM-array and Hs is + # NxMxM-array, where N is number of observables and M is number of # parameters. Xs = np.array([dic["X"] for dic in obj.values()]) Gs = np.array([dic["G"] for dic in obj.values()]) Hs = np.array([dic["H"] for dic in obj.values()]) # Target contribution is (normalized) weighted averages of the - # individual quantity terms. + # individual observable terms. Objective = np.average(Xs, weights=(None if np.all(reweighted == 0) else \ reweighted), axis=0) if AGrad: @@ -403,7 +1064,7 @@ def get(self, mvals, AGrad=True, AHess=True): if not in_fd(): # Store results to show with indicator() function self.Xp = {q : dic["X"] for (q, dic) in obj.items()} - self.Wp = {q : reweighted[self.quantities.index(q)] + self.Wp = {q : reweighted[self.observables.index(q)] for (q, dic) in obj.items()} self.Pp = {q : dic["info"] for (q, dic) in obj.items()} @@ -451,4 +1112,158 @@ def __str__(self): return "\n".join(msg) +class Simulation(object): + + """ + Data container for a MD simulation (specified by index, simulation + type, initial condition). These settings are written to a file + then passed to md_one.py. + + The Simulation object is passed between the master ForceBalance + process and the remote script (e.g. md_one.py). + """ + + type_settings = {'gas': {'pbc' : 0}, + 'liquid': {'pbc' : 1}, + 'solid': {'pbc' : 1, 'anisotropic_box' : 1}, + 'bilayer': {'pbc' : 1, 'anisotropic_box' : 1}} + + def __init__(self, tgt, data, name, index, stype, icn, tsnames): + + # The name of the simulation (refers to a directory under job.tmp/target/iter_x/index/name) + self.name = name + # The Index that the simulation belongs to. + self.index = index + # The type of simulation (liquid, gas, solid, bilayer...) + if stype not in Simulation.type_settings.keys(): + logger.error('Simulation type %s is not supported at this time') + raise RuntimeError + # The reference data! May contain parameters for calculating observables. + self.Data = copy.deepcopy(data) + # Type of the simulation (map to simulation settings) + self.type = stype + # Root directory of the ForceBalance job + self.root = tgt.root + # Locate the initial coordinate file and frame number. + self.initial, self.iframe = find_file(os.path.join(tgt.root, tgt.tgtdir), index, stype, tgt.crdsfx, True, icn) + # The time series for the simulation. + self.timeseries = OrderedDict([(i, []) for i in tsnames]) + self.timeseries['potential'] = [] # ALWAYS require the potential energy. + # The file extension that the coordinate file will be written with. + self.fext = os.path.splitext(self.initial)[1] + # Auxiliary files to be copied to the current location prior to running the simulation. + self.faux = OrderedDict() + for sfx in tgt.auxsfx: + auxf = find_file(os.path.join(tgt.root, tgt.tgtdir), index, stype, sfx, False)[0] + self.faux[os.path.splitext(auxf)[1]] = (auxf, "%s%s" % (self.type, os.path.splitext(auxf)[1])) + # Name of the simulation engine + self.engname = tgt.engname + # Whether to use the CUDA platform (OpenMM only). + self.force_cuda = tgt.OptionDict.get('force_cuda', 0) + # Finite difference step size. + self.h = tgt.h + # Active parameters to differentiate over. + self.pgrad = tgt.pgrad + # List of ITERATION : directory pairs. + self.RunDirs = OrderedDict() + # List of ITERATION : result data structures. + self.Results = OrderedDict() + + pbc = Simulation.type_settings[self.type]['pbc'] + + #---- + # MD options, passed straight to the molecular_dynamics() method + #---- + self.MDOpts = OrderedDict() + # The time step in femtoseconds. + self.MDOpts['timestep'] = tgt.timestep + # The number of equilibration MD steps. + self.MDOpts['nequil'] = tgt.eq_steps + # The number of production MD steps. + self.MDOpts['nsteps'] = tgt.md_steps + # The number of MD steps between sampling. + self.MDOpts['nsave'] = int(1000 * tgt.interval / self.MDOpts['timestep']) + # Flag for minimizing the energy. + self.MDOpts['minimize'] = tgt.OptionDict.get('minimize_energy', 0) + # The number of threads for this simulation (no-PBC simulations are 1 thread). + self.MDOpts['threads'] = tgt.OptionDict.get('md_threads', 1) if pbc else 1 + # Whether to use multiple timestep integrator. + self.MDOpts['mts'] = tgt.OptionDict.get('mts_integrator', 0) + # The number of beads in an RPMD simulation. + self.MDOpts['rpmd_beads'] = tgt.OptionDict.get('rpmd_beads', 0) + # Print out lots of information. + self.MDOpts['verbose'] = True + # Save trajectory to disk. + self.MDOpts['save_traj'] = tgt.save_traj + # Number of MD steps between successive calls to Monte Carlo barostat (OpenMM only). + self.MDOpts['nbarostat'] = tgt.OptionDict.get('n_mcbarostat', 25) + # Flag for anisotropic simulation cell (OpenMM only). + self.MDOpts['anisotropic'] = tgt.OptionDict.get('anisotropic_box', 0) + # The time step for the 'fast forces' in femtoseconds in MTS integrators. + self.MDOpts['timestep'] = tgt.OptionDict.get('faststep', 0.25) + # Simulation temperature in Kelvin. + self.MDOpts['temperature'] = getval(self.Data, 'temp') if 'temp' in self.Data else None + # Simulation pressure in bar. + self.MDOpts['pressure'] = getval(self.Data, 'pres') if 'pres' in self.Data else None + + #---- + # Engine options, used in creating the Engine object + #---- + self.EngOpts = OrderedDict() + # Whether to use periodic boundary conditions. + self.EngOpts['pbc'] = pbc + # The name of the coordinate file to be written prior to running the simulation. + self.EngOpts['coords'] = "%s%s" % (self.type, self.fext) + # Software-specific options. + if self.engname == 'openmm': + self.EngOpts['platname'] = 'CUDA' if self.EngOpts['pbc'] else 'Reference' + else: + if self.force_cuda: + logger.error("force_cuda option is set, but has no effect on Gromacs engine.") ; raise RuntimeError + if self.MDOpts['rpmd_beads'] > 0: + logger.error('Only the OpenMM engine can handle RPMD simulations.') ; raise RuntimeError + if self.MDOpts['mts']: + logger.error('Only OpenMM is configured to use multiple timestep integrator.') ; raise RuntimeError + if self.MDOpts['anisotropic']: + logger.error('Only OpenMM is configured to use anisotropic pressure coupling.') ; raise RuntimeError + + if self.engname == 'gromacs': + self.EngOpts['gmxpath'] = tgt.gmxpath + self.EngOpts['gmxsuffix'] = tgt.gmxsuffix + self.EngOpts['gmx_top'] = self.faux['.top'][1] + self.EngOpts['gmx_mdp'] = self.faux['.mdp'][1] + + if self.engname == 'tinker': + self.EngOpts['tinkerpath'] = tgt.tinkerpath + self.EngOpts['tinker_key'] = self.faux['.key'][1] + def __str__(self): + msg = [] + msg.append("Simulation: Name %s, Index %s, Type %s" % (self.name, self.index, self.type)) + msg.append("Initial Conditions: File %s Frame %i" % (self.initial, self.iframe)) + msg.append("Timeseries Names: %s" % (', '.join(self.timeseries.keys()))) + return "\n".join(msg) + + def retrieve(self, cycle=None): + """Retrieve the molecular dynamics (MD) results and store the calculated + observables in the Simulation object. + + Parameters + ---------- + dp : Point + Store the calculated observables in this point. + + Returns + ------- + Nothing + + """ + if cycle == None: cycle = Counter() + + abspath = os.path.join(self.RunDirs[cycle], 'md_result.p') + if os.path.exists(abspath): + logger.info('Simulation %s reading data from ' % self.name + abspath.replace(self.root+'/', '') + ' .\n') + self.Results[cycle] = lp_load(open(abspath)) + else: + logger.warning('The file ' + abspath + ' does not exist so we cannot read it.\n') + self.Results[cycle] = None diff --git a/studies/004_thermo_liquid_bromine/forcefield/about.txt b/studies/004_thermo/forcefield/about.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/forcefield/about.txt rename to studies/004_thermo/forcefield/about.txt diff --git a/studies/004_thermo_liquid_bromine/forcefield/bro.itp b/studies/004_thermo/forcefield/bro.itp similarity index 100% rename from studies/004_thermo_liquid_bromine/forcefield/bro.itp rename to studies/004_thermo/forcefield/bro.itp diff --git a/studies/004_thermo_liquid_bromine/forcefield/bro.orig.itp b/studies/004_thermo/forcefield/bro.orig.itp similarity index 100% rename from studies/004_thermo_liquid_bromine/forcefield/bro.orig.itp rename to studies/004_thermo/forcefield/bro.orig.itp diff --git a/studies/004_thermo_liquid_bromine/optimize.in b/studies/004_thermo/optimize.in similarity index 100% rename from studies/004_thermo_liquid_bromine/optimize.in rename to studies/004_thermo/optimize.in diff --git a/studies/004_thermo_liquid_bromine/single.in b/studies/004_thermo/single.in similarity index 96% rename from studies/004_thermo_liquid_bromine/single.in rename to studies/004_thermo/single.in index 8bfd6281f..1ec7dbde5 100644 --- a/studies/004_thermo_liquid_bromine/single.in +++ b/studies/004_thermo/single.in @@ -66,9 +66,11 @@ $target name LiquidBromine type Thermo_GMX weight 1.0 -expdata_txt expset.txt +source expset.txt quantities density h_vap -n_sim_chain 2 +simulations liquid gas md_steps 100000 eq_steps 50000 +interval 0.1 +timestep 2.0 $end diff --git a/studies/004_thermo/targets/Lipid_MIX/lipidcol1.txt b/studies/004_thermo/targets/Lipid_MIX/lipidcol1.txt new file mode 100644 index 000000000..ca440c7e5 --- /dev/null +++ b/studies/004_thermo/targets/Lipid_MIX/lipidcol1.txt @@ -0,0 +1,67 @@ +metadata = 'Mao' + +Index T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic +50C 323.15 1 0.631 1 C15 scd323.txt:2 C34 scd323.txt:4 1 58 1 10 + C17 C36 + C18 C37 + C19 C38 + C20 C39 + C21 C40 + C22 C41 + C23 C42 + C24 C43 + C25 C44 + C26 C45 + C27 C46 + C28 C47 + C29 C48 + C30 C49 + C31 C50 +60C 333.15 1 0.65 1 C15 scd333.txt:2 C34 scd333.txt:4 0 58 0 10 + C17 C36 + C18 C37 + C19 C38 + C20 C39 + C21 C40 + C22 C41 + C23 C42 + C24 C43 + C25 C44 + C26 C45 + C27 C46 + C28 C47 + C29 C48 + C30 C49 + C31 C50 +65C 338.15 1 0.671 1 C15 scd338.txt:2 C34 scd338.txt:4 1 58 0 10 + C17 C36 + C18 C37 + C19 C38 + C20 C39 + C21 C40 + C22 C41 + C23 C42 + C24 C43 + C25 C44 + C26 C45 + C27 C46 + C28 C47 + C29 C48 + C30 C49 + C31 C50 +80C 353.15 1 0.719 1 C15 scd353.txt:2 C34 scd353.txt:4 1 58 0 10 + C17 C36 + C18 C37 + C19 C38 + C20 C39 + C21 C40 + C22 C41 + C23 C42 + C24 C43 + C25 C44 + C26 C45 + C27 C46 + C28 C47 + C29 C48 + C30 C49 + C31 C50 diff --git a/studies/004_thermo/targets/Lipid_MIX/lipidcol2a.txt b/studies/004_thermo/targets/Lipid_MIX/lipidcol2a.txt new file mode 100644 index 000000000..8d97a0bcc --- /dev/null +++ b/studies/004_thermo/targets/Lipid_MIX/lipidcol2a.txt @@ -0,0 +1,5 @@ +Index T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic +50C 323.15 1 0.631 1 scd323.txt:1 scd323.txt:2 scd323.txt:3 scd323.txt:4 1 58 1 10 +60C 333.15 1 0.65 1 scd333.txt:1 scd333.txt:2 scd333.txt:3 scd333.txt:4 0 58 0 10 +65C 338.15 1 0.671 1 scd338.txt:1 scd338.txt:2 scd338.txt:3 scd338.txt:4 1 58 0 10 +80C 353.15 1 0.719 1 scd353.txt:1 scd353.txt:2 scd353.txt:3 scd353.txt:4 1 58 0 10 diff --git a/studies/004_thermo/targets/Lipid_MIX/scd323.txt b/studies/004_thermo/targets/Lipid_MIX/scd323.txt new file mode 100644 index 000000000..57c1cfa5b --- /dev/null +++ b/studies/004_thermo/targets/Lipid_MIX/scd323.txt @@ -0,0 +1,17 @@ +Scd1_idx Scd1 Scd2_idx Scd2 +C15 C34 +C17 0.198144 C36 0.198144 +C18 0.198128 C37 0.198128 +C19 0.198111 C38 0.198111 +C20 0.198095 C39 0.198095 +C21 0.198079 C40 0.198079 +C22 0.197799 C41 0.197537 +C23 0.198045 C42 0.198046 +C24 0.178844 C43 0.178844 +C25 0.167527 C44 0.178565 +C26 0.148851 C45 0.16751 +C27 0.134117 C46 0.148834 +C28 0.119646 C47 0.1341 +C29 0.100969 C48 0.110956 +C30 0.07546 C49 0.087549 +C31 C50 diff --git a/studies/004_thermo/targets/Lipid_MIX/scd333.txt b/studies/004_thermo/targets/Lipid_MIX/scd333.txt new file mode 100644 index 000000000..26ee01c85 --- /dev/null +++ b/studies/004_thermo/targets/Lipid_MIX/scd333.txt @@ -0,0 +1,17 @@ +Scd1_idx Scd1 Scd2_idx Scd2 +C15 C34 +C17 0.181121 C36 0.181121 +C18 0.180807 C37 0.180807 +C19 0.181055 C38 0.181055 +C20 0.180741 C39 0.180741 +C21 0.180989 C40 0.180989 +C22 0.168579 C41 0.168579 +C23 0.169109 C42 0.169109 +C24 0.149104 C43 0.149104 +C25 0.138945 C44 0.138945 +C26 0.123439 C45 0.138629 +C27 0.112717 C46 0.123968 +C28 0.098056 C47 0.112121 +C29 0.083396 C48 0.089303 +C30 0.062266 C49 0.070424 +C31 C50 diff --git a/studies/004_thermo/targets/Lipid_MIX/scd338.txt b/studies/004_thermo/targets/Lipid_MIX/scd338.txt new file mode 100644 index 000000000..26ee01c85 --- /dev/null +++ b/studies/004_thermo/targets/Lipid_MIX/scd338.txt @@ -0,0 +1,17 @@ +Scd1_idx Scd1 Scd2_idx Scd2 +C15 C34 +C17 0.181121 C36 0.181121 +C18 0.180807 C37 0.180807 +C19 0.181055 C38 0.181055 +C20 0.180741 C39 0.180741 +C21 0.180989 C40 0.180989 +C22 0.168579 C41 0.168579 +C23 0.169109 C42 0.169109 +C24 0.149104 C43 0.149104 +C25 0.138945 C44 0.138945 +C26 0.123439 C45 0.138629 +C27 0.112717 C46 0.123968 +C28 0.098056 C47 0.112121 +C29 0.083396 C48 0.089303 +C30 0.062266 C49 0.070424 +C31 C50 diff --git a/studies/004_thermo/targets/Lipid_MIX/scd353.txt b/studies/004_thermo/targets/Lipid_MIX/scd353.txt new file mode 100644 index 000000000..31434af01 --- /dev/null +++ b/studies/004_thermo/targets/Lipid_MIX/scd353.txt @@ -0,0 +1,17 @@ +Scd1_idx Scd1 Scd2_idx Scd2 +C15 C34 +C17 0.162535 C36 0.162535 +C18 0.162817 C37 0.162817 +C19 0.162535 C38 0.162535 +C20 0.162535 C39 0.162535 +C21 0.162817 C40 0.162817 +C22 0.151268 C41 0.151268 +C23 0.142254 C42 0.142254 +C24 0.127606 C43 0.127606 +C25 0.117465 C44 0.117465 +C26 0.101972 C45 0.117183 +C27 0.092676 C46 0.102535 +C28 0.081408 C47 0.092676 +C29 0.068732 C48 0.073239 +C30 0.051267 C49 0.056901 +C31 C50 diff --git a/studies/004_thermo/targets/Lipid_MUL/lipidcol2a.txt b/studies/004_thermo/targets/Lipid_MUL/lipidcol2a.txt new file mode 100644 index 000000000..8d97a0bcc --- /dev/null +++ b/studies/004_thermo/targets/Lipid_MUL/lipidcol2a.txt @@ -0,0 +1,5 @@ +Index T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic +50C 323.15 1 0.631 1 scd323.txt:1 scd323.txt:2 scd323.txt:3 scd323.txt:4 1 58 1 10 +60C 333.15 1 0.65 1 scd333.txt:1 scd333.txt:2 scd333.txt:3 scd333.txt:4 0 58 0 10 +65C 338.15 1 0.671 1 scd338.txt:1 scd338.txt:2 scd338.txt:3 scd338.txt:4 1 58 0 10 +80C 353.15 1 0.719 1 scd353.txt:1 scd353.txt:2 scd353.txt:3 scd353.txt:4 1 58 0 10 diff --git a/studies/004_thermo/targets/Lipid_MUL/scd323.txt b/studies/004_thermo/targets/Lipid_MUL/scd323.txt new file mode 100644 index 000000000..57c1cfa5b --- /dev/null +++ b/studies/004_thermo/targets/Lipid_MUL/scd323.txt @@ -0,0 +1,17 @@ +Scd1_idx Scd1 Scd2_idx Scd2 +C15 C34 +C17 0.198144 C36 0.198144 +C18 0.198128 C37 0.198128 +C19 0.198111 C38 0.198111 +C20 0.198095 C39 0.198095 +C21 0.198079 C40 0.198079 +C22 0.197799 C41 0.197537 +C23 0.198045 C42 0.198046 +C24 0.178844 C43 0.178844 +C25 0.167527 C44 0.178565 +C26 0.148851 C45 0.16751 +C27 0.134117 C46 0.148834 +C28 0.119646 C47 0.1341 +C29 0.100969 C48 0.110956 +C30 0.07546 C49 0.087549 +C31 C50 diff --git a/studies/004_thermo/targets/Lipid_MUL/scd333.txt b/studies/004_thermo/targets/Lipid_MUL/scd333.txt new file mode 100644 index 000000000..26ee01c85 --- /dev/null +++ b/studies/004_thermo/targets/Lipid_MUL/scd333.txt @@ -0,0 +1,17 @@ +Scd1_idx Scd1 Scd2_idx Scd2 +C15 C34 +C17 0.181121 C36 0.181121 +C18 0.180807 C37 0.180807 +C19 0.181055 C38 0.181055 +C20 0.180741 C39 0.180741 +C21 0.180989 C40 0.180989 +C22 0.168579 C41 0.168579 +C23 0.169109 C42 0.169109 +C24 0.149104 C43 0.149104 +C25 0.138945 C44 0.138945 +C26 0.123439 C45 0.138629 +C27 0.112717 C46 0.123968 +C28 0.098056 C47 0.112121 +C29 0.083396 C48 0.089303 +C30 0.062266 C49 0.070424 +C31 C50 diff --git a/studies/004_thermo/targets/Lipid_MUL/scd338.txt b/studies/004_thermo/targets/Lipid_MUL/scd338.txt new file mode 100644 index 000000000..26ee01c85 --- /dev/null +++ b/studies/004_thermo/targets/Lipid_MUL/scd338.txt @@ -0,0 +1,17 @@ +Scd1_idx Scd1 Scd2_idx Scd2 +C15 C34 +C17 0.181121 C36 0.181121 +C18 0.180807 C37 0.180807 +C19 0.181055 C38 0.181055 +C20 0.180741 C39 0.180741 +C21 0.180989 C40 0.180989 +C22 0.168579 C41 0.168579 +C23 0.169109 C42 0.169109 +C24 0.149104 C43 0.149104 +C25 0.138945 C44 0.138945 +C26 0.123439 C45 0.138629 +C27 0.112717 C46 0.123968 +C28 0.098056 C47 0.112121 +C29 0.083396 C48 0.089303 +C30 0.062266 C49 0.070424 +C31 C50 diff --git a/studies/004_thermo/targets/Lipid_MUL/scd353.txt b/studies/004_thermo/targets/Lipid_MUL/scd353.txt new file mode 100644 index 000000000..31434af01 --- /dev/null +++ b/studies/004_thermo/targets/Lipid_MUL/scd353.txt @@ -0,0 +1,17 @@ +Scd1_idx Scd1 Scd2_idx Scd2 +C15 C34 +C17 0.162535 C36 0.162535 +C18 0.162817 C37 0.162817 +C19 0.162535 C38 0.162535 +C20 0.162535 C39 0.162535 +C21 0.162817 C40 0.162817 +C22 0.151268 C41 0.151268 +C23 0.142254 C42 0.142254 +C24 0.127606 C43 0.127606 +C25 0.117465 C44 0.117465 +C26 0.101972 C45 0.117183 +C27 0.092676 C46 0.102535 +C28 0.081408 C47 0.092676 +C29 0.068732 C48 0.073239 +C30 0.051267 C49 0.056901 +C31 C50 diff --git a/studies/004_thermo/targets/Lipid_RIT/lipidcol1.txt b/studies/004_thermo/targets/Lipid_RIT/lipidcol1.txt new file mode 100644 index 000000000..b2824acc2 --- /dev/null +++ b/studies/004_thermo/targets/Lipid_RIT/lipidcol1.txt @@ -0,0 +1,67 @@ +metadata = 'Mao' + +Index T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic + 50C 323.15 1 0.631 1 C15 C34 1 58 1 10 + C17 0.198144 C36 0.198144 + C18 0.198128 C37 0.198128 + C19 0.198111 C38 0.198111 + C20 0.198095 C39 0.198095 + C21 0.198079 C40 0.198079 + C22 0.197799 C41 0.197537 + C23 0.198045 C42 0.198046 + C24 0.178844 C43 0.178844 + C25 0.167527 C44 0.178565 + C26 0.148851 C45 0.16751 + C27 0.134117 C46 0.148834 + C28 0.119646 C47 0.1341 + C29 0.100969 C48 0.110956 + C30 0.07546 C49 0.087549 + C31 C50 + 60C 333.15 1 0.65 1 C15 C34 0 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 + 65C 338.15 1 0.671 1 C15 C34 1 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 + 80C 353.15 1 0.719 1 C15 C34 1 58 0 10 + C17 0.162535 C36 0.162535 + C18 0.162817 C37 0.162817 + C19 0.162535 C38 0.162535 + C20 0.162535 C39 0.162535 + C21 0.162817 C40 0.162817 + C22 0.151268 C41 0.151268 + C23 0.142254 C42 0.142254 + C24 0.127606 C43 0.127606 + C25 0.117465 C44 0.117465 + C26 0.101972 C45 0.117183 + C27 0.092676 C46 0.102535 + C28 0.081408 C47 0.092676 + C29 0.068732 C48 0.073239 + C30 0.051267 C49 0.056901 + C31 C50 diff --git a/studies/004_thermo/targets/Lipid_SPC/lipidcol1.txt b/studies/004_thermo/targets/Lipid_SPC/lipidcol1.txt new file mode 100644 index 000000000..9aadee124 --- /dev/null +++ b/studies/004_thermo/targets/Lipid_SPC/lipidcol1.txt @@ -0,0 +1,67 @@ +metadata = 'Mao' + +Index T P (atm) Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic +50C 323.15 1 0.631 1 C15 C34 1 58 1 10 + C17 0.198144 C36 0.198144 + C18 0.198128 C37 0.198128 + C19 0.198111 C38 0.198111 + C20 0.198095 C39 0.198095 + C21 0.198079 C40 0.198079 + C22 0.197799 C41 0.197537 + C23 0.198045 C42 0.198046 + C24 0.178844 C43 0.178844 + C25 0.167527 C44 0.178565 + C26 0.148851 C45 0.16751 + C27 0.134117 C46 0.148834 + C28 0.119646 C47 0.1341 + C29 0.100969 C48 0.110956 + C30 0.07546 C49 0.087549 + C31 C50 +60C 333.15 1 0.65 1 C15 C34 0 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 +65C 338.15 1 0.671 1 C15 C34 1 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 +80C 353.15 1 0.719 1 C15 C34 1 58 0 10 + C17 0.162535 C36 0.162535 + C18 0.162817 C37 0.162817 + C19 0.162535 C38 0.162535 + C20 0.162535 C39 0.162535 + C21 0.162817 C40 0.162817 + C22 0.151268 C41 0.151268 + C23 0.142254 C42 0.142254 + C24 0.127606 C43 0.127606 + C25 0.117465 C44 0.117465 + C26 0.101972 C45 0.117183 + C27 0.092676 C46 0.102535 + C28 0.081408 C47 0.092676 + C29 0.068732 C48 0.073239 + C30 0.051267 C49 0.056901 + C31 C50 diff --git a/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt b/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt new file mode 100644 index 000000000..2001d85b1 --- /dev/null +++ b/studies/004_thermo/targets/Lipid_TAB/lipidcol1.txt @@ -0,0 +1,65 @@ +Index T P Al Al_wt Scd1_idx Scd1 Scd2_idx Scd2 Scd1_wt Kappa Kappa_wt n_ic +50C 1 0.631 1 C15 C34 1 58 1 10 + 323.15 C17 0.198144 C36 0.198144 + C18 0.198128 C37 0.198128 + C19 0.198111 C38 0.198111 + C20 0.198095 C39 0.198095 + C21 0.198079 C40 0.198079 + C22 0.197799 C41 0.197537 + C23 0.198045 C42 0.198046 + C24 0.178844 C43 0.178844 + C25 0.167527 C44 0.178565 + C26 0.148851 C45 0.16751 + C27 0.134117 C46 0.148834 + C28 0.119646 C47 0.1341 + C29 0.100969 C48 0.110956 + C30 0.07546 C49 0.087549 + C31 C50 +60C 333.15 1 0.65 1 C15 C34 0 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 +65C 338.15 1 0.671 1 C15 C34 1 58 0 10 + C17 0.181121 C36 0.181121 + C18 0.180807 C37 0.180807 + C19 0.181055 C38 0.181055 + C20 0.180741 C39 0.180741 + C21 0.180989 C40 0.180989 + C22 0.168579 C41 0.168579 + C23 0.169109 C42 0.169109 + C24 0.149104 C43 0.149104 + C25 0.138945 C44 0.138945 + C26 0.123439 C45 0.138629 + C27 0.112717 C46 0.123968 + C28 0.098056 C47 0.112121 + C29 0.083396 C48 0.089303 + C30 0.062266 C49 0.070424 + C31 C50 +80C 353.15 1 0.719 1 C15 C34 1 58 0 10 + C17 0.162535 C36 0.162535 + C18 0.162817 C37 0.162817 + C19 0.162535 C38 0.162535 + C20 0.162535 C39 0.162535 + C21 0.162817 C40 0.162817 + C22 0.151268 C41 0.151268 + C23 0.142254 C42 0.142254 + C24 0.127606 C43 0.127606 + C25 0.117465 C44 0.117465 + C26 0.101972 C45 0.117183 + C27 0.092676 C46 0.102535 + C28 0.081408 C47 0.092676 + C29 0.068732 C48 0.073239 + C30 0.051267 C49 0.056901 + C31 C50 diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.gro b/studies/004_thermo/targets/LiquidBromine/1/gas.gro similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.gro rename to studies/004_thermo/targets/LiquidBromine/1/gas.gro diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.mdp b/studies/004_thermo/targets/LiquidBromine/1/gas.mdp similarity index 79% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.mdp rename to studies/004_thermo/targets/LiquidBromine/1/gas.mdp index 1a64558b6..2a7427065 100644 --- a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.mdp +++ b/studies/004_thermo/targets/LiquidBromine/1/gas.mdp @@ -11,16 +11,16 @@ nstxtcout = 50 xtc_grps = System energygrps = System -nstlist = 10 -ns_type = grid -rlist = 0.9 +nstlist = 0 +ns_type = simple +rlist = 0.0 vdwtype = cut-off coulombtype = cut-off -rcoulomb = 0.9 -rvdw = 0.9 -rvdw_switch = 0.9 +rcoulomb = 0.0 +rvdw = 0.0 +rvdw_switch = 0.0 constraints = all-bonds -pbc = xyz +pbc = no tcoupl = v-rescale tc_grps = System diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.top b/studies/004_thermo/targets/LiquidBromine/1/gas.top similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim2.top rename to studies/004_thermo/targets/LiquidBromine/1/gas.top diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.gro b/studies/004_thermo/targets/LiquidBromine/1/liquid.gro similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.gro rename to studies/004_thermo/targets/LiquidBromine/1/liquid.gro diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.mdp b/studies/004_thermo/targets/LiquidBromine/1/liquid.mdp similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.mdp rename to studies/004_thermo/targets/LiquidBromine/1/liquid.mdp diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.top b/studies/004_thermo/targets/LiquidBromine/1/liquid.top similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/1/sim1.top rename to studies/004_thermo/targets/LiquidBromine/1/liquid.top diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/about.txt b/studies/004_thermo/targets/LiquidBromine/about.txt similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/about.txt rename to studies/004_thermo/targets/LiquidBromine/about.txt diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/data.csv b/studies/004_thermo/targets/LiquidBromine/data.csv similarity index 100% rename from studies/004_thermo_liquid_bromine/targets/LiquidBromine/data.csv rename to studies/004_thermo/targets/LiquidBromine/data.csv diff --git a/studies/004_thermo/targets/LiquidBromine/expset.txt b/studies/004_thermo/targets/LiquidBromine/expset.txt new file mode 100644 index 000000000..766c88b32 --- /dev/null +++ b/studies/004_thermo/targets/LiquidBromine/expset.txt @@ -0,0 +1,8 @@ +# Experimental data for liquid bromine. + + Index Temp (K) Pressure (bar) Density (kg/m^3) w Hvap ( kJ/mol ) w + 1 298.15 1.01325 3102.8 1.0 29.96 1.0 + +# Variables: Denominators and weights for quantities +Denoms = 30 0.3 +Weights = 1.0 1.0 diff --git a/studies/004_thermo/targets/LiquidBromine_CSV/data.csv b/studies/004_thermo/targets/LiquidBromine_CSV/data.csv new file mode 100644 index 000000000..354b70778 --- /dev/null +++ b/studies/004_thermo/targets/LiquidBromine_CSV/data.csv @@ -0,0 +1,8 @@ +"# Experimental data for liquid, bromine.",,,,,, +,,,,,, +Index,Temp (K),Density (kg/m^3),w,Hvap (kJ/mol),w,Pressure (bar) +298.15K-1.0atm,298.15,3102.8,1,29.96,1,1.01325 +,,,,,, +# Variables: Denominators and weights for quantities,,,,,, +Denoms,=,30,0.3,,, +Weights,=,1.0,1.0,,, diff --git a/studies/004_thermo/targets/LiquidBromine_TAB/data.tab.txt b/studies/004_thermo/targets/LiquidBromine_TAB/data.tab.txt new file mode 100644 index 000000000..155adc470 --- /dev/null +++ b/studies/004_thermo/targets/LiquidBromine_TAB/data.tab.txt @@ -0,0 +1,8 @@ +"# Experimental data for liquid, bromine." + +Index Temp (K) Density (kg/m^3) w Hvap (kJ/mol) w Pressure (bar) +0 298.15 3102.8 1 29.96 1 1.01325 + +# Variables: Denominators and weights for quantities +Denoms = 30 0.3 +Weights = 1 1 diff --git a/studies/004_thermo/test_parse.in b/studies/004_thermo/test_parse.in new file mode 100644 index 000000000..89edb69a7 --- /dev/null +++ b/studies/004_thermo/test_parse.in @@ -0,0 +1,156 @@ +# ForceBalance input file generated by MakeInputFile.py + +# The octothorpe '#' is a comment symbol + +# Note: If the specified value is 'None' then the option will truly be set to +# None - not the string 'None' + +# Note: 'Section' option types are more complicated and may require you to read +# the documentation + +# Note: Boolean option types require no value, the key being present implies +# 'True' + +$options +# (string) Directory containing force fields, relative to project directory +ffdir forcefield + +# (string) Type of the penalty, L2 or L1 in the optimizer +penalty_type L2 + +# (allcap) The job type, defaults to a single-point evaluation of objective +# function +jobtype newton + +# (list) The names of force fields, corresponding to directory +# forcefields/file_name.(itp|gen) +forcefield bro.itp + +# (int) Maximum number of steps in an optimization +maxstep 100 + +# (float) Convergence criterion of step size (just needs to fall below this +# threshold) +convergence_step 0.05 + +# (float) Convergence criterion of objective function (in MainOptimizer this is +# the stdev of x2 over 10 steps) +convergence_objective 0.5 + +# (float) Convergence criterion of gradient norm +convergence_gradient 0.01 + +# (float) Minimum eigenvalue for applying steepest descent correction in the +# MainOptimizer +eig_lowerbound 0.01 + +# (float) Step size for finite difference derivatives in many functions +# (get_(G/H) in fitsim, FDCheckG) +finite_difference_h 0.001 + +# (float) Factor for multiplicative penalty function in objective function +penalty_additive 1.0 + +trust0 1.0 +mintrust 0.05 +error_tolerance 1.0 +adaptive_factor 0.2 +adaptive_damping 1.0 +normalize_weights no +print_hessian + +# Charge constraints are taken care of using "evals". +constrain_charge false +verbose_options false +backup false + +$end + +$target +name LiquidBromine +type Thermo_GMX +weight 1.0 +source expset.txt +observables density h_vap +simulations liquid gas +md_steps 100000 +eq_steps 50000 +$end + +$target +name LiquidBromine_CSV +type Thermo_GMX +weight 1.0 +source data.csv +observables density h_vap +simulations liquid gas +md_steps 100000 +eq_steps 50000 +$end + +$target +name LiquidBromine_TAB +type Thermo_GMX +weight 1.0 +source data.tab.txt +observables density h_vap +simulations liquid gas +md_steps 100000 +eq_steps 50000 +$end + +$target +name Lipid_SPC +type Thermo_GMX +weight 1.0 +source lipidcol1.txt +observables al scd kappa +simulations bilayer +md_steps 100000 +eq_steps 50000 +$end + +$target +name Lipid_RIT +type Thermo_GMX +weight 1.0 +source lipidcol1.txt +observables al scd kappa +simulations bilayer +md_steps 100000 +eq_steps 50000 +$end + +$target +name Lipid_TAB +type Thermo_GMX +weight 1.0 +source lipidcol1.txt +observables al scd kappa +simulations bilayer +md_steps 100000 +eq_steps 50000 +$end + +$target +name Lipid_MUL +type Thermo_GMX +weight 1.0 +source lipidcol2a.txt +observables al scd kappa +simulations bilayer +md_steps 100000 +eq_steps 50000 +$end + +$target +name Lipid_MIX +type Thermo_GMX +weight 1.0 +source lipidcol1.txt +observables al scd kappa +simulations bilayer +md_steps 100000 +eq_steps 50000 +$end + diff --git a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt b/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt deleted file mode 100644 index 8f211bdaa..000000000 --- a/studies/004_thermo_liquid_bromine/targets/LiquidBromine/expset.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Experimental data for liquid bromine. - - Temp_K Density_kg/m^3 w Enthalpy_kJ/mol w Temperature_K Pressure_bar - 298.15 3102.8 1.0 29.96 1.0 298.15 1.01325 - -# Variables: Denominators and weights for quantities -Denoms = 30 0.3 -Weights = 1.0 1.0 diff --git a/test/test_system.py b/test/test_system.py index 927e7e783..9f87b9cd4 100644 --- a/test/test_system.py +++ b/test/test_system.py @@ -179,7 +179,7 @@ def runTest(self): class TestThermoBromineStudy(ForceBalanceTestCase): def setUp(self): super(ForceBalanceTestCase,self).setUp() - os.chdir('studies/004_thermo_liquid_bromine') + os.chdir('studies/004_thermo') def tearDown(self): os.system('rm -rf results *.bak *.tmp') diff --git a/test/test_thermo.py b/test/test_thermo.py new file mode 100644 index 000000000..9baa65e1b --- /dev/null +++ b/test/test_thermo.py @@ -0,0 +1,53 @@ +import unittest +import sys, os, re +import forcebalance +import abc +import numpy +from __init__ import ForceBalanceTestCase +from collections import defaultdict, OrderedDict + +class TestParser(ForceBalanceTestCase): + def setUp(self): + os.chdir(os.path.join(os.getcwd(), 'studies', '004_thermo')) + input_file='test_parse.in' + options, tgt_opts = forcebalance.parser.parse_inputs(input_file) + forcefield = forcebalance.forcefield.FF(options) + self.objective = forcebalance.objective.Objective(options, tgt_opts, forcefield) + + def test_lipid_parser(self): + """Test for equality amongst multiple ways to parse lipid experimental data""" + # Build a dictionary of target name : dataframes + lipid_data = OrderedDict() + for tgt in self.objective.Targets: + if 'lipid' in tgt.name.lower(): + lipid_data[tgt.name] = tgt.Data + # Double loop over different targets + for i, ikey in enumerate(lipid_data.keys()): + for j, jkey in enumerate(lipid_data.keys()): + # Check column headings and row indices + self.assertTrue(all(lipid_data[ikey].columns == lipid_data[jkey].columns), msg='\nColumn headings not equal for %s and %s' % (ikey, jkey)) + self.assertTrue(all(lipid_data[ikey].index == lipid_data[jkey].index), msg='\nRow indices not equal for %s and %s' % (ikey, jkey)) + # Make dictionary representation of dataframes + dicti = lipid_data[ikey].to_dict() + dictj = lipid_data[jkey].to_dict() + # Here's where it gets complicated. + # Loop over data columns. + for column in dicti.keys(): + dseti = defaultdict(set) + dsetj = defaultdict(set) + # For each data column, the dataframe contains a + # set of data which is keyed by the system index. + # Each row is further keyed by the subindex, but + # this test assumes that the subindices are + # irrelevant (equivalent to saying the ordering of + # rows - or the relative vertical position of data + # in cells across columns - is not important. Not + # entirely true but anyway...) + for idx in dicti[column].keys(): + dseti[idx[0]].add(dicti[column][idx]) + dsetj[idx[0]].add(dictj[column][idx]) + self.assertEqual(dseti, dsetj, msg='\n%s data column not equal for targets %s and %s' % (i, ikey, jkey)) + + +if __name__ == '__main__': + unittest.main()