Source code for OpenSMOG

# Copyright (c) 2020-2022 The Center for Theoretical Biological Physics (CTBP) - Rice University and Northeastern University
# This file is from the OpenSMOG project, released under the MIT License. 

R"""  
The :class:`~.OpenSMOG` classes perform molecular dynamics using Structure-Based Models (SBM) for biomolecular simulations.
:class:`~.OpenSMOG` uses force fields generated by SMOG 2, and it allows the simulations of a wide variety of potential forms, including commonly employed C-alpha and all-atom variants.
Details about the default models in SMOG 2 can be found in the following resources:
    - **SMOG server**: https://smog-server.org/smog2/
    - **C-alpha**: Clementi, C., Nymeyer, H. and Onuchic, J.N., 2000. Topological and energetic factors: what determines the structural details of the transition state ensemble and “en-route” intermediates for protein folding? An investigation for small globular proteins. Journal of molecular biology, 298(5), pp.937-953.
    - **All-Atom**: Whitford, P.C., Noel, J.K., Gosavi, S., Schug, A., Sanbonmatsu, K.Y. and Onuchic, J.N., 2009. An all‐atom structure‐based potential for proteins: bridging minimal models with all‐atom empirical forcefields. Proteins: Structure, Function, and Bioinformatics, 75(2), pp.430-441.
"""

# with OpenMM 7.7.0, the import calls have changed. So, try both, if needed
try:
    try:
        # >=7.7.0
        from openmm.app import *
        from openmm import *
        from openmm.unit import *
    except:
        # earlier
        print('Unable to load OpenMM as \'openmm\'. Will try the older way \'simtk.openmm\'')
        from simtk.openmm.app import *
        from simtk.openmm import *
        from simtk.unit import *
except:
    SBM.opensmog_quit('Failed to load OpenMM. Check your configuration.')


import os
import numpy as np
import xml.etree.ElementTree as ET
from lxml import etree
import sys
#from .OpenSMOG_Reporter import forcesReporter, stateReporter
import re as regex
from pathlib import Path
from sys import stdout
import subprocess
from contextlib import redirect_stdout

[docs]class SBM: version="1.1.1" R""" The :class:`~.SBM` class performs Molecular dynamics simulations using structure-based (SMOG) models to investigate a broad range of biomolecular dynamics, including domain rearrangements in proteins, folding and ligand binding in RNA, and large-scale rearrangements in ribonucleoprotein assemblies. In its simplest form, a structure-based model defines a particular structure (usually obtained from X-ray, cryo-EM, or NMR methods) as the energetic global minimum. Find more information about SMOG models and OpenSMOG at http://smog-server.org The :class:`~.SBM` sets the environment to start the molecular dynamics simulations. Args: time_step (float, required): Simulation time step in units of :math:`\tau`. collision_rate (float, required): Friction/Damping constant in units of reciprocal time (:math:`1/\tau`). r_cutoff (float, required): Cutoff distance to consider non-bonded interactions in units of nanometers. pbc (boolean, optional): Turn PBC on/off. Default value: False cmm (boolean, optional): Remove center of mass motion. Default value: True temperature (float,required): Temperature in reduced units name (str): Name used in the output files. (Default value: :code:`OpenSMOG`) warn (boolean, optional): Give cautionary warnings... (Default value: True) """ def __init__(self, time_step, collision_rate, r_cutoff, temperature = -1, cmm = True, pbc = False, name = "OpenSMOG", warn = True): if not isinstance(temperature,(float,int)): print('ERROR: temperature must be a numeric value') sys.exit(1) if not isinstance(r_cutoff,(float,int)): print('ERROR: r_cutoff must be a numeric value') sys.exit(1) if not isinstance(time_step,(float,int)): print('ERROR: time_step must be a numeric value') sys.exit(1) if not isinstance(collision_rate,(float,int)): print('ERROR: collision_rate must be a numeric value') sys.exit(1) self.name = name self.warn = warn self.dt = time_step * picoseconds self.started=0 self.gamma = collision_rate / picosecond self.rcutoff = r_cutoff * nanometers if temperature == -1: print(''' ERROR: Temperature was not given. ''') sys.exit(1) if self.warn: if not time_step in [0.0005, 0.002]: print(''' WARNING: The given time_step value is not the one usually employed in the SBM models. Make sure this value is correct. The suggested values are: time_step=0.0005 for default C-alpha and time_step = 0.002 for the default all-atom model. ''') if collision_rate != 1.0: print(''' WARNING: The given collision_rate value is not the one usually employed in the SBM models. Make sure this value is correct. The suggested value for many models is: collision_rate=1.0. ''') if not r_cutoff in [1.1 ,0.65]: print(''' NOTE: The given r_cutoff value is not the one usually employed in the SBM models with OpenSMOG. Make sure this value is correct. The suggested values for r_cutoff are: 1.1 for the default C-alpha model and 0.65 for the default all-atom model. Depending on your model, other choices may be more appropriate. ''') self.temperature = (temperature / 0.00831446261815) * kelvin self.forceApplied = False self.loaded = False self.folder = "." self.forceCount = 0 self.pbc=pbc self.cmm=cmm self.nonbonded_present = False self.setupCheck = False
[docs] def help(): R"""Prints information about using OpenSMOG. """ print(""" ############################################################################# Information about using OpenSMOG ############################################################################# Units in OpenSMOG Time: When using OpenSMOG, it is important to consider the units. Since OpenSMOG is built on OpenMM libraries, some unit labels can be misleading. Specifically, SMOG models use reduced units (R.U.). While it is possible to estimate the effective timescales in these models, OpenMM reporters may label the timescales as \"ns\". One should not consider this label to be meaningful. In terms of accounting, 1 \"ns\" is equal to 1000 reduced time units. For detailed discussions of units, one can consult any number of recent papers by the SMOG team, such as Yang et al. JCP 2019, 151, 085102, which estimated that one reduced unit corresponds to 50-1000 ps. Temperature: When initializing your simulation with the SBM class, the temperature is provided in reduced units, where a typical protein will fold at around 1 reduced unit. Energy: OpenSMOG will report all energies in reduced units. However, OpenMM libraries may still label the energies as units of kJ/mol. This label should be ignored, unless you calibrated your SMOG model to use this energy scale. Usage Example While a broad range of simulation protocols may be used, here is a very basic example for how to launch simulations using the OpenSMOG module with OpenMM. >from OpenSMOG import SBM Choose some basic runtime settings. We will call our system 2ci2 >SMOGrun = SBM(name='2ci2', time_step=0.002, collision_rate=1.0, r_cutoff=1.2, temperature=0.5) Note: By default, PBC is not turned on. If you want to include PBCs, then use: >SMOGrun = SBM(name='2ci2', time_step=0.002, collision_rate=1.0, r_cutoff=1.2, temperature=0.5, pbc=True) Select a platform and GPU IDs (if needed) >SMOGrun.setup_openmm(platform='cuda',GPUindex='default') Decide where to save your data (here, output_2ci2) >SMOGrun.saveFolder('output_2ci2') You may optionally set some input file names to variables >SMOG_grofile = '2ci2.gro' >SMOG_topfile = '2ci2.top' >SMOG_xmlfile = '2ci2.xml' Load your force field data >SMOGrun.loadSystem(Grofile=SMOG_grofile, Topfile=SMOG_topfile, Xmlfile=SMOG_xmlfile) Create the context, and prepare the simulation to run >SMOGrun.createSimulation() Perform energy minimization >SMOGrun.minimize(tolerance=1) Decide how frequently to save data >SMOGrun.createReporters(trajectory=True, energies=True, energy_components=True, interval=10**3) Launch the simulation >SMOGrun.run(nsteps=10**6, report=True, interval=10**3) Note: One can also run for clock time, rather than number of timesteps. For example, to run for 10 hours, you could use: >SMOGrun.runForClockTime(time=10) Optional additional considerations: It can be useful to save the state, or checkpoint at the end of your run, so that you may continue the run later. Here are some examples that build upon the above use case. To save the state file: >statefilename='smog.state' >SMOGrun.simulation.saveState(statefilename) To save the checkpoint file: >checkpointfilename='smog.checkpoint' >SMOGrun.saveCheckpoint(checkpointfilename) If you saved a state or checkpoint in a previous run, here is an example for how to continue the simulation. >from OpenSMOG import SBM >SMOGrun2 = SBM(name='2ci2', time_step=0.002, collision_rate=1.0, r_cutoff=1.2, temperature=0.5) >SMOGrun2.setup_openmm(platform='cuda',GPUindex='default') >SMOGrun2.saveFolder('output_2ci2') >SMOG_grofile = '2ci2.gro' >SMOG_topfile = '2ci2.top' >SMOG_xmlfile = '2ci2.xml' >SMOGrun2.loadSystem(Grofile=SMOG_grofile, Topfile=SMOG_topfile, Xmlfile=SMOG_xmlfile) >SMOGrun2.createSimulation() >SMOGrun2.createReporters(trajectory=True, energies=True, energy_components=True, interval=10**3) Load the previous state OR checkpoint file. How to load state file >statefilename='smog.state' >SMOGrun2.simulation.loadState(statefilename) How to load a checkpoint >checkpointfilename='smog.checkpoint' >SMOGrun2.simulation.loadCheckpoint(checkpointfilename) After loading either the state, or checkpoint, then run the simulation >SMOGrun2.run(nsteps=10**6, report=True, interval=10**3) For more information and help, see the OpenSMOG and SMOG 2 websites. OpenSMOG: https://opensmog.readthedocs.io/en/latest/ SMOG 2: https://smog-server.org If you have questions/suggestions, you can also email us at info@smog-server.org """)
[docs] def opensmog_quit(message): print("\n\nOpenSMOG error: {}\n\n".format(message)) sys.exit(1)
[docs] def minimize(self,tolerance=1.0,maxIterations=0): R"""Wrapper for L-BFGS energy minimization. Args: tolerance (float, required): Stopping criteria value between iterations. When the error between iteration is below this value, the minimization stops. (Default value: :code:`1.0`). maxIteration (int, required): Number of maximum steps to be performed in the minimization simulation. (Default value: :code:`0`). """ self.simulation.minimizeEnergy(tolerance=tolerance,maxIterations=maxIterations) # it is very important that we reset the velocities. minimization warps the velocity values self.simulation.context.setVelocitiesToTemperature(self.temperature*kelvin)
[docs] def setup_openmm(self, platform='opencl', precision='single', GPUindex='default', integrator="langevin"): R"""Sets up the parameters of the simulation OpenMM platform. Args: platform (str, optional): Platform to use in the simulations. Opitions are *CUDA*, *OpenCL*, *HIP*, *CPU*, *Reference*. (Default value: :code:`OpenCL`). precision (str, optional): Numerical precision type of the simulation. Options are *single*, *mixed*, *double*. (Default value: :code:`single`). For details check the `OpenMM Documentation <http://docs.openmm.org/latest/developerguide/developer.html#numerical-precision>`__. GPUIndex (str, optional): Set of Platform device index IDs. Ex: 0,1,2 for the system to use the devices 0, 1 and 2. integrator (str, or integrator object): Integrator to use in the simulations. Options are *langevin*, *langevinMiddle, *variableLangevin* and, *brownian*. You may also build your own integrator object and pass it, rather than use a named integrator. (Default value: :code:`langevin`). """ precision = precision.lower() if precision not in ["mixed", "single", "double"]: SBM.opensmog_quit("Precision must be mixed, single or double") properties = {} properties["Precision"] = precision if GPUindex.lower() != "default": properties["DeviceIndex"] = GPUindex self.properties = properties if platform.lower() == "opencl": platformObject = Platform.getPlatformByName('OpenCL') elif platform.lower() == "reference": platformObject = Platform.getPlatformByName('Reference') self.properties = {} elif platform.lower() == "cuda": platformObject = Platform.getPlatformByName('CUDA') elif platform.lower() == "cpu": platformObject = Platform.getPlatformByName('CPU') self.properties = {} elif platform.lower() == "hip": platformObject = Platform.getPlatformByName('HIP') else: SBM.opensmog_quit("Unknown platform") self.platform = platformObject if isinstance(integrator,str): if integrator.lower() == "langevin": self.integrator = LangevinIntegrator(self.temperature, self.gamma, self.dt) elif integrator.lower() == "langevinmiddle": self.integrator = LangevinMiddleIntegrator(self.temperature, self.gamma, self.dt) elif integrator.lower() == "variablelangevin": self.integrator = VariableLangevinIntegrator(self.temperature, self.gamma, self.dt) elif integrator.lower() == "brownian": self.integrator = BrownianIntegrator(self.temperature, self.gamma, self.dt) else: SBM.opensmog_quit("Unknown/unsupported integrator name: {}".format(integrator)) self.integrator_type = integrator else: self.integrator = integrator self.integrator_type = "UserDefined" self.setupCheck = True self.forceDict = {} self.forcesDict = {}
[docs] def saveFolder(self, folder): R"""Sets the folder path to save data. Args: folder (str, required): Folder path to save the simulation data. If the folder path does not exist, the function will create the directory. """ Path(folder).mkdir( parents=True, exist_ok=True ) self.folder = folder
[docs] def loadSystem(self, Grofile, Topfile, Xmlfile): R"""Loads the input files in the OpenMM system platform. The input files are generated using SMOG2 software with the flag :code:`-OpenSMOG`. Details on how to create the files can be found in the `SMOG2 User Manual <https://smog-server.org/smog2/>`__. Tutorials for how to generate the inputs files can be found on the smog-server page, or `here <https://opensmog.readthedocs.io>`__. Args: Grofile (file, required): Initial structure for the MD simulations in *.gro* file format generated by SMOG2 software with the flag :code:`-OpenSMOG`. (Default value: :code:`None`). Topfile (file, required): Topology *.top* file format generated by SMOG2 software with the flag :code:`-OpenSMOG`. The topology file lists the interactions between the system atoms except for the "Native Contacts" potential that is provided to OpenSMOG in a *.xml* file. (Default value: :code:`None`). Xmlfile (file, required): The *.xml* file can contain the information that defines the "Contact" and nonbonded potentials. The *.xml* file is generated by SMOG2 software with the flag :code:`-OpenSMOG`, which support custom potential functions. (Default value: :code:`None`). """ def _checknames(f1,f2,f3): fn1 = os.path.basename(f1).rsplit('.', 1)[0] fn2 = os.path.basename(f2).rsplit('.', 1)[0] fn3 = os.path.basename(f3).rsplit('.', 1)[0] if fn1 == fn2 and fn1 ==fn3: return False else: return True if _checknames(Grofile, Topfile, Xmlfile) and self.warn : print('WARNING: The Gro, Top and Xml files have different prefixes. Most people use the same name, so this may be a mistake.') self.inputNames = [Grofile, Topfile, Xmlfile] if (self.setupCheck): self._check_file(Grofile, '.gro') self.loadGro(Grofile) self._check_file(Topfile, '.top') self.loadTop(Topfile) self._check_file(Xmlfile, '.xml') self.loadXml(Xmlfile) print("Loaded force field and config files.") else: SBM.opensmog_quit("setup_openmm() is not defined. Please set it before using LoadSystem()\n")
def _check_file(self, filename, ext): if not (filename.lower().endswith(ext)): SBM.opensmog_quit('Wrong file extension: {} must to be {} extension'.format(filename, ext))
[docs] def loadGro(self, Grofile): R"""Loads the *.gro* file format in the OpenMM system platform. The input files are generated using SMOG2 software with the flag :code:`-OpenSMOG`. Details on how to create the files can be found in the `SMOG2 User Manual <https://smog-server.org/smog2/>`__. Tutorial on how to generate the inputs files can be found on the smog-server page, or `here <https://opensmog.readthedocs.io>`__. Args: Grofile (file, required): Initial structure for the MD simulations in *.gro* file format. This is typically generated by SMOG2 with the flag :code:`-OpenSMOG`. (Default value: :code:`None`). """ if not os.path.exists(Grofile): SBM.opensmog_quit("Could not find gro file {}".format(Grofile)) try: self.Gro = GromacsGroFile(Grofile) except: SBM.opensmog_quit("Failed to load {}".format(Grofile))
[docs] def loadTop(self, Topfile): R"""Loads the *.top* file format in the OpenMM system platform. The input files are generated using SMOG2 software with the flag :code:`-OpenSMOG`. Details on how to create the files can be found in the `SMOG2 User Manual <https://smog-server.org/smog2/>`__. Tutorial on how to generate the inputs files can be found on the smog-server page, or `here <https://opensmog.readthedocs.io>`__. Args: Topfile (file, required): Topology *.top* file format generated by SMOG2 software with the flag :code:`-OpenSMOG`. The topology file defines the interactions between atoms, but the "Contacts" and nonbonded potentials can be provided with the *.xml* file. (Default value: :code:`None`). """ if not os.path.exists(Topfile): SBM.opensmog_quit("Could not find top file {}".format(Topfile)) if self.pbc == True: print('This simulation will use periodic boundary conditions') self.Top = GromacsTopFile(Topfile,unitCellDimensions=self.Gro.getUnitCellDimensions()) self.system = self.Top.createSystem(nonbondedMethod=CutoffPeriodic,nonbondedCutoff=self.rcutoff,removeCMMotion = self.cmm) else: print('This simulation will not use Periodic boundary conditions') self.Top = GromacsTopFile(Topfile) self.system = self.Top.createSystem(nonbondedMethod=CutoffNonPeriodic,nonbondedCutoff=self.rcutoff,removeCMMotion = self.cmm) nforces = len(self.system.getForces()) for force_id, force in enumerate(self.system.getForces()): force.setForceGroup(force_id) self.forcesDict[force.__class__.__name__] = force self.forceCount +=1
def _splitForces_contacts(self): #Contacts cont_data=self.data['contacts'] n_forces = len(cont_data[3]) forces = {} for n in range(n_forces): forces[cont_data[3][n]] = [cont_data[0][n], cont_data[1][n], cont_data[2][n]] self.contacts = forces def _splitForces_nb(self): #Contacts nb_data=self.data['nonbond'] n_forces = len(nb_data[0]) forces = {} for n in range(n_forces): forces[nb_data[0][n]] = [nb_data[1][n], nb_data[2][n], nb_data[3][n]] self.nonbond = forces def _customSmogForce(self, name, data): #first set the equation contacts_ff = CustomBondForce(data[0]) #second set the number of variable for pars in data[1]: contacts_ff.addPerBondParameter(pars) #third, apply the bonds from each pair of atoms and the related variables. pars = [pars for pars in data[1]] for iteraction in data[2]: atom_index_i = int(iteraction['i'])-1 atom_index_j = int(iteraction['j'])-1 parameters = [float(iteraction[k]) for k in pars] contacts_ff.addBond(atom_index_i, atom_index_j, parameters) #forth, if the are global variables, add them to the force if self.constants_present==True: for const_key in self.data['constants']: contacts_ff.addGlobalParameter(const_key,self.data['constants'][const_key]) self.forcesDict[name] = contacts_ff contacts_ff.setForceGroup(self.forceCount) self.forceCount +=1 def _customSmogForce_nb(self, name, data): #first set the equation nonbond_ff = CustomNonbondedForce(data[0]) #Define per particle parameters nonbond_ff.addPerParticleParameter('q') nonbond_ff.addPerParticleParameter('type') #If the are global variables, add them to the force if self.constants_present==True: for const_key in self.data['constants']: nonbond_ff.addGlobalParameter(const_key,self.data['constants'][const_key]) #Add cutoff as global parameter nonbond_ff.addGlobalParameter('r_c',self.rcutoff.value_in_unit(nanometer)) #Load old nonbonded force for later use original_nonbonded=self.system.getForce(0) original_customnonbonded=self.system.getForce(1) #Get atoms types and number of types atom_types=[] for i in range(len(data[2])): atom_types.append(data[2][i]['type1']) atom_types.append(data[2][i]['type2']) self.atom_types=np.unique(np.array(atom_types)) natom_types=len(self.atom_types) #Generate tables for each parameter tables={} for par in data[1]: tables[par]=np.ones([natom_types,natom_types])*np.nan #Fill tables with nonbond_param for nonbond_params in data[2]: #Get atoms id (name to number) type1=np.where(self.atom_types==nonbond_params['type1'])[0][0] type2=np.where(self.atom_types==nonbond_params['type2'])[0][0] for par in tables: tables[par][type1][type2]=nonbond_params[par] tables[par][type2][type1]=nonbond_params[par] missing={} #Generate Function from tables table_fun={} for par in data[1]: #Check none have nans if np.sum(np.isnan(tables[par]))!=0: en=np.argwhere(np.isnan(tables[par])) pairs="" for i in en: if i[0] <= i[1]: stri=str(i[0])+","+str(i[1]) if stri in missing: missing[str(stri)].append(par) else: missing[str(stri)]=[par] table_fun[par]=Discrete2DFunction(natom_types,natom_types,list(np.ravel(tables[par]))) nonbond_ff.addTabulatedFunction(par,table_fun[par]) if len(missing) != 0: message="" for i in missing.keys(): j=i.split(",") ati=self.atom_types[int(j[0])] atj=self.atom_types[int(j[1])] message=message+ati+","+atj+" : " for j in missing[i]: message=message+j+" " message=message+"\n" SBM.opensmog_quit('XML file error:\n Atom-type pairs are missing the following parameters\n{}'.format(message)) #Get exceptions from topfile import for i in range(original_customnonbonded.getNumExclusions()): exclusion_id = original_customnonbonded.getExclusionParticles(i) nonbond_ff.addExclusion(exclusion_id[0],exclusion_id[1]) ## ADD PARTICLES TO THE FORCE BASED ON SPECIFYING TYPE AND CHARGE ## CHARGES ## From nonbondedforce we get the charges for each atom atom_charges=[] ## Loop over every atom for i in range(self.system.getNumParticles()): atom_charge=original_nonbonded.getParticleParameters(i)[0] atom_charges.append(atom_charge.value_in_unit(constants.elementary_charge)) ## ATOM TYPES ## From molecule information we get atom types atom_types=[] ## Get name and multiplicity of each molecule molecules_keys=[self.Top._molecules[i][0] for i in range(len(self.Top._molecules)) ] molecules_mul=[self.Top._molecules[i][1] for i in range(len(self.Top._molecules)) ] ## Loop over molecules for i in range(len(molecules_keys)): molecule=molecules_keys[i] mult=molecules_mul[i] # Loop over atoms in each molecule for atom in self.Top._moleculeTypes[molecule].atoms: # If atoms are repeated, then include the same entrie (CL,CL,CL,... or MG,MG,MG,...) for _ in range(mult): atom_types.append(atom[1]) for i in range(self.system.getNumParticles()): # GET ATOM TYPE at_type=np.where(atom_types[i]==self.atom_types)[0][0] # GET ATOM CHARGE charge=atom_charges[i] # ADD PARTICLE TO EACH FORCE WITH CORRESPONDING CHARGE AND TYPE nonbond_ff.addParticle([charge,at_type]) #Set cutoff and nonbonded method if self.pbc == True: nonbond_ff.setNonbondedMethod(NonbondedForce.CutoffPeriodic) else: nonbond_ff.setNonbondedMethod(NonbondedForce.CutoffNonPeriodic) nonbond_ff.setCutoffDistance(self.rcutoff.value_in_unit(nanometer)) self.forcesDict['Nonbonded'+str(name)] = nonbond_ff nonbond_ff.setForceGroup(self.forceCount) self.forceCount +=1
[docs] def loadXml(self, Xmlfile): R"""Loads the *.xml* file format in the OpenMM system platform. The input files are generated using SMOG2 software with the flag :code:`-OpenSMOG`. Details on how to create the files can be found in the `SMOG2 User Manual <https://smog-server.org/smog2/>`__. Tutorial on how to generate the inputs files can be found on the smog-server page, or `here <https://opensmog.readthedocs.io>`__. Args: Xmlfile (file, required): The *.xml* file can contain information that defines the "Contact" and nonbonded potentials. The *.xml* file is generated by SMOG2 software with the flag :code:`-OpenSMOG`, which support custom potential energy functions. (Default value: :code:`None`). """ def validate(Xmlfile): path = "share/OpenSMOG.xsd" pt = os.path.dirname(os.path.realpath(__file__)) filepath = os.path.join(pt,path) xmlschema_doc = etree.parse(filepath) xmlschema = etree.XMLSchema(xmlschema_doc) if not os.path.exists(Xmlfile): SBM.opensmog_quit("Could not find XML file {}".format(Xmlfile)) xml_doc = etree.parse(Xmlfile) result = xmlschema.validate(xml_doc) log = xmlschema.error_log return result,log def import_xml2OpenSMOG(file_xml): XML_potential = ET.parse(file_xml) root = XML_potential.getroot() xml_data={} ## Constants self.constants_present=False if root.find('constants') != None: self.constants_present=True constants={} constants_xml = root.find('constants') for const in constants_xml.iter('constant'): constants[const.attrib['name']]=float(const.attrib['value']) xml_data['constants']=constants ## Contacts Force_Names=[] Expression=[] Parameters=[] Pairs=[] self.contacts_present=False if root.find('contacts') == None: print(''' NOTE: No contacts were found in the XML file. While this is allowed, it is unusual for a SMOG model to not contain contacts. Accordingly, This message may be due to an error in your workflow. However, if you are expecting your model to lack contacts, then you can safely ignore this message. ''') else: self.contacts_present=True contacts_xml=root.find('contacts') for i in range(len(contacts_xml)): for name in contacts_xml[i].iter('contacts_type'): Force_Names.append(name.attrib['name']) for expr in contacts_xml[i].iter('expression'): Expression.append(expr.attrib['expr']) internal_Param=[] for par in contacts_xml[i].iter('parameter'): internal_Param.append(par.text) Parameters.append(internal_Param) internal_Pairs=[] for atompairs in contacts_xml[i].iter('interaction'): internal_Pairs.append(atompairs.attrib) Pairs.append(internal_Pairs) xml_data['contacts']=[Expression,Parameters,Pairs,Force_Names] #Launch contact force function self.nonbond_present=False if root.find('nonbond') != None: self.nonbond_present=True nonbond_xml=root.find('nonbond') NonBond_Num=[] NBExpression=[] NBExpressionParameters=[] NBParameters=[] nonbond_xml=root.find('nonbond') for i in range(len(nonbond_xml)): for cut in nonbond_xml[i].iter('cutoff'): cutoff=cut.attrib['distance'] print("\nMODEL-SPECIFIC CUTOFF VALUE FOUND IN XML FILE!!!!\nWILL SET NON-BONDED CUTOFF TO {} nm\n".format(cutoff)) self.rcutoff=float(cutoff) * nanometer for types in nonbond_xml[i].iter('nonbond_bytype'): NonBond_Num.append(i) for expr in nonbond_xml[i].iter('expression'): NBExpression.append(expr.attrib['expr']) internal_Param=[] for par in nonbond_xml[i].iter('parameter'): internal_Param.append(par.text) NBExpressionParameters.append(internal_Param) internal_NBParam=[] for nbpar in nonbond_xml[i].iter('nonbond_param'): internal_NBParam.append(nbpar.attrib) NBParameters.append(internal_NBParam) xml_data['nonbond']=[NonBond_Num,NBExpression,NBExpressionParameters,NBParameters] else: self.nonbond_present=False return xml_data if not (self.forceApplied): result,log=validate(Xmlfile) if not result: SBM.opensmog_quit("The xml file does not adhere to the required schema (same as that used by SMOG 2). Error message:\n\n"+str(log)+"\n\n This typically means your xml file was corrupted, or you are using an incompatible version of SMOG 2. See smog-server.org for a list of OpenSMOG-SMOG2 versions that are compatible.") self.data = import_xml2OpenSMOG(Xmlfile) if self.contacts_present==True: self._splitForces_contacts() for force in self.contacts: print("Creating Contacts force {:} from xml file".format(force)) self._customSmogForce(force, self.contacts[force]) self.system.addForce(self.forcesDict[force]) if self.nonbond_present==True: self._splitForces_nb() for force in self.nonbond: print("Creating Nonbonded force {:} from xml file".format(force)) self._customSmogForce_nb(force, self.nonbond[force]) self.system.addForce(self.forcesDict['Nonbonded'+str(force)]) ## REMOVE OTHER NONBONDED FORCES self.system.removeForce(0) self.system.removeForce(0) self.forceApplied = True else: print('\n OpenSMOG forces already applied!!! \n')
[docs] def createSimulation(self): R"""Creates the simulation context and loads into the OpenMM platform. """ if not self.loaded: self.simulation = Simulation(self.Top.topology, self.system, self.integrator, self.platform, self.properties) self.simulation.context.setPositions(self.Gro.positions) self.simulation.context.setVelocitiesToTemperature(self.temperature) self.loaded = True else: print('\n Simulations context already created! \n')
def _checkFile(self,filename): if os.path.isfile(filename): i = 1 ck = True while i <= 10 and ck: newname = filename + "_" + str(i) if not os.path.isfile(newname): print("{:} already exists. Backing up to {:}".format(filename,newname)) os.rename(filename, newname) ck = False else: i += 1
[docs] def createReporters(self, trajectory=True, trajectoryName=None, trajectoryFormat='dcd', energies=True, energiesName=None, energy_components=False, energy_componentsName=None, logFileName='OpenSMOG.log', interval=1000): R"""Creates the reporters to provide the output data. Args: logFileName (str, optional): Name of log file. (Default value: :code:`OpenSMOG.log`). trajectory (bool, optional): Whether to save the trajectory *.dcd* file containing the position of the atoms as a function of time. (Default value: :code:`True`). trajectoryName (str, optional): Name of the trajectory file. trajectoryFormat (str, optional): File format of the trajectory file. Options are dcd, pdb, pdbx, hdf5, netcdf, and xtc. Saving the trajectory in the file formats hdf5, netcdf, and xtc require the MDTraj package. (Default value: :code:`dcd`). energies (bool, optional): Whether to save the energies in a *.txt* file containing five columns, comma-delimited. The header of the files shows the information of each collum: #"Step","Potential Energy (kJ/mole)","Kinetic Energy (kJ/mole)","Total Energy (kJ/mole)","Temperature (K)". (Default value: :code:`True`). energiesName (str, optional): Name of the energy file. energy_components (bool, optional): Whether to save the potential energy for each applied force in a *.txt* file containing several columns, comma-delimited. The header of the files shows the information of each column. An example of the header is: #"Step","electrostatic","Non-Contacts","Bonds","Angles","Dihedrals","contact_1-10-12". (Default value: :code:`False`). energy_componentsName (str, optional): Name of the energy_components file. logFileName (str, optional): Name of the log file. interval (int, required): Frequency to write the data to the output files. (Default value: :code:`10**3`) """ if os.path.basename(logFileName) != logFileName: SBM.opensmog_quit('logFileName is invalid. To specify the path, use the saveFolder method.') if not regex.search(".log$",logFileName): logFileName= logFileName + ".log" self.logFileName=logFileName self.outputNames = [] if trajectory: trajectoryFormat=trajectoryFormat.lower() if trajectoryName is None: trajfile = os.path.join(self.folder, self.name + '_trajectory.'+trajectoryFormat) else: if os.path.basename(trajectoryName) != trajectoryName: SBM.opensmog_quit('trajectoryName is invalid. To specify the path, use the saveFolder method.') trajfile = os.path.join(self.folder, trajectoryName + "."+trajectoryFormat) self._checkFile(trajfile) self.outputNames.append(trajfile) for i in ['hdf5', 'xtc', 'netcdf']: if i == trajectoryFormat: print(""" The """+str(trajectoryFormat)+""" trajectory format requires mdtraj to be loaded. Will try to import mdtraj...""") import mdtraj as md if trajectoryFormat == 'dcd': self.simulation.reporters.append(DCDReporter(trajfile, interval)) elif trajectoryFormat == 'pdb': self.simulation.reporters.append(PDBReporter(trajfile, interval)) elif trajectoryFormat == 'pdbx': self.simulation.reporters.append(PDBxReporter(trajfile, interval)) elif trajectoryFormat == 'hdf5': self.simulation.reporters.append(md.reporters.HDF5Reporter(trajfile, interval)) elif trajectoryFormat == 'netcdf': self.simulation.reporters.append(md.reporters.NetCDFReporter(trajfile, interval)) elif trajectoryFormat == 'xtc': self.simulation.reporters.append(md.reporters.XTCReporter(trajfile, interval)) else: SBM.opensmog_quit("Trajectory format "+str(trajectoryFormat)+" not recognized") if energies: if energiesName is None: energyfile = os.path.join(self.folder, self.name+ '_energies.txt') else: if os.path.basename(energiesName) != energiesName: SBM.opensmog_quit('energiesName is invalid. To specify the path, use the saveFolder method.') if regex.search(".txt$",energiesName): energyfile = os.path.join(self.folder, energiesName) else: energyfile = os.path.join(self.folder, energiesName + ".txt") self._checkFile(energyfile) self.outputNames.append(energyfile) self.simulation.reporters.append(stateReporter(energyfile, interval, step=True, potentialEnergy=True, kineticEnergy=True, totalEnergy=True,temperature=True, separator=",")) if energy_components: if energy_componentsName is None: forcefile = os.path.join(self.folder, self.name + '_forces.txt') else: if os.path.basename(energy_componentsName) != energy_componentsName: SBM.opensmog_quit('energy_componentsName is invalid. To specify the path, use the saveFolder method.') if regex.search(".txt$",energy_componentsName): forcefile = os.path.join(self.folder, energy_componentsName) else: forcefile = os.path.join(self.folder, energy_componentsName + '.txt') self._checkFile(forcefile) self.outputNames.append(forcefile) self.simulation.reporters.append(forcesReporter(forcefile, interval, self.forcesDict, step=True))
[docs] def run(self, nsteps, report=True, interval=10**4): if self.started != 0: SBM.opensmog_quit('The run or runForClockTime method was already called. Calling it a second time can lead to unpredictable behavior. If you want to extend a simulation, it is more appropriate to use checkpoint/state files. Use SBM.help() for more information on checkpoint/state file usage in OpenSMOG.') self.started=1 R"""Run the molecular dynamics simulation. Args: nsteps (int, required): Number of steps to be performed in the simulation. (Default value: :code:`10**7`) report (bool, optional): Whether to print the simulation progress. (Default value: :code:`True`). interval (int, optional): Frequency to print the simulation progress. (Default value: :code:`10**4`) """ if report: self.simulation.reporters.append(StateDataReporter(sys.stdout, interval, step=True, remainingTime=True, progress=True, speed=True, totalSteps=nsteps, separator="\t")) self._createLogfile() self.simulation.step(nsteps)
[docs] def runForClockTime(self, time, report=True, interval=10**4,checkpointFile=None, stateFile=None, checkpointInterval=None): if self.started != 0: SBM.opensmog_quit('The run or runForClockTime method was already called. Calling it a second time can lead to unpredictable behavior. If you want to extend a simulation, it is more appropriate to use checkpoint files. Use SBM.help() for more information on checkpoint/state file usage in OpenSMOG.') self.started=1 R"""Run the molecular dynamics simulation. Args: time (float, required): Walltime to run the simulation (units of hours, if units are not given) report (bool, optional): Whether to print the simulation progress. (Default value: :code:`True`). interval (int, optional): Frequency to print the simulation progress. (Default value: :code:`10**4`) checkpointFile (str, optional): Name of checkpoint file to save. (Default value: :code:`None`). stateFile (str, optional): Name of state file to save. (Default value: :code:`None`). checkpointInterval (str, optional): Time between checkpoint/state file saves. """ if report: self.simulation.reporters.append(StateDataReporter(sys.stdout, interval, step=True, remainingTime=False, progress=False, speed=True, separator="\t")) self._createLogfile() self.simulation.runForClockTime(time=time,checkpointFile=checkpointFile, stateFile=stateFile, checkpointInterval=checkpointInterval) print("\nOpenSMOG simulation completed.\n")
def _createLogfile(self): import platform import datetime logFilename = os.path.join(self.folder, self.logFileName) self._checkFile(logFilename) self.outputNames.append(logFilename) with open(logFilename, 'w') as f: ori = sys.stdout sys.stdout = f sys.stdout = ori #system_information f.write('\nSystem Information:\n') f.write('-------------------\n') f.write('Date and time: {:}\n'.format(datetime.datetime.now())) f.write('Machine information: {:} : {:}, {:} : {:}\n'.format("System", platform.uname().system, "Version", platform.uname().version)) f.write('OpenSMOG version: {:}\n'.format(SBM.version)) f.write('Platform: {:}\n'.format(self.platform.getName())) if (self.platform.getName() in ["CUDA", "OpenCL", "HIP"]): f.write('Precision: {:}\n'.format(self.properties['Precision'])) f.write('Integrator: {:}\n'.format(self.integrator_type)) f.write('Savefolder: {:}\n'.format(self.folder)) f.write('\nSimulation Information:\n') f.write('-----------------------\n') f.write('Name: {:}\n'.format(self.name)) f.write('Time step: {:}\n'.format(self.dt/picoseconds)) f.write('Collision Rate: {:}\n'.format(self.gamma*picosecond)) f.write('r_cutoff: {:}\n'.format(self.rcutoff/nanometers)) f.write('Temperature: {:}\n'.format(self.temperature * 0.008314/kelvin)) f.write('\nInput Files:\n') f.write('------------\n') f.write('GroFile: {:}\n'.format(self.inputNames[0])) f.write('TopFile: {:}\n'.format(self.inputNames[1])) f.write('XmlFile: {:}\n'.format(self.inputNames[2])) f.write('\nOutput Files:\n') f.write('-------------\n') for n in self.outputNames: f.write(os.path.basename(n)+"\n") sys.stdout = ori # this is for testing that OpenSMOG and SMOG2 are working together. # the interested user may also be interested in how to compare energies
[docs] def opensmogcheck(): # define some functions that will be used for testing purposes def runSMOG(sysname): # based on the sysname, find the flags and run SMOG2 with open(sysname+"smogcommands", "r") as f: for line in f: pt = os.path.dirname(os.path.realpath(__file__)) line=regex.sub("OSDIR", pt, line) command = line.split() try: out=subprocess.run(command,stdout=subprocess.PIPE,stderr=subprocess.PIPE,check=True) except subprocess.CalledProcessError as e: print("SMOG 2 failed to generate OpenSMOG input!!!!\n") print("STDERR:\n") print((e.stderr).decode()) print("STDOUT:\n") print((e.stdout).decode()) sys.exit(1) def prepOpenSMOG(runsmog,sysname,platform,PBC): # make and return a system. if runsmog: topname="opentest.top" xmlname="opentest.xml" else: topname=sysname+"opentest.top" xmlname=sysname+"opentest.xml" f = open(os.devnull, 'w') with redirect_stdout(f): sbm_test = SBM(name='testss', time_step=0.002, collision_rate=1.0, r_cutoff=2, temperature=0.1, pbc = PBC, warn = False) sbm_test.setup_openmm(platform=platform,GPUindex='default') sbm_test.saveFolder('.') sbm_test.loadSystem(Grofile=sysname+"frame.0.gro", Topfile=topname, Xmlfile=xmlname) sbm_test.createSimulation() return sbm_test def comparevalues(ref,open): absdiff=abs(float(ref)-float(open)) reldiff=abs(absdiff/float(ref)) if absdiff < maxabsdiff or reldiff < maxreldiff: return 1 else: print("BAD VALUES:"+ref.rstrip()+" (reference) and "+open+" (OpenSMOG)") return 0 def which(exe): for dir in os.getenv("PATH").split(':'): if (os.path.exists(os.path.join(dir, exe))): print ('SMOG2 version found:%s/%s\n' % (dir, exe)) return 1 print (''' NOTE: Did not find smog2 in path. Will test OpenSMOG library, but not the generation of force fields with SMOG2. Use smog-check to ensure SMOG 2 is also functioning properly. ''') return 0 # maxreldiff is the max relative deviation allowed maxreldiff=0.00001 # maxabsdiff is the max absolute deviation allowed maxabsdiff=0.002 # The name of each test is listed in tests/listoftests # each line defines a test, which is a subdir of tests/ # each subdirectory has a "flags" file that gives the exact # flags used when calling smog2 # energies contains the gromacs-generated (or any reference # calculation) energies for the 10 frame..gro files print( ''' OpenSMOG-check This will use the version of SMOG2 that is found in the path and try to generate OpenSMOG force fields. It will then calculate the energy using the OpenSMOG force field for reference configurations of each system. Finally, it will check if the potential energies are the same as a set of predefined reference values (typically from Gromacs). ''') # nummatch is the total number of energies that # will need to be close, in order to pass # it is incremented as tests are applied nummatch=0 # mcount is the number of matching energies mcount=0 runsmog = which("smog2") print('What platform would you like to test?') print("\tOptions: OpenCL, HIP, CUDA, CPU, reference") plats = ['opencl', 'hip', 'cuda', 'cpu', 'reference'] for line in sys.stdin: platform = line.rstrip() break if not (platform.lower() in plats): print('''Unrecognized platform! Options are (case-insensitive): OpenCL, HIP, CUDA, CPU, reference Unable to perform tests. ''') sys.exit(1) print("Will test platform \""+platform+"\"\n") listoftests = "share/tests/listoftests" pt = os.path.dirname(os.path.realpath(__file__)) listoftests = os.path.join(pt,listoftests) list = open(listoftests, "r") for testname in list: testname=testname.strip() PBC=False if regex.search(".PBC$",testname): PBC=True sysname = "share/tests/"+testname+"/" pt = os.path.dirname(os.path.realpath(__file__)) sysname = os.path.join(pt,sysname) nummatch+=10 print("STARTING TEST FOR SYSTEM IN "+testname) if runsmog: runSMOG(sysname) sbm_test = prepOpenSMOG(runsmog,sysname,platform,PBC) energies = open(sysname+"energies", "r") for frame in range(10): newgro = app.GromacsGroFile(sysname+"frame."+str(frame)+".gro") sbm_test.simulation.context.setPositions(newgro.positions) new=newgro.getPeriodicBoxVectors() sbm_test.simulation.context.setPeriodicBoxVectors(new[0],new[1],new[2]) e_pot_frame = sbm_test.simulation.context.getState(getEnergy=True).getPotentialEnergy() num=str(e_pot_frame).split(" ",1)[0] openv=energies.readline() mcount += comparevalues(openv,num) print("COMPLETED TEST FOR SYSTEM IN "+testname+"\n") # these two lines can be uncommented, if we want to regenerate the reference templates for fall-back option #subprocess.run("cp opentest.xml "+sysname,shell=True) #subprocess.run("cp opentest.top "+sysname,shell=True) ############## if runsmog: subprocess.run('rm opentest*',shell=True) print(str(mcount) + " of " + str(nummatch) + " values matched") if nummatch == 0: print("\n\nNO TESTS PERFORMED. SOMETHING WENT WRONG!!!!\n\n") sys.exit(1) elif mcount == nummatch: print("\n\nPASSED ALL CHECKS!!!!\n\n") sys.exit(0) else: print("\n\nFAILED CHECKS!!!!\n\n") sys.exit(1)
#end of subroutines print('{:^96s}'.format("****************************************************************************************")) print('{:^96s}'.format("**** *** *** *** *** *** *** *** OpenSMOG (v"+version+") *** *** *** *** *** *** *** ****")) print('') print('{:^96s}'.format("The OpenSMOG class is used to perform molecular dynamics simulations using")) print('{:^96s}'.format("Structure-Based Models (SBM) for biomolecular systems,")) print('{:^96s}'.format("and it allows for the simulation of a wide variety of potential forms.")) print('{:^96s}'.format("OpenSMOG uses force field files generated by SMOG 2.")) print('{:^96s}'.format("OpenSMOG documentation is available at")) print('{:^96s}'.format("https://opensmog.readthedocs.io and https://smog-server.org")) print('') print('{:^96s}'.format("OpenSMOG is described in: Oliveira and Contessoto et al,")) print('{:^96s}'.format("SMOG 2 and OpenSMOG: Extending the limits of structure-based models.")) print('{:^96s}'.format("Protein Science, 31, 158-172 (2022) DOI:10.1002/pro.4209")) print('') print('{:^96s}'.format("This package is the product of contributions from a number of people, including:")) print('{:^96s}'.format("Jeffrey Noel, Mariana Levi, Antonio Oliveira, Vinícius Contessoto,")) print('{:^96s}'.format("Esteban Dodero-Rojas, Mohit Raghunathan, Joyce Yang, Prasad Bandarkar,")) print('{:^96s}'.format("Udayan Mohanty, Ailun Wang, Heiko Lammert, Ryan Hayes,")) print('{:^96s}'.format("Jose Onuchic & Paul Whitford")) print('') print('{:^96s}'.format("Copyright (c) 2021, 2022, The SMOG development team at")) print('{:^96s}'.format("Rice University and Northeastern University")) print("\n\n") print('{:^96s}'.format("For more information, including descriptions of units and examples of")) print('{:^96s}'.format("how to launch a simulation with OpenSMOG, issue the command SBM.help()")) print('{:^96s}'.format("To check your installation of OpenSMOG/SMOG2, use SBM.opensmogcheck()")) print('{:^96s}'.format("Additional information available at https://smog-server.org")) print('{:^96s}'.format("****************************************************************************************")) sys.stdout.flush()