# Copyright (c) 2020-2021 The Center for Theoretical Biological Physics (CTBP) - Rice University and Northeastern University
# This file is from the OpenSMOG project, released under the MIT License.
R"""
The :class:`~.OpenSMOG` classes perform molecular dynamics using Structure-Based Models (SBM) for biomolecular simulations.
:class:`~.OpenSMOG` uses force fields generated by SMOG 2, and it allows the simulations of a wide variety of potential forms, including commonly employed C-alpha and all-atom variants.
Details about the default models in SMOG 2 can be found in the following resources:
- **SMOG server**: https://smog-server.org/smog2/
- **C-alpha**: Clementi, C., Nymeyer, H. and Onuchic, J.N., 2000. Topological and energetic factors: what determines the structural details of the transition state ensemble and “en-route” intermediates for protein folding? An investigation for small globular proteins. Journal of molecular biology, 298(5), pp.937-953.
- **All-Atom**: Whitford, P.C., Noel, J.K., Gosavi, S., Schug, A., Sanbonmatsu, K.Y. and Onuchic, J.N., 2009. An all‐atom structure‐based potential for proteins: bridging minimal models with all‐atom empirical forcefields. Proteins: Structure, Function, and Bioinformatics, 75(2), pp.430-441.
"""
from simtk.openmm.app import *
from simtk.openmm import *
from simtk.unit import *
import os
import numpy as np
import xml.etree.ElementTree as ET
import warnings
from lxml import etree
import sys
#from .OpenSMOG_Reporter import forcesReporter
[docs]class SBM:
R"""
The :class:`~.SBM` class performs Molecular dynamics simulations using structure-based models to investigate a broad range of biomolecular dynamics, including domain rearrangements in proteins, folding and ligand binding in RNA, and large-scale rearrangements in ribonucleoprotein assemblies. In its simplest form, a structure-based model defines a particular structure (usually obtained from X-ray, or NMR, methods) as the energetic global minimum.
The :class:`~.SBM` sets the environment to start the molecular dynamics simulations.
Args:
time_step (float, required):
Simulation time step in units of :math:`\tau`.
collision_rate (float, required):
Friction/Damping constant in units of reciprocal time (:math:`1/\tau`).
r_cutoff (float, required):
Cutoff distance to consider non-bonded interactions in units of nanometers.
temperature (float, required):
Temperature in reduced units.
name (str):
Name used in the output files. (Default value: :code:`OpenSMOG`).
"""
def __init__(self, time_step, collision_rate, r_cutoff, temperature, name = "OpenSMOG"):
self.printHeader()
self.name = name
self.dt = time_step * picoseconds
if not time_step in [0.0005, 0.002]:
print('[WARNING] The given time_step value is not the one usually employed in the SBM models. Make sure this value is correct. The suggested values are: time_step=0.0005 for C-alpha and time_step = 0.002 for All-Atoms.')
self.gamma = collision_rate / picosecond
if collision_rate != 1.0:
print('[WARNING] The given collision_rate value is not the one usually employed in the SBM models. Make sure this value is correct. The suggested value is: collision_rate=1.0.')
self.rcutoff = r_cutoff * nanometers
if not r_cutoff in [3.0, 1.2]:
print('[WARNING] The given r_cutoff value is not the one usually employed in the SBM models. Make sure this value is correct. The suggested values are: r_cutoff=3.0 for C-alpha and r_cutoff=1.2 for All-Atoms.')
self.temperature = (temperature / 0.008314) * kelvin
self.forceApplied = False
self.loaded = False
self.folder = "."
self.forceNamesCA = {0 : "eletrostastic", 1 : "Non-Contacts", 2 : "Bonds", 3 : "Angles", 4 : "Dihedrals"}
self.forceNamesAA = {0 : "eletrostastic", 1 : "Non-Contacts", 2 : "Bonds", 3 : "Angles", 4 : "Dihedrals", 5 : "Impropers"}
self.forceCount = 0
[docs] def setup_openmm(self, platform='opencl', precision='single', GPUindex='default', integrator="langevin"):
R"""Sets up the parameters of the simulation OpenMM platform.
Args:
platform (str, optional):
Platform to use in the simulations. Opitions are *CUDA*, *OpenCL*, *HIP*, *CPU*, *Reference*. (Default value: :code:`OpenCL`).
precision (str, optional):
Numerical precision type of the simulation. Options are *single*, *mixed*, *double*. (Default value: :code:`single`). For details check the `OpenMM Documentation <http://docs.openmm.org/latest/developerguide/developer.html#numerical-precision>`__.
GPUIndex (str, optional):
Set of Platform device index IDs. Ex: 0,1,2 for the system to use the devices 0, 1 and 2. (Use only when GPU != default).
integrator (str):
Integrator to use in the simulations. Options are *langevin*, *variableLangevin*, *verlet*, *variableVerlet* and, *brownian*. (Default value: :code:`langevin`).
"""
precision = precision.lower()
if precision not in ["mixed", "single", "double"]:
raise ValueError("Precision must be mixed, single or double")
properties = {}
properties["Precision"] = precision
if GPUindex.lower() != "default":
properties["DeviceIndex"] = GPUindex
self.properties = properties
if platform.lower() == "opencl":
platformObject = Platform.getPlatformByName('OpenCL')
elif platform.lower() == "reference":
platformObject = Platform.getPlatformByName('Reference')
self.properties = {}
elif platform.lower() == "cuda":
platformObject = Platform.getPlatformByName('CUDA')
elif platform.lower() == "cpu":
platformObject = Platform.getPlatformByName('CPU')
self.properties = {}
elif platform.lower() == "hip":
platformObject = Platform.getPlatformByName('HIP')
else:
self.exit("\n!!!! Unknown platform !!!!\n")
self.platform = platformObject
if integrator.lower() == "langevin":
self.integrator = LangevinIntegrator(self.temperature,
self.gamma, self.dt)
self.integrator_type = integrator
else:
self.integrator = integrator
self.integrator_type = "UserDefined"
self.forceDict = {}
self.forcesDict = {}
[docs] def saveFolder(self, folder):
R"""Sets the folder path to save data.
Args:
folder (str, optional):
Folder path to save the simulation data. If the folder path does not exist, the function will create the directory.
"""
if os.path.exists(folder) == False:
os.mkdir(folder)
self.folder = folder
[docs] def loadSystem(self, Grofile, Topfile, Xmlfile):
R"""Loads the input files in the OpenMM system platform. The input files are generated using SMOG2 software with the flag :code:`-OpenSMOG`. Details on how to create the files can be found in the `SMOG2 User Manual <https://smog-server.org/smog2/>`__.
A tutorial on how to generate the inputs files for default all-atom and C-alpha models can be found `here <https://opensmog.readthedocs.io>`__.
Args:
Grofile (file, required):
Initial structure for the MD simulations in *.gro* file format generated by SMOG2 software with the flag :code:`-OpenSMOG`. (Default value: :code:`None`).
Topfile (file, required):
Topology *.top* file format generated by SMOG2 software with the flag :code:`-OpenSMOG`. The topology file lists the interactions between the system atoms except for the "Native Contacts" potential that is provided to OpenSMOG in a *.xml* file. (Default value: :code:`None`).
Xmlfile (file, required):
The *.xml* file that contains the all information that defines the "Contact" potential. The *.xml* file is generated by SMOG2 software with the flag :code:`-OpenSMOG`, which support custom potential functions. (Default value: :code:`None`).
"""
def _checknames(f1,f2,f3):
fn1 = os.path.basename(f1).rsplit('.', 1)[0]
fn2 = os.path.basename(f2).rsplit('.', 1)[0]
fn3 = os.path.basename(f3).rsplit('.', 1)[0]
if fn1 == fn2 and fn1 ==fn3:
return False
else:
return True
if _checknames(Grofile, Topfile, Xmlfile):
warnings.warn('The Gro, Top and Xml files have different prefixes. Most people use the same name, so this may be a mistake.')
self.inputNames = [Grofile, Topfile, Xmlfile]
self._check_file(Grofile, '.gro')
self.loadGro(Grofile)
self._check_file(Topfile, '.top')
self.loadTop(Topfile)
self._check_file(Xmlfile, '.xml')
self.loadXml(Xmlfile)
print("Files loaded in the system.")
def _check_file(self, filename, ext):
if not (filename.lower().endswith(ext)):
raise ValueError('Wrong file extension: {} must to be {} extension'.format(filename, ext))
[docs] def loadGro(self, Grofile):
R"""Loads the *.gro* file format in the OpenMM system platform. The input files are generated using SMOG2 software with the flag :code:`-OpenSMOG`. Details on how to create the files can be found in the `SMOG2 User Manual <https://smog-server.org/smog2/>`__.
A tutorial on how to generate the inputs files for the default all-atom and C-alpha models can be found `here <https://opensmog.readthedocs.io>`__.
Args:
Grofile (file, required):
Initial structure for the MD simulations in *.gro* file format generated by SMOG2 software with the flag :code:`-OpenSMOG`. (Default value: :code:`None`).
"""
self.Gro = GromacsGroFile(Grofile)
[docs] def loadTop(self, Topfile):
R"""Loads the *.top* file format in the OpenMM system platform. The input files are generated using SMOG2 software with the flag :code:`-OpenSMOG`. Details on how to create the files can be found in the `SMOG2 User Manual <https://smog-server.org/smog2/>`__.
A tutorial on how to generate the inputs files for the default all-atom and C-alpha models can be found `here <https://opensmog.readthedocs.io>`__.
Args:
Topfile (file, required):
Topology *.top* file format generated by SMOG2 software with the flag :code:`-OpenSMOG`. The topology file defines the interactions between atoms, except for the "Native Contacts" potential that is provided to OpenSMOG in the form of a *.xml* file. (Default value: :code:`None`).
"""
self.Top = GromacsTopFile(Topfile)
self.system = self.Top.createSystem(nonbondedMethod=CutoffNonPeriodic,nonbondedCutoff=self.rcutoff)
nforces = len(self.system.getForces())
for force_id, force in enumerate(self.system.getForces()):
if nforces == 7:
if force_id <=4:
force.setForceGroup(force_id)
self.forcesDict[self.forceNamesCA[force_id]] = force
self.forceCount +=1
else:
force.setForceGroup(30)
elif nforces == 8:
if force_id <=5:
force.setForceGroup(force_id)
self.forcesDict[self.forceNamesAA[force_id]] = force
self.forceCount +=1
else:
force.setForceGroup(30)
def _splitForces(self):
n_forces = len(self.data[3])
forces = {}
for n in range(n_forces):
forces[self.data[3][n]] = [self.data[0][n], self.data[1][n], self.data[2][n]]
self.contacts = forces
def _customSmogForce(self, name, data):
#first set the equation
contacts_ff = CustomBondForce(data[0])
#second set the number of variable
for pars in data[1]:
contacts_ff.addPerBondParameter(pars)
#third, apply the bonds from each pair of atoms and the related variables.
pars = [pars for pars in data[1]]
for iteraction in data[2]:
atom_index_i = int(iteraction['i'])-1
atom_index_j = int(iteraction['j'])-1
parameters = [float(iteraction[k]) for k in pars]
contacts_ff.addBond(atom_index_i, atom_index_j, parameters)
self.forcesDict[name] = contacts_ff
contacts_ff.setForceGroup(self.forceCount)
self.forceCount +=1
[docs] def loadXml(self, Xmlfile):
R"""Loads the *.xml* file format in the OpenMM system platform. The input files are generated using SMOG2 software with the flag :code:`-OpenSMOG`. Details on how to create the files can be found in the `SMOG2 User Manual <https://smog-server.org/smog2/>`__.
A tutorial on how to generate the inputs files for default all-atom and C-alpha models can be found `here <https://opensmog.readthedocs.io>`__.
Args:
Xmlfile (file, required):
The *.xml* file that contains all information that defines the "Contact" potentials. The *.xml* file is generated by SMOG2 software with the flag :code:`-OpenSMOG`, which support custom potential energy functions. (Default value: :code:`None`).
"""
def validate(Xmlfile):
path = "share/OpenSMOG.xsd"
pt = os.path.dirname(os.path.realpath(__file__))
filepath = os.path.join(pt,path)
xmlschema_doc = etree.parse(filepath)
xmlschema = etree.XMLSchema(xmlschema_doc)
xml_doc = etree.parse(Xmlfile)
result = xmlschema.validate(xml_doc)
return result
def import_xml2OpenSMOG(file_xml):
XML_potential = ET.parse(file_xml)
Expression = []
Parameters = []
Pairs = []
Force_Names= []
root = XML_potential.getroot()
for i in range(len(root[0])):
for name in root[0][i].iter('contacts_type'):
Force_Names.append(name.attrib['name'])
for expr in root[0][i].iter('expression'):
Expression.append(expr.attrib['expr'])
internal_Param=[]
for par in root[0][i].iter('parameter'):
internal_Param.append(par.text)
Parameters.append(internal_Param)
internal_Pairs=[]
for atompairs in root[0][i].iter('interaction'):
internal_Pairs.append(atompairs.attrib)
Pairs.append(internal_Pairs)
return Expression,Parameters,Pairs,Force_Names
if not (self.forceApplied):
if not validate(Xmlfile):
raise ValueError("The xml file is not in the correct format")
self.data = import_xml2OpenSMOG(Xmlfile)
self._splitForces()
for force in self.contacts:
print("creating force {:} from xml file".format(force))
self._customSmogForce(force, self.contacts[force])
self.system.addForce(self.forcesDict[force])
self.forceApplied = True
else:
print('\n Contacts forces already applied!!! \n')
[docs] def createSimulation(self):
R"""Creates the simulation context and loads into the OpenMM platform.
"""
if not self.loaded:
self.simulation = Simulation(self.Top.topology, self.system, self.integrator, self.platform, self.properties)
self.simulation.context.setPositions(self.Gro.positions)
self.simulation.context.setVelocitiesToTemperature(self.temperature)
self.loaded = True
else:
print('\n Simulations context already created! \n')
def _checkFile(self,filename):
if os.path.isfile(filename):
i = 1
ck = True
while i <= 10 and ck:
newname = filename + "_" + str(i)
if not os.path.isfile(newname):
print("{:} already exists. Backing up to {:}".format(filename,newname))
os.rename(filename, newname)
ck = False
else:
i += 1
[docs] def createReporters(self, trajectory=True, trajectoryName=None, energies=True, energiesName=None, energy_components=False, energy_componentsName=None, interval=1000):
R"""Creates the reporters to provide the output data.
Args:
trajectory (bool, optional):
Whether to save the trajectory *.dcd* file containing the position of the atoms as a function of time. (Default value: :code:`True`).
energies (bool, optional):
Whether to save the energies in a *.txt* file containing five columns, comma-delimited. The header of the files shows the information of each collum: #"Step","Potential Energy (kJ/mole)","Kinetic Energy (kJ/mole)","Total Energy (kJ/mole)","Temperature (K)". (Default value: :code:`True`).
forces (bool, optional):
Whether to save the potential energy for each applied force in a *.txt* file containing several columns, comma-delimited. The header of the files shows the information of each column. An example of the header is: #"Step","eletrostastic","Non-Contacts","Bonds","Angles","Dihedrals","contact_1-10-12". (Default value: :code:`False`).
interval (int, required):
Frequency to write the data to the output files. (Default value: :code:`10**3`)
"""
self.outputNames = []
if trajectory:
if trajectoryName is None:
dcdfile = os.path.join(self.folder, self.name + '_trajectory.dcd')
else:
dcdfile = os.path.join(self.folder, trajectoryName + ".dcd")
self._checkFile(dcdfile)
self.outputNames.append(dcdfile)
self.simulation.reporters.append(DCDReporter(dcdfile, interval))
if energies:
if energiesName is None:
energyfile = os.path.join(self.folder, self.name+ '_energies.txt')
else:
energyfile = os.path.join(self.folder, energiesName + ".txt")
self._checkFile(energyfile)
self.outputNames.append(energyfile)
self.simulation.reporters.append(StateDataReporter(energyfile, interval, step=True,
potentialEnergy=True, kineticEnergy=True,
totalEnergy=True,temperature=True, separator=","))
if energy_components:
if energy_componentsName is None:
forcefile = os.path.join(self.folder, self.name + '_forces.txt')
else:
forcefile = os.path.join(self.folder, energy_componentsName + '.txt')
self._checkFile(forcefile)
self.outputNames.append(forcefile)
self.simulation.reporters.append(forcesReporter(forcefile, interval, self.forcesDict, step=True))
[docs] def run(self, nsteps, report=True, interval=10**4):
R"""Run the molecular dynamics simulation.
Args:
nsteps (int, required):
Number of steps to be performed in the simulation. (Default value: :code:`10**7`)
report (bool, optional):
Whether to print the simulation progress. (Default value: :code:`True`).
interval (int, required):
Frequency to print the simulation progress. (Default value: :code:`10**4`)
"""
if report:
self.simulation.reporters.append(StateDataReporter(sys.stdout, interval, step=True, remainingTime=True,
progress=True, totalSteps=nsteps, separator="\t"))
self._createLogfile()
self.simulation.step(nsteps)
def _createLogfile(self):
import platform
import datetime
logFilename = os.path.join(self.folder, 'OpenSMOG.log')
self._checkFile(logFilename)
self.outputNames.append(logFilename)
with open(logFilename, 'w') as f:
ori = sys.stdout
sys.stdout = f
self.printHeader()
sys.stdout = ori
#system_information
f.write('\nSystem Information:\n')
f.write('-------------------\n')
f.write('Date and time: {:}\n'.format(datetime.datetime.now()))
f.write('Machine information: {:} : {:}, {:} : {:}\n'.format("System", platform.uname().system, "Version", platform.uname().version))
f.write('Platform: {:}\n'.format(self.platform.getName()))
if (self.platform.getName() in ["CUDA", "OpenCL", "HIP"]):
f.write('Precision: {:}\n'.format(self.properties['Precision']))
f.write('Integrator: {:}\n'.format(self.integrator_type))
f.write('Savefolder: {:}\n'.format(self.folder))
f.write('\nSimulation Information:\n')
f.write('-----------------------\n')
f.write('Name: {:}\n'.format(self.name))
f.write('Time step: {:}\n'.format(self.dt/picoseconds))
f.write('Collision Rate: {:}\n'.format(self.gamma*picosecond))
f.write('r_Cutoff: {:}\n'.format(self.rcutoff/nanometers))
f.write('Temperature: {:}\n'.format(self.temperature * 0.008314/kelvin))
f.write('\nInput Files:\n')
f.write('------------\n')
f.write('GroFile: {:}\n'.format(self.inputNames[0]))
f.write('TopFile: {:}\n'.format(self.inputNames[1]))
f.write('XmlFile: {:}\n'.format(self.inputNames[2]))
f.write('\nOutput Files:\n')
f.write('-------------\n')
for n in self.outputNames:
f.write(n+"\n")
sys.stdout = ori