Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

extend watfinder to more water models and ions #1953

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions prody/atomic/flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,12 @@
'pyrimidine': set(['CYT', 'C', 'THY', 'T', 'URA', 'U']),

'water': set(['HOH', 'DOD', 'WAT', 'TIP3', 'H2O', 'OH2', 'TIP', 'TIP2',
'TIP4']),
'TIP4', 'SPC', 'SOL']),

'ion': set(['AL', 'BA', 'CA', 'CD', 'CL', 'CO', 'CS', 'CU', 'CU1', 'CUA',
'HG', 'IN', 'IOD', 'K', 'MG', 'MN', 'MN3', 'NA', 'PB', 'PT', 'RB',
'TB', 'TL', 'WO4', 'YB', 'ZN']),
'ion_other': set(['CAL', 'CES', 'CLA', 'POT', 'SOD', 'ZN2']),
'ion_other': set(['CAL', 'CES', 'CLA', 'POT', 'SOD', 'ZN2', 'CU2', 'CU2P']),


'lipid': set(['GPE', 'LPP', 'OLA', 'SDS', 'STE']),
Expand Down Expand Up @@ -503,15 +503,16 @@ def changeBackbone(flag, names):

water
indices `HOH`_ and `DOD`_ recognized by *PDB* and also WAT, TIP3, H2O,
OH2, TIP, TIP2, and TIP4 recognized by molecular dynamics (MD) force
OH2, TIP, TIP2, TIP4 and SPC recognized by molecular dynamics (MD) force
fields.

.. _HOH: http://www.pdb.org/pdb/ligand/ligandsummary.do?hetId=HOH

.. _DOD: http://www.pdb.org/pdb/ligand/ligandsummary.do?hetId=DOD

Previously used water types HH0, OHH, and SOL conflict with other
compounds in the *PDB*, so are removed from the definition of this flag.
compounds in the *PDB*, so are removed from the definition of this flag
except SOL (restored) as compound SOL (L-sorbose) is only used 3 times.


ion
Expand Down Expand Up @@ -552,10 +553,15 @@ def changeBackbone(flag, names):
POT potassium No CHARMM Yes
SOD sodium No CHARMM Yes
ZN2 zinc No CHARMM No
CU2P copper (ii) No CHARMM No
CU2 copper (ii) No CHARMM No
======= ================== ===== ======== ==========

Ion identifiers that are obsoleted by *PDB* (MO3, MO4, MO5, MO6, NAW,
OC7, and ZN1) are removed from this definition.
OC7, and ZN1) are removed from this definition.

CU2 comes from CU2P if parsing PDB files without long_resname
or writing them again (always trims to 3-character resnames).


lipid
Expand Down
163 changes: 128 additions & 35 deletions prody/proteins/waterbridges.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,14 @@ def auto(it=count()):
'calcBridgingResiduesHistogram', 'calcWaterBridgesDistribution',
'savePDBWaterBridges', 'savePDBWaterBridgesTrajectory',
'saveWaterBridges', 'parseWaterBridges', 'findClusterCenters',
'filterStructuresWithoutWater', 'selectSurroundingsBox']
'filterStructuresWithoutWater', 'selectSurroundingsBox',
'findCommonSelectionTraj']


class ResType(Enum):
WATER = auto()
PROTEIN = auto()
ION = auto()


class AtomNode:
Expand Down Expand Up @@ -154,6 +156,22 @@ def checkIsHBondAngle(donor, acceptor, angleRange):

return False

class CoordinationBond:
def __init__(self, donor, acceptor):
self.donor = donor
self.acceptor = acceptor

@staticmethod
def checkIsCoorBond(donor, acceptor, constraints):
if donor.type != ResType.WATER and donor.atom.getName() not in constraints.donors:
return False
if acceptor.type != ResType.WATER and acceptor.atom.getName() not in constraints.acceptors:
return False
if donor.type != ResType.ION and acceptor.type != ResType.ION:
return False

return True


def calcBridges(relations, hydrophilicList, method, maxDepth, maxNumResidues):
waterBridges = []
Expand Down Expand Up @@ -318,7 +336,7 @@ def getAtomicOutput(waterBridges, relations):
for bridge in waterBridges:
proteinAtoms, waterAtoms = [], []
for atomIndex in bridge:
if relations[atomIndex].type == ResType.PROTEIN:
if relations[atomIndex].type in [ResType.PROTEIN, ResType.ION]:
proteinAtoms.append(relations[atomIndex].atom)
else:
waterAtoms.append(relations[atomIndex].atom)
Expand Down Expand Up @@ -394,6 +412,10 @@ def calcWaterBridges(atoms, **kwargs):
:func:`.selectSurroundingsBox`, selecting a box surrounding it.
Default is **False**
:type expand_selection: bool

:arg considered_atoms_sel: selection string for which atoms to consider
Default is **"protein"**
:type considered_atoms_sel: str
"""

method = kwargs.pop('method', 'chain')
Expand All @@ -410,6 +432,7 @@ def calcWaterBridges(atoms, **kwargs):
isInfoLog = kwargs.pop('isInfoLog', True)
DIST_COVALENT_H = 1.4
prefix = kwargs.pop('prefix', '')
considered_atoms_sel = kwargs.pop('considered_atoms_sel', "protein")

if method not in ['chain', 'cluster']:
raise TypeError('Method should be chain or cluster.')
Expand All @@ -422,7 +445,7 @@ def calcWaterBridges(atoms, **kwargs):

expand_selection = kwargs.pop('expand_selection', False)
if expand_selection:
atoms = selectSurroundingsBox(atoms, selection).copy()
atoms = selectSurroundingsBox(atoms, selection, **kwargs).copy()
else:
atoms = selection.copy()

Expand All @@ -449,14 +472,18 @@ def calcWaterBridges(atoms, **kwargs):
relations[oxygen].hydrogens.append(hydrogen)

proteinHydrophilic = consideredAtoms.select(
'protein and name "{0}" and within {1} of water'.format(
'({0}) and name "{1}" and within {2} of water'.format(considered_atoms_sel,
getElementsRegex(set(donors+acceptors)), distWR))

proteinHydrogens = consideredAtoms.select('protein and hydrogen') or []
proteinHydrogens = consideredAtoms.select('({0}) and hydrogen'.format(considered_atoms_sel)) or []
proteinHydroPairs = findNeighbors(
proteinHydrophilic, DIST_COVALENT_H, proteinHydrogens) if proteinHydrogens else []
for hydrophilic in proteinHydrophilic:
relations.addNode(hydrophilic, ResType.PROTEIN)
if hydrophilic is not None:
if hydrophilic.getFlag('ion'):
relations.addNode(hydrophilic, ResType.ION)
else:
relations.addNode(hydrophilic, ResType.PROTEIN)
for pair in proteinHydroPairs:
hydrophilic, hydrogen, _ = pair
relations[hydrophilic].hydrogens.append(hydrogen)
Expand All @@ -476,12 +503,18 @@ def calcWaterBridges(atoms, **kwargs):
else:
LOGGER.info('No hydrogens detected, angle criteria will not be used.')

constraints2 = HBondConstraints(acceptors, donors) # not taking angles

for pair in contactingWaterNodes + contactingWaterProteinNodes:
for a, b in [(0, 1), (1, 0)]:
if HydrogenBond.checkIsHBond(pair[a], pair[b], constraints):
newHBond = HydrogenBond(pair[a].atom, pair[b].atom)
relations.addHBond(newHBond)

elif CoordinationBond.checkIsCoorBond(pair[a], pair[b], constraints2):
newBond = CoordinationBond(pair[a].atom, pair[b].atom)
relations.addHBond(newBond)

relations.removeUnbonded()

waterBridgesWithIndices = calcBridges(
Expand Down Expand Up @@ -549,14 +582,19 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs):
:arg return_selection: whether to return the combined common selection
Default is **False** to keep expected behaviour.
However, this output is required when using selstr.
:type return_selection: bool
:type return_selection: bool

:arg considered_atoms_sel: selection string for which atoms to consider
Default is **"protein"**
:type considered_atoms_sel: str
"""
start_frame = kwargs.pop('start_frame', 0)
stop_frame = kwargs.pop('stop_frame', -1)
max_proc = kwargs.pop('max_proc', mp.cpu_count()//2)
selstr = kwargs.pop('selstr', None)
expand_selection = kwargs.pop('expand_selection', False)
return_selection = kwargs.pop('return_selection', False)
padding = kwargs.pop('padding', 0)

if trajectory is not None:
if isinstance(trajectory, Atomic):
Expand All @@ -571,26 +609,14 @@ def calcWaterBridgesTrajectory(atoms, trajectory, **kwargs):
else:
traj = trajectory[start_frame:stop_frame+1]

indices = None
selection = atoms
atoms_copy = atoms.copy()
if selstr is not None:
LOGGER.info('Finding common selection')
indices = []
for frame0 in traj:
atoms_copy = atoms.copy()
atoms_copy.setCoords(frame0.getCoords())
selection = atoms_copy.select(selstr)

if expand_selection:
selection = selectSurroundingsBox(atoms_copy, selection)

indices.extend(list(selection.getIndices()))

indices = np.unique(indices)
selection = atoms_copy[indices]

LOGGER.info('Common selection found with {0} atoms and {1} protein chains'.format(selection.numAtoms(),
len(list(selection.protein.getHierView()))))
selection, indices = findCommonSelectionTraj(atoms, traj, selstr,
expand_selection=expand_selection,
return_selection=False,
padding=padding)
LOGGER.info('Common selection found with {0} atoms and {1} protein chains'.format(
selection.numAtoms(), len(list(selection.protein.getHierView()))))

def analyseFrame(j0, start_frame, frame0, interactions_all):
LOGGER.info('Frame: {0}'.format(j0))
Expand Down Expand Up @@ -725,19 +751,20 @@ def removeDuplicateKeys(self, criterion):
del self.values[key]


def getResInfo(atoms):
def getResInfo(atoms, **kwargs):
considered_atoms_sel = kwargs.pop('considered_atoms_sel', "protein")
dict = {}
nums = atoms.select('protein').getResnums()
names = atoms.select('protein').getResnames()
chids = atoms.select('protein').getChids()
nums = atoms.select(considered_atoms_sel).getResnums()
names = atoms.select(considered_atoms_sel).getResnames()
chids = atoms.select(considered_atoms_sel).getChids()

for i, num in enumerate(nums):
dict[num] = "{names[i]}{num}{chids[i]}"
dict[num] = "{0}{1}{2}".format(names[i], num, chids[i])

return dict


def getWaterBridgeStatInfo(stats, atoms):
def getWaterBridgeStatInfo(stats, atoms, **kwargs):
"""Converts calcWaterBridgesStatistic indices output to info output from stat.

:arg stats: statistics returned by calcWaterBridgesStatistics, output='indices'
Expand All @@ -746,7 +773,7 @@ def getWaterBridgeStatInfo(stats, atoms):
:arg atoms: Atomic object from which atoms are considered
:type atoms: :class:`.Atomic`
"""
residueInfo = getResInfo(atoms)
residueInfo = getResInfo(atoms, **kwargs)
infoOutput = {}
for key, value in stats.items():
x_id, y_id = key
Expand All @@ -771,12 +798,20 @@ def calcWaterBridgesStatistics(frames, trajectory, **kwargs):
:arg filename: name of file to save statistic information if wanted
default is None
:type filename: string

:arg considered_atoms_sel: selection string for which atoms to consider
Default is **"protein"**
:type considered_atoms_sel: str
"""
output = kwargs.pop('output', 'indices')
filename = kwargs.pop('filename', None)
if output not in ['info', 'indices']:
raise TypeError('Output should be info or indices!')

considered_atoms_sel = kwargs.pop('considered_atoms_sel', None)
if considered_atoms_sel is not None:
trajectory.select(considered_atoms_sel)

allCoordinates = trajectory.getCoordsets()
interactionCount = DictionaryList(0)
distances = DictionaryList([])
Expand Down Expand Up @@ -1429,9 +1464,14 @@ def filterStructuresWithoutWater(structures, min_water=0, filenames=None):
return list(reversed(new_structures))


def selectSurroundingsBox(atoms, select, padding=0, return_selstr=False):
def selectSurroundingsBox(atoms, select, **kwargs):
"""Select the surroundings of *select* within *atoms* using
a bounding box with optional *padding*."""
a bounding box with optional *padding*.

:arg return_selstr: whether to return the final selstr
Default False
:type return_selstr: bool
"""

if not isinstance(atoms, Atomic):
raise TypeError('atoms should be an Atomic object')
Expand All @@ -1442,11 +1482,16 @@ def selectSurroundingsBox(atoms, select, padding=0, return_selstr=False):
if not isinstance(select, Atomic):
raise TypeError('select should be a valid selection or selection string')

padding = kwargs.get('padding', 0)
if not isinstance(padding, Number):
raise TypeError('padding should be a number')
if padding < 0:
raise ValueError('padding should be a positive number')

return_selstr = kwargs.get('return_selstr', False)
if not isinstance(return_selstr, bool):
raise TypeError('return_selstr should be a bool')

minCoords = select.getCoords().min(axis=0)
maxCoords = select.getCoords().max(axis=0)

Expand All @@ -1460,3 +1505,51 @@ def selectSurroundingsBox(atoms, select, padding=0, return_selstr=False):
if return_selstr:
return selstr
return atoms.select(selstr)


def findCommonSelectionTraj(atoms, traj, selstr, **kwargs):
"""Select *selstr* within *atoms* for each frame in *traj*
using a bounding box with optional *padding*.

:arg expand_selection: whether to expand selections with
:meth:`.selectSurroundingsBox`. Default False
:type expand_selection: bool

Returns the common selection and corresponding indices and
optionally the corresponding selstr if *return_selstr* is **True**
"""

if not isinstance(atoms, Atomic):
raise TypeError('atoms should be an Atomic object')

if not isinstance(selstr, str):
raise TypeError('selstr should be a string')

expand_selection = kwargs.get('expand_selection', False)
if not isinstance(expand_selection, bool):
raise TypeError('expand_selection should be a bool')

return_selstr = kwargs.pop('return_selstr', False) # not passed on
if not isinstance(return_selstr, bool):
raise TypeError('return_selstr should be a bool')

indices = None
if selstr is not None:
LOGGER.info('Finding common selection')
indices = []
for frame0 in traj:
atoms_copy = atoms.copy()
atoms_copy.setCoords(frame0.getCoords())
selection = atoms_copy.select(selstr)

if expand_selection:
selection = selectSurroundingsBox(atoms_copy, selection, **kwargs)

indices.extend(list(selection.getIndices()))

indices = np.unique(indices)
selection = atoms_copy[indices]

if return_selstr:
return selection, indices, selstr
return selection, indices
Loading