diff --git a/.gitignore b/.gitignore index 0760f8129e4..e9f3571ca8e 100644 --- a/.gitignore +++ b/.gitignore @@ -21,7 +21,8 @@ examples/output.txt # ignore Vagrant virtual machines .vagrant # ignore coverage files -.coverage +.coverage* +!.coveragerc .noseids htmlcov # ignore trajectory offset caches diff --git a/package/CHANGELOG b/package/CHANGELOG index 7efe1751e54..802a17f0f33 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -19,6 +19,8 @@ The rules for this file: * 2.4.0 Fixes + * Update hbond analysis doc string to use exclusive bound language + (Issue #3847) * XTC and TRR readers now fail with IOError when a status except EOK (=0) is detected on reading a frame instead of silently continuing * Consolidate license files across MDAnalysis library packages (PR #3939) @@ -39,6 +41,10 @@ Fixes (e.g. bonds, angles) (PR #3779). Enhancements + * Added ability for hbond analysis to use types when resnames are not + present (Issue #3847) + * Added explanatory warnings when hbond analysis doesn't find any hbonds + (Issue #3847) * Improve C content of libxdr Cython, add `read_direct` methods to read coordinates, velocities and forces directly into memoryviews of `Timestep` attributes, make `TRR` timestep have positions, velocities and forces on @@ -48,8 +54,8 @@ Enhancements NumPy array was added to ProtoReader (PR #3890) * MDAnalysis now officially supports py3.11 (Issue #3878) * LAMMPSDump Reader optionally unwraps trajectories with image flags upon - loading (#3843 - * LAMMPSDump Reader now imports velocities and forces (#3843) + loading (Issue #3843) + * LAMMPSDump Reader now imports velocities and forces (Issue #3843) * Minimum NumPy version for py3.11 is now set to 1.23.2. * Added decorator to check for an empty atom group, applied in topological attributes(Issue #3837) diff --git a/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py index 2d2dcf07211..21b08a3d457 100644 --- a/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py @@ -174,6 +174,16 @@ protein-water and protein-protein hydrogen bonds will be found, but no water-water hydrogen bonds. +One can also define hydrogen bonds with atom types:: + + from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import HydrogenBondAnalysis as HBA + hbonds = HBA( + universe=u, + donors_sel='type 2', + hydrogens_sel='type 1', + acceptors_sel='type 2', + ) + In order to compute the hydrogen bond lifetime, after finding hydrogen bonds one can use the :attr:`lifetime` function:: @@ -236,6 +246,7 @@ from MDAnalysis.lib.correlations import autocorrelation, correct_intermittency from MDAnalysis.exceptions import NoDataError from MDAnalysis.core.groups import AtomGroup +from MDAnalysis.analysis.hydrogenbonds.hbond_autocorrel import find_hydrogen_donors from ...due import due, Doi @@ -260,6 +271,10 @@ def __init__(self, universe, """Set up atom selections and geometric criteria for finding hydrogen bonds in a Universe. + Hydrogen bond selections with `donors_sel` , `hydrogens_sel`, and + `acceptors_sel` may be achieved with either a *resname*, atom *name* + combination, or when those are absent, with atom *type* selections. + Parameters ---------- universe : Universe @@ -310,8 +325,13 @@ def __init__(self, universe, information is used, as this is the only way that guarantees the correct identification of donor-hydrogen pairs. + .. versionadded:: 2.0.0 Added `between` keyword + .. versionchanged:: 2.4.0 + Added use of atom types in selection strings for hydrogen atoms, + bond donors, or bond acceptors + """ self.u = universe @@ -370,29 +390,45 @@ def guess_hydrogens(self, Parameters ---------- select: str (optional) - Selection string for atom group from which hydrogens will be identified. + :ref:`Selection string ` for atom group + from which hydrogens will be identified. (e.g., ``(resname X and + name H1)`` or ``type 2``) max_mass: float (optional) - Maximum allowed mass of a hydrogen atom. + The mass of a hydrogen atom must be less than this value. + min_mass: float (optional) + The mass of a hydrogen atom must be greater than this value. min_charge: float (optional) - Minimum allowed charge of a hydrogen atom. + The charge of a hydrogen atom must be greater than this value. Returns ------- potential_hydrogens: str - String containing the :attr:`resname` and :attr:`name` of all hydrogen atoms potentially capable of forming - hydrogen bonds. + String containing the :attr:`resname` and :attr:`name` of all + hydrogen atoms potentially capable of forming hydrogen bonds. Notes ----- - This function makes use of atomic masses and atomic charges to identify which atoms are hydrogen atoms that are - capable of participating in hydrogen bonding. If an atom has a mass less than :attr:`max_mass` and an atomic - charge greater than :attr:`min_charge` then it is considered capable of participating in hydrogen bonds. + Hydrogen selections may be achieved with either a resname, atom + name combination, or when those are absent, atom types. - If :attr:`hydrogens_sel` is `None`, this function is called to guess the selection. + This function makes use of atomic masses and atomic charges to identify + which atoms are hydrogen atoms that are capable of participating in + hydrogen bonding. If an atom has a mass less than :attr:`max_mass` and + an atomic charge greater than :attr:`min_charge` then it is considered + capable of participating in hydrogen bonds. + + If :attr:`hydrogens_sel` is `None`, this function is called to guess + the selection. - Alternatively, this function may be used to quickly generate a :class:`str` of potential hydrogen atoms involved - in hydrogen bonding. This str may then be modified before being used to set the attribute + Alternatively, this function may be used to quickly generate a + :class:`str` of potential hydrogen atoms involved in hydrogen bonding. + This str may then be modified before being used to set the attribute :attr:`hydrogens_sel`. + + + .. versionchanged:: 2.4.0 + Added ability to use atom types + """ if min_mass > max_mass: @@ -407,109 +443,168 @@ def guess_hydrogens(self, )) ] - hydrogens_list = np.unique( - [ - '(resname {} and name {})'.format(r, p) for r, p in zip(hydrogens_ag.resnames, hydrogens_ag.names) - ] - ) - - return " or ".join(hydrogens_list) + return self._group_categories(hydrogens_ag) def guess_donors(self, select='all', max_charge=-0.5): - """Guesses which atoms could be considered donors in the analysis. Only use if the universe topology does not - contain bonding information, otherwise donor-hydrogen pairs may be incorrectly assigned. + """Guesses which atoms could be considered donors in the analysis. Only + use if the universe topology does not contain bonding information, + otherwise donor-hydrogen pairs may be incorrectly assigned. Parameters ---------- select: str (optional) - Selection string for atom group from which donors will be identified. + :ref:`Selection string ` for atom group + from which donors will be identified. (e.g., ``(resname X and name + O1)`` or ``type 2``) max_charge: float (optional) - Maximum allowed charge of a donor atom. + The charge of a donor atom must be less than this value. Returns ------- potential_donors: str - String containing the :attr:`resname` and :attr:`name` of all atoms that potentially capable of forming - hydrogen bonds. + String containing the :attr:`resname` and :attr:`name` of all atoms + that are potentially capable of forming hydrogen bonds. Notes ----- - This function makes use of and atomic charges to identify which atoms could be considered donor atoms in the - hydrogen bond analysis. If an atom has an atomic charge less than :attr:`max_charge`, and it is within - :attr:`d_h_cutoff` of a hydrogen atom, then it is considered capable of participating in hydrogen bonds. + Donor selections may be achieved with either a resname, atom + name combination, or when those are absent, atom types. + + This function makes use of and atomic charges to identify which atoms + could be considered donor atoms in the hydrogen bond analysis. If an + atom has an atomic charge less than :attr:`max_charge`, and it is + within :attr:`d_h_cutoff` of a hydrogen atom, then it is considered + capable of participating in hydrogen bonds. - If :attr:`donors_sel` is `None`, and the universe topology does not have bonding information, this function is - called to guess the selection. + If :attr:`donors_sel` is `None`, and the universe topology does not + have bonding information, this function is called to guess the + selection. - Alternatively, this function may be used to quickly generate a :class:`str` of potential donor atoms involved - in hydrogen bonding. This :class:`str` may then be modified before being used to set the attribute - :attr:`donors_sel`. + Alternatively, this function may be used to quickly generate a + :class:`str` of potential donor atoms involved in hydrogen bonding. + This :class:`str` may then be modified before being used to set the + attribute :attr:`donors_sel`. + + + .. versionchanged:: 2.4.0 + Added ability to use atom types """ # We need to know `hydrogens_sel` before we can find donors - # Use a new variable `hydrogens_sel` so that we do not set `self.hydrogens_sel` if it is currently `None` + # Use a new variable `hydrogens_sel` so that we do not set + # `self.hydrogens_sel` if it is currently `None` if self.hydrogens_sel is None: hydrogens_sel = self.guess_hydrogens() else: hydrogens_sel = self.hydrogens_sel hydrogens_ag = self.u.select_atoms(hydrogens_sel) - ag = hydrogens_ag.residues.atoms.select_atoms( - "({donors_sel}) and around {d_h_cutoff} {hydrogens_sel}".format( - donors_sel=select, - d_h_cutoff=self.d_h_cutoff, - hydrogens_sel=hydrogens_sel + # We're using u._topology.bonds rather than u.bonds as it is a million + # times faster to access. This is because u.bonds also calculates + # properties of each bond (e.g bond length). See: + # https://github.com/MDAnalysis/mdanalysis/issues/2396#issuecomment-596251787 + if (hasattr(self.u._topology, 'bonds') + and len(self.u._topology.bonds.values) != 0): + donors_ag = find_hydrogen_donors(hydrogens_ag) + donors_ag = donors_ag.intersection(self.u.select_atoms(select)) + else: + donors_ag = hydrogens_ag.residues.atoms.select_atoms( + "({donors_sel}) and around {d_h_cutoff} {hydrogens_sel}".format( + donors_sel=select, + d_h_cutoff=self.d_h_cutoff, + hydrogens_sel=hydrogens_sel + ) ) - ) - donors_ag = ag[ag.charges < max_charge] - donors_list = np.unique( - [ - '(resname {} and name {})'.format(r, p) for r, p in zip(donors_ag.resnames, donors_ag.names) - ] - ) - return " or ".join(donors_list) + donors_ag = donors_ag[donors_ag.charges < max_charge] + + return self._group_categories(donors_ag) def guess_acceptors(self, select='all', max_charge=-0.5): """Guesses which atoms could be considered acceptors in the analysis. + Acceptor selections may be achieved with either a resname, atom + name combination, or when those are absent, atom types. + Parameters ---------- select: str (optional) - Selection string for atom group from which acceptors will be identified. + :ref:`Selection string ` for atom group + from which acceptors will be identified. (e.g., ``(resname X and + name O1)`` or ``type 2``) max_charge: float (optional) - Maximum allowed charge of an acceptor atom. + The charge of an acceptor atom must be less than this value. Returns ------- potential_acceptors: str - String containing the :attr:`resname` and :attr:`name` of all atoms that potentially capable of forming - hydrogen bonds. + String containing the :attr:`resname` and :attr:`name` of all atoms + that potentially capable of forming hydrogen bonds. Notes ----- - This function makes use of and atomic charges to identify which atoms could be considered acceptor atoms in the - hydrogen bond analysis. If an atom has an atomic charge less than :attr:`max_charge` then it is considered - capable of participating in hydrogen bonds. + Acceptor selections may be achieved with either a resname, atom + name combination, or when those are absent, atom types. - If :attr:`acceptors_sel` is `None`, this function is called to guess the selection. + This function makes use of and atomic charges to identify which atoms + could be considered acceptor atoms in the hydrogen bond analysis. If + an atom has an atomic charge less than :attr:`max_charge` then it is + considered capable of participating in hydrogen bonds. - Alternatively, this function may be used to quickly generate a :class:`str` of potential acceptor atoms involved - in hydrogen bonding. This :class:`str` may then be modified before being used to set the attribute - :attr:`acceptors_sel`. + If :attr:`acceptors_sel` is `None`, this function is called to guess + the selection. + + Alternatively, this function may be used to quickly generate a + :class:`str` of potential acceptor atoms involved in hydrogen bonding. + This :class:`str` may then be modified before being used to set the + attribute :attr:`acceptors_sel`. + + + .. versionchanged:: 2.4.0 + Added ability to use atom types """ ag = self.u.select_atoms(select) acceptors_ag = ag[ag.charges < max_charge] - acceptors_list = np.unique( - [ - '(resname {} and name {})'.format(r, p) for r, p in zip(acceptors_ag.resnames, acceptors_ag.names) - ] - ) - return " or ".join(acceptors_list) + return self._group_categories(acceptors_ag) + + @staticmethod + def _group_categories(group): + """ Find categories according to universe constraints + + Parameters + ---------- + group : AtomGroup + AtomGroups corresponding to either hydrogen bond acceptors, + donors, or hydrogen atoms that meet their respective charge + and mass constraints. + + Returns + ------- + select : str + String for each hydrogen bond acceptor/donor/hydrogen atom category. + + + .. versionadded:: 2.4.0 + + """ + + if hasattr(group, "resnames") and hasattr(group, "names"): + group_list = np.unique([ + '(resname {} and name {})'.format(r, + p) for r, p in zip(group.resnames, group.names) + ]) + else: + group_list = np.unique( + [ + 'type {}'.format(tp) for tp in group.types + ] + ) + + return " or ".join(group_list) def _get_dh_pairs(self): """Finds donor-hydrogen pairs. @@ -517,7 +612,8 @@ def _get_dh_pairs(self): Returns ------- donors, hydrogens: AtomGroup, AtomGroup - AtomGroups corresponding to all donors and all hydrogens. AtomGroups are ordered such that, if zipped, will + AtomGroups corresponding to all donors and all hydrogens. + AtomGroups are ordered such that, if zipped, will produce a list of donor-hydrogen pairs. """ @@ -622,6 +718,13 @@ def _single_frame(self): return_distances=True, ) + if np.size(d_a_indices) == 0: + warnings.warn( + "No hydrogen bonds were found given d-a cutoff of " + f"{self.d_a_cutoff} between Donor, {self.donors_sel}, and " + f"Acceptor, {self.acceptors_sel}." + ) + # Remove D-A pairs more than d_a_cutoff away from one another tmp_donors = self._donors[d_a_indices.T[0]] tmp_hydrogens = self._hydrogens[d_a_indices.T[0]] @@ -644,6 +747,13 @@ def _single_frame(self): ) hbond_indices = np.where(d_h_a_angles > self.d_h_a_angle)[0] + if np.size(hbond_indices) == 0: + warnings.warn( + "No hydrogen bonds were found given angle of " + f"{self.d_h_a_angle} between Donor, {self.donors_sel}, and " + f"Acceptor, {self.acceptors_sel}." + ) + # Retrieve atoms, distances and angles of hydrogen bonds hbond_donors = tmp_donors[hbond_indices] hbond_hydrogens = tmp_hydrogens[hbond_indices] @@ -788,20 +898,27 @@ def count_by_type(self): Returns ------- counts : numpy.ndarray - Each row of the array contains the donor resname, donor atom type, acceptor resname, acceptor atom type and - the total number of times the hydrogen bond was found. + Each row of the array contains the donor resname, donor atom type, + acceptor resname, acceptor atom type and the total number of times + the hydrogen bond was found. Note ---- - Unique hydrogen bonds are determined through a consideration of the resname and atom type of the donor and - acceptor atoms in a hydrogen bond. + Unique hydrogen bonds are determined through a consideration of the + resname and atom type of the donor and acceptor atoms in a hydrogen bond. """ d = self.u.atoms[self.results.hbonds[:, 1].astype(np.intp)] a = self.u.atoms[self.results.hbonds[:, 3].astype(np.intp)] - tmp_hbonds = np.array([d.resnames, d.types, a.resnames, a.types], - dtype=str).T + if hasattr(d, "resnames"): + d_res = d.resnames + a_res = a.resnames + else: + d_res = len(d.types) * ["None"] + a_res = len(a.types) * ["None"] + + tmp_hbonds = np.array([d_res, d.types, a_res, a.types], dtype=str).T hbond_type, type_counts = np.unique( tmp_hbonds, axis=0, return_counts=True) hbond_type_list = [] diff --git a/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py b/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py index 2fd191073c3..c7295631862 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py +++ b/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py @@ -23,6 +23,7 @@ import numpy as np import logging import pytest +import copy from numpy.testing import (assert_allclose, assert_equal, assert_array_almost_equal, assert_array_equal, @@ -188,6 +189,13 @@ def hydrogen_bonds(universe): h.run() return h + def test_count_by_type(self, hydrogen_bonds): + + # Only one type of hydrogen bond in this system + ref_count = 2 + + counts = hydrogen_bonds.count_by_type() + assert int(counts[0, 2]) == ref_count def test_no_bond_info_exception(self, universe): @@ -204,6 +212,23 @@ def test_no_bond_info_exception(self, universe): h = HydrogenBondAnalysis(universe, **kwargs) h._get_dh_pairs() + def test_no_bond_donor_sel(self, universe): + + kwargs = { + 'donors_sel': "type O", + 'hydrogens_sel': None, + 'acceptors_sel': None, + 'd_h_cutoff': 1.2, + 'd_a_cutoff': 3.0, + 'd_h_a_angle_cutoff': 120.0 + } + u = universe.copy() + n_residues = 2 + u.add_TopologyAttr('mass', [15.999, 1.008, 1.008] * n_residues) + u.add_TopologyAttr('charge', [-1.04, 0.52, 0.52] * n_residues) + h = HydrogenBondAnalysis(u, **kwargs) + donors = u.select_atoms(h.guess_donors()) + def test_first_hbond(self, hydrogen_bonds): assert len(hydrogen_bonds.results.hbonds) == 2 frame_no, donor_index, hydrogen_index, acceptor_index, da_dst, angle =\ @@ -246,11 +271,97 @@ def test_logging_step_not_1(self, universe, caplog): caplog.set_level(logging.WARNING) hbonds.lifetime(tau_max=2, intermittency=1) - warning = \ - "Autocorrelation: Hydrogen bonds were computed with step > 1." + warning = ("Autocorrelation: Hydrogen bonds were computed with " + "step > 1.") assert any(warning in rec.getMessage() for rec in caplog.records) +class TestHydrogenBondAnalysisNoRes(TestHydrogenBondAnalysisIdeal): + + kwargs = { + 'donors_sel': 'type O', +# 'hydrogens_sel': 'type H H', + 'acceptors_sel': 'type O', + 'd_h_cutoff': 1.2, + 'd_a_cutoff': 3.0, + 'd_h_a_angle_cutoff': 120.0 + } + + @staticmethod + @pytest.fixture(scope='class', autouse=True) + def universe(): + # create two water molecules + """ + H4 + \ + O1-H2 .... O2-H3 + / + H1 + """ + n_residues = 2 + u = MDAnalysis.Universe.empty( + n_atoms=n_residues*3, + n_residues=n_residues, + atom_resindex=np.repeat(range(n_residues), 3), + residue_segindex=[0] * n_residues, + trajectory=True, # necessary for adding coordinates + ) + + u.add_TopologyAttr('type', ['O', 'H', 'H'] * n_residues) + u.add_TopologyAttr('id', list(range(1, (n_residues * 3) + 1))) + u.add_TopologyAttr('mass', [15.999, 1.008, 1.008] * n_residues) + u.add_TopologyAttr('charge', [-1.04, 0.52, 0.52] * n_residues) + + # Atomic coordinates with a single hydrogen bond between O1-H2---O2 + pos1 = np.array([[0, 0, 0], # O1 + [-0.249, -0.968, 0], # H1 + [1, 0, 0], # H2 + [2.5, 0, 0], # O2 + [3., 0, 0], # H3 + [2.250, 0.968, 0] # H4 + ]) + + # Atomic coordinates with no hydrogen bonds + pos2 = np.array([[0, 0, 0], # O1 + [-0.249, -0.968, 0], # H1 + [1, 0, 0], # H2 + [4.5, 0, 0], # O2 + [5., 0, 0], # H3 + [4.250, 0.968, 0] # H4 + ]) + + coordinates = np.empty((3, # number of frames + u.atoms.n_atoms, + 3)) + coordinates[0] = pos1 + coordinates[1] = pos2 + coordinates[2] = pos1 + u.load_new(coordinates, order='fac') + + return u + + @staticmethod + @pytest.fixture(scope='class') + def hydrogen_bonds(universe): + h = HydrogenBondAnalysis( + universe, + **TestHydrogenBondAnalysisNoRes.kwargs + ) + h.run() + return h + + def test_no_hydrogen_bonds(self, universe): + tmp_kwargs = copy.deepcopy(self.kwargs) + tmp_kwargs["d_h_a_angle_cutoff"] = 50 + hbonds = HydrogenBondAnalysis(universe, **tmp_kwargs) + + with pytest.warns(UserWarning, + match=("No hydrogen bonds were found given angle " + "of 50 between Donor, type O, and Acceptor," + " type O.")): + hbonds.run(step=1) + + class TestHydrogenBondAnalysisBetween(object): kwargs = {