diff --git a/docs/releasehistory.rst b/docs/releasehistory.rst index 04cafbdb..1733c993 100644 --- a/docs/releasehistory.rst +++ b/docs/releasehistory.rst @@ -8,6 +8,22 @@ Releases follow the ``major.minor.micro`` scheme recommended by `PEP440 `_: New options for log handlers. + +Bugfixes +"""""""" +- `PR #167 `_: Bug fixes for rotamer libraries and affected tests +- `PR #168 `_: Compatibility changes for the ffld server shipped with latest Schrodinger version +- `PR #169 `_: Support new RDKit versions. + + 1.4.1 - Bug fixes for heteromolecules extraction --------------------------------------------------------- diff --git a/examples/alchemistry/ethylene_to_chlorofom.ipynb b/examples/alchemistry/ethylene_to_chlorofom.ipynb index 2c8e967c..71ed1ccb 100644 --- a/examples/alchemistry/ethylene_to_chlorofom.ipynb +++ b/examples/alchemistry/ethylene_to_chlorofom.ipynb @@ -467,7 +467,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -481,9 +481,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/peleffy/data/tests/alchemical_0.rot.assign b/peleffy/data/tests/alchemical_0.rot.assign index 194d5c6f..60646aea 100644 --- a/peleffy/data/tests/alchemical_0.rot.assign +++ b/peleffy/data/tests/alchemical_0.rot.assign @@ -1,4 +1,4 @@ rot assign res HYB & - sidelib FREE30 _N1_ _C4_ & + sidelib FREE30 _C4_ _N1_ & newgrp & - sidelib FREE30 _C5_ _C4_ & + sidelib FREE30 _C4_ _C5_ & diff --git a/peleffy/data/tests/alchemical_1.rot.assign b/peleffy/data/tests/alchemical_1.rot.assign index cefeefa6..bb4acaf7 100644 --- a/peleffy/data/tests/alchemical_1.rot.assign +++ b/peleffy/data/tests/alchemical_1.rot.assign @@ -1,5 +1,5 @@ rot assign res HYB & - sidelib FREE30 _C4_ _N1_ & + sidelib FREE30 _N1_ _C4_ & sidelib FREE30 _C4_ _C5_ & newgrp & - sidelib FREE30 _C6_ _N1_ & + sidelib FREE30 _N1_ _C6_ & diff --git a/peleffy/data/tests/alchemical_2.rot.assign b/peleffy/data/tests/alchemical_2.rot.assign index 94f73e54..f3ac4c6c 100644 --- a/peleffy/data/tests/alchemical_2.rot.assign +++ b/peleffy/data/tests/alchemical_2.rot.assign @@ -1,2 +1,2 @@ rot assign res HYB & - sidelib FREE30 _N1_ _C6_ & + sidelib FREE30 _C6_ _N1_ & diff --git a/peleffy/data/tests/alchemical_structure.pdb b/peleffy/data/tests/alchemical_structure.pdb index eabc19a2..acb1c13a 100644 --- a/peleffy/data/tests/alchemical_structure.pdb +++ b/peleffy/data/tests/alchemical_structure.pdb @@ -34,10 +34,10 @@ CONECT 3 12 13 14 CONECT 4 15 16 17 CONECT 5 6 18 19 CONECT 6 7 7 8 -CONECT 20 21 25 +CONECT 20 21 21 25 CONECT 21 22 26 -CONECT 22 23 27 +CONECT 22 23 23 27 CONECT 23 24 28 -CONECT 24 25 29 +CONECT 24 25 25 29 CONECT 25 30 END diff --git a/peleffy/forcefield/parameters.py b/peleffy/forcefield/parameters.py index 9026b850..1d92df7d 100644 --- a/peleffy/forcefield/parameters.py +++ b/peleffy/forcefield/parameters.py @@ -1200,6 +1200,12 @@ def from_ffld_output(molecule, ffld_output): params['charges'] = unit.Quantity(charges, unit.elementary_charge) + # Check params is not empty (it would mean that ffld_server failed) + if len(params['atom_types']) == 0: + raise ValueError(f"ffld server did not produce a valid output. " + + f"Instead, the following output was obtained: " + + f"{ffld_output}") + opls_parameters_wrapper = OPLS2005ParameterWrapper(params) OPLS2005ParameterWrapper._add_SGBNP_solvent_parameters( opls_parameters_wrapper) diff --git a/peleffy/template/impact.py b/peleffy/template/impact.py index 2eba70cd..f960aec6 100644 --- a/peleffy/template/impact.py +++ b/peleffy/template/impact.py @@ -532,6 +532,402 @@ def topology(self): """ return self._topology + @classmethod + def from_file(cls, impact_file): + """ + It loads an Impact template object from a file. + + Parameters + ---------- + impact_file : str + The path to the impact file + + Returns + ------- + molecule : a peleffy.topology.Molecule object + The resulting Molecule object + """ + molecule_tag, n_atoms, n_bonds, n_angles, n_propers_impropers, _ = \ + cls._retrive_template_information(impact_file) + + with open(impact_file) as file: + impact_template_content = file.read() + + resx_section = cls._get_section(impact_template_content, + molecule_tag, n_atoms) + nbon_section = cls._get_section(impact_template_content, + 'NBON', n_atoms) + bond_section = cls._get_section(impact_template_content, + 'BOND', n_bonds) + thet_section = cls._get_section(impact_template_content, + 'THET', n_angles) + phi_section, iphi_section = cls._get_phi_iphi_sections( + impact_template_content, n_propers_impropers) + + molecule = cls._build_molecule(resx_section, bond_section) + + return molecule + + @staticmethod + def _retrive_template_information(impact_file): + """ + Given an Impact template, it retrieves the basic template + information. + + Parameters + ---------- + impact_file : str + The path to the impact file + + Returns + ------- + molecule_tag : str + The tag of the molecule + n_atoms : int + The number of atoms + n_bonds : int + The number of bonds + n_angles : int + The number of angles + n_propers_impropers : int + The number of propers and impropers + n_interactions : int + The number of interactions + """ + with open(impact_file) as file: + for line in file: + line = line.strip() + + # Ignore lines starting by '*' + if line.startswith('*'): + continue + + # Parse template information + if len(line) < 39: + raise ValueError("Template information line in " + + "'resx' section has an invalid " + + "format") + + try: + molecule_tag = line[:3] + n_atoms = int(line[5:11]) + n_bonds = int(line[11:17]) + n_angles = int(line[17:23]) + n_propers_impropers = int(line[23:31]) + n_interactions = int(line[31:39]) + except TypeError: + raise ValueError("Template information line in " + + "'resx' section has an invalid " + + "format") + + break + + else: + raise ValueError("Template information line in " + + "'resx' section has an invalid " + + "format") + + return molecule_tag, n_atoms, n_bonds, n_angles, \ + n_propers_impropers, n_interactions + + @staticmethod + def _get_section(impact_template_content, section_name, n_lines): + """ + Given the content of an impact template, it retrieves a + specific section. + + Parameters + ---------- + impact_template_content : str + The content of the impact template + section_name : str + The name of the section + n_lines : int + The number of lines to retrieve from this section + + Returns + ------- + section : list[str] + List of lines that belong to the chosen section + """ + impact_template_lines = impact_template_content.split('\n') + + for line_num, line in enumerate(impact_template_lines, + start=1): + if line.startswith(section_name): + break + else: + raise ValueError(f"Supplied template has an invalid format. " + + f"'{section_name}' section could not be " + + f"found") + + section = impact_template_lines[line_num: line_num + n_lines] + + return section + + @staticmethod + def _get_phi_iphi_sections(impact_template_content, n_lines): + """ + Given the content of an impact template, it retrieves the phi + and iphi sections. + + Parameters + ---------- + impact_template_content : str + The content of the impact template + n_lines : int + The number of lines to retrieve from phi and iphi sections + + Returns + ------- + phi_section : str + List of lines that belong to the phi section + iphi_section : str + List of lines that belong to the iphi section + """ + impact_template_lines = impact_template_content.split('\n') + + phi_section = list() + iphi_section = list() + + inside_phi = False + inside_iphi = False + + for line_num, line in enumerate(impact_template_lines, + start=1): + if line.startswith('PHI'): + inside_iphi = False + inside_phi = True + continue + + if line.startswith('IPHI'): + inside_phi = False + inside_iphi = True + continue + + if line.startswith('END'): + break + + if inside_phi: + phi_section.append(line) + + if inside_iphi: + iphi_section.append(line) + + if len(phi_section) + len(iphi_section) == n_lines: + break + + else: + raise ValueError(f"Supplied template has an invalid format. " + + f"'PHI' and 'IPHI' sections could not be " + + f"found") + + if len(phi_section) + len(iphi_section) != n_lines: + raise ValueError(f"Supplied template has an invalid format. " + + f"'PHI' and 'IPHI' sections have a wrong " + + f"number of parameters") + + return phi_section, iphi_section + + @staticmethod + def _get_atomic_number_from_pdb_name(pdb_name): + """ + Given a PDB atom name, it inferres the atomic element and + returns its atomic number. + + Parameters + ---------- + pdb_name : str + The PDB atom name + + Returns + ------- + atomic_number : int + The atomic number corresponding to the inferred atomic element + """ + elements = ['H', 'He', 'Li', 'Be', 'B', 'C', 'N', + 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', + 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', + 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', + 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', + 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', + 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', + 'Sn', 'Sb', 'Te', 'I', 'Xe'] # without periods 6 and 7, and lanthanides and actinides + + non_digit_pdb_name = ''.join([c for c in pdb_name + if not c.isdigit() and + c != '_']) + non_digit_pdb_name = non_digit_pdb_name[0].upper() + \ + non_digit_pdb_name[1:].lower() + + if non_digit_pdb_name in elements: + atomic_number = elements.index(non_digit_pdb_name) + 1 + return atomic_number + + one_char_less_pdb_name = non_digit_pdb_name + + while len(one_char_less_pdb_name) > 1: + one_char_less_pdb_name = one_char_less_pdb_name[:-1] + + if one_char_less_pdb_name in elements: + atomic_number = elements.index(one_char_less_pdb_name) + 1 + return atomic_number + + raise ValueError(f"Atomic number cannot be inferred " + f"from {pdb_name}") + + @staticmethod + def _from_zmatrix_to_cartesians(zmatrix): + """ + Given a zmatrix it finds the corresponding cartesian coordinates. + + Inspired by https://github.com/jevandezande/zmatrix + """ + import numpy as np + from peleffy.utils import rotation_matrix + + n_atoms = len(zmatrix) + coords = np.zeros((n_atoms, 3)) + + for atom_id, zmatrix_row in enumerate(zmatrix): + if atom_id == 0: + q_coords = np.array((1.0, 0.0, 0.0)) + r_coords = np.array((0.0, 0.0, 1.0)) + s_coords = np.array((0.0, 0.0, 0.0)) + + elif atom_id == 1: + q_coords = coords[atom_id - 1] + r_coords = np.array((1.0, 0.0, 0.0)) + s_coords = np.array((0.0, 0.0, 1.0)) + + elif atom_id == 2: + q_coords = coords[atom_id - 1] + r_coords = coords[atom_id - 2] + s_coords = np.array((1.0, 0.0, 0.0)) + + else: + q_coords = coords[atom_id - 1] + r_coords = coords[atom_id - 2] + s_coords = coords[atom_id - 3] + + # Take data from zmatrix row + distance, angle, dihedral = zmatrix_row + + # Vector pointing from q to r + a_vector = r_coords - q_coords + # Vector pointing from s to r + b_vector = r_coords - s_coords + + # Vector of length distance pointing from q to r + d_vector = distance * a_vector / np.sqrt(np.dot(a_vector, a_vector)) + + # Vector normal to plane defined by q, r, s + n_vector = np.cross(a_vector, b_vector) + + # Rotate d vector by the angle around the normal to the + # plane defined by q, r, s + d_vector = np.dot(rotation_matrix(n_vector, angle), d_vector) + + # Rotate d around a by the dihedral + d_vector = np.dot(rotation_matrix(a_vector, dihedral), d_vector) + + # Add d to the position of q to get the new coordinates of the atom + p_coords = q_coords + d_vector + + # Append d coordinates to cartesian coordinates list + coords[atom_id] = p_coords + + return coords + + @staticmethod + def _build_molecule(resx_section, bond_section): + """ + Given an Impact template, it generates the corresponding + molecule object. + + Parameters + ---------- + resx_section : list[str] + The 'resx' section + bond_section : list[str] + The 'bond' section + + Returns + ------- + molecule : a peleffy.topology.Molecule object + The resulting Molecule object + """ + # TODO transfer to RDKit toolkit + from rdkit.Chem import AllChem as Chem + + import numpy as np + + molecule = Chem.Mol() + editable_molecule = Chem.EditableMol(molecule) + + n_atoms = len(resx_section) + zmatrix = np.zeros((n_atoms, 3)) + + for atom_id, atom_data in enumerate(resx_section): + try: + _ = int(atom_data[0:5]) # index + _ = int(atom_data[6:11]) # parent index + _ = atom_data[12] # core + _ = atom_data[15:19] # opls_type + pdb_name = atom_data[22:26] + _ = int(atom_data[27:32]) # unknown + zmatrix_x = float(atom_data[33:44]) + zmatrix_y = float(atom_data[45:56]) + zmatrix_z = float(atom_data[58:69]) + except TypeError: + raise ValueError(f"Molecule could not " + + f"be built. Unexpected format " + + f"found in line: {atom_data}") + + zmatrix[atom_id] = (zmatrix_x, zmatrix_y, zmatrix_z) + + atomic_number = Impact._get_atomic_number_from_pdb_name(pdb_name) + + atom = Chem.Atom(atomic_number) + atom.SetNoImplicit(True) + editable_molecule.AddAtom(atom) + + for bond_data in bond_section: + try: + idx1 = int(bond_data[1:6]) # atom 1 index + idx2 = int(bond_data[7:12]) # atom 2 index + _ = float(bond_data[13:22]) # spring constant + _ = float(bond_data[23:29]) # equilibrium distance + except TypeError: + raise ValueError(f"Molecule could not " + + f"be built. Unexpected format " + + f"found in line: {bond_data}") + + editable_molecule.AddBond(idx1 - 1, idx2 - 1) + + molecule = editable_molecule.GetMol() + + cartesians = Impact._from_zmatrix_to_cartesians(zmatrix) + + # Uncomment when transformation from zmatrix to cartesians is ready + """ + from rdkit.Geometry import Point3D + + conformer = Chem.Conformer() + for atom_id in range(molecule.GetNumAtoms()): + x, y, z = cartesians[atom_id] + conformer.SetAtomPosition(atom_id, Point3D(x, y, z)) + + molecule.RemoveAllConformers() + molecule.AddConformer(conformer) + """ + + # While cartesians from zmatrix cannot be obtained, infer them with rdkit + # Generate 3D coordinates + Chem.EmbedMolecule(molecule) + + return molecule + class WritableWrapper(object): """ diff --git a/peleffy/tests/test_utils.py b/peleffy/tests/test_utils.py index ce81fe7a..978a9f43 100644 --- a/peleffy/tests/test_utils.py +++ b/peleffy/tests/test_utils.py @@ -191,6 +191,61 @@ def test_get_logger_level(self): level = logger.get_level() assert level == 'CRITICAL', 'Unexpected Logger level' + def test_stdout_handler(self): + """ + It tests the stdout handler of peleffy's logger. + """ + import io + import logging + from contextlib import redirect_stdout + from peleffy.utils import Logger + + # Force a hard reset of logging library and the logger it manages + from importlib import reload + logging.shutdown() + reload(logging) + + logger = Logger() + + with io.StringIO() as buf: + with redirect_stdout(buf): + logger.set_stdout_handler() + + logger.debug('This message must not be printed') + logger.info('This message must be printed') + + output = buf.getvalue() + + assert output.strip() == 'This message must be printed', \ + 'Unexpected logger message' + + def test_file_handler(self): + """ + It tests the file handler of peleffy's logger. + """ + import logging + from peleffy.utils import Logger + from peleffy.utils import temporary_cd + + # Force a hard reset of logging library and the logger it manages + from importlib import reload + logging.shutdown() + reload(logging) + + logger = Logger() + + with tempfile.TemporaryDirectory() as tmpdir: + with temporary_cd(tmpdir): + logger.set_file_handler('log.txt') + + logger.debug('This message must not be printed') + logger.info('This message must be printed') + + with open('log.txt') as f: + output = '\n'.join(f.readlines()) + + assert output.strip() == 'This message must be printed', \ + 'Unexpected logger message' class TestOutputPathHandler(object): """ diff --git a/peleffy/topology/rotamer.py b/peleffy/topology/rotamer.py index 82589215..ba65034c 100644 --- a/peleffy/topology/rotamer.py +++ b/peleffy/topology/rotamer.py @@ -548,9 +548,13 @@ def _get_sorted_bonds_per_group(self, core_atom_per_group, rot_bonds_per_group): sorting_dict = dict() for bond in rot_bonds: - min_d = min([distances[core_atom][bond[0]], - distances[core_atom][bond[1]]]) - sorting_dict[bond] = min_d + if (distances[core_atom][bond[0]] < + distances[core_atom][bond[1]]): + sorting_dict[(bond[0], bond[1])] = \ + distances[core_atom][bond[0]] + else: + sorting_dict[(bond[1], bond[0])] = \ + distances[core_atom][bond[1]] sorted_rot_bonds_per_group.append( [i[0] for i in diff --git a/peleffy/utils/toolkits.py b/peleffy/utils/toolkits.py index f3cbf386..23f3aee3 100644 --- a/peleffy/utils/toolkits.py +++ b/peleffy/utils/toolkits.py @@ -957,6 +957,10 @@ def alchemical_combination(self, mol1, mol2, atom_mapping, for atom in mol_combo.GetAtoms(): atom.SetIsAromatic(False) + # Sanitize it + Chem.SanitizeMol(mol_combo, + Chem.SANITIZE_ALL ^ Chem.SANITIZE_KEKULIZE ^ Chem.SANITIZE_SETAROMATICITY) + return mol_combo @@ -1323,14 +1327,20 @@ def run_ffld_server(self, molecule): with tempfile.TemporaryDirectory() as tmpdir: with temporary_cd(tmpdir): - self._rdkit_toolkit_wrapper.to_pdb_file( - molecule, tmpdir + '/molecule.pdb') - - subprocess.check_output([ffld_server_exec, - "-ipdb", "molecule.pdb", - "-version", "14", - "-print_parameters", - "-out_file", "parameters.txt"]) + self._rdkit_toolkit_wrapper.to_sdf_file( + molecule, tmpdir + '/molecule.sdf') + + errors = subprocess.check_output([ffld_server_exec, + "-isdf", "molecule.sdf", + "-version", "14", + "-print_parameters", + "-out_file", + "parameters.txt"]) + + if errors: + raise SystemError('FFLD_SERVER has failed with the ' + + 'following error message: \n ' + + '{}'.format(errors.decode("utf-8"))) with open('parameters.txt') as parameters_file: return parameters_file.read() diff --git a/peleffy/utils/utils.py b/peleffy/utils/utils.py index 3b194b6d..89a015b0 100644 --- a/peleffy/utils/utils.py +++ b/peleffy/utils/utils.py @@ -13,6 +13,7 @@ "convert_all_quantities_to_string", "string_to_quantity", "parse_charges_from_mae", + "rotation_matrix", "Logger" ] @@ -313,7 +314,7 @@ def parse_charges_from_mae(path, parameters): import re # Read external file containing the partial charges information - params_info, params_list = ([] for i in range(2)) + params_info, params_list = ([], []) copy = False with open(path, 'r') as file: for line in file.readlines(): @@ -332,15 +333,21 @@ def parse_charges_from_mae(path, parameters): params_list = [l.replace('"', '').split() for l in params_list[1:-1]] # Get the index of the atom name and charge from the parameter's list - idx_charges, idx_atom_name = (None for i in range(2)) + idx_charges, idx_atom_name = (None, None) for idx, line in enumerate(params_info): + # Get PDB atom name if 's_m_pdb_atom_name' in line: idx_atom_name = idx + + # Get precomputed charges if 'r_m_charge1' in line: idx_charges = idx - if idx_charges is None or idx_atom_name is None: - raise ValueError( - " {} does not contain charges information. ".format(path)) + + if idx_charges is None: + raise ValueError(f"{path} does not contain charges information") + + if idx_atom_name is None: + raise ValueError(f"{path} does not contain PDB atom names information") # Creates a charges by atom name dictionary d = {} @@ -363,6 +370,38 @@ def parse_charges_from_mae(path, parameters): return parameters +def rotation_matrix(axis, angle): + """ + It applies the Euler-Rodrigues formula to generate the rotation + matrix. + + Parameters + ---------- + axis : Union[List, Tuple, np.array] + The 3D array that defines the rotation axis + angle : float + The rotation angle, in radians + + Returns + ------- + rotation_matrix : np.array + The resulting rotation matrix + """ + import numpy as np + + # Normalize axis + axis = axis / np.sqrt(np.dot(axis, axis)) + + a = np.cos(angle / 2) + b, c, d = -axis * np.sin(angle / 2) + return np.array([[a * a + b * b - c * c - d * d, + 2 * (b * c - a * d), 2 * (b * d + a * c)], + [2 * (b * c + a * d), + a * a + c * c - b * b - d * d, 2 * (c * d - a * b)], + [2 * (b * d - a * c), + 2 * (c * d + a * b), a * a + d * d - b * b - c * c]]) + + class Logger(object): """ It contains all the required methods to handle logging messages. @@ -378,16 +417,74 @@ def __init__(self): if 'peleffy_log' not in logging.root.manager.loggerDict: self._logger = logging.getLogger('peleffy_log') self._logger.setLevel(self.DEFAULT_LEVEL) + + # Add stream handler + self.set_stdout_handler() else: self._logger = logging.getLogger('peleffy_log') - # If no handler is found add stream handler - if not len(self._logger.handlers): - ch = logging.StreamHandler() - ch.setLevel(self.DEFAULT_LEVEL) - formatter = logging.Formatter('%(message)s') - ch.setFormatter(formatter) - self._logger.addHandler(ch) + def set_stdout_handler(self): + """ + It unsets current Logger's handlers and sets the stream handler + that points to the standard output. + """ + import sys + import logging + + # Unset current file handlers + self._unset_handlers() + + # Initialize stream handler + stream_handler = logging.StreamHandler(sys.stdout) + + # Assign logger's level + level = self.get_level() + stream_handler.setLevel(level) + + # Set up logger's format + stream_handler.setFormatter(logging.Formatter('%(message)s')) + + # Add handler + self._logger.addHandler(stream_handler) + + def set_file_handler(self, log_file): + """ + It unsets current Logger's handlers and sets the file handler + that points to the supplied path. + + Parameters + ---------- + log_file : str + Path where to save logger's output + """ + import os + import logging + + # Unset current file handlers + self._unset_handlers() + + # Initialize file handler + if not os.path.isfile(log_file): + file_handler = logging.FileHandler(log_file, mode="w+") + else: + file_handler = logging.FileHandler(log_file, mode="a") + + # Assign logger's level + level = self.get_level() + file_handler.setLevel(level) + + # Set up logger's format + file_handler.setFormatter(logging.Formatter('%(message)s')) + + # Add handler + self._logger.addHandler(file_handler) + + def _unset_handlers(self): + """ + It removes any handler of this Logger. + """ + for handler in self._logger.handlers: + self._logger.removeHandler(handler) def set_level(self, level): """ @@ -400,6 +497,7 @@ def set_level(self, level): CRITICAL] """ import logging + logging.basicConfig() if level.upper() == 'DEBUG': logging_level = logging.DEBUG