Skip to content

Commit

Permalink
Merge pull request #736 from padix-key/rdkit
Browse files Browse the repository at this point in the history
Add interface to RDKit
  • Loading branch information
padix-key authored Jan 23, 2025
2 parents fab175e + c178d7c commit a778010
Show file tree
Hide file tree
Showing 19 changed files with 941 additions and 36 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/test_and_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ env:
--durations=50
--ignore={project}//tests//sequence//align//test_statistics.py
--ignore={project}//tests//application
--ignore={project}//tests//interface
--ignore={project}//tests//database
--ignore={project}//tests//test_doctest.py
--ignore={project}//tests//test_modname.py
Expand Down Expand Up @@ -223,6 +224,7 @@ jobs:
tests//test_modname.py
tests//database
tests//application
tests//interface
test-muscle5:
Expand Down
5 changes: 5 additions & 0 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
"sphinx.ext.doctest",
"sphinx.ext.mathjax",
"sphinx.ext.linkcode",
"sphinx.ext.intersphinx",
"sphinxcontrib.bibtex",
"sphinx_gallery.gen_gallery",
"sphinx_design",
Expand Down Expand Up @@ -111,6 +112,10 @@

notfound_urls_prefix = "/latest/"

intersphinx_mapping = {"rdkit": ("https://www.rdkit.org/docs/", None)}
intersphinx_timeout = 60


#### HTML ####

html_theme = "pydata_sphinx_theme"
Expand Down
39 changes: 5 additions & 34 deletions doc/switcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,60 +5,31 @@
__author__ = "Patrick Kunzmann"
__all__ = ["create_switcher_json"]

import functools
import json
import re
from dataclasses import dataclass
import requests
from packaging.version import Version
import biotite

RELEASE_REQUEST = "https://api.github.com/repos/biotite-dev/biotite/releases"
BIOTITE_URL = "https://www.biotite-python.org"
SEMVER_TAG_REGEX = r"^v?(\d+)\.(\d+)\.(\d+)"


@functools.total_ordering
@dataclass(frozen=True)
class Version:
major: ...
minor: ...
patch: ...

@staticmethod
def from_tag(tag):
match = re.match(SEMVER_TAG_REGEX, tag)
if match is None:
raise ValueError(f"Invalid tag: {tag}")
major, minor, patch = map(int, match.groups())
return Version(major, minor, patch)

def __str__(self):
return f"{self.major}.{self.minor}.{self.patch}"

def __ge__(self, other):
return (self.major, self.minor, self.patch) >= (
other.major,
other.minor,
other.patch,
)


def _get_previous_versions(min_tag, n_versions, current_version):
# The current version might already be released on GitHub
# -> request one more version than necessary
response = requests.get(RELEASE_REQUEST, params={"per_page": n_versions + 1})
release_data = json.loads(response.text)
versions = [Version.from_tag(release["tag_name"]) for release in release_data]
versions = [Version(release["tag_name"]) for release in release_data]
applicable_versions = [
version
for version in versions
if version >= Version.from_tag(min_tag) and version < current_version
if version >= Version(min_tag) and version < current_version
]
return applicable_versions[:n_versions]


def _get_current_version():
return Version(*biotite.__version_tuple__[:3])
return Version(biotite.__version__)


def create_switcher_json(file_path, min_tag, n_versions):
Expand All @@ -81,7 +52,7 @@ def create_switcher_json(file_path, min_tag, n_versions):
versions.append(current_version)
versions.sort()
for version in versions:
if version.patch != 0:
if version.micro != 0:
# Documentation is not uploaded for patch versions
continue
version_config.append(
Expand Down
3 changes: 2 additions & 1 deletion doc/tutorial/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,4 +103,5 @@ provided by the mentioned subpackages.
database/index
sequence/index
structure/index
application/index
application/index
interface/index
38 changes: 38 additions & 0 deletions doc/tutorial/interface/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
:sd_hide_title: true

.. include:: /tutorial/preamble.rst

##########################
``interface`` subpackage
##########################

Connecting the ecosystem - The ``interface`` subpackage
=======================================================

.. currentmodule:: biotite.interface

In the last section we learned that :mod:`biotite.application` encapsulates entire
external application runs with subsequent calls of ``start()`` and ``join()``.
In contrast :mod:`biotite.interface` provides flexible interfaces to other Python
packages in the bioinformatics ecosystem.
Its purpose is to convert between native Biotite objects, such as :class:`.AtomArray`
and :class:`.Sequence`, and the corresponding objects in the respective interfaced
package.
Each interface is located in a separate subpackage with the same name as the
interfaced package.
For example, the interface to ``rdkit`` is placed in the subpackage
:mod:`biotite.interface.rdkit`.

.. note::

Like in :mod:`biotite.application`, the interfaced Python packages are not
dependencies of the ``biotite`` package.
Hence, they need to be installed separately.

The following chapters will give you an overview of the different interfaced packages.

.. toctree::
:maxdepth: 1
:hidden:

rdkit
66 changes: 66 additions & 0 deletions doc/tutorial/interface/rdkit.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
.. include:: /tutorial/preamble.rst

Interface to RDKit
==================

.. currentmodule:: biotite.interface.rdkit

`RDKit <https://www.rdkit.org/>`_ is a popular cheminformatics package
and thus can be used to supplement *Biotite* with a variety of functionalities focused
on small molecules, such as conversion from/to textual representations
(e.g. *SMILES* and *InChI*) and visualization as structural formulas.
Basically, the :mod:`biotite.interface.rdkit` subpackage provides only two functions:
:func:`to_mol()` to obtain a :class:`rdkit.Chem.rdchem.Mol` from an :class:`.AtomArray`
and :func:`from_mol()` for the reverse direction.
The rest happens within the realm of *RDKit*.
This tutorial will only give a small glance on how the interface can be used.
For comprehensive documentation refer to the
`RDKit documentation <https://www.rdkit.org/docs/>`_.

First example: Depiction as structural formula
----------------------------------------------
*RDKit* allows rendering structural formulas using
`pillow <https://pillow.readthedocs.io/en/stable/>`_.
For a proper structural formula, we need to compute proper 2D coordinates first.

.. jupyter-execute::

import biotite.interface.rdkit as rdkit_interface
import biotite.structure.info as struc
from rdkit.Chem.Draw import MolToImage
from rdkit.Chem.rdDepictor import Compute2DCoords
from rdkit.Chem.rdmolops import RemoveHs

penicillin = struc.residue("PNN")
mol = rdkit_interface.to_mol(penicillin)
# We do not want to include explicit hydrogen atoms in the structural formula
mol = RemoveHs(mol)
Compute2DCoords(mol)
image = MolToImage(mol, size=(600, 400))
display(image)

Second example: Creating a molecule from SMILES
-----------------------------------------------
Although the *Chemical Component Dictionary* accessible from
:mod:`biotite.structure.info` already provides all compounds found in the PDB,
there are a myriad of compounds out there that are not part of it.
One way to to obtain them as :class:`.AtomArray` is passing a *SMILES* string to
*RDKit* to obtain the topology of the molecule and then computing the coordinates.

.. jupyter-execute::

from rdkit.Chem import MolFromSmiles
from rdkit.Chem.rdDistGeom import EmbedMolecule
from rdkit.Chem.rdForceFieldHelpers import UFFOptimizeMolecule
from rdkit.Chem.rdmolops import AddHs

ERTAPENEM_SMILES = "C[C@@H]1[C@@H]2[C@H](C(=O)N2C(=C1S[C@H]3C[C@H](NC3)C(=O)NC4=CC=CC(=C4)C(=O)O)C(=O)O)[C@@H](C)O"

mol = MolFromSmiles(ERTAPENEM_SMILES)
# RDKit uses implicit hydrogen atoms by default, but Biotite requires explicit ones
mol = AddHs(mol)
# Create a 3D conformer
conformer_id = EmbedMolecule(mol)
UFFOptimizeMolecule(mol)
ertapenem = rdkit_interface.from_mol(mol, conformer_id)
print(ertapenem)
4 changes: 4 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ dependencies:
- msgpack-python >=0.5.6
- networkx >=2.0
- numpy >=2.0
- packaging >=24.0
- requests >=2.12
# Testing
- pytest >=7.0
Expand Down Expand Up @@ -53,3 +54,6 @@ dependencies:
- pydot >=1.4
- scikit-learn >=0.18
- scipy >=1.8.0
- pip:
# Conda provides no recent version of RDKit (required for biotite.interface)
- rdkit
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ dependencies = [
"requests >= 2.12",
"msgpack >= 0.5.6",
"networkx >= 2.0",
"requests >= 2.12",
"packaging >= 24.0",
]
dynamic = ["version"]

Expand Down
19 changes: 19 additions & 0 deletions src/biotite/interface/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# This source code is part of the Biotite package and is distributed
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
# information.

"""
This subpackage provides interfaces to other Python packages in the bioinformatics
ecosystem.
Its purpose is to convert between native Biotite objects, such as :class:`.AtomArray`
and :class:`.Sequence`, and the corresponding objects in the respective interfaced
package.
In contrast to :mod:`biotite.application`, where an entire application run is handled
under the hood, :mod:`biotite.interface` only covers the object conversion, allowing
for more flexibility.
"""

__name__ = "biotite.interface"
__author__ = "Patrick Kunzmann"

from .warning import *
15 changes: 15 additions & 0 deletions src/biotite/interface/rdkit/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# This source code is part of the Biotite package and is distributed
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
# information.

"""
This subpackage provides an interface to the `RDKit <https://www.rdkit.org/>`_
cheminformatics package.
It allows conversion between :class:`.AtomArray` and :class:`rdkit.Chem.rdchem.Mol`
objects.
"""

__name__ = "biotite.interface.rdkit"
__author__ = "Patrick Kunzmann"

from .mol import *
Loading

0 comments on commit a778010

Please sign in to comment.