Skip to content

Commit

Permalink
SMILES naming fix and associated tests (#236)
Browse files Browse the repository at this point in the history
* Add tests and SMILES naming fix

* Cleanup
  • Loading branch information
wukevin authored Dec 10, 2024
1 parent 91adda8 commit b6e7fa1
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 1 deletion.
8 changes: 7 additions & 1 deletion chai_lab/data/sources/rdkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# See the LICENSE file for details.

import logging
from collections import defaultdict
from pathlib import Path

import antipickle
Expand Down Expand Up @@ -157,8 +158,13 @@ def generate(self, smiles: str) -> ConformerData:

AllChem.EmbedMultipleConfs(mol_with_hs, numConfs=1, params=params)
AllChem.RemoveHs(mol_with_hs)

element_counter: dict = defaultdict(int)
for atom in mol_with_hs.GetAtoms():
atom.SetProp("name", atom.GetSymbol())
elem = atom.GetSymbol()
element_counter[elem] += 1 # Start each counter at 1
atom.SetProp("name", elem + str(element_counter[elem]))

retval = self._load_ref_conformer_from_rdkit(mol_with_hs)
retval.atom_names = [a.upper() for a in retval.atom_names]
return retval
Expand Down
9 changes: 9 additions & 0 deletions tests/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,12 @@ def test_fasta_parsing():
assert records[0].sequence == "RKDES"
assert records[1].header == "bar"
assert records[1].sequence == "KEDESRRR"


def test_smiles_parsing():
smiles = ">smiles\nCc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C"
with TemporaryDirectory() as tmpdir:
fa_file = Path(tmpdir) / "test.fasta"
fa_file.write_text(smiles)
records = read_fasta(fa_file)
assert len(records) == 1
24 changes: 24 additions & 0 deletions tests/test_rdkit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright (c) 2024 Chai Discovery, Inc.
# Licensed under the Apache License, Version 2.0.
# See the LICENSE file for details.

from chai_lab.data.sources.rdkit import RefConformerGenerator


def test_ref_conformer_from_smiles():
"""Test ref conformer generation from SMILES."""
smiles = "Cc1cc2nc3c(=O)[nH]c(=O)nc-3n(C[C@H](O)[C@H](O)[C@H](O)CO)c2cc1C"
rcg = RefConformerGenerator()

conformer = rcg.generate(smiles)

assert len(set(conformer.atom_names)) == conformer.num_atoms


def test_ref_conformer_glycan_ccd():
"""Ref conformer from CCD code for a sugar ring."""
rcg = RefConformerGenerator()
conformer = rcg.get("MAN")
assert conformer is not None

assert len(set(conformer.atom_names)) == conformer.num_atoms

0 comments on commit b6e7fa1

Please sign in to comment.