From c4326bd9026b97984b4800f9fb080c2a67f853fe Mon Sep 17 00:00:00 2001 From: Alejandro Martinez Leon Date: Sun, 14 Jan 2024 20:06:33 +0100 Subject: [PATCH] Better handling of sa_score and qed calculation --- docs/source/CHANGELOG.md | 6 ++---- src/moldrug/fitness.py | 14 ++++++-------- src/moldrug/utils.py | 5 +---- 3 files changed, 9 insertions(+), 16 deletions(-) diff --git a/docs/source/CHANGELOG.md b/docs/source/CHANGELOG.md index bae899c..c89dcda 100644 --- a/docs/source/CHANGELOG.md +++ b/docs/source/CHANGELOG.md @@ -11,17 +11,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed -- Calculate SA_score with the molecule without explicit hydrogens. Based on: [https://github.com/rdkit/rdkit/discussions/7047](https://github.com/rdkit/rdkit/discussions/7047) +- Calculate SA_score with the molecule without explicit hydrogens for all built-in fitness functions. Based on: [https://github.com/rdkit/rdkit/discussions/7047](https://github.com/rdkit/rdkit/discussions/7047) ### Added - The `moldrug.constraintconf.clashes_present` function. -- Attribute `_mol_no_hs` to `moldrug.utils.Inidividual` (the molecule without explicit hydrogens) ### Changed -- Filtering of conformation that clashes with the protein is no longer with `bio.PDB` object. Now the coordinates are retrieved from the RDKit molecule object and the distance is calculated with NumPy. -- The `smiles` property of the `moldrug.utils.Inidividual` is obtained from `_mol_no_hs` and not from `mol` +- `bio.PDB` object is no longer used for filtering of conformations that clash with the protein. Now the coordinates are retrieved from the RDKit molecule object and the distance is calculated with NumPy. ### Removed diff --git a/src/moldrug/fitness.py b/src/moldrug/fitness.py index 503f0ae..5d71151 100644 --- a/src/moldrug/fitness.py +++ b/src/moldrug/fitness.py @@ -149,8 +149,6 @@ def __get_mol_cost(mol: Chem.rdchem.Mol, update_dict=desirability ) - mol_no_hs = Chem.RemoveHs(mol) - if not os.path.exists(wd): os.makedirs(wd) # Initializing result dict @@ -160,10 +158,10 @@ def __get_mol_cost(mol: Chem.rdchem.Mol, # multicriteria optimization,Optimization of Several Response Variables # Getting estimate of drug-likness - results['qed'] = QED.weights_mean(mol_no_hs) + results['qed'] = QED.weights_mean(Chem.RemoveHs(mol)) # Getting synthetic accessibility score - results['sa_score'] = sascorer.calculateScore(mol_no_hs) + results['sa_score'] = sascorer.calculateScore(Chem.RemoveHs(mol)) # Getting vina_score and update pdbqt # Making the ligand pdbqt @@ -582,10 +580,10 @@ def Cost( sascorer = utils.import_sascorer() # multicriteria optimization,Optimization of Several Response Variables # Getting estimate of drug-likness - Individual.qed = QED.weights_mean(Individual._mol_no_hs) + Individual.qed = QED.weights_mean(Chem.RemoveHs(Individual.mol)) # Getting synthetic accessibility score - Individual.sa_score = sascorer.calculateScore(Individual._mol_no_hs) + Individual.sa_score = sascorer.calculateScore(Chem.RemoveHs(Individual.mol)) # Getting vina_score and update pdbqt Individual.vina_score, Individual.pdbqt = _vinadock( @@ -899,10 +897,10 @@ def CostMultiReceptors( ad4map = [None] * len(receptor_pdbqt_path) sascorer = utils.import_sascorer() - Individual.qed = QED.weights_mean(Individual._mol_no_hs) + Individual.qed = QED.weights_mean(Chem.RemoveHs(Individual.mol)) # Getting synthetic accessibility score - Individual.sa_score = sascorer.calculateScore(Individual._mol_no_hs) + Individual.sa_score = sascorer.calculateScore(Chem.RemoveHs(Individual.mol)) # Getting Vina score pdbqt_list = [] diff --git a/src/moldrug/utils.py b/src/moldrug/utils.py index 3e5ea80..e43eca3 100644 --- a/src/moldrug/utils.py +++ b/src/moldrug/utils.py @@ -690,8 +690,6 @@ class Individual: ---------- mol: Chem.rdchem.Mol The molecule object - _mol_no_hs: Chem.rdchem.Mol - The molecule object without explicit hydrogens. idx: Union[int, str] The identifier pdbqt: str @@ -748,7 +746,6 @@ def __init__(self, mol: Chem.rdchem.Mol, idx: Union[int, str] = 0, pdbqt: str = for the attribute pdbqt on multiple runs. If None, the RNG will not be seeded, by default None """ self.mol = mol - self._mol_no_hs = Chem.RemoveHs(self.mol) if not pdbqt: try: @@ -763,7 +760,7 @@ def __init__(self, mol: Chem.rdchem.Mol, idx: Union[int, str] = 0, pdbqt: str = @property def smiles(self): - return Chem.MolToSmiles(self._mol_no_hs) + return Chem.MolToSmiles(Chem.RemoveHs(self.mol)) def __repr__(self): return f"{self.__class__.__name__}(idx = {self.idx}, smiles = {self.smiles}, cost = {self.cost})"