From 6fed5834e17bc9717444e8eddf2a196c04c93d49 Mon Sep 17 00:00:00 2001 From: ale94mleon Date: Wed, 23 Nov 2022 15:42:20 +0100 Subject: [PATCH] add to the main fitness module the option ad4map --- Contrib/Zn/ZnFitness.py | 402 ---------------------------------- Contrib/adme_models/README.md | 25 ++- docs/source/CHANGELOG.md | 6 +- moldrug/fitness.py | 74 +++++-- moldrug/utils.py | 14 +- tests/test_moldrug.py | 2 +- 6 files changed, 96 insertions(+), 427 deletions(-) delete mode 100644 Contrib/Zn/ZnFitness.py diff --git a/Contrib/Zn/ZnFitness.py b/Contrib/Zn/ZnFitness.py deleted file mode 100644 index 27c11dc..0000000 --- a/Contrib/Zn/ZnFitness.py +++ /dev/null @@ -1,402 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -from copy import deepcopy -from moldrug import utils, constraintconf -from rdkit import Chem -from rdkit.Chem import QED -import os -import numpy as np -from typing import Dict, List -import warnings -from meeko import MoleculePreparation - - -def __vinadock( - Individual:utils.Individual, - wd:str = '.vina_jobs', - vina_executable:str = 'vina', - receptor_pdbqt_path:str = None, - boxcenter:List[float] = None, - boxsize:List[float] = None, - exhaustiveness:int = 8, - ad4map:str = None, - ncores:int = 1, - num_modes:int = 1, - constraint:bool = False, - constraint_type = 'score_only', # score_only, local_only - constraint_ref:Chem.rdchem.Mol = None, - constraint_receptor_pdb_path:str = None, - constraint_num_conf:int = 100, - constraint_minimum_conf_rms:int = 0.01): - """ - This function is intend to be used to perform docking for all the cost functions implemented here. - If ad4map is set, the last version of vina (`releases `_) - must be installed. To see how to use AutoDock4 force fields in the new version of vina, follow - `this tutorial _` - - Parameters - ---------- - Individual : utils.Individual - An Individual with the pdbqt attribute (only needed for non constraint docking). - wd : str, optional - The working directory to execute the docking jobs, by default '.vina_jobs' - vina_executable : str, optional - This is the name of the vina executable, could be a path to the binary object, by default 'vina' - receptor_pdbqt_path : str, optional - Where the receptor pdbqt file is located, by default None - boxcenter : List[float], optional - A list of three floats with the definition of the center of the box in angstrom for docking (x, y, z), by default None - boxsize : List[float], optional - A list of three floats with the definition of the box size in angstrom of the docking box (x, y, z), by default None - exhaustiveness : int, optional - Parameter of vina that controls the accuracy of the docking searching, by default 8 - ad4map : str, optional - The path where the AD4 maps are, by default None - ncores : int, optional - Number of cpus to use in Vina, by default 1 - num_modes : int, optional - How many modes should Vina export, by default 1 - constraint : bool, optional - Controls if constraint docking will be perform, by default False - constraint_type : str, optional - This is the type of constraint docking. Could be local_only - (vina will perform local optimization and score the resulted pose) - or score_only (in this case the provided pose by the internal conformer - generator will only be scored), by default 'score_only' - constraint_ref : Chem.rdchem.Mol, optional - The part of the molecule that we would like to constraint, by default None - constraint_receptor_pdb_path : str, optional - The same as constraint_receptor_pdbqt_path but in pdb format, by default None - constraint_num_conf : int, optional - Maximum number of conformer to be generated internally by MolDrug , by default 100 - constraint_minimum_conf_rms : int, optional - RMS to filter duplicate conformers, by default 0.01 - - Returns - ------- - tuple - A tuple with two elements: - (vina score, pdbqt string) - - Raises - ------ - Exception - Inappropriate constraint_type. must be local_only or score_only. Only will be checked if constraint is set to True. - """ - - # Creating the working directory if needed - if not os.path.exists(wd): - os.makedirs(wd) - # Creating the command line for vina - cmd_vina_str = f"{vina_executable}"\ - f" --cpu {ncores} --exhaustiveness {exhaustiveness} --num_modes {num_modes}" - - if ad4map: - cmd_vina_str += f" --scoring ad4 --maps {os.path.abspath(ad4map)}" - else: - cmd_vina_str += f" --receptor {receptor_pdbqt_path}"\ - f" --center_x {boxcenter[0]} --center_y {boxcenter[1]} --center_z {boxcenter[2]}"\ - f" --size_x {boxsize[0]} --size_y {boxsize[1]} --size_z {boxsize[2]}"\ - - if constraint: - # Check for the correct type of docking - if constraint_type in ['score_only', 'local_only']: - cmd_vina_str += f" --{constraint_type}" - else: - raise Exception("constraint_type only admit two possible values: score_only, local_only.") - - # Generate constrained conformer - try: - out_mol = constraintconf.generate_conformers( - mol = Chem.RemoveHs(Individual.mol), - ref_mol = Chem.RemoveHs(constraint_ref), - num_conf = constraint_num_conf, - #ref_smi=Chem.MolToSmiles(constraint_ref), - minimum_conf_rms=constraint_minimum_conf_rms) - out_mol = Chem.AddHs(out_mol) - except Exception: - vina_score_pdbqt = (np.inf, "NonValidConformer") - return vina_score_pdbqt - - # Remove conformers that clash with the protein - clash_filter = constraintconf.ProteinLigandClashFilter(protein_pdbpath = constraint_receptor_pdb_path, distance=1.5) - clashIds = [conf.GetId() for conf in out_mol.GetConformers() if clash_filter(conf)] - _ = [out_mol.RemoveConformer(clashId) for clashId in clashIds] - - # Check first if some valid conformer exist - if len(out_mol.GetConformers()): - vina_score_pdbqt = (np.inf, None) - for conf in out_mol.GetConformers(): - temp_mol = deepcopy(out_mol) - temp_mol.RemoveAllConformers() - temp_mol.AddConformer(out_mol.GetConformer(conf.GetId()), assignId=True) - - preparator = MoleculePreparation() - preparator.prepare(temp_mol) - preparator.write_pdbqt_file(os.path.join(wd, f'{Individual.idx}_conf_{conf.GetId()}.pdbqt')) - - # Make a copy to the vina command string and add the out (is needed) and ligand options - cmd_vina_str_tmp = cmd_vina_str[:] - cmd_vina_str_tmp += f" --ligand {os.path.join(wd, f'{Individual.idx}_conf_{conf.GetId()}.pdbqt')}" - if constraint_type == 'local_only': - cmd_vina_str_tmp += f" --out {os.path.join(wd, f'{Individual.idx}_conf_{conf.GetId()}_out.pdbqt')}" - - try: - cmd_vina_result = utils.run(cmd_vina_str_tmp) - except Exception as e: - if os.path.isfile(receptor_pdbqt_path): - with open(receptor_pdbqt_path, 'r') as f: - receptor_str = f.read() - else: - receptor_str = None - - error = { - 'Exception': e, - 'Individual': Individual, - f'used_mol_conf_{conf.GetId()}': temp_mol, - f'used_ligand_pdbqt_conf_{conf.GetId()}': preparator.write_pdbqt_string(), - 'receptor_str': receptor_str, - 'boxcenter': boxcenter, - 'boxsize': boxsize, - } - utils.compressed_pickle(f'{Individual.idx}_conf_{conf.GetId()}_error', error) - warnings.warn(f"\nVina failed! Check: {Individual.idx}_conf_{conf.GetId()}_error.pbz2 file.\n") - vina_score_pdbqt = (np.inf, preparator.write_pdbqt_string()) - return vina_score_pdbqt - - vina_score = np.inf - for line in cmd_vina_result.stdout.split('\n'): - # Check over different vina versions - if line.startswith('Affinity'): - vina_score = float(line.split()[1]) - break - elif 'Estimated Free Energy of Binding' in line: - vina_score = float(line.split(':')[1].split()[0]) - break - if vina_score < vina_score_pdbqt[0]: - if constraint_type == 'local_only': - if os.path.isfile(os.path.join(wd, f'{Individual.idx}_conf_{conf.GetId()}_out.pdbqt')): - with open(os.path.join(wd, f'{Individual.idx}_conf_{conf.GetId()}_out.pdbqt'), 'r') as f: - pdbqt = f.read() - else: - pdbqt = "NonExistedFileToRead" - vina_score_pdbqt = (vina_score, pdbqt) - else: - vina_score_pdbqt = (vina_score, preparator.write_pdbqt_string()) - else: - vina_score_pdbqt = (np.inf, "NonValidConformer") - # "Normal" docking - else: - cmd_vina_str += f" --ligand {os.path.join(wd, f'{Individual.idx}.pdbqt')} --out {os.path.join(wd, f'{Individual.idx}_out.pdbqt')}" - with open(os.path.join(wd, f'{Individual.idx}.pdbqt'), 'w') as lig_pdbqt: - lig_pdbqt.write(Individual.pdbqt) - try: - utils.run(cmd_vina_str) - except Exception as e: - if os.path.isfile(receptor_pdbqt_path): - with open(receptor_pdbqt_path, 'r') as f: - receptor_str = f.read() - else: - receptor_str = None - - error = { - 'Exception': e, - 'Individual': Individual, - 'receptor_str': receptor_str, - 'boxcenter': boxcenter, - 'boxsize': boxsize, - } - utils.compressed_pickle(f'{Individual.idx}_error', error) - warnings.warn(f"\nVina failed! Check: {Individual.idx}_error.pbz2 file.\n") - - vina_score_pdbqt = (np.inf, 'VinaFailed') - return vina_score_pdbqt - - # Getting the information - best_energy = utils.VINA_OUT(os.path.join(wd, f'{Individual.idx}_out.pdbqt')).BestEnergy() - vina_score_pdbqt = (best_energy.freeEnergy, ''.join(best_energy.chunk)) - return vina_score_pdbqt - -def Cost( - Individual:utils.Individual, - wd:str = '.vina_jobs', - vina_executable:str = 'vina', - receptor_pdbqt_path:str = None, - boxcenter:List[float] = None, - boxsize:List[float] = None, - exhaustiveness:int = 8, - ad4map:str = None, - ncores:int = 1, - num_modes:int = 1, - constraint:bool = False, - constraint_type = 'score_only', # score_only, local_only - constraint_ref:Chem.rdchem.Mol = None, - constraint_receptor_pdb_path:str = None, - constraint_num_conf:int = 100, - constraint_minimum_conf_rms:int = 0.01, - desirability:Dict = None, - ): - """ - This is the main Cost function of the module. It use the concept of desirability functions. The response variables are: - - #. `Vina score. `_ - #. `Quantitative Estimation of Drug-likeness (QED). `_ - #. `Synthetic accessibility score. `_ - - If ad4map is set, the last version of vina (`releases `_) - must be installed. To see how to use AutoDock4 force fields in the new version of vina, follow - `this tutorial _` - - Parameters - ---------- - Individual : utils.Individual - A Individual with the pdbqt attribute - wd : str, optional - The working directory to execute the docking jobs, by default '.vina_jobs' - vina_executable : str, optional - This is the name of the vina executable, could be a path to the binary object, by default 'vina' - receptor_pdbqt_path : str, optional - Where the receptor pdbqt file is located, by default None - boxcenter : list[float], optional - A list of three floats with the definition of the center of the box in angstrom for docking (x, y, z), by default None - boxsize : list[float], optional - A list of three floats with the definition of the box size in angstrom of the docking box (x, y, z), by default None - exhaustiveness : int, optional - Parameter of vina that controls the accuracy of the docking searching, by default 8 - ad4map : str, optional - The path where the AD4 maps are, by default None - ncores : int, optional - Number of cpus to use in Vina, by default 1 - num_modes : int, optional - How many modes should Vina export, by default 1 - constraint : bool, optional - Controls if constraint docking will be perform, by default False - constraint_type : str, optional - This is the type of constraint docking. - Could be local_only (vina will perform local optimization and score the resulted pose) - or score_only (in this case the provided pose by the internal conformer generator will only be scored), - by default 'score_only' - constraint_ref : Chem.rdchem.Mol, optional - The part of the molecule that we would like to constraint, by default None - constraint_receptor_pdb_path : str, optional - The same as constraint_receptor_pdbqt_path but in pdb format, by default None - constraint_num_conf : int, optional - Maximum number of conformer to be generated internally by MolDrug , by default 100 - constraint_minimum_conf_rms : int, optional - RMS to filter duplicate conformers, by default 0.01 - desirability : dict, optional - The definition of the desirability to use for each used variable = [qed, sa_score, vina_score]. - Each variable only will accept the keys [w, and the name of the desirability function of :meth:`moldrug.utils.DerringerSuichDesirability`], - by default None which means that it will be used: - desirability = { - 'qed': {'w': 1,'LargerTheBest': {'LowerLimit': 0.1,'Target': 0.75, 'r': 1} - }, - 'sa_score': {'w': 1,'SmallerTheBest': {'Target': 3,'UpperLimit': 7,'r': 1} - }, - 'vina_score': {'w': 1,'SmallerTheBest': {'Target': -12,'UpperLimit': -6,'r': 1} - } - } - Returns - ------- - utils.Individual - A new instance of the original Individual with the the new attributes: - pdbqt, qed, vina_score, sa_score and cost. - cost attribute will be a number between 0 and 1, been 0 the optimal value. - Example - ------- - .. ipython:: python - - from moldrug import utils, fitness - from rdkit import Chem - import tempfile, os - from moldrug.data import ligands, boxes, receptor_pdbqt - tmp_path = tempfile.TemporaryDirectory() - ligand_mol = Chem.MolFromSmiles(ligands.r_x0161) - I = utils.Individual(ligand_mol) - receptor_path = os.path.join(tmp_path.name,'receptor.pdbqt') - with open(receptor_path, 'w') as r: r.write(receptor_pdbqt.r_x0161) - box = boxes.r_x0161['A'] - # Using the default desirability - NewI = fitness.Cost( - Individual = I,wd = tmp_path.name, - receptor_pdbqt_path = receptor_path,boxcenter = box['boxcenter'], - boxsize = box['boxsize'],exhaustiveness = 4,ncores = 4) - print(NewI.cost, NewI.vina_score, NewI.qed, NewI.sa_score) - """ - if not desirability: - desirability = { - 'qed': { - 'w': 1, - 'LargerTheBest': { - 'LowerLimit': 0.1, - 'Target': 0.75, - 'r': 1 - } - }, - 'sa_score': { - 'w': 1, - 'SmallerTheBest': { - 'Target': 3, - 'UpperLimit': 7, - 'r': 1 - } - }, - 'vina_score': { - 'w': 1, - 'SmallerTheBest': { - 'Target': -12, - 'UpperLimit': -6, - 'r': 1 - } - } - } - - sascorer = utils.import_sascorer() - # multicriteria optimization,Optimization of Several Response Variables - # Getting estimate of drug-likness - Individual.qed = QED.weights_mean(Individual.mol) - - # Getting synthetic accessibility score - Individual.sa_score = sascorer.calculateScore(Individual.mol) - - # Getting vina_score and update pdbqt - Individual.vina_score, Individual.pdbqt = __vinadock( - Individual = Individual, - wd = wd, - vina_executable = vina_executable, - receptor_pdbqt_path = receptor_pdbqt_path, - boxcenter = boxcenter, - boxsize = boxsize, - exhaustiveness = exhaustiveness, - ad4map = ad4map, - ncores = ncores, - num_modes = num_modes, - constraint = constraint, - constraint_type = constraint_type, - constraint_ref = constraint_ref, - constraint_receptor_pdb_path = constraint_receptor_pdb_path, - constraint_num_conf = constraint_num_conf, - constraint_minimum_conf_rms = constraint_minimum_conf_rms, - ) - # Adding the cost using all the information of qed, sas and vina_cost - # Construct the desirability - # Quantitative estimation of drug-likeness (ranges from 0 to 1). We could use just the value perse, but using LargerTheBest we are more permissible. - - base = 1 - exponent = 0 - for variable in desirability: - for key in desirability[variable]: - if key == 'w': - w = desirability[variable][key] - elif key in utils.DerringerSuichDesirability(): - d = utils.DerringerSuichDesirability()[key](getattr(Individual, variable), **desirability[variable][key]) - else: - raise RuntimeError(f"Inside the desirability dictionary you provided for the variable = {variable} "\ - f"a non implemented key = {key}. Only are possible: 'w' (standing for weight) and any "\ - f"possible Derringer-Suich desirability function: {utils.DerringerSuichDesirability().keys()}") - base *= d**w - exponent += w - - # We are using a geometric mean. And because we are minimizing we have to return - Individual.cost = 1 - base**(1/exponent) - return Individual \ No newline at end of file diff --git a/Contrib/adme_models/README.md b/Contrib/adme_models/README.md index bb6c062..dcb1e89 100644 --- a/Contrib/adme_models/README.md +++ b/Contrib/adme_models/README.md @@ -1,9 +1,32 @@ # ADME models -The fitness model: `fitness_plus_admes.py` is able to use the models `hppb.jilib` and `clearence.jlib`. +The fitness module: `fitness_plus_admes.py` is able to use the models `hppb.jlib` and `clearence.jlib` by default and any other `jlib` model must be specified through the keywords: `adme_models` and `desirability`. The models will be evaluated as follow: + +```python +# The classes Featurizer and Predictors are already inside fitness_plus_admes.py +predictor = Predictors(featurizer=Featurizer(), models=adme_models.values()) +predictions = predictor.predict(Individual.mol) +``` The scripts `train_models.py`, `featurize.py` are used for the construction sof the models. `use_models.py` is a simple example on how to use the models. These three scripts were kindly provided by [Miha Skalic](https://github.com/miha-skalic). The class `Featurizer` and `Predictors` from `featurize.py` are integrated in `fitness_plus_admes.py`. +## Getting the models + +Install dependencies: + +```bash +pip install sklearn +``` + +The other dependencies are already inside **MolDrug**. Yopu must have in the same folder the scripts: `train_models.py`, `featurize.py` and then just: + +```bash +python train_models.py +``` + +You should get the files: `clearance.jlib` and `hppb.jlib`. + + ## Future models Here is a detail list of public data base to construct ADME models: [adme](https://tdcommons.ai/single_pred_tasks/adme/). diff --git a/docs/source/CHANGELOG.md b/docs/source/CHANGELOG.md index b8279a9..956d7b2 100644 --- a/docs/source/CHANGELOG.md +++ b/docs/source/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [unreleased] +## [3.0.0] + ### Changed - Name of `moldrug.fitness.get_mol_cost` to `moldrug.fitness.__get_mol_cost` function. @@ -24,6 +26,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- `ad4map` in all the cost functions of the `moldrug.fitness` module. This parameters specify the path where the ad4 map files are. To use this feature you must have the AutoDcok Vina v1.2.3 of above. Now you can use the force fields of AD4 inside of Vina. Future release will extend the integration with this versions. - `moldrug.utils.to_dataframe`. THis function was previously isolated as a method of the class `moldrug.utils.GA`; now it could also be called as a function. - `kept_gens` attribute to the `Individual`s inside of `moldrug.utils.GA`. This is a set that contains the generations for which the Individual was conserved. - `acceptance` attribute to `moldrug.utils.GA`. This is a dictionary that has as keyword the generation ID, and as values a dictionary with keywords: `accepted` (number of generated molecules accepted on the current generation) and `generated` (number of total molecules generated) @@ -206,7 +209,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Minor code cleaning. - Better code covered during testing -[unreleased]: https://github.com/ale94mleon/MolDrug/compare/2.1.7...HEAD +[unreleased]: https://github.com/ale94mleon/MolDrug/compare/3.0.0...HEAD +[3.0.0]: https://github.com/ale94mleon/MolDrug/compare/2.1.12...3.0.0 [2.1.12]: https://github.com/ale94mleon/MolDrug/compare/2.1.7...2.1.12 [2.1.7]: https://github.com/ale94mleon/MolDrug/compare/2.1.0...2.1.7 [2.1.0]: https://github.com/ale94mleon/MolDrug/compare/2.0.0...2.1.0 diff --git a/moldrug/fitness.py b/moldrug/fitness.py index 04face5..0c9da1c 100644 --- a/moldrug/fitness.py +++ b/moldrug/fitness.py @@ -168,6 +168,7 @@ def __vinadock( boxcenter:List[float] = None, boxsize:List[float] = None, exhaustiveness:int = 8, + ad4map:str = None, ncores:int = 1, num_modes:int = 1, constraint:bool = False, @@ -178,6 +179,9 @@ def __vinadock( constraint_minimum_conf_rms:int = 0.01): """ This function is intend to be used to perform docking for all the cost functions implemented on :mod:`moldrug.fitness` + If ad4map (available from moldrug-3.0.0) is set, the last version of vina (`releases `_) + must be installed. To see how to use AutoDock4 force fields in the new version of vina, follow + `this tutorial _` Parameters ---------- @@ -196,6 +200,8 @@ def __vinadock( A list of three floats with the definition of the box size in angstrom of the docking box (x, y, z), by default None exhaustiveness : int, optional Parameter of vina that controls the accuracy of the docking searching, by default 8 + ad4map : str, optional + The path where the AD4 maps are, by default None ncores : int, optional Number of cpus to use in Vina, by default 1 num_modes : int, optional @@ -231,17 +237,17 @@ def __vinadock( # Creating the working directory if needed if not os.path.exists(wd): os.makedirs(wd) - - # If the vina_executable is a path - if os.path.isfile(vina_executable): - vina_executable = os.path.abspath(vina_executable) - # Creating the command line for vina - cmd_vina_str = f"{vina_executable} --receptor {receptor_pdbqt_path}"\ - f" --center_x {boxcenter[0]} --center_y {boxcenter[1]} --center_z {boxcenter[2]}"\ - f" --size_x {boxsize[0]} --size_y {boxsize[1]} --size_z {boxsize[2]}"\ + cmd_vina_str = f"{vina_executable}"\ f" --cpu {ncores} --exhaustiveness {exhaustiveness} --num_modes {num_modes}" + if ad4map: + cmd_vina_str += f" --scoring ad4 --maps {os.path.abspath(ad4map)}" + else: + cmd_vina_str += f" --receptor {receptor_pdbqt_path}"\ + f" --center_x {boxcenter[0]} --center_y {boxcenter[1]} --center_z {boxcenter[2]}"\ + f" --size_x {boxsize[0]} --size_y {boxsize[1]} --size_z {boxsize[2]}"\ + if constraint: # Check for the correct type of docking if constraint_type in ['score_only', 'local_only']: @@ -370,6 +376,7 @@ def Cost( boxcenter:List[float] = None, boxsize:List[float] = None, exhaustiveness:int = 8, + ad4map:str = None, ncores:int = 1, num_modes:int = 1, constraint:bool = False, @@ -387,6 +394,10 @@ def Cost( #. `Quantitative Estimation of Drug-likeness (QED). `_ #. `Synthetic accessibility score. `_ + If ad4map is set, the last version of vina (`releases `_) + must be installed. To see how to use AutoDock4 force fields in the new version of vina, follow + `this tutorial _` + Parameters ---------- Individual : utils.Individual @@ -506,6 +517,7 @@ def Cost( boxcenter = boxcenter, boxsize = boxsize, exhaustiveness = exhaustiveness, + ad4map = ad4map, ncores = ncores, num_modes = num_modes, constraint = constraint, @@ -546,6 +558,7 @@ def CostOnlyVina( boxcenter:List[float] = None, boxsize:List[float] = None, exhaustiveness:int = 8, + ad4map:str = None, ncores:int = 1, num_modes:int = 1, constraint:bool = False, @@ -557,7 +570,7 @@ def CostOnlyVina( wt_cutoff:float = None, ): """ - This Cost function performs Docking and return the vina_score as cost. + This Cost function performs Docking and return the vina_score as Cost. Parameters ---------- @@ -575,6 +588,8 @@ def CostOnlyVina( A list of three floats with the definition of the box size in angstrom of the docking box (x, y, z), by default None exhaustiveness : int, optional Parameter of vina that controls the accuracy of the docking searching, by default 8 + ad4map : str, optional + The path where the AD4 maps are, by default None ncores : int, optional Number of cpus to use in Vina, by default 1 num_modes : int, optional @@ -640,6 +655,7 @@ def CostOnlyVina( boxcenter = boxcenter, boxsize = boxsize, exhaustiveness = exhaustiveness, + ad4map = ad4map, ncores = ncores, num_modes = num_modes, constraint = constraint, @@ -658,9 +674,10 @@ def CostMultiReceptors( vina_executable:str = 'vina', receptor_pdbqt_path:List[str] = None, vina_score_type:List[str] = None, - boxcenter:List[float] = None, - boxsize:List[float] = None, + boxcenter:List[List[float]] = None, + boxsize:List[List[float]] = None, exhaustiveness:int = 8, + ad4map:List[str] = None, ncores:int = 1, num_modes:int = 1, constraint:bool = False, @@ -681,6 +698,10 @@ def CostMultiReceptors( In this case every vina score (for all the provided receptors) will be used for the construction of the desirability. + If ad4map is set, the last version of vina (`releases `_) + must be installed. To see how to use AutoDock4 force fields in the new version of vina, follow + `this tutorial _` + Parameters ---------- Individual : utils.Individual @@ -696,12 +717,14 @@ def CostMultiReceptors( This is a list with the keywords 'min' and/or 'max'. E.g. If two receptor were provided and for the first one we would like to find a minimum in the vina scoring function and for the other one a maximum (selectivity for the first receptor); we must provided the list: ['min', 'max'], by default None - boxcenter : list[float], optional + boxcenter : list[list[float]], optional A list of three floats with the definition of the center of the box in angstrom for docking (x, y, z), by default None - boxsize : list[float], optional + boxsize : list[list[float]], optional A list of three floats with the definition of the box size in angstrom of the docking box (x, y, z), by default None exhaustiveness : int, optional Parameter of vina that controls the accuracy of the docking searching, by default 8 + ad4map : list[str], optional + A list of path where the AD4 maps are. For every receptor you should have a separate directory with all the maps, by default None ncores : int, optional Number of cpus to use in Vina, by default 1 num_modes : int, optional @@ -807,6 +830,10 @@ def CostMultiReceptors( } } } + + if not ad4map: + ad4map = [None]*len(receptor_pdbqt_path) + sascorer = utils.import_sascorer() Individual.qed = QED.weights_mean(Individual.mol) @@ -827,6 +854,7 @@ def CostMultiReceptors( boxcenter = boxcenter[i], boxsize = boxsize[i], exhaustiveness = exhaustiveness, + ad4map = ad4map[i], ncores = ncores, num_modes = num_modes, constraint = constraint, @@ -845,6 +873,7 @@ def CostMultiReceptors( boxcenter = boxcenter[i], boxsize = boxsize[i], exhaustiveness = exhaustiveness, + ad4map = ad4map[i], ncores = ncores, num_modes = num_modes, ) @@ -899,9 +928,10 @@ def CostMultiReceptorsOnlyVina( vina_executable:str = 'vina', receptor_pdbqt_path:List[str] = None, vina_score_type:List[str] = None, - boxcenter:List[float] = None, - boxsize:List[float] = None, + boxcenter:List[List[float]] = None, + boxsize:List[List[float]] = None, exhaustiveness:int = 8, + ad4map:List[str] = None, ncores:int = 1, num_modes:int = 1, constraint:bool = False, @@ -919,6 +949,10 @@ def CostMultiReceptorsOnlyVina( It also use the concept of desirability. The response variables are the vina scores on each receptor. + If ad4map is set, the last version of vina (`releases `_) + must be installed. To see how to use AutoDock4 force fields in the new version of vina, follow + `this tutorial _` + Parameters ---------- Individual : utils.Individual @@ -926,7 +960,8 @@ def CostMultiReceptorsOnlyVina( wd : str, optional The working directory to execute the docking jobs, by default '.vina_jobs' vina_executable : str, optional - This is the name of the vina executable, could be a path to the binary object, which must have execution permits (chmod a+x ), by default 'vina' + This is the name of the vina executable, could be a path to the binary object, which must have + execution permits (chmod a+x ), by default 'vina' receptor_pdbqt_path : list[str], optional A list of location of the receptors pdbqt files, by default None vina_score_type : list[str], optional @@ -939,6 +974,8 @@ def CostMultiReceptorsOnlyVina( A list of three floats with the definition of the box size in angstrom of the docking box (x, y, z), by default None exhaustiveness : int, optional Parameter of vina that controls the accuracy of the docking searching, by default 8 + ad4map : list[str], optional + A list of path where the AD4 maps are. For every receptor you should have a separate directory with all the maps, by default None ncores : int, optional Number of cpus to use in Vina, by default 1 num_modes : int, optional @@ -1026,6 +1063,9 @@ def CostMultiReceptorsOnlyVina( pdbqt_list = [] Individual.vina_score = [] + if not ad4map: + ad4map = [None]*len(receptor_pdbqt_path) + # If the molecule is heavy, don't perform docking and assign infinite to the cost attribute. Add "TooHeavy" string to pdbqts and np.inf to vina_scores if wt_cutoff: if Descriptors.MolWt(Individual.mol) > wt_cutoff: @@ -1051,6 +1091,7 @@ def CostMultiReceptorsOnlyVina( boxcenter = boxcenter[i], boxsize = boxsize[i], exhaustiveness = exhaustiveness, + ad4map = ad4map[i], ncores = ncores, num_modes = num_modes, constraint = constraint, @@ -1069,6 +1110,7 @@ def CostMultiReceptorsOnlyVina( boxcenter = boxcenter[i], boxsize = boxsize[i], exhaustiveness = exhaustiveness, + ad4map = ad4map[i], ncores = ncores, num_modes = num_modes, ) diff --git a/moldrug/utils.py b/moldrug/utils.py index 789e937..1668a6e 100644 --- a/moldrug/utils.py +++ b/moldrug/utils.py @@ -1017,7 +1017,10 @@ def to_dataframe(individuals:List[Individual]): class GA: """An implementation of genetic algorithm to search in the chemical space. """ - def __init__(self, seed_mol:Union[Chem.rdchem.Mol, Iterable[Chem.rdchem.Mol]], costfunc:object, costfunc_kwargs:Dict, crem_db_path:str, maxiter:int, popsize:int, beta:float = 0.001, pc:float = 1, get_similar:bool = False, mutate_crem_kwargs:Dict = None, save_pop_every_gen:int = 0, deffnm:str = 'ga',AddHs:bool = False) -> None: + def __init__(self, seed_mol:Union[Chem.rdchem.Mol, Iterable[Chem.rdchem.Mol]], + costfunc:object, costfunc_kwargs:Dict, crem_db_path:str, maxiter:int, popsize:int, + beta:float = 0.001, pc:float = 1, get_similar:bool = False, mutate_crem_kwargs:Dict = None, + save_pop_every_gen:int = 0, deffnm:str = 'ga', AddHs:bool = False) -> None: """This is the generator Parameters @@ -1099,14 +1102,13 @@ def __init__(self, seed_mol:Union[Chem.rdchem.Mol, Iterable[Chem.rdchem.Mol]], c self.AddHs = AddHs # Convert to list the seed_mol in case that it is not - if not is_iter(seed_mol) and isinstance(seed_mol, Chem.rdchem.Mol): self._seed_mol = [seed_mol] elif all([isinstance(mol, Chem.rdchem.Mol) for mol in seed_mol]): self._seed_mol = seed_mol else: - raise TypeError(f"seed_mol is not Chem.rdchem.Mol neither a Iterable[Chem.rdchem.Mol]") - + raise TypeError("seed_mol is not Chem.rdchem.Mol neither a Iterable[Chem.rdchem.Mol]") + if self.AddHs: self._seed_mol = [Chem.AddHs(mol) for mol in self._seed_mol] # if 'protected_ids' in self.mutate_crem_kwargs or 'replace_ids' in self.mutate_crem_kwargs: @@ -1167,7 +1169,7 @@ def __call__(self, njobs:int = 1): else: # Everything is ok! pass - + # Adding the inputs to the intial population for i, mol in enumerate(self._seed_mol): individual = Individual(mol, idx = i) @@ -1182,7 +1184,7 @@ def __call__(self, njobs:int = 1): individual = Individual(mol, idx = i + len(self._seed_mol)) if individual.pdbqt: self.pop.append(individual) - + # Make sure that the population do not have more than popsize members and it is without repeated elemnts. # That could happens if seed_mol has more molecules that popsize self.pop = list(set(self.pop))[:self.popsize] diff --git a/tests/test_moldrug.py b/tests/test_moldrug.py index ba2d594..7d390f6 100644 --- a/tests/test_moldrug.py +++ b/tests/test_moldrug.py @@ -355,4 +355,4 @@ def test_constraintconf(): if __name__ == '__main__': - test_single_receptor_command_line() + test_fitness_module()