-
Notifications
You must be signed in to change notification settings - Fork 24
/
molchemaxon2pdb.py
executable file
·133 lines (110 loc) · 6.08 KB
/
molchemaxon2pdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env python3
# author : Aleksandra Nikonenko
# date : 17.05.2021
# license : BSD-3
# ==============================================================================
__author__ = 'aleksandra'
import argparse
from functools import partial
import os
import subprocess
import sys
from io import BytesIO
from multiprocessing import Pool, cpu_count
from rdkit import Chem
from rdkit.Chem import AllChem
import read_input
cmd_run = "cxcalc majormicrospecies -H {pH} -f {out_format} -M -K '{fname}'"
def gen_conf(mol, useRandomCoords, randomSeed):
params = AllChem.ETKDGv3()
params.useRandomCoords = useRandomCoords
params.randomSeed = randomSeed
conf_stat = AllChem.EmbedMolecule(mol, params)
return mol, conf_stat
def convert2pdb(mol_iter, outpath, preserve_coord, seed, out_format):
m, name = mol_iter
if not m:
sys.stderr.write('Incorrect molecule: {0}\t{1}\n'.format(Chem.MolToSmiles(m), name))
sys.stderr.flush()
return None
m = Chem.AddHs(m, addCoords=True)
if not preserve_coord:
m, conf_stat = gen_conf(m, useRandomCoords=False, randomSeed=seed)
if conf_stat == -1:
# if molecule is big enough and rdkit cannot generate a conformation - use params.useRandomCoords = True
m, conf_stat = gen_conf(m, useRandomCoords=True, randomSeed=seed)
if conf_stat == -1:
sys.stderr.write(
'Could not get conformation of the molecule: {0}\t{1}\n'.format(Chem.MolToSmiles(m), name))
sys.stderr.flush()
return None
AllChem.UFFOptimizeMolecule(m, maxIters=100)
if out_format == 'pdb':
pdb = Chem.MolToPDBBlock(m)
with open(os.path.join(outpath, name + '.pdb'), 'w') as data:
data.write(pdb)
if out_format == 'sdf':
w = Chem.SDWriter(os.path.join(outpath, name + '.sdf'))
w.write(m)
w.close()
def iter_molname(fname, field):
for _, name in read_input.read_input(fname,
input_format=None,
id_field_name=field,
sanitize=False):
yield name
def calc(fname, outpath, ncpu, field, pH, preserve_coord, save_sdf, no_protonation, seed, out_format):
if outpath and not os.path.exists(outpath):
os.mkdir(outpath)
if no_protonation:
mol_gener = read_input.read_input(fname,
input_format=None,
id_field_name=field,
sanitize=True)
else:
sdf_pHprotonate = subprocess.check_output(cmd_run.format(fname=fname, out_format='sdf', pH=pH), shell=True)
if save_sdf is not None:
with open(save_sdf, 'w') as out_sdf:
out_sdf.write(sdf_pHprotonate.decode())
mol_gener = zip(Chem.ForwardSDMolSupplier(BytesIO(sdf_pHprotonate), sanitize=True), iter_molname(fname, field))
if ncpu > 1:
p = Pool(max(1, min(ncpu, cpu_count())))
p.map(partial(convert2pdb, outpath=outpath, preserve_coord=preserve_coord, seed=seed,
out_format=out_format), mol_gener)
else:
list(map(partial(convert2pdb, outpath=outpath, preserve_coord=preserve_coord, seed=seed,
out_format=out_format), mol_gener))
def main():
parser = argparse.ArgumentParser(description='''Conversion of input file to separate pdb files.
Conformer generation is performed by RDKit.
Major tautomeric form at the given pH is generated by ChemAxon.''')
parser.add_argument('-i', '--input', metavar='FILENAME', required=True,
help='input file with compounds. Supported formats: SMILES (*.smi) or SDF (*.sdf). '
'Note: For SDF format always use the neutral form of molecules.')
parser.add_argument('-f', '--field_name', metavar='STRING', required=False, default=None,
help='Is used only if input format is *.sdf.'
' Name of the field containing molecule name, if None molecule title will be taken.')
parser.add_argument('-o', '--output', metavar='DIRNAME', required=False, default=None,
help='output path. If None save to current directory.')
parser.add_argument('--pH', metavar='FLOAT', required=False, default=7.4,
help='To get major tautomer at this pH value.')
parser.add_argument('--save_sdf', metavar='FILENAME', default=None,
help='Output filename to save major tautomer at given pH in SDF format. '
'Saving without hydrogens.')
parser.add_argument('--preserve_coord', action='store_true', default=False,
help='If set current coordinates will be saved. Is used only for input SDF format.')
parser.add_argument('-n', '--ncpu', metavar='INTEGER', required=False, default=1, type=int,
help='number of CPUs to use for computation.')
parser.add_argument('--no_protonation', action='store_true', default=False,
help="if set ChemAxon protonation won't be used.")
parser.add_argument('--seed', metavar='INTEGER', required=False, type=int, default=0,
help='seed to make results reproducible.')
parser.add_argument('--out_format', metavar='SDF or PDB', default='pdb', type=lambda x: x.lower(),
help="Choose one of the output formats - SDF or PDB (case insensitive).")
args = parser.parse_args()
if args.preserve_coord and args.input.split('.')[-1].lower() != 'sdf':
raise ValueError('Please use --preserve_coord argument only for input SDF format.')
calc(args.input, args.output if args.output is not None else os.getcwd(), args.ncpu, args.field_name, args.pH,
args.preserve_coord, args.save_sdf, args.no_protonation, seed=args.seed, out_format=args.out_format)
if __name__ == '__main__':
main()