|
| 1 | +#!/Applications/PyMOL3.app/Contents/bin/python3 |
| 2 | +#-*- coding: utf-8 -*- |
| 3 | + |
| 4 | +"""rna_calc_rmsd_pymol.py - calculate RMSDs of structures with PyMOL |
| 5 | +
|
| 6 | +""" |
| 7 | +from __future__ import print_function |
| 8 | + |
| 9 | +import warnings |
| 10 | +from Bio import BiopythonDeprecationWarning |
| 11 | +warnings.filterwarnings("ignore", category=BiopythonDeprecationWarning) |
| 12 | + |
| 13 | +import sys |
| 14 | +import argparse |
| 15 | +import sys |
| 16 | +import math |
| 17 | +import glob |
| 18 | +import re |
| 19 | +import os |
| 20 | + |
| 21 | +try: |
| 22 | + import __main__ |
| 23 | + __main__.pymol_argv = ['pymol', '-qc'] |
| 24 | + import pymol # import cmd, finish_launching |
| 25 | + |
| 26 | + from pymol import cmd |
| 27 | + #cmd.feedback("disable") |
| 28 | + cmd.set("logging", 0) # optional; turns off logging |
| 29 | + cmd.set("suspend_updates", 1) # optional; disables GUI redraws (if in GUI) |
| 30 | + pymol.finish_launching() |
| 31 | + |
| 32 | +except ImportError: |
| 33 | + print('calc_rmsd_pymol: you need to have installed PyMOL') |
| 34 | + sys.exit(0) # no error |
| 35 | + |
| 36 | + |
| 37 | + |
| 38 | +def get_rna_models_from_dir(files): |
| 39 | + """ |
| 40 | + :param models: a list of filenames |
| 41 | +
|
| 42 | + Example of the list:: |
| 43 | +
|
| 44 | + ['test_data/rp17/2_restr1_Michal1.pdb_clean.pdb', 'test_data/rp17/2a_nonrestr2_Michal1.pdb_clean.pdb', |
| 45 | + 'test_data/rp17/3_nonrestr1_Michal1.pdb_clean.pdb', 'test_data/rp17/5_restr1_Michal3.pdb_clean.pdb']""" |
| 46 | + |
| 47 | + models = [] |
| 48 | + #if not os.path.exists(directory): |
| 49 | + # raise Exception('Dir does not exist! ', directory) |
| 50 | + #files = glob.glob(directory + "/*.pdb") |
| 51 | + files_sorted = sort_nicely(files) |
| 52 | + for f in files_sorted: |
| 53 | + models.append(f) |
| 54 | + return models |
| 55 | + |
| 56 | +def sort_nicely( l ): |
| 57 | + """ Sort the given list in the way that humans expect. |
| 58 | +
|
| 59 | + http://blog.codinghorror.com/sorting-for-humans-natural-sort-order/ |
| 60 | + """ |
| 61 | + convert = lambda text: int(text) if text.isdigit() else text |
| 62 | + alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] |
| 63 | + l.sort( key=alphanum_key ) |
| 64 | + return l |
| 65 | + |
| 66 | +def calc_rmsd_pymol(pdb1, pdb2, method, verbose=False): |
| 67 | + """Calculate rmsd using PyMOL. Two methods are available: align and fit |
| 68 | +
|
| 69 | + See: |
| 70 | +
|
| 71 | + - Align: <http://www.pymolwiki.org/index.php/Align> |
| 72 | + - Fit: <http://www.pymolwiki.org/index.php/Fit> |
| 73 | +
|
| 74 | + Align can return a list with 7 items: |
| 75 | +
|
| 76 | + - RMSD after refinement |
| 77 | + - Number of aligned atoms after refinement |
| 78 | + - Number of refinement cycles |
| 79 | + - RMSD before refinement |
| 80 | + - Number of aligned atoms before refinement |
| 81 | + - Raw alignment score |
| 82 | + - Number of residues aligned |
| 83 | +
|
| 84 | + in this version of function, the function returns `RMSD before refinement`. |
| 85 | +
|
| 86 | + Install on OSX: ``brew install brewsci/bio/pymol`` or get |
| 87 | +
|
| 88 | + If you have a problem:: |
| 89 | +
|
| 90 | + Match-Error: unable to open matrix file '/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/data/pymol/matrices/BLOSUM62'. |
| 91 | +
|
| 92 | + then find BLOSUM62, e.g.:: |
| 93 | +
|
| 94 | + mdfind -name BLOSUM62 | grep pymol |
| 95 | + /Users/magnus/miniconda2/envs/py37/lib/python3.7/site-packages/pymol/pymol_path/data/pymol/matrices/BLOSUM62 |
| 96 | + /usr/local/Cellar/pymol/2.4.0_3/libexec/lib/python3.9/site-packages/pymol/pymol_path/data/pymol/matrices/BLOSUM62 |
| 97 | + /Users/magnus/miniconda2/pkgs/pymol-2.4.2-py37h06d7bae_0/share/pymol/data/pymol/matrices/BLOSUM62 |
| 98 | + /Users/magnus/work/opt/pymol-open-source/data/pymol/matrices/BLOSUM62 |
| 99 | +
|
| 100 | + and then define ``PYMOL_DATA`` in your .bashrc/.zshrc, e.g.:: |
| 101 | +
|
| 102 | + export PYMOL_DATA="/Users/magnus/work/opt/pymol-open-source/data/pymol" |
| 103 | +
|
| 104 | + """ |
| 105 | + |
| 106 | + pymol.cmd.reinitialize() |
| 107 | + pymol.cmd.delete('all') |
| 108 | + pymol.cmd.load(pdb1, 's1') |
| 109 | + pymol.cmd.load(pdb2, 's2') |
| 110 | + |
| 111 | + if method == 'align': |
| 112 | + # experiments with align <https://pymolwiki.org/index.php/Align> |
| 113 | + # quiet = 0/1: suppress output {default: 0 in command mode, 1 in API} |
| 114 | + # (4.130036354064941, 60, 3, 4.813207626342773, 64, 30.0, 3) |
| 115 | + values = pymol.cmd.align('s1', 's2',quiet=1, object='aln') |
| 116 | + |
| 117 | + from Bio import pairwise2 |
| 118 | + |
| 119 | + def get_sequence(obj): |
| 120 | + """Extract 1-letter FASTA sequence from PyMOL object""" |
| 121 | + fasta = cmd.get_fastastr(obj) |
| 122 | + lines = fasta.splitlines() |
| 123 | + return ''.join(lines[1:]) # Skip the >header line |
| 124 | + |
| 125 | + def calc_sequence_identity(seq1, seq2): |
| 126 | + """Calculate global sequence identity""" |
| 127 | + alignments = pairwise2.align.globalxx(seq1, seq2) |
| 128 | + best = alignments[0] |
| 129 | + matches = sum(a == b for a, b in zip(best.seqA, best.seqB)) |
| 130 | + return matches / max(len(seq1), len(seq2)) |
| 131 | + |
| 132 | + seq1 = get_sequence("s1") |
| 133 | + seq2 = get_sequence("s2") |
| 134 | + print(seq1, seq2) |
| 135 | + |
| 136 | + identity = calc_sequence_identity(seq1, seq2) |
| 137 | + print(f"Sequence identity: {identity:.2%}") |
| 138 | + |
| 139 | + print(values) |
| 140 | + return values[0], values[3], identity # (,#0) #, pymol.cmd.align('s1','s2')[4]) |
| 141 | + #raw_aln = pymol.cmd.get_raw_alignment('aln') |
| 142 | + #print raw_aln |
| 143 | + #for idx1, idx2 in raw_aln: |
| 144 | + # print '%s`%d -> %s`%d' % tuple(idx1 + idx2) |
| 145 | + #pymol.cmd.save('aln.aln', 'aln') |
| 146 | + |
| 147 | + if method == 'fit': |
| 148 | + return (pymol.cmd.fit('s1', 's2'), 'fit') |
| 149 | + |
| 150 | +def calc_rmsd(a, b, target_selection, target_ignore_selection, model_selection, model_ignore_selection, way, verbose): |
| 151 | + """ |
| 152 | + Calculate RMSD between two XYZ files |
| 153 | +
|
| 154 | + by: Jimmy Charnley Kromann <[email protected]> and Lars Andersen Bratholm <[email protected]> |
| 155 | + project: https://github.com/charnley/rmsd |
| 156 | + license: https://github.com/charnley/rmsd/blob/master/LICENSE |
| 157 | +
|
| 158 | + a is model |
| 159 | + b is target |
| 160 | +
|
| 161 | + :params: a = filename of structure a |
| 162 | + :params: b = filename of structure b |
| 163 | +
|
| 164 | + :return: rmsd, number of atoms |
| 165 | + """ |
| 166 | + if verbose: print('in:', a) |
| 167 | + |
| 168 | + atomsP, P, atoms = get_coordinates(a, model_selection, model_ignore_selection, 'pdb', True, way) |
| 169 | + atomsQ, Q, atoms = get_coordinates(b, target_selection,target_ignore_selection, 'pdb', True, way) |
| 170 | + |
| 171 | + if verbose: |
| 172 | + print(atomsP, P) |
| 173 | + print(atomsQ, Q) |
| 174 | + |
| 175 | + if atomsQ != atomsP: |
| 176 | + print('Error: number of atoms is not equal target (' + b + '):' + str(atomsQ) + ' vs model (' + a + '):' + str(atomsP)) |
| 177 | + return (-1,0) # skip this RNA |
| 178 | + # Calculate 'dumb' RMSD |
| 179 | + normal_rmsd = rmsd(P, Q, atoms) |
| 180 | + # Create the centroid of P and Q which is the geometric center of a |
| 181 | + # N-dimensional region and translate P and Q onto that center. |
| 182 | + # http://en.wikipedia.org/wiki/Centroid |
| 183 | + Pc = centroid(P) |
| 184 | + Qc = centroid(Q) |
| 185 | + P -= Pc |
| 186 | + Q -= Qc |
| 187 | + |
| 188 | + if False: |
| 189 | + V = rotate(P, Q) |
| 190 | + V += Qc |
| 191 | + write_coordinates(atomsP, V) |
| 192 | + quit() |
| 193 | + |
| 194 | + return round(kabsch_rmsd(P, Q, atoms),2), atomsP |
| 195 | + |
| 196 | +def get_parser(): |
| 197 | + import argparse |
| 198 | + class SmartFormatter(argparse.HelpFormatter): |
| 199 | + def _split_lines(self, text, width): |
| 200 | + if text.startswith('R|'): |
| 201 | + return text[2:].splitlines() |
| 202 | + # this is the RawTextHelpFormatter._split_lines |
| 203 | + return argparse.HelpFormatter._split_lines(self, text, width) |
| 204 | + |
| 205 | + parser = argparse.ArgumentParser(description=__doc__, formatter_class=SmartFormatter)#formatter_class=argparse.RawDescriptionHelpFormatter) |
| 206 | + |
| 207 | + parser.add_argument('-t',"--target-fn", |
| 208 | + default='', required = True, |
| 209 | + help="pdb file") |
| 210 | + |
| 211 | + parser.add_argument('--ignore-files', help='files to be ingored, .e.g, \'solution\'', default='') |
| 212 | + |
| 213 | + parser.add_argument("--target-selection", |
| 214 | + default='', |
| 215 | + help="selection, e.g. A:10-16+20, where #16 residue is included") |
| 216 | + |
| 217 | + parser.add_argument("--target-ignore-selection", |
| 218 | + default='', |
| 219 | + help="A/10/O2\'") |
| 220 | + |
| 221 | + parser.add_argument("--model-selection", |
| 222 | + default='', |
| 223 | + help="selection, e.g. A:10-16+20, where #16 residue is included") |
| 224 | + |
| 225 | + parser.add_argument("--model-ignore-selection", |
| 226 | + default='', |
| 227 | + help="A/10/O2\'") |
| 228 | + |
| 229 | + parser.add_argument('-m', "--method", |
| 230 | + default='all-atom-built-in', |
| 231 | + help="align, fit") |
| 232 | + |
| 233 | + parser.add_argument('-o', "--rmsds-fn", |
| 234 | + default='rmsds.csv', |
| 235 | + help="ouput, matrix") |
| 236 | + |
| 237 | + parser.add_argument("-v", "--verbose", action="store_true", |
| 238 | + help="verbose") |
| 239 | + |
| 240 | + parser.add_argument('-pr', '--print-results', |
| 241 | + action="store_true") |
| 242 | + |
| 243 | + parser.add_argument('-sr', '--sort-results', |
| 244 | + action="store_true") |
| 245 | + |
| 246 | + parser.add_argument('-pp', '--print-progress', |
| 247 | + default=False, |
| 248 | + action="store_true") |
| 249 | + |
| 250 | + parser.add_argument('--way', help="""R|c1p = C1' |
| 251 | +backbone = P OP1 OP2 O5' C5' C4' C3' O3' |
| 252 | +po = P OP1 OP2 |
| 253 | +no-backbone = all - po |
| 254 | +bases, backbone+sugar, sugar |
| 255 | +pooo = P OP1 OP2 O5' |
| 256 | +alpha = P OP1 OP2 O5' C5' |
| 257 | +""", default='all') |
| 258 | + |
| 259 | + parser.add_argument("--name-rmsd-column", help="default: fn,rmsd, with this cols will be fn,<name-rmsd-column>") |
| 260 | + |
| 261 | + parser.add_argument("--target-column-name", action="store_true", |
| 262 | + help="") |
| 263 | + |
| 264 | + parser.add_argument('files', help='files', nargs='+') |
| 265 | + |
| 266 | + return parser |
| 267 | +# main |
| 268 | +if __name__ == '__main__': |
| 269 | + parser = get_parser() |
| 270 | + args = parser.parse_args() |
| 271 | + |
| 272 | + input_files = args.files # opts.input_dir |
| 273 | + tmp = [] |
| 274 | + if args.ignore_files: |
| 275 | + for f in input_files: |
| 276 | + if args.ignore_files in f: |
| 277 | + continue |
| 278 | + tmp.append(f) |
| 279 | + input_files = tmp |
| 280 | + |
| 281 | + rmsds_fn = args.rmsds_fn |
| 282 | + target_fn = args.target_fn |
| 283 | + method = args.method |
| 284 | + |
| 285 | + if args.verbose: |
| 286 | + print('method:', method) |
| 287 | + |
| 288 | + models = get_rna_models_from_dir(input_files) |
| 289 | + |
| 290 | + if args.verbose: |
| 291 | + print('target:', target_fn) |
| 292 | + print('of models:', len(models)) |
| 293 | + |
| 294 | + f = open(rmsds_fn, 'w') |
| 295 | + #t = 'target:' + os.path.basename(target_fn) + ' , rmsd_all\n' |
| 296 | + if args.name_rmsd_column: |
| 297 | + t = 'fn,' + args.name_rmsd_column + '\n' |
| 298 | + elif args.target_column_name: |
| 299 | + t = 'fn,' + os.path.basename(args.target_fn) + '\n' |
| 300 | + else: |
| 301 | + t = 'fn,rmsd_all,seq_identity\n' |
| 302 | + |
| 303 | + if not args.verbose: |
| 304 | + t = '' |
| 305 | + c = 1 |
| 306 | + for r1 in models: |
| 307 | + if method == 'align' or method == 'fit': |
| 308 | + rmsd_curr, atoms, seq_identity = calc_rmsd_pymol(r1, target_fn, method, args.verbose) |
| 309 | + r1_basename = os.path.basename(r1) |
| 310 | + if args.print_progress: print(r1_basename, rmsd_curr, atoms) |
| 311 | + t += r1_basename + ',' + str(round(rmsd_curr,3)) + ',' + str(seq_identity) |
| 312 | + c += 1 |
| 313 | + t += '\n' |
| 314 | + |
| 315 | + f.write(t) |
| 316 | + f.close() |
| 317 | + |
| 318 | + if args.verbose: |
| 319 | + print('number of atoms used:', atoms) |
| 320 | + |
| 321 | + try: |
| 322 | + import pandas as pdx |
| 323 | + pd.set_option('display.max_rows', 1000) |
| 324 | + |
| 325 | + except: |
| 326 | + print(t.strip()) # matrix |
| 327 | + sys.exit(0) |
| 328 | + |
| 329 | + df = pd.read_csv(rmsds_fn) |
| 330 | + df = df.round(2) |
| 331 | + if args.sort_results: |
| 332 | + df = df.sort_values('rmsd_all', ascending=True) |
| 333 | + if args.print_results: |
| 334 | + print(df) |
| 335 | + df.to_csv(rmsds_fn, sep=',', index=False) # easy to set \t here! |
| 336 | + |
| 337 | + # print('# csv was created! ', rmsds_fn) |
0 commit comments