-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpyscalatomtz.py
186 lines (161 loc) · 5.76 KB
/
pyscalatomtz.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/python
# Class that takes in a file name and other optional inputs and converts the scafile to an mtz file
from optparse import OptionParser
import aurigaerrors
import os
import sys
from project_settings import auriga_proj_dir
import subprocess
from Queue import Queue
from threading import Thread,Lock
class scalatomtz(object):
"""Class that accepts filename, (optional) spacegroup,cell_dimensions, wavelength and number of residues in the ASU as imput. Runs truncate and outputs the mtz file.
"""
def __init__(self,filename,spag=None,cell_dimensions_tuple=None,wavelength=None,number_of_residues_in_asu=130 ):
"""Assume only filename and infer cell dimensions and spacegroup
"""
self.filename = filename.strip()
self.auriga_proj_dir = auriga_proj_dir
self.file = open(self.filename.strip(),"r")
self.spag = self.extract_spag()
self.cell_dimensions_string = self.extract_cell_dimensions_string()
self.wavelength = wavelength
self.number_of_residues_in_asu = number_of_residues_in_asu
self.title = os.path.splitext(os.path.split(self.filename)[1])[0] + "_autoproc"
self.proj_name = os.path.splitext(os.path.split(self.filename)[1])[0].upper()
self.scalepack2mtz_dict = dict(title = self.title ,spag = self.spag , cell = self.cell_dimensions_string , wavelength = self.wavelength , proj_name = self.proj_name)
def extract_cell_dimensions_string(self):
"""
Arguments:
- `self`:
"""
self.file.seek(0)
self.file.readline()
self.file.readline()
cell_dimensions_string = ",".join(self.file.readline().split()[:-1])
print "CELL DIM STR" , cell_dimensions_string
return cell_dimensions_string
def extract_spag(self):
self.file.seek(0)
self.file.readline()
self.file.readline()
cell_line = self.file.readline().split()
print "CELL LINE" , cell_line
return cell_line[-1]
def describe(self):
description = """Scalepack file:{self.filename},{self.spag},{self.cell_dimensions_string}""".format(self=self)
print description
def create_and_return_runscript_file(self):
scrfile1 = open(os.path.join(self.auriga_proj_dir,"%s_1.sh" % self.proj_name),"w")
scrfile2 = open(os.path.join(self.auriga_proj_dir,"%s_2.sh" % self.proj_name),"w")
scrfile3 = open(os.path.join(self.auriga_proj_dir,"%s_3.sh" % self.proj_name),"w")
scr1 = """#!/bin/sh
#set -e
# bug # 3192 - run-all examples produce harvest files - well to counteract
# this here set HARVESTHOME to somewhere in $CCP4_SCR
HARVESTHOME=${self.auriga_proj_dir}
export HARVESTHOME
# from /home/hari/official_ccp4/ccp4-6.1.3/examples/unix/runnable
# SCALEPACK2MTZ
#
# h k l I+ SigI+ I- SigI- were extracted from aucn.na4
# (acentric data only), and put into scalepack format.
# This is simply to illustrate the procedure for getting
# scalepack data into CCP4. I don't really know if it
# is a good example.
#
# (You can use the same procedure whether or not you have
# anomalous data.)
scalepack2mtz hklin {self.filename} hklout $CCP4_SCR/{self.proj_name}_junk1.mtz <<eof
name project {self.proj_name} crystal {self.proj_name} dataset {self.proj_name}
symm {self.spag}
end
eof
""".format(self = self)
scr2 = """#!/bin/sh
# convert Is to Fs and Ds.
truncate hklin $CCP4_SCR/{self.proj_name}_junk1.mtz hklout $CCP4_SCR/{self.proj_name}_junk2.mtz <<eof
title {self.proj_name} red aucn2 data
truncate yes
nresidue {self.number_of_residues_in_asu}
labout F=FP_{self.proj_name} SIGF=SIGFP_self.proj_name
end
eof
""".format(self = self)
scr3 = """#!/bin/sh
# get correct sort order and asymmetric unit
cad hklin1 $CCP4_SCR/{self.proj_name}_junk2.mtz hklout $CCP4_SCR/{self.proj_name}_trn.mtz <<eof
labi file 1 ALL
sort H K L
end
eof
#""".format(self = self)
scrfile1.write(scr1)
scrfile2.write(scr2)
scrfile3.write(scr3)
scrfile1.close()
scrfile2.close()
scrfile3.close()
os.chmod(scrfile1.name,0755)
os.chmod(scrfile2.name,0755)
os.chmod(scrfile3.name,0755)
return scrfile.name
# print scrfile.name
# mutex.acquire()
# subprocess.call([scrfile.name])
# sys.stdout.flush()
# mutex.release()
def report(message):
mutex.acquire()
print message
sys.stdout.flush()
mutex.release()
class ScaToMtzConvertor(Thread):
def __init__(self, in_queue, out_queue):
Thread.__init__(self)
self.in_queue = in_queue
self.out_queue = out_queue
def run(self):
while True:
path = self.in_queue.get()
sys.stdout.flush()
if path is None:
break
print "GOTPATH" , path
report("Converting %s" % path)
myfile = scalatomtz(path)
scrfile = myfile.create_and_return_runscript_file()
subprocess.call([scrfile])
#report(myfile.describe())
report("Done %s" % path)
self.out_queue.put(path)
in_queue = Queue()
in_queue = Queue()
out_queue = Queue()
mutex = Lock()
THREAD_COUNT = 8
worker_list = []
for i in range(THREAD_COUNT):
worker = ScaToMtzConvertor(in_queue, out_queue)
worker.start()
worker_list.append(worker)
if __name__ == '__main__':
sca_listfile = "scafiles.txt"
for i in open(sca_listfile,"r"):
in_queue.put(i)
for i in range(THREAD_COUNT):
in_queue.put(None)
for worker in worker_list:
worker.join()
# with 4
#real 1m17.021s
#user 1m2.530s
#sys 0m5.300s
#with 1
#real 1m19.310s
#user 1m4.490s
#sys 0m5.410s
#with 8
#real 1m13.971s
#user 1m1.020s
#sys 0m5.070s