-
Notifications
You must be signed in to change notification settings - Fork 0
/
functions.py
175 lines (139 loc) · 6 KB
/
functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import re
import os
import pandas as pd
import json
import subprocess
import time
import random
# ANSI escape codes for colors
class Color:
RED = '\033[91m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
RESET = '\033[0m'
def print_colored(message, color):
print(f"{color}{message}{Color.RESET}")
'''
The readmdoc function extracts information from mdoc files produced by Tomography 5 (TFS) and stores within a pandas
dataframe.
'''
def readmdoc(mdoc_file):
# Read the mdoc file
with open(mdoc_file, "r") as file:
mdoc_content = file.read()
# Extract the Voltage value from the header
voltage_match = re.search(r"Voltage = (\d+\.\d+)", mdoc_content)
if voltage_match:
voltage = float(voltage_match.group(1))
else:
voltage = None
# Extract the TiltAxisAngle value from the header
tilt_axis_angle_match = re.search(r"TiltAxisAngle = ([-+]?\d+\.\d+)", mdoc_content)
if tilt_axis_angle_match:
tilt_axis_angle = float(tilt_axis_angle_match.group(1))
else:
tilt_axis_angle = None
# Extract the ImageFile value from the header and remove the mrc extension
image_file_match = re.search(r"ImageFile = (.+)", mdoc_content)
if image_file_match:
image_file = image_file_match.group(1).strip()
image_file = image_file.replace(".mrc", "")
else:
image_file = None
# Split the mdoc content into Z groups
z_groups_raw = re.split(r"\[ZValue = (-?\d+)]", mdoc_content)
z_groups = z_groups_raw[1:] # Skip the first empty element
# Extract the data for each Z group
data = []
for i in range(0, len(z_groups), 2):
z_data = z_groups[i + 1]
tilt_angle = re.search(r"TiltAngle = ([-+]?\d+\.\d+)", z_data).group(1)
subframe_path = re.search(r"SubFramePath = (.+)", z_data).group(1).strip()
subframe_path = re.search(r"[^\\/:*?\"<>|\r\n]+$", subframe_path).group()
number_of_frames = re.search(r"NumSubFrames = (\d+)", z_data).group(1)
# Read the configuration file
with open('config_TomoPrep.json', 'r') as f:
config_data = f.read()
# Parse the contents of the JSON file
config = json.loads(config_data)
modify_subframe_path = config['modify_subframe_path']
# Check if modification is needed
if modify_subframe_path == "YES":
# Replace "Fractions.mrc" with "fractions.mrc" in subframe_path
subframe_path = subframe_path.replace("_Fractions.", "_fractions.")
data.append((float(tilt_angle), subframe_path, float(number_of_frames)))
# Create a pandas DataFrame
mdoc_df = pd.DataFrame(data, columns=["TiltAngle", "SubFramePath", "NumSubFrames"])
# Add the header information to each DataFrame entry
mdoc_df["Voltage"] = voltage
mdoc_df["TiltAxisAngle"] = tilt_axis_angle
mdoc_df['ImageFile'] = image_file
return mdoc_df
def parse_config(config_file):
# Read the configuration file
with open(config_file, 'r') as f:
config_data = f.read()
# Parse the contents of the JSON file
config = json.loads(config_data)
return config
def get_position_name(mdoc_file, config):
# Read mdoc and config file to get key variables
mdoc_df = readmdoc(mdoc_file)
position_name = mdoc_df.loc[1, "ImageFile"]
file_type = config.get('file_type')
processing_directory = config.get('processing_directory')
# extract the position name and directory
position_prefix = position_name.replace(".{}".format(file_type), "")
position_directory = os.path.join(processing_directory, position_prefix)
return position_prefix, position_directory
def queue_submit(position_prefix, job_name, slurm_script_path, config):
max_jobs = config['max_jobs']
message_printed = False
while True:
# Run the squeue command to get the job count for the current user
squeue_command = "squeue -u $(whoami) | wc -l"
job_count = int(subprocess.check_output(squeue_command, shell=True).decode().strip())
if job_count >= max_jobs:
if not message_printed:
print_colored(
f"{position_prefix} : Maximum number of SLURM jobs running ({job_count}). Waiting for the queue to go down...",
Color.YELLOW)
message_printed = True
sleep_time = random.randint(1, 10)
time.sleep(sleep_time)
else:
# Submit a new job using sbatch
subprocess.run(['sbatch', slurm_script_path])
print_colored(f"{position_prefix} : {job_name} job submitted.",
Color.RED)
break # Exit the loop after submitting a job
def modify_tltfile(tlt_file_path, tiltcom):
message_printed = False
while not os.path.exists(tiltcom) or not os.path.exists(
tlt_file_path):
if not message_printed:
print_colored(
f"{position_prefix} : Waiting to modify tlt and tilt.com files for RELION compatibility...",
Color.YELLOW)
message_printed = True
# Read the contents of the file
with open(tlt_file_path, 'r') as file:
lines = file.readlines()
# Remove the trailing whitespace (including the extra blank line) if it exists
lines = [line.rstrip() for line in lines]
# Write the modified content back to the file
with open(tlt_file_path, 'w') as file:
file.write('\n'.join(lines))
# modify the EXCLUDE list in the tilt.com file to match the RELION naming scheme.
def increment_number(number):
return str(int(number) + 1)
def process_numbers(match):
numbers = match.group(1).replace(',', ' ').split()
incremented_numbers = ','.join(increment_number(num) for num in numbers)
return f'EXCLUDELIST {incremented_numbers}'
with open(tiltcom, 'r') as file:
content = file.read()
pattern = r'EXCLUDELIST\s+(.*?)$'
content = re.sub(pattern, process_numbers, content, flags=re.MULTILINE)
with open(tiltcom, 'w') as file:
file.write(content)