Skip to content

Commit 470b3d3

Browse files
committed
Software Update 3.2.3
Improvements to user interface and plotting normalized t13 data
1 parent 6e34b82 commit 470b3d3

File tree

4 files changed

+158
-34
lines changed

4 files changed

+158
-34
lines changed

analyze_run.py

Lines changed: 72 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@
5151
barcode_assignment = match.group(1)
5252
print(f"IFC barcode: {barcode_assignment}")
5353

54+
#####
55+
# instantiate Data Reader from reader.py
56+
reader = DataReader() # make am empty class object that corresponds to the DataReader() object from reader.py
5457

5558
# then load in the data file
5659
data_files = sorted([fname for fname in all_files if fname.suffix == ".csv"])
@@ -64,8 +67,6 @@
6467
file_like_object = BytesIO(data_file.read_bytes())
6568
df_data = pd.read_csv(file_like_object, on_bad_lines="skip")
6669
print(f"This is the Data File that was loaded: {data_file.name}")
67-
68-
reader = DataReader() # make am empty class object that corresponds to the DataReader() object from reader.py
6970

7071
phrases_to_find = [
7172
"Raw Data for Passive Reference ROX",
@@ -80,13 +81,20 @@
8081

8182
# at this point, we have loaded the assignment sheet and have sorted through the loaded data file to create a dict of dataframes
8283

84+
#####
8385
# instantiate DataProcessor from norm.py
8486
processor = DataProcessor()
8587
# normalize the signal
8688
normalized_dataframes = processor.background_processing(dataframes)
8789

90+
# The premise of the code is that different viral panels require different thresholding
91+
# So the user will specifiy command line arguments as per the ReadMe instructions
92+
# and this portion of the code is meant to access the CI arguments and modify the threshold specified in the code
93+
94+
CLI_arg = sys.argv
95+
8896
# make an output folder in your path's wd if it hasn't been made already
89-
output_folder = f'output_{barcode_assignment}'
97+
output_folder = f'output_{barcode_assignment}_[{CLI_arg[1]}]'
9098
if not os.path.exists(output_folder):
9199
os.makedirs(output_folder)
92100

@@ -95,6 +103,7 @@
95103
normalized_dataframes['signal_norm'].to_csv(os.path.join(output_folder, 'signal_norm.csv'), index=True)
96104
normalized_dataframes['ref_norm'].to_csv(os.path.join(output_folder, 'ref_norm.csv'), index=True)
97105

106+
#####
98107
# instantiate DataMatcher from matcher.py
99108
matcher = DataMatcher()
100109
assigned_norms, assigned_lists = matcher.assign_assays(assignment_files[0], normalized_dataframes['ref_norm'], normalized_dataframes['signal_norm'])
@@ -104,9 +113,11 @@
104113
assigned_norms['signal_norm_raw'].to_csv(os.path.join(output_folder, 'assigned_signal_norm.csv'), index=True)
105114
assigned_norms['ref_norm_raw'].to_csv(os.path.join(output_folder, 'assigned_ref_norm.csv'), index=True)
106115

107-
samples_list = assigned_lists['samples_list']
116+
# collect the assays/samples from the layout assays/samples in the assignment sheet (this extraction is done in matcher.py)
108117
crRNA_assays = assigned_lists['assay_list']
118+
samples_list = assigned_lists['samples_list']
109119

120+
#####
110121
# instantiate ntcContaminationChecker from ntc_con_check.py
111122
ntcCheck = ntcContaminationChecker()
112123

@@ -115,16 +126,24 @@
115126
# create df of filtered assigned_signal_norm by applying the NTC check to remove any NTCs whose raw signal suggests contamination
116127
assigned_signal_norm_with_NTC_check = ntcCheck.ntc_cont(assigned_signal_norm) # feed this into MedianSort
117128

129+
# temporarily save assigned_signal_norm_with_NTC_check
130+
assigned_signal_norm_with_NTC_check.to_csv(os.path.join(output_folder, 'assigned_signal_norm_with_NTC_check.csv'), index=True)
131+
132+
133+
#####
118134
# instantiate MedianSort from median_frame.py
119135
median = MedianSort(crRNA_assays)
120136
final_med_frames = median.create_median(assigned_signal_norm_with_NTC_check)
121137

138+
# temporarily print final_med_frames
139+
#print(final_med_frames)
140+
122141
# Output needs to be rounded to 4 digits
123142
rounded_final_med_frames = {}
124143
# Define the number of decimals for rounding
125144
decimals = 5
126145

127-
# Iterate through each row and column
146+
# Iterate through each row and column, round each value
128147
for key, df in final_med_frames.items():
129148
rounded_df = pd.DataFrame(index=df.index, columns=df.columns)
130149
for i in range(len(df.index)):
@@ -133,11 +152,18 @@
133152
rounded_df.iloc[i, j] = round(df.iloc[i, j], decimals)
134153
rounded_final_med_frames[key] = rounded_df
135154

155+
# Make subfolder in the output folder in your path's wd if it hasn't been made already
156+
timepoints_subfolder = os.path.join(output_folder, f'timepoints_quantData_{barcode_assignment}')
157+
if not os.path.exists(timepoints_subfolder):
158+
os.makedirs(timepoints_subfolder)
159+
160+
# Save the dataframes per timepoint in subfolder timp
136161
timepoints = list(rounded_final_med_frames.keys())
137162
for i, t in enumerate(timepoints, start=1):
138-
filename = os.path.join(output_folder, f't{i}_{barcode_assignment}.csv')
163+
filename = os.path.join(timepoints_subfolder, f't{i}_{barcode_assignment}.csv')
139164
csv = rounded_final_med_frames[t].to_csv(filename, index=True)
140165

166+
141167
# since we want to explicitly manipulate t13_csv, it is helpful to have the t13 df referenced outside of the for loop
142168
last_key = list(rounded_final_med_frames.keys())[-1]
143169
t13_dataframe_orig = rounded_final_med_frames[last_key]
@@ -147,12 +173,7 @@
147173
# at this point, we have created a t1 thru t13 dataframe and exported all these dataframes as csv files in our output folder
148174
# now we need to threshold the t13 csv and mark signals >= threshold as positive and < threshold as negative
149175

150-
# The premise of the code is that different viral panels require different thresholding
151-
# So the user will specifiy command line arguments as per the ReadMe instructions
152-
# and this portion of the code is meant to access the CI arguments and modify the threshold specified in the code
153-
154-
CLI_arg = sys.argv
155-
176+
#####
156177
# instantiate Thresholder from threshold.py
157178
thresholdr = Thresholder()
158179
unique_crRNA_assays = list(set(crRNA_assays))
@@ -161,7 +182,6 @@
161182
# and save the file to your working directory
162183
ntc_PerAssay, ntc_thresholds_output, t13_hit_output = thresholdr.raw_thresholder(unique_crRNA_assays, assigned_norms['signal_norm_raw'], t13_dataframe_copy1, CLI_arg[1])
163184

164-
165185
# make copies of t13_hit_output csv for downstream summaries and quality control checks
166186
t13_hit_output_copy1 = pd.DataFrame(t13_hit_output).copy() # make a copy of t13_hit_output # used in ndc qual check
167187
t13_hit_output_copy2 = pd.DataFrame(t13_hit_output).copy() # make a copy of t13_hit_output # used in cpc qual check
@@ -176,13 +196,15 @@
176196
ntc_thresholds_output.to_csv(ntc_thresholds_output_file_path, index=True)
177197
t13_hit_output.to_csv(hit_output_file_path, index=True)
178198

199+
#####
179200
# instantiate NTC_Normalized from ntcnorm.py
180201
ntcNorm = Normalized()
181202
# apply ntc_normalizr to the t13_dataframe to produce a new dataframe with all values divided by the mean NTC for that assay
182-
t13_quant_hit_norm = ntcNorm.normalizr(t13_dataframe_copy2)
183-
quant_hit_output_ntcNorm_file_path = os.path.join(output_folder, f't13_{barcode_assignment}_quant_ntcNorm.csv')
184-
t13_quant_hit_norm.to_csv(quant_hit_output_ntcNorm_file_path, index=True)
203+
t13_quant_norm = ntcNorm.normalizr(t13_dataframe_copy2)
204+
quant_output_ntcNorm_file_path = os.path.join(output_folder, f't13_{barcode_assignment}_normalized.csv')
205+
t13_quant_norm.to_csv(quant_output_ntcNorm_file_path, index=True)
185206

207+
#####
186208
# instantiate Binary_Converter from binary_results.py
187209
binary_num_converter = Binary_Converter()
188210
# apply hit_numeric_conv to the the t13_hit_output to produce a new dataframe with all pos/neg converted to binary 1/0 output
@@ -194,14 +216,15 @@
194216
t13_hit_binary_output_copy1 = pd.DataFrame(t13_hit_binary_output).copy() # used in coninf check
195217
t13_hit_binary_output_copy2 = pd.DataFrame(t13_hit_binary_output).copy()
196218

219+
#####
197220
# instantiate Summarized from summary.py
198221
summary = Summarized()
199222
# apply summarizer to the t13_dataframe to produce a new dataframe tabulating all of the positive samples
200223
summary_samples_df = summary.summarizer(t13_hit_output)
201224
summary_pos_samples_file_path = os.path.join(output_folder, f'Positives_Summary_{barcode_assignment}.csv')
202225
summary_samples_df.to_csv(summary_pos_samples_file_path, index=True)
203226

204-
227+
#####
205228
# instantiate Plotter from plotting.py
206229
heatmap_generator = Plotter()
207230

@@ -211,20 +234,38 @@
211234
unique_crRNA_assays = list(OrderedDict.fromkeys(crRNA_assays))
212235
heatmap = heatmap_generator.plt_heatmap(tgap, barcode_assignment,final_med_frames, samples_list, unique_crRNA_assays, timepoints)
213236

237+
# Make subfolder in the output folder in your path's wd if it hasn't been made already
238+
heatmaps_subfolder = os.path.join(output_folder, f'heatmaps_{barcode_assignment}')
239+
if not os.path.exists(heatmaps_subfolder):
240+
os.makedirs(heatmaps_subfolder)
241+
214242
# save heatmap per timepoint
215243
for i, t in enumerate(timepoints, start=1):
216244
#csv = convert_df(final_med_frames[t])
217-
heatmap_filename = os.path.join(output_folder, f'Heatmap_t{i}_{barcode_assignment}.png')
245+
heatmap_filename = os.path.join(heatmaps_subfolder, f'Heatmap_t{i}_{barcode_assignment}.png')
218246
fig = heatmap[t].savefig(heatmap_filename, bbox_inches = 'tight', dpi=80)
219247
plt.close(fig)
220248

221-
print(f"The heatmap plots saved to the folder, {output_folder}")
249+
print(f"The heatmap plots saved to the folder, {heatmaps_subfolder} in {output_folder}")
222250

251+
heatmap_t13_quant_norm = heatmap_generator.t13_plt_heatmap(tgap, barcode_assignment,t13_quant_norm, samples_list, unique_crRNA_assays, timepoints)
252+
heatmap_t13_quant_norm_filename = os.path.join(output_folder, f'Heatmap_t13_{barcode_assignment}_normalized.png')
253+
fig = heatmap_t13_quant_norm.savefig(heatmap_t13_quant_norm_filename, bbox_inches = 'tight', dpi=80)
254+
plt.close(fig)
255+
256+
257+
#####
223258
# instantiate Assay_QC_Score
224259
assayScorer = Assay_QC_Score()
225260
# take in t13_hit_binary_output as the df to build off of
226261
QC_score_per_assay_df = assayScorer.assay_level_score(t13_hit_binary_output)
227-
assay_lvl_QC_score_file_path = os.path.join(output_folder, f'Assay_Level_QC_Metrics_{barcode_assignment}.csv')
262+
263+
# Make subfolder in the output folder in your path's wd if it hasn't been made already
264+
assayQC_subfolder = os.path.join(output_folder, f'assay_performance_evaluation_{barcode_assignment}')
265+
if not os.path.exists(assayQC_subfolder):
266+
os.makedirs(assayQC_subfolder)
267+
268+
assay_lvl_QC_score_file_path = os.path.join(assayQC_subfolder, f'Assay_Performance_QC_Test_Results_{barcode_assignment}.csv')
228269
QC_score_per_assay_df.to_csv(assay_lvl_QC_score_file_path, index=True)
229270

230271
# write text file explaining the QC score
@@ -264,16 +305,22 @@
264305
assayScores_Explanation.append("The final score per assay is included for easy comparison of assay performance.\n")
265306

266307
# create and save an output text file containing the quality control checks
267-
assayScores_file_path = os.path.join(output_folder, f'Assay_Performance_QC_Tests_{barcode_assignment}.txt')
308+
assayScores_file_path = os.path.join(assayQC_subfolder, f'Assay_Performance_QC_Tests_{barcode_assignment}.txt')
268309
with open(assayScores_file_path, 'w') as f:
269310
for line in assayScores_Explanation:
270311
f.write(line + '\n')
271312

272-
print(f"The four quality control tests to evaluate assay performance are complete. Their results have been saved to the folder, {output_folder}")
313+
print(f"The four quality control tests to evaluate assay performance are complete. Their results have been saved to the folder, {assayQC_subfolder}")
273314

315+
#####
274316
# instantiate Qual_Ctrl_Checks from qual-checks.py
275317
qual_checks = Qual_Ctrl_Checks()
276318

319+
# Make subfolder in the output folder in your path's wd if it hasn't been made already
320+
qc_subfolder = os.path.join(output_folder, 'quality_control_flags')
321+
if not os.path.exists(qc_subfolder):
322+
os.makedirs(qc_subfolder)
323+
277324
# initialize a list to collect all quality control checks
278325
QC_lines = []
279326

@@ -332,7 +379,6 @@
332379
# apply ntc_check to the t13_hit_output df to generate a list of all ntc positive assays
333380
assigned_signal_norm_2 = pd.DataFrame(assigned_norms['signal_norm_raw']).copy() # make a copy of assigned_signal_norm dataframe
334381

335-
336382
high_raw_ntc_signal = qual_checks.ntc_check(assigned_signal_norm_2)
337383
QC_lines.append("4. Evaluation of No Target Control (NTC) Contamination \n")
338384
if high_raw_ntc_signal:
@@ -348,7 +394,7 @@
348394
coinfection_df = qual_checks.coinf_check(t13_hit_binary_output_copy1)
349395
QC_lines.append("5. Evaluation of Potential Co-Infected Samples\n")
350396

351-
coinfection_df_file_path = os.path.join(output_folder, f'Coinfection_Check_{barcode_assignment}.csv')
397+
coinfection_df_file_path = os.path.join(qc_subfolder, f'Coinfection_Check_{barcode_assignment}.csv')
352398
coinfection_df.to_csv(coinfection_df_file_path, index=True) # output needs to be csv of coninfection check
353399

354400
# in Qual_Check text file, add message saying "see coinf check csv" and "if any samples excpet CPC are flagged as being coinfected, there is risk of these samples being coinfected"
@@ -359,13 +405,12 @@
359405
QC_lines.append(" - All other flagged samples should be further evaluated for potential co-infection.\n")
360406
QC_lines.append("Please be advised to check the output files as well.")
361407

362-
363408
# create and save an output text file containing the quality control checks
364-
QCs_file_path = os.path.join(output_folder, f'Quality_Control_Flags_{barcode_assignment}.txt')
409+
QCs_file_path = os.path.join(qc_subfolder, f'Quality_Control_Flags_{barcode_assignment}.txt')
365410
with open(QCs_file_path, 'w') as f:
366411
for line in QC_lines:
367412
f.write(line + '\n')
368413

369-
print(f"The quality control checks are complete and saved to the folder, {output_folder}")
414+
print(f"The quality control checks are complete and saved to the folder, {qc_subfolder}")
370415

371416

ntc_con_check.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,29 +12,33 @@ def ntc_cont(self, assigned_sig_norm):
1212

1313
# filter sample for anything that contains NTC (case-insensitive)
1414
ntc_filtered_df = assigned_sig_norm[assigned_sig_norm['sample'].str.contains('NTC', case=False, na=False)]
15+
# filter all other samples and save it in a new df
1516
all_else_filtered_df = assigned_sig_norm[~assigned_sig_norm['sample'].str.contains('NTC', case=False, na=False)]
16-
17-
ntc_assay_dfs = []
17+
# initiate list of ntc_assay_dfs
18+
ntc_assay_dfs = []
1819
# check per assay_df
1920
for assay in ntc_filtered_df['assay'].unique():
2021
# filter the df for the current assay
2122
assay_df = ntc_filtered_df[ntc_filtered_df['assay'] == assay]
2223

2324
# Check if t13 value is greater than 0.5 for any sample
24-
for _, row in assay_df.iterrows():
25+
for index, row in assay_df.iterrows():
2526
if row['t13'] > 0.5:
2627
# Check if there is still more than one row left in assay_df
2728
if len(assay_df) > 1:
2829
# Remove the row where t13 is greater than 0.5
29-
assay_df = assay_df[assay_df['t13'] != row['t13']]
30+
assay_df = assay_df.drop(index)
3031
else:
3132
# If there is only one row left, set row['t13'] to 0.5
32-
assay_df.at[row.name, 't13'] = 0.5
33-
33+
assay_df.at[index, 't13'] = 0.5
34+
# add assay_df back to list of ntc_assay_dfs
3435
ntc_assay_dfs.append(assay_df)
3536

37+
# convert ntc_assay_dfs list into a df
3638
combined_ntc_assay_dfs = pd.concat(ntc_assay_dfs, ignore_index=True)
3739

3840
assigned_sig_norm = pd.concat([combined_ntc_assay_dfs, all_else_filtered_df], ignore_index=True)
41+
42+
# pull the samples list
3943

4044
return assigned_sig_norm

ntcnorm.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ def normalizr(self, t13_df):
2828
ntc_mean = ntc_mean_df.loc['NTC Mean', col_name]
2929
# Divide the value by the NTC mean per assay
3030
t13_df.at[index, col_name] = value/ntc_mean
31+
32+
t13_df = t13_df.apply(pd.to_numeric, errors = 'coerce')
3133

3234
return t13_df
3335

plotting.py

Lines changed: 74 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,77 @@ def plt_heatmap(self, tgap, barcode_number, df_dict, sample_list, assay_list, tp
9292
axes.vlines(v_lines, colors = 'silver',alpha=0.9,linewidths = 0.35,*axes.get_ylim())
9393
fig_timepoints[i] = fig
9494

95-
return fig_timepoints
95+
return fig_timepoints
96+
97+
def t13_plt_heatmap(self, tgap, barcode_number, df, sample_list, assay_list, tp):
98+
# Create a dictonary for timepoints
99+
time_assign = {}
100+
101+
for cycle in range(1,len(tp)+1):
102+
tpoint = "t" + str(cycle)
103+
time_assign[tpoint] = tgap + 3 + (cycle-1) * 5
104+
last_key = list(time_assign.keys())[-1]
105+
106+
half_samples = int(len(sample_list)/2)
107+
108+
if len(sample_list) == 192:
109+
# Split the sample list into two halves
110+
first_half_samples = sample_list[:half_samples]
111+
second_half_samples = sample_list[half_samples:]
112+
113+
fig, axes = plt.subplots(2, 1, figsize=(len(first_half_samples) * 0.5, len(assay_list) * 0.5 * 2))
114+
115+
df = df.transpose()
116+
117+
# First heatmap (first 96 samples)
118+
frame1 = df[first_half_samples].reindex(assay_list)
119+
ax1 = sns.heatmap(frame1, cmap='Reds', square=True, cbar_kws={'pad': 0.002}, annot_kws={"size": 20}, ax=axes[0])
120+
ax1.set_title(f'Heatmap for data normalized against assay-specific NTC mean, for {barcode_number} at {time_assign[last_key]} minutes (First {half_samples} Samples)', size=28)
121+
ax1.set_xlabel('Samples', size=14)
122+
ax1.set_ylabel('Assays', size=14)
123+
bottom, top = ax1.get_ylim()
124+
ax1.set_ylim(bottom + 0.5, top - 0.5)
125+
ax1.tick_params(axis="y", labelsize=16)
126+
ax1.tick_params(axis="x", labelsize=16)
127+
plt.yticks(rotation=0)
128+
129+
# Second heatmap (next 96 samples)
130+
frame2 = df[second_half_samples].reindex(assay_list)
131+
ax2 = sns.heatmap(frame2, cmap='Reds', square=True, cbar_kws={'pad': 0.002}, annot_kws={"size": 20}, ax=axes[1])
132+
ax2.set_title(f'Heatmap for data normalized against assay-specific NTC mean, for {barcode_number} at {time_assign[last_key]} minutes (Next {half_samples} Samples)', size=28)
133+
ax2.set_xlabel('Samples', size=14)
134+
ax2.set_ylabel('Assays', size=14)
135+
bottom, top = ax2.get_ylim()
136+
ax2.set_ylim(bottom + 0.5, top - 0.5)
137+
ax2.tick_params(axis="y", labelsize=16)
138+
ax2.tick_params(axis="x", labelsize=16)
139+
plt.yticks(rotation=0)
140+
141+
# Adjust layout
142+
plt.tight_layout()
143+
144+
else:
145+
df = df.transpose()
146+
frame = df[sample_list].reindex(assay_list)
147+
fig, axes = plt.subplots(1,1,figsize=(len(frame.columns.values)*0.5,len(frame.index.values)*0.5))
148+
ax = sns.heatmap(frame,cmap='Reds',square = True,cbar_kws={'pad':0.002}, annot_kws={"size": 20})
149+
150+
plt.title(f'Heatmap for data normalized against assay-specific NTC mean, for {barcode_number} at {time_assign[-1]} minutes', size = 28)
151+
plt.xlabel('Samples', size = 14)
152+
plt.ylabel('Assays', size = 14)
153+
bottom, top = ax.get_ylim()
154+
ax.set_ylim(bottom + 0.5, top - 0.5)
155+
ax.tick_params(axis="y", labelsize=16)
156+
ax.tick_params(axis="x", labelsize=16)
157+
plt.yticks(rotation=0)
158+
159+
tgt_num = len(sample_list)
160+
gd_num = len(assay_list)
161+
bottom, top = ax.get_ylim()
162+
ax.set_ylim(bottom + 0.5, top - 0.5)
163+
h_lines = np.arange(3,gd_num,3)
164+
v_lines = np.arange(3,tgt_num,3)
165+
axes.hlines(h_lines, colors = 'silver',alpha=0.9,linewidths = 0.35,*axes.get_xlim())
166+
axes.vlines(v_lines, colors = 'silver',alpha=0.9,linewidths = 0.35,*axes.get_ylim())
167+
168+
return fig

0 commit comments

Comments
 (0)