Skip to content

Commit 7042fe5

Browse files
committed
v5.4.0
Resolved issues with 3_SD runs, where assays erroneously called as invalid due to threshold failing to be generated. Resolved issues with RVP and BBP assays ran on the same chip.
1 parent e6fd0a6 commit 7042fe5

File tree

6 files changed

+44
-25
lines changed

6 files changed

+44
-25
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
CARMEN is a diagnostic tool designed for surveillance purposes. Below are the instructions to complete your CARMEN analysis.
33

44
## Software Version
5-
When cloning this repository, you will be using software version 5.3.0.
5+
When cloning this repository, you will be using software version 5.4.0.
66

77
## Overview
88
At this point, you have ran the $Standard\ BioTools\ Dynamic\ Array^{TM}$ IFC (integrated fluidic circuit) on the $Standard\ BioTools\ Biomark^{TM}$ instrument and have completed the experimental portion of CARMEN. In running this code, you will be able to complete the data analysis portion of CARMEN and generate both binary positive/negative and quantitative signal output of your diagnostic assay.

analyze_run.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252

5353
######################################################################################################################################################
5454
# assign software version
55-
software_version = '5.3.0'
55+
software_version = '5.4.0'
5656

5757
######################################################################################################################################################
5858
# data loading
@@ -173,7 +173,7 @@
173173

174174
# collect the assays/samples from the layout assays/samples in the assignment sheet (this extraction is done in matcher.py)
175175
crRNA_assays = assigned_lists['assay_list']
176-
samples_list = assigned_lists['samples_list']
176+
#samples_list = assigned_lists['samples_list']
177177

178178
######################################################################################################################################################
179179
# instantiate ntcContaminationChecker from ntc_con_check.py
@@ -184,6 +184,9 @@
184184
# create df of filtered assigned_signal_norm by applying the NTC check to remove any NTCs whose raw signal suggests contamination
185185
assigned_signal_norm_with_NTC_check = ntcCheck.ntc_cont(assigned_signal_norm) # feed this into MedianSort
186186

187+
# collect the samples_list after running NTC Contamination Check, in case NTCs were removed
188+
samples_list = assigned_signal_norm_with_NTC_check['sample'].unique()
189+
187190
# temporarily save assigned_signal_norm_with_NTC_check
188191
assigned_signal_norm_with_NTC_check.to_csv(os.path.join(rd_subfolder, 'assigned_signal_norm_with_NTC_check.csv'), index=True)
189192

@@ -865,7 +868,7 @@
865868
fl_t13_hit_binary_output
866869
]
867870

868-
output_file_path = os.path.join(res_subfolder, f"RESULTS_{barcode_assignment}.xlsx") #
871+
output_file_path = os.path.join(res_subfolder, f"RESULTS_{barcode_assignment}_{CLI_arg[1]}.xlsx") #
869872

870873
try:
871874
# save all DataFrames to a single Excel file
@@ -974,7 +977,7 @@
974977
# tp = list of timepoints (t1, t2, etc)
975978
#unique_crRNA_assays = list(set(crRNA_assays))
976979
unique_crRNA_assays = list(OrderedDict.fromkeys(crRNA_assays))
977-
heatmap, frame2, second_half_samples = heatmap_generator.plt_heatmap(tgap, barcode_assignment,final_med_frames, samples_list, unique_crRNA_assays, timepoints)
980+
heatmap = heatmap_generator.plt_heatmap(tgap, barcode_assignment,final_med_frames, samples_list, unique_crRNA_assays, timepoints)
978981

979982
# Make subfolder in the output folder in your path's wd if it hasn't been made already
980983
heatmaps_subfolder = os.path.join(rd_subfolder, f'Heatmaps_by_Timepoint_{barcode_assignment}')
@@ -1014,7 +1017,7 @@
10141017

10151018
# apply redcapper to fl_t13_hit_binary_output_2 df
10161019
threshold = CLI_arg[1]
1017-
redcap_t13_hit_binary_output = redcapper.build_redcap(fl_t13_hit_binary_output_2, date, barcode_assignment,threshold, software_version)
1020+
redcap_t13_hit_binary_output, samplesDF = redcapper.build_redcap(fl_t13_hit_binary_output_2, date, barcode_assignment,threshold, software_version)
10181021

10191022
# save REDCAP file
10201023
redcap_t13_hit_binary_output_file_path = os.path.join(res_subfolder, f'REDCAP_{barcode_assignment}.csv')

plotting.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -155,17 +155,23 @@ def plt_heatmap(self, tgap, barcode_number, df_dict, sample_list, assay_list, tp
155155
else:
156156
for i in tqdm(tp):
157157
df_dict[i] = df_dict[i].transpose()
158-
158+
159159
# Do not split heatmap into two subplots (2-row, 1-column layout)
160-
fig, axes = plt.subplots(1, 1, figsize=(len(frame.columns.values)*0.5,len(frame.index.values)*0.5 * 2))
160+
fig, axes = plt.subplots(1, 1, figsize=(len(sample_list)*0.5,len(assay_list)*0.5)) # fig, axes = plt.subplots(1, 1, figsize=(len(sample_list)*0.5,len(sample_list)*0.5 * 2))
161161
# Add space between the two subplots (vertical spacing)
162162
plt.subplots_adjust(hspace=1)
163+
# add space to the bottom of the figure (adjust the bottom margin)
164+
plt.subplots_adjust(top=0.8, bottom=0.3)
163165

164166
# Plot heatmap (all samples)
165167
frame = df_dict[i][sample_list].reindex(assay_list)
166168
annot1 = frame.map(lambda x: 'X' if (pd.isna(x) or x == 'NaN' or x is None) else '')
167-
ax = sns.heatmap(frame, cmap='Reds', square=True, cbar_kws={'pad': 0.002}, annot = None, fmt='', annot_kws={"size": 1000, "color": "black"}, ax=axes[0],
169+
ax = sns.heatmap(frame, cmap='Reds', square=True, cbar_kws={'pad': 0.002}, annot = None, fmt='', annot_kws={"size": 1000, "color": "black"},
168170
linewidths = 1, linecolor = "black")
171+
# set colorbar format
172+
cbar = ax.collections[0].colorbar
173+
cbar.outline.set_edgecolor('black') # Set the color of the edge (outline)
174+
cbar.outline.set_linewidth(2)
169175

170176
# calculate the real timing of the image
171177
rt = time_assign[i]
@@ -199,7 +205,7 @@ def plt_heatmap(self, tgap, barcode_number, df_dict, sample_list, assay_list, tp
199205
# Place the legend below the first heatmap
200206
left, right = ax.get_xlim()
201207
top, bottom = ax.get_ylim()
202-
ax.text(left, top + 7,
208+
ax.text(left, top + 10,
203209
'†: The NTC sample for this assay was removed from the analysis due to potential contamination.',
204210
ha='left', fontsize=12, style='italic')
205211

@@ -223,6 +229,6 @@ def plt_heatmap(self, tgap, barcode_number, df_dict, sample_list, assay_list, tp
223229
# Save the figure to the dictionary
224230
fig_timepoints[i] = fig
225231

226-
return fig_timepoints, frame2, second_half_samples
232+
return fig_timepoints
227233

228234

redcap_builder.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def merge_group(group, bbp_P1_assays, bbp_P2_assays, rvp_assays):
4242
merged_row[col] = p1_value #p1_value[0]
4343
elif col in bbp_P2_assays and len(p2_value) > 0:
4444
merged_row[col] = p2_value #p2_value[0]
45-
elif col in rvp_assays and len(p2_value) > 0:
45+
elif col in rvp_assays and len(rvp_value) > 0:
4646
merged_row[col] = rvp_value #rvp_value[0]
4747
"""
4848
else:
@@ -204,5 +204,5 @@ def merge_group(group, bbp_P1_assays, bbp_P2_assays, rvp_assays):
204204
### reset index
205205
redcap_t13_hit_binary_output = redcap_t13_hit_binary_output.reset_index(drop=True)
206206

207-
return redcap_t13_hit_binary_output
207+
return redcap_t13_hit_binary_output, samplesDF
208208

t13_plotting.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -308,19 +308,21 @@ def t13_plt_heatmap(self, tgap, barcode_number, df, sample_list, assay_list, tp,
308308

309309
else:
310310
# Do not split heatmap into two subplots (2-row, 1-column layout)
311-
fig, axes = plt.subplots(1, 1, figsize=(len(frame.columns.values)*0.5,len(frame.index.values)*0.5 * 2))
311+
fig, axes = plt.subplots(1, 1, figsize=(len(sample_list)*0.5,len(assay_list)*0.5))
312312
# Add space between the two subplots (vertical spacing)
313-
plt.subplots_adjust(hspace=1)
313+
#plt.subplots_adjust(hspace=1)
314+
plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1)
314315

315316
# Plot heatmap (all samples)
316317
df = df.transpose()
317318
frame = df[sample_list].reindex(assay_list)
318319
annot1 = frame.map(lambda x: 'X' if (pd.isna(x) or x == 'NaN' or x is None) else '')
319-
ax = sns.heatmap(frame, cmap='Reds', square=True, cbar_kws={'pad': 0.002}, annot = None, fmt='', annot_kws={"size": 1000, "color": "black"}, ax=axes[0],
320+
ax = sns.heatmap(frame, cmap='Reds', square=True, cbar_kws={'pad': 0.002}, annot = None, fmt='', annot_kws={"size": 1000, "color": "black"},
320321
linewidths = 1, linecolor = "black")
321-
322-
# calculate the real timing of the image
323-
rt = time_assign[i]
322+
# set colorbar format
323+
cbar = ax.collections[0].colorbar
324+
cbar.outline.set_edgecolor('black') # Set the color of the edge (outline)
325+
cbar.outline.set_linewidth(2)
324326

325327
# Track x-axis labels that need a dagger
326328
dagger_labels = set()
@@ -351,29 +353,29 @@ def t13_plt_heatmap(self, tgap, barcode_number, df, sample_list, assay_list, tp,
351353
# Place the legend below the first heatmap
352354
left, right = ax.get_xlim()
353355
top, bottom = ax.get_ylim()
354-
ax.text(left, top + 7,
356+
ax.text(left, top + 10,
355357
'†: The NTC sample for this assay was removed from the analysis due to potential contamination.',
356358
ha='left', fontsize=12, style='italic')
357359

358360
# plot * on y-axis that contains Invalid Assays
359361
if invalid_assays:
360362
invalid_assays = [assay.upper() for assay in invalid_assays]
361-
asterisk1_labels = [label + '*' if label in invalid_assays else label for label in frame1.index]
363+
asterisk1_labels = [label + '*' if label in invalid_assays else label for label in frame.index]
362364
ax.set_yticklabels(asterisk1_labels, rotation=0)
363365

364366
## add legend for * below the '†: ...' legend
365-
ax.text(left1, top1 + 9,
367+
ax.text(left, top + 11,
366368
'*: This assay is considered invalid due to failing Quality Control Test #3, which evaluates performance of the Combined Positive Control sample.',
367369
ha='left', fontsize=12, style='italic')
368370

369371
# plot *** on x-axis that contains Invalid Samples
370372
if any(invalid_samples): # invalid_samples.size > 0
371373
invalid_samples = [sample.upper() for sample in invalid_samples]
372-
asterisk3_labels = [label + '***' if label in invalid_samples else label for label in frame1.columns]
374+
asterisk3_labels = [label + '***' if label in invalid_samples else label for label in frame.columns]
373375
ax.set_xticklabels(asterisk3_labels, rotation=90, ha='right')
374376

375377
## add legend for * below the '†: ...' legend
376-
ax.text(left1, top1 + 10,
378+
ax.text(left, top + 12,
377379
'***: This sample is invalid due to testing positive against the no-crRNA assay, an included negative assay control.',
378380
ha='left', fontsize=12, style='italic')
379381

@@ -403,7 +405,7 @@ def t13_plt_heatmap(self, tgap, barcode_number, df, sample_list, assay_list, tp,
403405
ax.add_patch(rect)
404406

405407
# Adjust layout
406-
ax.set_title(f'NTC Normalized Heatmap for {barcode_number} at '+str(rt)+' minutes', size=28)
408+
ax.set_title(f'NTC Normalized Heatmap for {barcode_number} at {time_assign[last_key]} minutes', size=28)
407409
ax.set_xlabel('Samples', size=18)
408410
ax.set_ylabel('Assays', size=18)
409411
top, bottom = ax.get_ylim()

threshold.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,18 @@ def raw_thresholder(self, unique_crRNA_assays, assigned_only, t13_df, CLI_thresh
7676
raw_thresholds_df = pd.concat([ntc_mean_df, ntc_sd_df, ntc_3sd_df, raw_thresholds_df], ignore_index=True, axis=0)
7777
raw_thresholds_df.index = ['NTC Mean', 'NTC Standard Deviation', 'NTC 3*SD', 'NTC Threshold']
7878

79+
# If there is only 1 NTC for an assay, we cannot take the std dev and these rows remain blank in raw_thresholds_df
80+
# So we resort to using the 1.8_Mean for these instances.
81+
for col in raw_thresholds_df.columns:
82+
if pd.isna(raw_thresholds_df.at['NTC Threshold', col]):
83+
ntc_mean = raw_thresholds_df.at['NTC Mean', col]
84+
raw_thresholds_df.at['NTC Threshold', col] = 1.8 * ntc_mean
85+
7986
# Calculate the Normalized NTC threshold too
8087
norm_thresholds_df = raw_thresholds_df.loc['NTC Threshold'] / ntc_mean_df
8188
raw_thresholds_df = pd.concat([raw_thresholds_df, norm_thresholds_df], ignore_index=True, axis=0)
8289
raw_thresholds_df.index = ['NTC Mean', 'NTC Standard Deviation', 'NTC 3*SD', 'NTC Threshold', 'Normalized NTC Threshold']
90+
8391

8492
else:
8593
print("Consult ReadME and input appropriate command-line arguments to specify thresholding method.")

0 commit comments

Comments
 (0)