v5.4.0

fsalbeez · fsalbeez · commit 7042fe534a7f · 2025-03-19T16:46:25.000-04:00
Resolved issues with 3_SD runs, where assays erroneously called as invalid due to threshold failing to be generated. Resolved issues with RVP and BBP assays ran on the same chip.
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 CARMEN is a diagnostic tool designed for surveillance purposes. Below are the instructions to complete your CARMEN analysis. 
 
 ## Software Version
-When cloning this repository, you will be using software version 5.3.0.
+When cloning this repository, you will be using software version 5.4.0.
 
 ## Overview
 At this point, you have ran the $Standard\ BioTools\ Dynamic\ Array^{TM}$ IFC (integrated fluidic circuit) on the $Standard\ BioTools\ Biomark^{TM}$ instrument and have completed the experimental portion of CARMEN. In running this code, you will be able to complete the data analysis portion of CARMEN and generate both binary positive/negative and quantitative signal output of your diagnostic assay. 
diff --git a/analyze_run.py b/analyze_run.py
@@ -52,7 +52,7 @@
 
 ######################################################################################################################################################
 # assign software version
-software_version = '5.3.0'
+software_version = '5.4.0'
 
 ######################################################################################################################################################
 # data loading
@@ -173,7 +173,7 @@
 
 # collect the assays/samples from the layout assays/samples in the assignment sheet (this extraction is done in matcher.py)
 crRNA_assays = assigned_lists['assay_list']
-samples_list = assigned_lists['samples_list']
+#samples_list = assigned_lists['samples_list']
 
 ######################################################################################################################################################
 # instantiate ntcContaminationChecker from ntc_con_check.py
@@ -184,6 +184,9 @@
 # create df of filtered assigned_signal_norm by applying the NTC check to remove any NTCs whose raw signal suggests contamination
 assigned_signal_norm_with_NTC_check = ntcCheck.ntc_cont(assigned_signal_norm) # feed this into MedianSort
 
+# collect the samples_list after running NTC Contamination Check, in case NTCs were removed
+samples_list = assigned_signal_norm_with_NTC_check['sample'].unique()
+
 # temporarily save assigned_signal_norm_with_NTC_check
 assigned_signal_norm_with_NTC_check.to_csv(os.path.join(rd_subfolder, 'assigned_signal_norm_with_NTC_check.csv'), index=True)
 
@@ -865,7 +868,7 @@
     fl_t13_hit_binary_output
 ]
 
-output_file_path = os.path.join(res_subfolder, f"RESULTS_{barcode_assignment}.xlsx") #
+output_file_path = os.path.join(res_subfolder, f"RESULTS_{barcode_assignment}_{CLI_arg[1]}.xlsx") #
 
 try: 
     # save all DataFrames to a single Excel file
@@ -974,7 +977,7 @@
 # tp = list of timepoints (t1, t2, etc)
 #unique_crRNA_assays = list(set(crRNA_assays))
 unique_crRNA_assays = list(OrderedDict.fromkeys(crRNA_assays))
-heatmap, frame2, second_half_samples = heatmap_generator.plt_heatmap(tgap, barcode_assignment,final_med_frames, samples_list, unique_crRNA_assays, timepoints)
+heatmap = heatmap_generator.plt_heatmap(tgap, barcode_assignment,final_med_frames, samples_list, unique_crRNA_assays, timepoints)
 
 # Make subfolder in the output folder in your path's wd if it hasn't been made already
 heatmaps_subfolder = os.path.join(rd_subfolder, f'Heatmaps_by_Timepoint_{barcode_assignment}')
@@ -1014,7 +1017,7 @@
 
     # apply redcapper to fl_t13_hit_binary_output_2 df
     threshold = CLI_arg[1]
-    redcap_t13_hit_binary_output = redcapper.build_redcap(fl_t13_hit_binary_output_2, date, barcode_assignment,threshold, software_version)
+    redcap_t13_hit_binary_output, samplesDF  = redcapper.build_redcap(fl_t13_hit_binary_output_2, date, barcode_assignment,threshold, software_version)
     
     # save REDCAP file
     redcap_t13_hit_binary_output_file_path = os.path.join(res_subfolder, f'REDCAP_{barcode_assignment}.csv')
diff --git a/plotting.py b/plotting.py
@@ -155,17 +155,23 @@ def plt_heatmap(self, tgap, barcode_number, df_dict, sample_list, assay_list, tp
         else: 
             for i in tqdm(tp):
                 df_dict[i] = df_dict[i].transpose()
-                
+
                 # Do not split heatmap into two subplots (2-row, 1-column layout)
-                fig, axes = plt.subplots(1, 1, figsize=(len(frame.columns.values)*0.5,len(frame.index.values)*0.5 * 2))
+                fig, axes = plt.subplots(1, 1, figsize=(len(sample_list)*0.5,len(assay_list)*0.5)) # fig, axes = plt.subplots(1, 1, figsize=(len(sample_list)*0.5,len(sample_list)*0.5 * 2))
                 # Add space between the two subplots (vertical spacing)
                 plt.subplots_adjust(hspace=1)
+                # add space to the bottom of the figure (adjust the bottom margin)
+                plt.subplots_adjust(top=0.8, bottom=0.3)  
 
                 # Plot heatmap (all samples)
                 frame = df_dict[i][sample_list].reindex(assay_list)
                 annot1 = frame.map(lambda x: 'X' if (pd.isna(x) or x == 'NaN' or x is None) else '')
-                ax = sns.heatmap(frame, cmap='Reds', square=True, cbar_kws={'pad': 0.002}, annot = None, fmt='', annot_kws={"size": 1000, "color": "black"}, ax=axes[0], 
+                ax = sns.heatmap(frame, cmap='Reds', square=True, cbar_kws={'pad': 0.002}, annot = None, fmt='', annot_kws={"size": 1000, "color": "black"}, 
                                   linewidths = 1, linecolor = "black")
+                # set colorbar format
+                cbar = ax.collections[0].colorbar
+                cbar.outline.set_edgecolor('black')  # Set the color of the edge (outline)
+                cbar.outline.set_linewidth(2)  
                 
                 # calculate the real timing of the image
                 rt = time_assign[i]
@@ -199,7 +205,7 @@ def plt_heatmap(self, tgap, barcode_number, df_dict, sample_list, assay_list, tp
                     # Place the legend below the first heatmap
                     left, right = ax.get_xlim()
                     top, bottom = ax.get_ylim() 
-                    ax.text(left, top + 7, 
+                    ax.text(left, top + 10, 
                                 '†: The NTC sample for this assay was removed from the analysis due to potential contamination.', 
                                 ha='left', fontsize=12, style='italic')
                                
@@ -223,6 +229,6 @@ def plt_heatmap(self, tgap, barcode_number, df_dict, sample_list, assay_list, tp
                 # Save the figure to the dictionary
                 fig_timepoints[i] = fig
 
-        return fig_timepoints, frame2, second_half_samples
+        return fig_timepoints
     
     
diff --git a/redcap_builder.py b/redcap_builder.py
@@ -42,7 +42,7 @@ def merge_group(group, bbp_P1_assays, bbp_P2_assays, rvp_assays):
                             merged_row[col] = p1_value #p1_value[0]
                         elif col in bbp_P2_assays and len(p2_value) > 0:
                             merged_row[col] = p2_value #p2_value[0]
-                        elif col in rvp_assays and len(p2_value) > 0:
+                        elif col in rvp_assays and len(rvp_value) > 0:
                             merged_row[col] = rvp_value #rvp_value[0]
                         """  
                         else:
@@ -204,5 +204,5 @@ def merge_group(group, bbp_P1_assays, bbp_P2_assays, rvp_assays):
         ### reset index
         redcap_t13_hit_binary_output = redcap_t13_hit_binary_output.reset_index(drop=True)
 
-        return redcap_t13_hit_binary_output
+        return redcap_t13_hit_binary_output, samplesDF
 
diff --git a/t13_plotting.py b/t13_plotting.py
@@ -308,19 +308,21 @@ def t13_plt_heatmap(self, tgap, barcode_number, df, sample_list, assay_list, tp,
 
             else: 
                 # Do not split heatmap into two subplots (2-row, 1-column layout)
-                fig, axes = plt.subplots(1, 1, figsize=(len(frame.columns.values)*0.5,len(frame.index.values)*0.5 * 2))
+                fig, axes = plt.subplots(1, 1, figsize=(len(sample_list)*0.5,len(assay_list)*0.5))
                 # Add space between the two subplots (vertical spacing)
-                plt.subplots_adjust(hspace=1)
+                #plt.subplots_adjust(hspace=1)
+                plt.subplots_adjust(left=0.1, right=0.9, top=0.85, bottom=0.1)
 
                 # Plot heatmap (all samples)
                 df = df.transpose()
                 frame = df[sample_list].reindex(assay_list)
                 annot1 = frame.map(lambda x: 'X' if (pd.isna(x) or x == 'NaN' or x is None) else '')
-                ax = sns.heatmap(frame, cmap='Reds', square=True, cbar_kws={'pad': 0.002}, annot = None, fmt='', annot_kws={"size": 1000, "color": "black"}, ax=axes[0], 
+                ax = sns.heatmap(frame, cmap='Reds', square=True, cbar_kws={'pad': 0.002}, annot = None, fmt='', annot_kws={"size": 1000, "color": "black"},
                                     linewidths = 1, linecolor = "black")
-                
-                # calculate the real timing of the image
-                rt = time_assign[i]
+                # set colorbar format
+                cbar = ax.collections[0].colorbar
+                cbar.outline.set_edgecolor('black')  # Set the color of the edge (outline)
+                cbar.outline.set_linewidth(2)
                 
                 # Track x-axis labels that need a dagger
                 dagger_labels = set()
@@ -351,29 +353,29 @@ def t13_plt_heatmap(self, tgap, barcode_number, df, sample_list, assay_list, tp,
                     # Place the legend below the first heatmap
                     left, right = ax.get_xlim()
                     top, bottom = ax.get_ylim() 
-                    ax.text(left, top + 7, 
+                    ax.text(left, top + 10, 
                                 '†: The NTC sample for this assay was removed from the analysis due to potential contamination.', 
                                 ha='left', fontsize=12, style='italic')
                 
                 # plot * on y-axis that contains Invalid Assays
                 if invalid_assays:
                     invalid_assays = [assay.upper() for assay in invalid_assays]
-                    asterisk1_labels = [label + '*' if label in invalid_assays else label for label in frame1.index]
+                    asterisk1_labels = [label + '*' if label in invalid_assays else label for label in frame.index]
                     ax.set_yticklabels(asterisk1_labels, rotation=0)
                     
                     ## add legend for * below the '†: ...' legend
-                    ax.text(left1, top1 + 9, 
+                    ax.text(left, top + 11, 
                                 '*: This assay is considered invalid due to failing Quality Control Test #3, which evaluates performance of the Combined Positive Control sample.', 
                                 ha='left', fontsize=12, style='italic')
                 
                 # plot *** on x-axis that contains Invalid Samples
                 if any(invalid_samples): # invalid_samples.size > 0
                     invalid_samples = [sample.upper() for sample in invalid_samples]
-                    asterisk3_labels = [label + '***' if label in invalid_samples else label for label in frame1.columns]
+                    asterisk3_labels = [label + '***' if label in invalid_samples else label for label in frame.columns]
                     ax.set_xticklabels(asterisk3_labels, rotation=90, ha='right')
 
                     ## add legend for * below the '†: ...' legend
-                    ax.text(left1, top1 + 10, 
+                    ax.text(left, top + 12, 
                                 '***: This sample is invalid due to testing positive against the no-crRNA assay, an included negative assay control.', 
                                 ha='left', fontsize=12, style='italic')
 
@@ -403,7 +405,7 @@ def t13_plt_heatmap(self, tgap, barcode_number, df, sample_list, assay_list, tp,
                                     ax.add_patch(rect)
                     
                 # Adjust layout
-                ax.set_title(f'NTC Normalized Heatmap for {barcode_number} at '+str(rt)+' minutes', size=28)
+                ax.set_title(f'NTC Normalized Heatmap for {barcode_number} at {time_assign[last_key]} minutes', size=28)
                 ax.set_xlabel('Samples', size=18)
                 ax.set_ylabel('Assays', size=18)
                 top, bottom = ax.get_ylim() 
diff --git a/threshold.py b/threshold.py
@@ -76,10 +76,18 @@ def raw_thresholder(self, unique_crRNA_assays, assigned_only, t13_df, CLI_thresh
             raw_thresholds_df = pd.concat([ntc_mean_df, ntc_sd_df, ntc_3sd_df, raw_thresholds_df], ignore_index=True, axis=0)
             raw_thresholds_df.index = ['NTC Mean', 'NTC Standard Deviation', 'NTC 3*SD', 'NTC Threshold']
 
+            # If there is only 1 NTC for an assay, we cannot take the std dev and these rows remain blank in raw_thresholds_df
+            # So we resort to using the 1.8_Mean for these instances.
+            for col in raw_thresholds_df.columns:
+                if pd.isna(raw_thresholds_df.at['NTC Threshold', col]):
+                    ntc_mean = raw_thresholds_df.at['NTC Mean', col]
+                    raw_thresholds_df.at['NTC Threshold', col] = 1.8 * ntc_mean
+
             # Calculate the Normalized NTC threshold too
             norm_thresholds_df = raw_thresholds_df.loc['NTC Threshold'] / ntc_mean_df
             raw_thresholds_df = pd.concat([raw_thresholds_df, norm_thresholds_df], ignore_index=True, axis=0)
             raw_thresholds_df.index = ['NTC Mean', 'NTC Standard Deviation', 'NTC 3*SD', 'NTC Threshold', 'Normalized NTC Threshold']
+
      
         else:
             print("Consult ReadME and input appropriate command-line arguments to specify thresholding method.")