v5.0.0

fsalbeez · fsalbeez · commit f5d51085e0f7 · 2025-02-04T19:32:20.000-05:00
RedCap integration
diff --git a/README.md b/README.md
@@ -2,7 +2,7 @@
 CARMEN is a diagnostic tool designed for surveillance purposes. Below are the instructions to complete your CARMEN analysis. 
 
 ## Software Version
-When cloning this repository, you will be using software version 4.4.4.
+When cloning this repository, you will be using software version 5.0.0.
 
 ## Overview
 At this point, you have ran the $Standard\ BioTools\ Dynamic\ Array^{TM}$ IFC (integrated fluidic circuit) on the $Standard\ BioTools\ Biomark^{TM}$ instrument and have completed the experimental portion of CARMEN. In running this code, you will be able to complete the data analysis portion of CARMEN and generate both binary positive/negative and quantitative signal output of your diagnostic assay. 
diff --git a/analyze_run.py b/analyze_run.py
@@ -44,12 +44,14 @@
 # annotations and flags imports
 from flags import Flagger
 import xlsxwriter
+# generate redcap
+from redcap_builder import RedCapper
 
 
 
 ######################################################################################################################################################
 # assign software version
-software_version = '4.4.4'
+software_version = '5.0.0'
 
 ######################################################################################################################################################
 # data loading
@@ -963,49 +965,7 @@
     t13_hit_binary_output_file_path = os.path.join(rd_subfolder, f't13__{barcode_assignment}_hit_binary.csv')
     fl_t13_hit_binary_output.to_csv(t13_hit_binary_output_file_path, index=True)
         
-           
-"""  
-### FILE 1: t13_hit_output as Results_Summary
-# convert the t13 hit output to an excel file with green/red conditional formatting for NEG/POS results
-t13_hit_output_file_path = os.path.join(res_subfolder, f'Results_Summary_{barcode_assignment}.xlsx')
-
-# create the Excel writer
-with pd.ExcelWriter(t13_hit_output_file_path, engine="openpyxl") as writer:
-    # write the DataFrame to an Excel sheet
-    fl_t13_hit_output.to_excel(writer, sheet_name="Sheet1", index=True)
-    workbook = writer.book
-    worksheet = writer.sheets["Sheet1"]
-
-    # define font colors for POSITIVE and NEGATIVE
-    red_font = Font(color="FF0000", bold=True)  # Red for POSITIVE
-    green_font = Font(color="008000")  # Green for NEGATIVE
-
-    # apply text color formatting
-    for row_idx, row in enumerate(t13_hit_output.values, start=3):  # Start from row 3 (after header and INVALID ASSAY label)
-        for col_idx, cell_value in enumerate(row, start=2):
-            cell = worksheet.cell(row=row_idx, column=col_idx)
-            if cell_value == "POSITIVE":
-                cell.font = red_font
-            elif cell_value == "NEGATIVE":
-                cell.font = green_font
-
-### FILE 2: rounded_t13_quant_norm as NTC_Quant_Normalized_Results
-quant_output_ntcNorm_file_path = os.path.join(res_subfolder, f'NTC_Normalized_Quantitative_Results_Summary_{barcode_assignment}.csv')
-fl_rounded_t13_quant_norm.to_csv(quant_output_ntcNorm_file_path, index=True)
-
-### File 3: summary_samples_df as Positives_Summary
-summary_pos_samples_file_path = os.path.join(res_subfolder, f'Positives_Summary_{barcode_assignment}.csv')
-fl_summary_samples_df.to_csv(summary_pos_samples_file_path, index=True)
-
-### File 4: ntc_thresholds_output as NTC_Thresholds
-ntc_thresholds_output_file_path = os.path.join(res_subfolder, f'NTC_thresholds_{barcode_assignment}.csv')
-fl_rounded_ntc_thresholds_output.to_csv(ntc_thresholds_output_file_path, index=True)
-
-### File 5: t13_hit_binary_output as t13_hit_Binary 
-t13_hit_binary_output_file_path = os.path.join(rd_subfolder, f't13__{barcode_assignment}_hit_binary.csv')
-fl_t13_hit_binary_output.to_csv(t13_hit_binary_output_file_path, index=True)
-
-"""
+ 
 ######################################################################################################################################################   
 # instantiate Plotter from plotting.py
 heatmap_generator = Plotter()
@@ -1042,3 +1002,23 @@
 
 print("Operation complete.")
 
+
+######################################################################################################################################################   
+# RedCap Integration
+# set it as you have to enter a CLI for Redcap to run this code
+
+# if CLI[1]
+
+
+# instantiate RedCapper from flags.py
+redcapper = RedCapper()
+
+# make copy of binary output file from RESULTS Excel sheet
+fl_t13_hit_binary_output_2 = fl_t13_hit_binary_output.copy()
+
+redcap_t13_hit_binary_output = redcapper.build_redcap(fl_t13_hit_binary_output_2)
+
+redcap_t13_hit_binary_output_file_path = os.path.join(res_subfolder, f'REDCAP_{barcode_assignment}.csv')
+redcap_t13_hit_binary_output.to_csv(redcap_t13_hit_binary_output_file_path, index=True)
+
+
diff --git a/flags.py b/flags.py
@@ -39,7 +39,8 @@ def assign_flags(self, fail_nocrRNA_check_df, high_raw_ntc_signal_df, rnasep_df,
                     if col.rstrip('*').lower() in [assay.lower() for assay in invalid_assays]:
                         invalid_row.append('INVALID ASSAY')  # mark invalid assays with this label
                     else:
-                        invalid_row.append('') # this way, invalid_row has same dimensions as flagged_file's cols
+                        invalid_row.append('VALID ASSAY') # this way, invalid_row has same dimensions as flagged_file's cols
+
                 invalid_row_df = pd.DataFrame([invalid_row], columns=flagged_file.columns)
                 invalid_row_df.index = ["Assay Valid?"]
                 data = flagged_file.iloc[0:]  
@@ -49,8 +50,18 @@ def assign_flags(self, fail_nocrRNA_check_df, high_raw_ntc_signal_df, rnasep_df,
                 label = 'This assay is considered invalid due to failing Quality Control Test #3, which evaluates performance of the Combined Positive Control sample.'
                 invalid_legend_label = pd.DataFrame(data=[[label] + [pd.NA]*(len(flagged_file.columns) - 1)], columns=flagged_file.columns, index=["Legend for *:"])
                 invalid_legend_label_filled = invalid_legend_label.fillna('')
+
                 # concatenate the invalid_legend label df to file df 
                 flagged_file = pd.concat([flagged_file, invalid_legend_label_filled], ignore_index=False) # concatenate
+            else: 
+                # create the "Assay Valid?" row with default value 'VALID ASSAY'
+                valid_assay_row = ['VALID ASSAY'] * len(flagged_file.columns)
+                valid_assay_row_df = pd.DataFrame([valid_assay_row], columns=flagged_file.columns)
+                valid_assay_row_df.index = ["Assay Valid?"]
+                # concatenate valid_assay_row_df to file_df
+                data = flagged_file.iloc[0:]  
+                flagged_file = pd.concat([valid_assay_row_df, data], ignore_index=False) # concatenate all
+            
 
 
             if i == 2: # summary_samples_df
diff --git a/redcap_builder.py b/redcap_builder.py
@@ -0,0 +1,81 @@
+import numpy as np 
+import pandas as pd 
+import matplotlib.pyplot as plt
+import seaborn as sns
+from tqdm import tqdm
+import re
+
+class RedCapper:
+    def __init__(self):
+        pass
+    
+    # method
+    def build_redcap(self, fl_t13_hit_binary_output_2):
+
+       
+        ### convert 0 to 2 (negative)
+        redcap_t13_hit_binary_output = fl_t13_hit_binary_output_2.replace(0, 2)
+
+         ### drop any rows incl and below 'Summary' row
+        if 'Summary' in redcap_t13_hit_binary_output.index:
+            idx = redcap_t13_hit_binary_output.index.get_loc('Summary')
+            redcap_t13_hit_binary_output = redcap_t13_hit_binary_output.iloc[:idx]
+
+        ### convert any cell val with a dagger † to 6 (NTC contaminated)
+        redcap_t13_hit_binary_output = redcap_t13_hit_binary_output.replace(r'.*†.*', 6, regex=True)
+
+        ### convert col vals for invalid assays to 5 (invalid)
+        # for all invalid samples
+        redcap_t13_hit_binary_output.loc[redcap_t13_hit_binary_output['SAMPLE VALID? Y/N'] == 'N***', :] = 5
+
+        # for all invalid assays
+        assay_valid_cols = redcap_t13_hit_binary_output.columns[redcap_t13_hit_binary_output.loc['Assay Valid?'] == 'INVALID ASSAY']
+        for col in assay_valid_cols:
+            redcap_t13_hit_binary_output[col] = 5
+
+        ### drop the 'SAMPLE VALID? Y/N' col
+        redcap_t13_hit_binary_output = redcap_t13_hit_binary_output.drop('SAMPLE VALID? Y/N', axis=1)
+
+        ### drop the 'Assay Valid?' row
+        redcap_t13_hit_binary_output = redcap_t13_hit_binary_output.drop('Assay Valid?', axis=0)
+
+        ### drop any columns containing no_crRNA
+        redcap_t13_hit_binary_output = redcap_t13_hit_binary_output.loc[:, ~redcap_t13_hit_binary_output.columns.str.lower().str.contains('no_crrna')]
+
+        ### strip all _ and asterisks from the column names
+        for i, col in enumerate(redcap_t13_hit_binary_output.columns):
+            if not re.search(r'rnasep|no_crrna', col, re.IGNORECASE):
+                new_col = re.split(r'[_*]', col)[0]
+                redcap_t13_hit_binary_output.columns.values[i] = new_col
+            if  re.search(r'rnasep|no_crrna', col, re.IGNORECASE):
+                new_col = re.split(r'[*]', col)[0]
+                redcap_t13_hit_binary_output.columns.values[i] = new_col
+
+        ### add columns for the assay that wasn't run with since REDCAP format needs all assays (RVP and BBP) headers in 
+        bbp_assays = ['CCHFV', 'CHI', 'DENV', 'EBOV', 'HBV_DNA', 'HCV', 'HIV_1', 'HIV_2', 'HTV', 'LASV', 'MBV', 'MMV', 
+                    'MPOX_DNA', 'ONN', 'PF_3_DNA', 'RBV', 'RVFV', 'SYPH_DNA', 'WNV', 'YFV', 'ZIKV']
+        rvp_assays = ['SARS-COV-2', 'HCOV-HKU1', 'HCOV-NL63', 'HCOV-OC43', 'FLUAV', 'FLUBV', 'HMPV', 'HRSV', 'HPIV-3']
+        # set column order
+        column_order = bbp_assays + rvp_assays + ['RNASEP_P1','RNASEP_P2']
+        # when adding the new columns, enter the value as 4 (not run)
+        for col in column_order:
+            if col not in redcap_t13_hit_binary_output.columns:
+                redcap_t13_hit_binary_output[col] = 4
+        
+        # reorder cols
+        redcap_t13_hit_binary_output = redcap_t13_hit_binary_output[column_order]
+        
+
+ 
+
+
+
+
+        
+
+
+
+
+
+        return redcap_t13_hit_binary_output
+