Skip to content

Commit f5d5108

Browse files
committed
v5.0.0
RedCap integration
1 parent 0958cd0 commit f5d5108

File tree

4 files changed

+118
-46
lines changed

4 files changed

+118
-46
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
CARMEN is a diagnostic tool designed for surveillance purposes. Below are the instructions to complete your CARMEN analysis.
33

44
## Software Version
5-
When cloning this repository, you will be using software version 4.4.4.
5+
When cloning this repository, you will be using software version 5.0.0.
66

77
## Overview
88
At this point, you have ran the $Standard\ BioTools\ Dynamic\ Array^{TM}$ IFC (integrated fluidic circuit) on the $Standard\ BioTools\ Biomark^{TM}$ instrument and have completed the experimental portion of CARMEN. In running this code, you will be able to complete the data analysis portion of CARMEN and generate both binary positive/negative and quantitative signal output of your diagnostic assay.

analyze_run.py

Lines changed: 24 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,14 @@
4444
# annotations and flags imports
4545
from flags import Flagger
4646
import xlsxwriter
47+
# generate redcap
48+
from redcap_builder import RedCapper
4749

4850

4951

5052
######################################################################################################################################################
5153
# assign software version
52-
software_version = '4.4.4'
54+
software_version = '5.0.0'
5355

5456
######################################################################################################################################################
5557
# data loading
@@ -963,49 +965,7 @@
963965
t13_hit_binary_output_file_path = os.path.join(rd_subfolder, f't13__{barcode_assignment}_hit_binary.csv')
964966
fl_t13_hit_binary_output.to_csv(t13_hit_binary_output_file_path, index=True)
965967

966-
967-
"""
968-
### FILE 1: t13_hit_output as Results_Summary
969-
# convert the t13 hit output to an excel file with green/red conditional formatting for NEG/POS results
970-
t13_hit_output_file_path = os.path.join(res_subfolder, f'Results_Summary_{barcode_assignment}.xlsx')
971-
972-
# create the Excel writer
973-
with pd.ExcelWriter(t13_hit_output_file_path, engine="openpyxl") as writer:
974-
# write the DataFrame to an Excel sheet
975-
fl_t13_hit_output.to_excel(writer, sheet_name="Sheet1", index=True)
976-
workbook = writer.book
977-
worksheet = writer.sheets["Sheet1"]
978-
979-
# define font colors for POSITIVE and NEGATIVE
980-
red_font = Font(color="FF0000", bold=True) # Red for POSITIVE
981-
green_font = Font(color="008000") # Green for NEGATIVE
982-
983-
# apply text color formatting
984-
for row_idx, row in enumerate(t13_hit_output.values, start=3): # Start from row 3 (after header and INVALID ASSAY label)
985-
for col_idx, cell_value in enumerate(row, start=2):
986-
cell = worksheet.cell(row=row_idx, column=col_idx)
987-
if cell_value == "POSITIVE":
988-
cell.font = red_font
989-
elif cell_value == "NEGATIVE":
990-
cell.font = green_font
991-
992-
### FILE 2: rounded_t13_quant_norm as NTC_Quant_Normalized_Results
993-
quant_output_ntcNorm_file_path = os.path.join(res_subfolder, f'NTC_Normalized_Quantitative_Results_Summary_{barcode_assignment}.csv')
994-
fl_rounded_t13_quant_norm.to_csv(quant_output_ntcNorm_file_path, index=True)
995-
996-
### File 3: summary_samples_df as Positives_Summary
997-
summary_pos_samples_file_path = os.path.join(res_subfolder, f'Positives_Summary_{barcode_assignment}.csv')
998-
fl_summary_samples_df.to_csv(summary_pos_samples_file_path, index=True)
999-
1000-
### File 4: ntc_thresholds_output as NTC_Thresholds
1001-
ntc_thresholds_output_file_path = os.path.join(res_subfolder, f'NTC_thresholds_{barcode_assignment}.csv')
1002-
fl_rounded_ntc_thresholds_output.to_csv(ntc_thresholds_output_file_path, index=True)
1003-
1004-
### File 5: t13_hit_binary_output as t13_hit_Binary
1005-
t13_hit_binary_output_file_path = os.path.join(rd_subfolder, f't13__{barcode_assignment}_hit_binary.csv')
1006-
fl_t13_hit_binary_output.to_csv(t13_hit_binary_output_file_path, index=True)
1007-
1008-
"""
968+
1009969
######################################################################################################################################################
1010970
# instantiate Plotter from plotting.py
1011971
heatmap_generator = Plotter()
@@ -1042,3 +1002,23 @@
10421002

10431003
print("Operation complete.")
10441004

1005+
1006+
######################################################################################################################################################
1007+
# RedCap Integration
1008+
# set it as you have to enter a CLI for Redcap to run this code
1009+
1010+
# if CLI[1]
1011+
1012+
1013+
# instantiate RedCapper from flags.py
1014+
redcapper = RedCapper()
1015+
1016+
# make copy of binary output file from RESULTS Excel sheet
1017+
fl_t13_hit_binary_output_2 = fl_t13_hit_binary_output.copy()
1018+
1019+
redcap_t13_hit_binary_output = redcapper.build_redcap(fl_t13_hit_binary_output_2)
1020+
1021+
redcap_t13_hit_binary_output_file_path = os.path.join(res_subfolder, f'REDCAP_{barcode_assignment}.csv')
1022+
redcap_t13_hit_binary_output.to_csv(redcap_t13_hit_binary_output_file_path, index=True)
1023+
1024+

flags.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,8 @@ def assign_flags(self, fail_nocrRNA_check_df, high_raw_ntc_signal_df, rnasep_df,
3939
if col.rstrip('*').lower() in [assay.lower() for assay in invalid_assays]:
4040
invalid_row.append('INVALID ASSAY') # mark invalid assays with this label
4141
else:
42-
invalid_row.append('') # this way, invalid_row has same dimensions as flagged_file's cols
42+
invalid_row.append('VALID ASSAY') # this way, invalid_row has same dimensions as flagged_file's cols
43+
4344
invalid_row_df = pd.DataFrame([invalid_row], columns=flagged_file.columns)
4445
invalid_row_df.index = ["Assay Valid?"]
4546
data = flagged_file.iloc[0:]
@@ -49,8 +50,18 @@ def assign_flags(self, fail_nocrRNA_check_df, high_raw_ntc_signal_df, rnasep_df,
4950
label = 'This assay is considered invalid due to failing Quality Control Test #3, which evaluates performance of the Combined Positive Control sample.'
5051
invalid_legend_label = pd.DataFrame(data=[[label] + [pd.NA]*(len(flagged_file.columns) - 1)], columns=flagged_file.columns, index=["Legend for *:"])
5152
invalid_legend_label_filled = invalid_legend_label.fillna('')
53+
5254
# concatenate the invalid_legend label df to file df
5355
flagged_file = pd.concat([flagged_file, invalid_legend_label_filled], ignore_index=False) # concatenate
56+
else:
57+
# create the "Assay Valid?" row with default value 'VALID ASSAY'
58+
valid_assay_row = ['VALID ASSAY'] * len(flagged_file.columns)
59+
valid_assay_row_df = pd.DataFrame([valid_assay_row], columns=flagged_file.columns)
60+
valid_assay_row_df.index = ["Assay Valid?"]
61+
# concatenate valid_assay_row_df to file_df
62+
data = flagged_file.iloc[0:]
63+
flagged_file = pd.concat([valid_assay_row_df, data], ignore_index=False) # concatenate all
64+
5465

5566

5667
if i == 2: # summary_samples_df

redcap_builder.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import numpy as np
2+
import pandas as pd
3+
import matplotlib.pyplot as plt
4+
import seaborn as sns
5+
from tqdm import tqdm
6+
import re
7+
8+
class RedCapper:
9+
def __init__(self):
10+
pass
11+
12+
# method
13+
def build_redcap(self, fl_t13_hit_binary_output_2):
14+
15+
16+
### convert 0 to 2 (negative)
17+
redcap_t13_hit_binary_output = fl_t13_hit_binary_output_2.replace(0, 2)
18+
19+
### drop any rows incl and below 'Summary' row
20+
if 'Summary' in redcap_t13_hit_binary_output.index:
21+
idx = redcap_t13_hit_binary_output.index.get_loc('Summary')
22+
redcap_t13_hit_binary_output = redcap_t13_hit_binary_output.iloc[:idx]
23+
24+
### convert any cell val with a dagger † to 6 (NTC contaminated)
25+
redcap_t13_hit_binary_output = redcap_t13_hit_binary_output.replace(r'.*†.*', 6, regex=True)
26+
27+
### convert col vals for invalid assays to 5 (invalid)
28+
# for all invalid samples
29+
redcap_t13_hit_binary_output.loc[redcap_t13_hit_binary_output['SAMPLE VALID? Y/N'] == 'N***', :] = 5
30+
31+
# for all invalid assays
32+
assay_valid_cols = redcap_t13_hit_binary_output.columns[redcap_t13_hit_binary_output.loc['Assay Valid?'] == 'INVALID ASSAY']
33+
for col in assay_valid_cols:
34+
redcap_t13_hit_binary_output[col] = 5
35+
36+
### drop the 'SAMPLE VALID? Y/N' col
37+
redcap_t13_hit_binary_output = redcap_t13_hit_binary_output.drop('SAMPLE VALID? Y/N', axis=1)
38+
39+
### drop the 'Assay Valid?' row
40+
redcap_t13_hit_binary_output = redcap_t13_hit_binary_output.drop('Assay Valid?', axis=0)
41+
42+
### drop any columns containing no_crRNA
43+
redcap_t13_hit_binary_output = redcap_t13_hit_binary_output.loc[:, ~redcap_t13_hit_binary_output.columns.str.lower().str.contains('no_crrna')]
44+
45+
### strip all _ and asterisks from the column names
46+
for i, col in enumerate(redcap_t13_hit_binary_output.columns):
47+
if not re.search(r'rnasep|no_crrna', col, re.IGNORECASE):
48+
new_col = re.split(r'[_*]', col)[0]
49+
redcap_t13_hit_binary_output.columns.values[i] = new_col
50+
if re.search(r'rnasep|no_crrna', col, re.IGNORECASE):
51+
new_col = re.split(r'[*]', col)[0]
52+
redcap_t13_hit_binary_output.columns.values[i] = new_col
53+
54+
### add columns for the assay that wasn't run with since REDCAP format needs all assays (RVP and BBP) headers in
55+
bbp_assays = ['CCHFV', 'CHI', 'DENV', 'EBOV', 'HBV_DNA', 'HCV', 'HIV_1', 'HIV_2', 'HTV', 'LASV', 'MBV', 'MMV',
56+
'MPOX_DNA', 'ONN', 'PF_3_DNA', 'RBV', 'RVFV', 'SYPH_DNA', 'WNV', 'YFV', 'ZIKV']
57+
rvp_assays = ['SARS-COV-2', 'HCOV-HKU1', 'HCOV-NL63', 'HCOV-OC43', 'FLUAV', 'FLUBV', 'HMPV', 'HRSV', 'HPIV-3']
58+
# set column order
59+
column_order = bbp_assays + rvp_assays + ['RNASEP_P1','RNASEP_P2']
60+
# when adding the new columns, enter the value as 4 (not run)
61+
for col in column_order:
62+
if col not in redcap_t13_hit_binary_output.columns:
63+
redcap_t13_hit_binary_output[col] = 4
64+
65+
# reorder cols
66+
redcap_t13_hit_binary_output = redcap_t13_hit_binary_output[column_order]
67+
68+
69+
70+
71+
72+
73+
74+
75+
76+
77+
78+
79+
80+
return redcap_t13_hit_binary_output
81+

0 commit comments

Comments
 (0)