Skip to content

Commit 8df49fc

Browse files
committed
v5.1.0
1 parent f5d5108 commit 8df49fc

File tree

5 files changed

+138
-33
lines changed

5 files changed

+138
-33
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
CARMEN is a diagnostic tool designed for surveillance purposes. Below are the instructions to complete your CARMEN analysis.
33

44
## Software Version
5-
When cloning this repository, you will be using software version 5.0.0.
5+
When cloning this repository, you will be using software version 5.1.0.
66

77
## Overview
88
At this point, you have ran the $Standard\ BioTools\ Dynamic\ Array^{TM}$ IFC (integrated fluidic circuit) on the $Standard\ BioTools\ Biomark^{TM}$ instrument and have completed the experimental portion of CARMEN. In running this code, you will be able to complete the data analysis portion of CARMEN and generate both binary positive/negative and quantitative signal output of your diagnostic assay.

analyze_run.py

Lines changed: 43 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import seaborn as sns
1010
#file imports
1111
from io import BytesIO
12+
from datetime import datetime
1213
import base64
1314
from pathlib import Path
1415
import os
@@ -51,7 +52,7 @@
5152

5253
######################################################################################################################################################
5354
# assign software version
54-
software_version = '5.0.0'
55+
software_version = '5.1.0'
5556

5657
######################################################################################################################################################
5758
# data loading
@@ -102,7 +103,7 @@
102103
file_like_object.seek(0)
103104

104105
# Extract dataframes from each CSV file
105-
read_dataframes = reader.extract_dataframes_from_csv(file_like_object, phrases_to_find)
106+
read_dataframes, date = reader.extract_dataframes_from_csv(file_like_object, phrases_to_find)
106107

107108
# at this point, we have loaded the assignment sheet and have sorted through the loaded data file to create a dict of dataframes
108109

@@ -831,13 +832,38 @@
831832
fail_nocrRNA_check_df.to_csv(fail_nocrRNA_check_df_file_path, index=True)
832833
print(f"CSV created with data at {fail_nocrRNA_check_df_file_path}")
833834

835+
"""
836+
flagged_file = t13_hit_output.copy()
837+
processed_samples = set()
838+
for _, row in high_raw_ntc_signal_df.iterrows():
839+
for col in high_raw_ntc_signal_df.columns: # cols are Sample, Assay, t13
840+
cont_ntc_sample = row['Sample'] # NEG NTC sample
841+
cont_ntc_assay = row['Assay'] # NTC assay
842+
# now iterate over the flagged file
843+
for idx, sample_row in flagged_file.iterrows():
844+
if cont_ntc_sample == idx:
845+
# add † to each cell value
846+
for assay_col in flagged_file.columns:
847+
if assay_col.upper() == cont_ntc_assay.upper():
848+
# check that the sample-assay pair has alr been processed
849+
if (cont_ntc_sample, cont_ntc_assay) not in processed_samples:
850+
processed_samples.add((cont_ntc_sample, cont_ntc_assay))
851+
# check if the value is NA (NaN)
852+
if pd.isna(sample_row[assay_col]) or sample_row[assay_col] == '':
853+
flagged_file.loc[idx, assay_col] = '†' # only dagger if value is NA
854+
else:
855+
flagged_file[assay_col] = flagged_file[assay_col].astype(str)
856+
#flagged_file.at[idx, assay_col] = str(flagged_file.at[idx, assay_col])
857+
flagged_file.at[idx, assay_col] = f"{sample_row[assay_col]}†" # add dagger to the value
858+
834859
835860
861+
"""
836862
######################################################################################################################################################
837863
# instantiate Flagger from flags.py
838864
flagger = Flagger()
839865

840-
invalid_assays, invalid_samples, flagged_files = flagger.assign_flags(fail_nocrRNA_check_df, high_raw_ntc_signal_df, rnasep_df_heatmap, QC_score_per_assay_df, t13_hit_output, rounded_t13_quant_norm, summary_samples_df, rounded_ntc_thresholds_output, t13_hit_binary_output)
866+
invalid_assays, invalid_samples, flagged_files, processed_samples, cont_ntc_sample, cont_ntc_assay = flagger.assign_flags(fail_nocrRNA_check_df, high_raw_ntc_signal_df, rnasep_df_heatmap, QC_score_per_assay_df, t13_hit_output, rounded_t13_quant_norm, summary_samples_df, rounded_ntc_thresholds_output, t13_hit_binary_output)
841867

842868
fl_t13_hit_output = flagged_files[0] # Results_Summary
843869
fl_rounded_t13_quant_norm = flagged_files[1] # NTC_Normalized_Quantitative_Results_Summary
@@ -1000,25 +1026,27 @@
10001026
fig = heatmap_t13_quant_norm.savefig(heatmap_t13_quant_norm_filename, bbox_inches = 'tight', dpi=80)
10011027
plt.close(fig)
10021028

1003-
print("Operation complete.")
1004-
10051029

10061030
######################################################################################################################################################
10071031
# RedCap Integration
1008-
# set it as you have to enter a CLI for Redcap to run this code
1032+
# set it as you have to enter a CLI arg for Redcap to run this code
10091033

1010-
# if CLI[1]
1034+
if len(CLI_arg) > 2 and CLI_arg[2] == 'REDCAP':
1035+
# instantiate RedCapper from flags.py
1036+
redcapper = RedCapper()
10111037

1038+
# make copy of binary output file from RESULTS Excel sheet
1039+
fl_t13_hit_binary_output_2 = fl_t13_hit_binary_output.copy()
10121040

1013-
# instantiate RedCapper from flags.py
1014-
redcapper = RedCapper()
1041+
redcap_t13_hit_binary_output = redcapper.build_redcap(fl_t13_hit_binary_output_2, date, barcode_assignment)
10151042

1016-
# make copy of binary output file from RESULTS Excel sheet
1017-
fl_t13_hit_binary_output_2 = fl_t13_hit_binary_output.copy()
1043+
redcap_t13_hit_binary_output_file_path = os.path.join(res_subfolder, f'REDCAP_{barcode_assignment}.csv')
1044+
redcap_t13_hit_binary_output.to_csv(redcap_t13_hit_binary_output_file_path, index=True)
1045+
print("REDCAP file generated.")
1046+
print("Operation complete.")
10181047

1019-
redcap_t13_hit_binary_output = redcapper.build_redcap(fl_t13_hit_binary_output_2)
1020-
1021-
redcap_t13_hit_binary_output_file_path = os.path.join(res_subfolder, f'REDCAP_{barcode_assignment}.csv')
1022-
redcap_t13_hit_binary_output.to_csv(redcap_t13_hit_binary_output_file_path, index=True)
1048+
else:
1049+
print("User did not specify REDCAP as a command line argument, and it follows that the REDCAP file was not generated.")
1050+
print("Operation complete.")
10231051

10241052

flags.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,12 @@ def assign_flags(self, fail_nocrRNA_check_df, high_raw_ntc_signal_df, rnasep_df,
8989
if (cont_ntc_sample, cont_ntc_assay) not in processed_samples:
9090
processed_samples.add((cont_ntc_sample, cont_ntc_assay))
9191
# check if the value is NA (NaN)
92-
if pd.isna(sample_row[assay_col]):
92+
if pd.isna(sample_row[assay_col]) or sample_row[assay_col] == '':
9393
flagged_file.loc[idx, assay_col] = '†' # only dagger if value is NA
9494
else:
9595
flagged_file[assay_col] = flagged_file[assay_col].astype(str)
9696
#flagged_file.at[idx, assay_col] = str(flagged_file.at[idx, assay_col])
97-
flagged_file.loc[idx, assay_col] = f"{sample_row[assay_col]}†" # add dagger to the value
97+
flagged_file.at[idx, assay_col] = f"{sample_row[assay_col]}†" # add dagger to the value
9898

9999
for _, row in high_raw_ntc_signal_df.iterrows():
100100
for col in high_raw_ntc_signal_df.columns:
@@ -141,7 +141,7 @@ def assign_flags(self, fail_nocrRNA_check_df, high_raw_ntc_signal_df, rnasep_df,
141141
flagged_file = pd.concat([flagged_file, cont_NTC_thresh_legend_label_filled], ignore_index=False) # concatenate
142142
legend_added = True
143143
break
144-
144+
145145
### no-crRNA flags
146146
## need to be added to t13_hit, t13_quant_norm, pos_samples_df, t13_hit_binary
147147
if i in {0,1,4}: # t13_hit, t13_quant_norm,t13_hit_binary
@@ -280,4 +280,4 @@ def assign_flags(self, fail_nocrRNA_check_df, high_raw_ntc_signal_df, rnasep_df,
280280

281281
flagged_files.append(flagged_file) # add flagged file to the list
282282

283-
return invalid_assays, invalid_samples, flagged_files
283+
return invalid_assays, invalid_samples, flagged_files, processed_samples, cont_ntc_sample, cont_ntc_assay

reader.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import pandas as pd
22
import io
3+
from datetime import datetime
34

45
class DataReader:
56
def __init__(self):
@@ -27,7 +28,7 @@ def extract_dataframes_from_csv(self, file_like_object, phrases_to_find):
2728

2829
# Read the content of the file-like object (only once)
2930
file_like_object.seek(0)
30-
content = file_like_object.read().decode('utf-8')
31+
content = file_like_object.read().decode('utf-8') # bytesIO obj
3132

3233
new_phrase = [
3334
"ref_raw",
@@ -62,6 +63,13 @@ def extract_dataframes_from_csv(self, file_like_object, phrases_to_find):
6263
df = pd.read_csv(content_io, nrows=section_rows[phrase], skiprows=start_index)
6364
dataframes[phrase] = self.clean_dataframe(df) # Clean and store the dataframe
6465
#print(phrase, len(df))
65-
# Return a dictionary of the dataframes
6666

67-
return dataframes
67+
# Collect the date
68+
df = pd.read_csv(io.StringIO(content), nrows=0)
69+
date_header = df.columns[7]
70+
date_str = date_header.split(' ')[0] # strip removes the character to concat rest and split breaks string at character
71+
date_obj = datetime.strptime(date_str, "%d/%m/%Y")
72+
date = date_obj.strftime("%m/%d/%Y")
73+
74+
# Return a dictionary of the dataframes
75+
return dataframes, date

redcap_builder.py

Lines changed: 79 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@ def __init__(self):
1010
pass
1111

1212
# method
13-
def build_redcap(self, fl_t13_hit_binary_output_2):
14-
13+
def build_redcap(self, fl_t13_hit_binary_output_2, date, barcode_assignment):
1514

1615
### convert 0 to 2 (negative)
1716
redcap_t13_hit_binary_output = fl_t13_hit_binary_output_2.replace(0, 2)
@@ -62,20 +61,90 @@ def build_redcap(self, fl_t13_hit_binary_output_2):
6261
if col not in redcap_t13_hit_binary_output.columns:
6362
redcap_t13_hit_binary_output[col] = 4
6463

65-
# reorder cols
64+
### reorder cols
6665
redcap_t13_hit_binary_output = redcap_t13_hit_binary_output[column_order]
67-
68-
69-
70-
71-
72-
7366

67+
### add in the metadata columns
68+
sampleid = []
69+
for idx in redcap_t13_hit_binary_output.index:
70+
cleaned_idx = re.sub(r'[\*\|†\s]', '', idx)
71+
sampleid.append(cleaned_idx)
7472

73+
redcap_t13_hit_binary_output.insert(0, "date", date)
74+
redcap_t13_hit_binary_output.insert(1, "ifc", barcode_assignment)
75+
redcap_t13_hit_binary_output.insert(2, "sampleid", sampleid)
7576

77+
record_id = []
78+
for row in redcap_t13_hit_binary_output.itertuples():
79+
samp_id = row.sampleid
80+
record_id_val = barcode_assignment + '_' + samp_id
81+
record_id.append(record_id_val)
7682

83+
redcap_t13_hit_binary_output.insert(0, "record_id", record_id)
7784

78-
85+
### merge same samples ran on different panels
86+
# extract sampleid before panel
87+
redcap_t13_hit_binary_output['sampleid_prefix'] = redcap_t13_hit_binary_output['sampleid'].str.replace(r'(_P1|_P2|_RVP)$', '', regex=True)
88+
89+
# merge rows with the same sampleid_prefix - keep assay results unique, combine record_ids, update sampleid
90+
def merge_group(group):
91+
# select first row in subset of redcap_t13_hit_binary_output grouped by sampleid_prefix
92+
merged_row = pd.DataFrame(columns=group.columns)
93+
merged_row.loc[0] = group.iloc[0]
94+
95+
# the group is the unique sampleid_prefix - each group should have max 2 rows
96+
for col in group.columns:
97+
if col not in ["record_id", "date", "ifc", "sampleid", "sampleid_prefix"]:
98+
# if merged_row['cchfv'] = [5,4], then lambda fn will produce [5,None]
99+
# dropna will make it merged_row['cchfv'] = [5]
100+
# .unique ensures that only unique vals are retained
101+
102+
if all(group[col] == 4):
103+
merged_row[col] = 4
104+
else: # if group[col] = [5,4] or [3, 4] - there's no world where it would be [5,3]
105+
filtered_values = group.loc[group[col] != 4, col].dropna().unique()
106+
merged_row[col] = filtered_values[0]
107+
108+
# each record_id is split and the two panel suffixes are added to the record_id - the .unique ensures that that all distinct splits are added tg
109+
merged_row['record_id'] = '_'.join(group['record_id'].apply(lambda x: x.split('_')[-1]).unique())
110+
111+
# assign a sampleid to the merged_row (doesn't matter as sampleid col will be dropped later)
112+
merged_row['sampleid'] = group['sampleid'].iloc[0]
113+
114+
return merged_row
115+
116+
# apply the merge_group function to each group in the groupby obj (which is a df)
117+
redcap_t13_hit_binary_output = redcap_t13_hit_binary_output.groupby('sampleid_prefix').apply(merge_group).reset_index(drop=True)
118+
119+
# make record_id col be ifc + sample_id_prefix + record_id (which is just _P1_P2)
120+
record_id_fix = []
121+
for row in redcap_t13_hit_binary_output.itertuples():
122+
record_id = row.record_id
123+
sampleid_prefix = row.sampleid_prefix
124+
sampleid = row.sampleid
125+
if not any(control in sampleid_prefix for control in ['NTC', 'CPC', 'NDC']):
126+
record_id_val = barcode_assignment + '_' + sampleid_prefix + '_' + record_id
127+
record_id_fix.append(record_id_val)
128+
else:
129+
record_id_val = barcode_assignment + '_' + sampleid + '_' + record_id
130+
131+
redcap_t13_hit_binary_output['record_id'] = record_id_fix
132+
133+
### drop sampleid
134+
redcap_t13_hit_binary_output = redcap_t13_hit_binary_output.drop(columns=['sampleid'])
135+
136+
### rename sampleid_prefix as sampleid and insert it as the 4th col
137+
redcap_t13_hit_binary_output = redcap_t13_hit_binary_output.rename(columns={'sampleid_prefix': 'sampleid'})
138+
cols = list(redcap_t13_hit_binary_output.columns)
139+
cols.remove('sampleid')
140+
cols.insert(3, 'sampleid')
141+
redcap_t13_hit_binary_output = redcap_t13_hit_binary_output[cols]
142+
143+
### lowercase all columns in redcap_t13_hit_binary_output for REDCAP data entry
144+
redcap_t13_hit_binary_output.columns = redcap_t13_hit_binary_output.columns.str.lower()
145+
146+
### reset index
147+
redcap_t13_hit_binary_output = redcap_t13_hit_binary_output.reset_index(drop=True)
79148

80149
return redcap_t13_hit_binary_output
81150

0 commit comments

Comments
 (0)