Skip to content

Commit b562282

Browse files
LEBCLEBC
LEBC
authored and
LEBC
committed
adding changes to accommodate rmlst in joined tsv
1 parent 4fd25cf commit b562282

File tree

2 files changed

+26
-0
lines changed

2 files changed

+26
-0
lines changed

bifrost_bridge/bifrost.py

+13
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def process_qc_data(
4747
bracken_path: str = None,
4848
amrfinder_path: str = None,
4949
pmlst_path: str = None,
50+
rmlst_path: str = None,
5051
combine_output: bool = True,
5152
output_path: str = "./output.tsv",
5253
):
@@ -125,6 +126,16 @@ def process_qc_data(
125126
raise FileNotFoundError(f"File not found: {pmlst_path}")
126127
process_pmlst_data(input_path=pmlst_path, output_path="parsed_pmlst.tsv")
127128

129+
if rmlst_path is not None:
130+
if not os.path.exists(rmlst_path):
131+
raise FileNotFoundError(f"File not found: {rmlst_path}")
132+
process_rmlst_data(
133+
input_path=rmlst_path,
134+
output_path="parsed_rmlst.tsv",
135+
filter_columns="taxon,species,rank,support",
136+
replace_header="match,species,rank,percentage",
137+
)
138+
128139
if combine_output:
129140
# List of output files that were actually created
130141
output_files = []
@@ -143,6 +154,8 @@ def process_qc_data(
143154
output_files.append("parsed_bracken.tsv")
144155
if pmlst_path is not None:
145156
output_files.append("parsed_pmlst.tsv")
157+
if rmlst_path is not None:
158+
output_files.append("parsed_rmlst.tsv")
146159

147160
# Read and concatenate all output files
148161
combined_df = pd.concat(

nbs/99_bifrost.ipynb

+13
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
" bracken_path:str = None,\n",
9696
" amrfinder_path:str = None,\n",
9797
" pmlst_path:str = None,\n",
98+
" rmlst_path:str = None,\n",
9899
" combine_output:bool = True,\n",
99100
" output_path:str = './output.tsv'):\n",
100101
"\n",
@@ -176,6 +177,16 @@
176177
" input_path=pmlst_path, \n",
177178
" output_path='parsed_pmlst.tsv'\n",
178179
" )\n",
180+
"\n",
181+
" if rmlst_path is not None:\n",
182+
" if not os.path.exists(rmlst_path):\n",
183+
" raise FileNotFoundError(f\"File not found: {rmlst_path}\")\n",
184+
" process_rmlst_data(\n",
185+
" input_path=rmlst_path, \n",
186+
" output_path='parsed_rmlst.tsv',\n",
187+
" filter_columns=\"taxon,species,rank,support\",\n",
188+
" replace_header=\"match,species,rank,percentage\"\n",
189+
" )\n",
179190
" \n",
180191
" if combine_output:\n",
181192
" # List of output files that were actually created\n",
@@ -195,6 +206,8 @@
195206
" output_files.append('parsed_bracken.tsv')\n",
196207
" if pmlst_path is not None:\n",
197208
" output_files.append('parsed_pmlst.tsv')\n",
209+
" if rmlst_path is not None:\n",
210+
" output_files.append('parsed_rmlst.tsv')\n",
198211
"\n",
199212
" # Read and concatenate all output files\n",
200213
" combined_df = pd.concat([pd.read_csv(file, sep='\\t') for file in output_files], axis=1)\n",

0 commit comments

Comments
 (0)