adding changes to accommodate rmlst in joined tsv

LEBC · LEBC · commit b562282069bb · 2025-03-07T14:35:27.000+01:00
diff --git a/bifrost_bridge/bifrost.py b/bifrost_bridge/bifrost.py
@@ -47,6 +47,7 @@ def process_qc_data(
     bracken_path: str = None,
     amrfinder_path: str = None,
     pmlst_path: str = None,
+    rmlst_path: str = None,
     combine_output: bool = True,
     output_path: str = "./output.tsv",
 ):
@@ -125,6 +126,16 @@ def process_qc_data(
             raise FileNotFoundError(f"File not found: {pmlst_path}")
         process_pmlst_data(input_path=pmlst_path, output_path="parsed_pmlst.tsv")
 
+    if rmlst_path is not None:
+        if not os.path.exists(rmlst_path):
+            raise FileNotFoundError(f"File not found: {rmlst_path}")
+        process_rmlst_data(
+            input_path=rmlst_path,
+            output_path="parsed_rmlst.tsv",
+            filter_columns="taxon,species,rank,support",
+            replace_header="match,species,rank,percentage",
+        )
+
     if combine_output:
         # List of output files that were actually created
         output_files = []
@@ -143,6 +154,8 @@ def process_qc_data(
             output_files.append("parsed_bracken.tsv")
         if pmlst_path is not None:
             output_files.append("parsed_pmlst.tsv")
+        if rmlst_path is not None:
+            output_files.append("parsed_rmlst.tsv")
 
         # Read and concatenate all output files
         combined_df = pd.concat(
diff --git a/nbs/99_bifrost.ipynb b/nbs/99_bifrost.ipynb
@@ -95,6 +95,7 @@
     "    bracken_path:str = None,\n",
     "    amrfinder_path:str = None,\n",
     "    pmlst_path:str = None,\n",
+    "    rmlst_path:str = None,\n",
     "    combine_output:bool = True,\n",
     "    output_path:str = './output.tsv'):\n",
     "\n",
@@ -176,6 +177,16 @@
     "            input_path=pmlst_path, \n",
     "            output_path='parsed_pmlst.tsv'\n",
     "        )\n",
+    "\n",
+    "    if rmlst_path is not None:\n",
+    "        if not os.path.exists(rmlst_path):\n",
+    "            raise FileNotFoundError(f\"File not found: {rmlst_path}\")\n",
+    "        process_rmlst_data(\n",
+    "            input_path=rmlst_path, \n",
+    "            output_path='parsed_rmlst.tsv',\n",
+    "            filter_columns=\"taxon,species,rank,support\",\n",
+    "            replace_header=\"match,species,rank,percentage\"\n",
+    "        )\n",
     "    \n",
     "    if combine_output:\n",
     "        # List of output files that were actually created\n",
@@ -195,6 +206,8 @@
     "            output_files.append('parsed_bracken.tsv')\n",
     "        if pmlst_path is not None:\n",
     "            output_files.append('parsed_pmlst.tsv')\n",
+    "        if rmlst_path is not None:\n",
+    "            output_files.append('parsed_rmlst.tsv')\n",
     "\n",
     "        # Read and concatenate all output files\n",
     "        combined_df = pd.concat([pd.read_csv(file, sep='\\t') for file in output_files], axis=1)\n",