sdfg

LEBC · LEBC · commit dec9a99ec7f3 · 2025-01-13T20:25:18.000+01:00
diff --git a/bifrost_bridge/core.py b/bifrost_bridge/core.py
@@ -392,30 +392,32 @@ def import_data(self, file_path, file_type="csv", add_header=0):
     :param delimiter: Delimiter used in the file (default is comma for CSV).
     """
     # Check if add_header is a string and split it into a list
-    if add_header == 0:
-        new_columns = None
     if isinstance(add_header, str):
-        add_header = add_header.replace(" ", "")
-        new_columns = add_header.split(",")
-    if isinstance(add_header, list):
-        new_columns = add_header
+        if len(add_header) > 0:
+            add_header = add_header.replace(" ", "").split(",")
+    # elif isinstance(add_header, list) and len(add_header > 0):
 
     if file_type == "csv":
-        self.df = pd.read_csv(file_path, delimiter=",", names=new_columns)
-        if isinstance(add_header, str):
-            if len(new_columns) != len(self.df.columns):
+        self.df = pd.read_csv(
+            file_path, delimiter=",", names=add_header if add_header else None
+        )
+        # if isinstance(add_header, str):
+        if add_header:
+            if len(add_header) != len(self.df.columns):
                 raise ValueError(
-                    f"Error: Number of new column names ({len(new_columns)}) must match the number of columns in the DataFrame ({len(self.df.columns)})."
+                    f"Error: Number of new column names ({len(add_header)}) must match the number of columns in the DataFrame ({len(self.df.columns)})."
                 )
-            self.df.columns = new_columns
+            # self.df.columns = new_columns
     elif file_type == "tsv":
-        self.df = pd.read_csv(file_path, delimiter="\t", names=new_columns)
-        if isinstance(add_header, str):
-            if len(new_columns) != len(self.df.columns):
+        self.df = pd.read_csv(
+            file_path, delimiter="\t", names=add_header if add_header else None
+        )
+        if add_header:
+            if len(add_header) != len(self.df.columns):
                 raise ValueError(
-                    f"Error: Number of new column names ({len(new_columns)}) must match the number of columns in the DataFrame ({len(self.df.columns)})."
+                    f"Error: Number of new column names ({len(add_header)}) must match the number of columns in the DataFrame ({len(self.df.columns)})."
                 )
-            self.df.columns = new_columns
+            # self.df.columns = new_columns
     elif file_type == "json":
         self.import_nested_json_data(file_path)
     elif file_type == "yaml":
diff --git a/bifrost_bridge/quast.py b/bifrost_bridge/quast.py
@@ -10,7 +10,7 @@
 import os
 import re
 
-# Common to template
+# Common to template´
 # add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
 import dotenv  # for loading config from .env files, https://pypi.org/project/python-dotenv/
 import envyaml  # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
@@ -31,6 +31,7 @@
 def process_quast_data(
     input_path: str,
     output_path: str = "./output.tsv",
+    add_header: str = "",
     replace_header: str = None,
     filter_columns: str = None,
     transpose: bool = True,
@@ -50,31 +51,31 @@ def process_quast_data(
     """
 
     df = core.DataFrame()
-    df.import_data(input_path, file_type="csv", add_header="placeholder")
-
     # df.print_header()
     # df.show()
 
-    if replace_header:
-        df.rename_header(replace_header)
-
     if transpose:
-        df_df = df.df
-        df_df[["column_names", "values"]] = df_df["placeholder"].str.split(
-            "\t", expand=True
+        df.import_data(
+            input_path, file_type="tsv", add_header=["column_names", "values"]
         )
-        df_df.drop("placeholder", axis=1, inplace=True)
+        df_df = df.df
         df_df = df_df.T
         df_df = df_df.rename(columns=df_df.loc["column_names"])
         df_df.drop("column_names", axis=0, inplace=True)
-        # print(df_df, df_df.shape)
+        # print(df_df, df_df.shape, df_df.columns)
         df.df = df_df
         # df_df.columns = df_df['column_names']
+    else:
+        df.import_data(input_path, file_type="tsv", add_header=add_header)
+        # print(df.df)
+
+    if replace_header:
+        df.rename_header(replace_header)
 
     if filter_columns:
         df.filter_columns(filter_columns)
 
-    # print(df.df)
+    # df.show()
     # print(type(df.df))
 
     df.export_data(output_path, file_type="tsv")
diff --git a/nbs/00_core.ipynb b/nbs/00_core.ipynb
@@ -12,7 +12,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -780,26 +780,25 @@
     "    :param delimiter: Delimiter used in the file (default is comma for CSV).\n",
     "    \"\"\"\n",
     "    # Check if add_header is a string and split it into a list\n",
-    "    if add_header == 0:\n",
-    "        new_columns = None\n",
     "    if isinstance(add_header, str):\n",
-    "        add_header = add_header.replace(\" \", \"\")\n",
-    "        new_columns = add_header.split(',')\n",
-    "    if isinstance(add_header, list):\n",
-    "        new_columns = add_header\n",
+    "        if len(add_header) > 0:\n",
+    "            add_header = add_header.replace(\" \", \"\").split(',')\n",
+    "    #elif isinstance(add_header, list) and len(add_header > 0):\n",
+    "\n",
     "\n",
     "    if file_type == 'csv':\n",
-    "        self.df = pd.read_csv(file_path, delimiter=',', names=new_columns)\n",
-    "        if isinstance(add_header, str):\n",
-    "            if len(new_columns) != len(self.df.columns):\n",
-    "                raise ValueError(f\"Error: Number of new column names ({len(new_columns)}) must match the number of columns in the DataFrame ({len(self.df.columns)}).\")\n",
-    "            self.df.columns = new_columns\n",
+    "        self.df = pd.read_csv(file_path, delimiter=',', names=add_header if add_header else None)\n",
+    "        #if isinstance(add_header, str):\n",
+    "        if add_header:\n",
+    "            if len(add_header) != len(self.df.columns):\n",
+    "                raise ValueError(f\"Error: Number of new column names ({len(add_header)}) must match the number of columns in the DataFrame ({len(self.df.columns)}).\")\n",
+    "            #self.df.columns = new_columns\n",
     "    elif file_type == 'tsv':\n",
-    "        self.df = pd.read_csv(file_path, delimiter='\\t', names=new_columns)\n",
-    "        if isinstance(add_header, str):\n",
-    "            if len(new_columns) != len(self.df.columns):\n",
-    "                raise ValueError(f\"Error: Number of new column names ({len(new_columns)}) must match the number of columns in the DataFrame ({len(self.df.columns)}).\")\n",
-    "            self.df.columns = new_columns\n",
+    "        self.df = pd.read_csv(file_path, delimiter='\\t', names=add_header if add_header else None)\n",
+    "        if add_header:\n",
+    "            if len(add_header) != len(self.df.columns):\n",
+    "                raise ValueError(f\"Error: Number of new column names ({len(add_header)}) must match the number of columns in the DataFrame ({len(self.df.columns)}).\")\n",
+    "            #self.df.columns = new_columns\n",
     "    elif file_type == 'json':\n",
     "        self.import_nested_json_data(file_path)\n",
     "    elif file_type == 'yaml':\n",
@@ -1095,9 +1094,21 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "python3",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
   }
  },
  "nbformat": 4,
diff --git a/nbs/04_quast.ipynb b/nbs/04_quast.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -11,7 +11,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -23,7 +23,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -34,7 +34,7 @@
     "import os\n",
     "import re\n",
     "\n",
-    "# Common to template\n",
+    "# Common to template´\n",
     "# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`\n",
     "import dotenv  # for loading config from .env files, https://pypi.org/project/python-dotenv/\n",
     "import envyaml  # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml\n",
@@ -54,7 +54,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -72,7 +72,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -81,6 +81,7 @@
     "def process_quast_data(\n",
     "    input_path:str,\n",
     "    output_path:str = './output.tsv',\n",
+    "    add_header:str = '',\n",
     "    replace_header:str = None,\n",
     "    filter_columns:str = None,\n",
     "    transpose:bool = True):\n",
@@ -100,31 +101,30 @@
     "    \"\"\"\n",
     "\n",
     "    df = core.DataFrame()\n",
-    "    df.import_data(input_path, file_type='csv', add_header='placeholder')\n",
-    "\n",
     "    #df.print_header()\n",
     "    #df.show()\n",
     "\n",
-    "    if replace_header:\n",
-    "        df.rename_header(replace_header)\n",
-    "\n",
     "    if transpose:\n",
+    "        df.import_data(input_path, file_type='tsv', add_header=['column_names', 'values'])\n",
     "        df_df = df.df\n",
-    "        df_df[['column_names','values']] = df_df['placeholder'].str.split('\\t',expand=True)\n",
-    "        df_df.drop('placeholder', axis = 1, inplace=True)\n",
     "        df_df = df_df.T\n",
     "        df_df = df_df.rename(columns=df_df.loc['column_names'])\n",
     "        df_df.drop('column_names', axis=0, inplace=True)\n",
-    "        #print(df_df, df_df.shape)\n",
+    "        #print(df_df, df_df.shape, df_df.columns)\n",
     "        df.df = df_df\n",
     "        #df_df.columns = df_df['column_names']\n",
+    "    else:\n",
+    "        df.import_data(input_path, file_type='tsv', add_header=add_header)\n",
+    "        #print(df.df)\n",
     "\n",
+    "    if replace_header:\n",
+    "        df.rename_header(replace_header)\n",
     "\n",
     "    if filter_columns:\n",
     "        df.filter_columns(filter_columns)\n",
     "\n",
     "\n",
-    "    #print(df.df)\n",
+    "    #df.show()\n",
     "    #print(type(df.df))\n",
     "\n",
     "    df.export_data(output_path, file_type='tsv')\n",
@@ -142,18 +142,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
     "# |hide\n",
     "# Example usage of the function\n",
-    "#process_quast_data(\n",
-    "#    input_path='/Users/B246654/vscode_storage/ssi-dk/bifrost_bridge/test_data/quast.tsv', \n",
-    "#    output_path='/Users/B246654/vscode_storage/ssi-dk/bifrost_bridge/test_data/quast_test_out.tsv',\n",
-    "#    filter_columns='Assembly,# contigs (>= 0 bp), N50',\n",
-    "#    transpose=True\n",
-    "#)"
+    "process_quast_data(\n",
+    "   input_path='/Users/B246357/ssi_stuff/bifrost_bridge/test_data/quast.tsv', \n",
+    "   output_path='/Users/B246357/ssi_stuff/bifrost_bridge/test_data/quast_test_out.tsv',\n",
+    "   #filter_columns='Assembly,# contigs (>= 0 bp), N50',\n",
+    "   add_header = '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15',\n",
+    "   #replace_header = '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15',\n",
+    "   transpose=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# |hide\n",
+    "# Example usage of the function\n",
+    "process_quast_data(\n",
+    "   input_path='/Users/B246357/ssi_stuff/bifrost_bridge/test_data/quast_transposed.tsv', \n",
+    "   output_path='/Users/B246357/ssi_stuff/bifrost_bridge/test_data/quast_test_out.tsv',\n",
+    "   #filter_columns='Assembly,# contigs (>= 0 bp), N50',\n",
+    "   #add_header = '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15',\n",
+    "   add_header = '1',\n",
+    "   #replace_header = '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15',\n",
+    "   #filter_columns = '1,2,3',\n",
+    "   transpose=False\n",
+    ")"
    ]
   },
   {
@@ -178,9 +200,21 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "python3",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
   }
  },
  "nbformat": 4,