Skip to content

Commit dec9a99

Browse files
LEBCLEBC
LEBC
authored and
LEBC
committedJan 13, 2025·
sdfg
1 parent a38012c commit dec9a99

File tree

4 files changed

+117
-69
lines changed

4 files changed

+117
-69
lines changed
 

‎bifrost_bridge/core.py

+18-16
Original file line numberDiff line numberDiff line change
@@ -392,30 +392,32 @@ def import_data(self, file_path, file_type="csv", add_header=0):
392392
:param delimiter: Delimiter used in the file (default is comma for CSV).
393393
"""
394394
# Check if add_header is a string and split it into a list
395-
if add_header == 0:
396-
new_columns = None
397395
if isinstance(add_header, str):
398-
add_header = add_header.replace(" ", "")
399-
new_columns = add_header.split(",")
400-
if isinstance(add_header, list):
401-
new_columns = add_header
396+
if len(add_header) > 0:
397+
add_header = add_header.replace(" ", "").split(",")
398+
# elif isinstance(add_header, list) and len(add_header > 0):
402399

403400
if file_type == "csv":
404-
self.df = pd.read_csv(file_path, delimiter=",", names=new_columns)
405-
if isinstance(add_header, str):
406-
if len(new_columns) != len(self.df.columns):
401+
self.df = pd.read_csv(
402+
file_path, delimiter=",", names=add_header if add_header else None
403+
)
404+
# if isinstance(add_header, str):
405+
if add_header:
406+
if len(add_header) != len(self.df.columns):
407407
raise ValueError(
408-
f"Error: Number of new column names ({len(new_columns)}) must match the number of columns in the DataFrame ({len(self.df.columns)})."
408+
f"Error: Number of new column names ({len(add_header)}) must match the number of columns in the DataFrame ({len(self.df.columns)})."
409409
)
410-
self.df.columns = new_columns
410+
# self.df.columns = new_columns
411411
elif file_type == "tsv":
412-
self.df = pd.read_csv(file_path, delimiter="\t", names=new_columns)
413-
if isinstance(add_header, str):
414-
if len(new_columns) != len(self.df.columns):
412+
self.df = pd.read_csv(
413+
file_path, delimiter="\t", names=add_header if add_header else None
414+
)
415+
if add_header:
416+
if len(add_header) != len(self.df.columns):
415417
raise ValueError(
416-
f"Error: Number of new column names ({len(new_columns)}) must match the number of columns in the DataFrame ({len(self.df.columns)})."
418+
f"Error: Number of new column names ({len(add_header)}) must match the number of columns in the DataFrame ({len(self.df.columns)})."
417419
)
418-
self.df.columns = new_columns
420+
# self.df.columns = new_columns
419421
elif file_type == "json":
420422
self.import_nested_json_data(file_path)
421423
elif file_type == "yaml":

‎bifrost_bridge/quast.py

+13-12
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import os
1111
import re
1212

13-
# Common to template
13+
# Common to template´
1414
# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
1515
import dotenv # for loading config from .env files, https://pypi.org/project/python-dotenv/
1616
import envyaml # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
@@ -31,6 +31,7 @@
3131
def process_quast_data(
3232
input_path: str,
3333
output_path: str = "./output.tsv",
34+
add_header: str = "",
3435
replace_header: str = None,
3536
filter_columns: str = None,
3637
transpose: bool = True,
@@ -50,31 +51,31 @@ def process_quast_data(
5051
"""
5152

5253
df = core.DataFrame()
53-
df.import_data(input_path, file_type="csv", add_header="placeholder")
54-
5554
# df.print_header()
5655
# df.show()
5756

58-
if replace_header:
59-
df.rename_header(replace_header)
60-
6157
if transpose:
62-
df_df = df.df
63-
df_df[["column_names", "values"]] = df_df["placeholder"].str.split(
64-
"\t", expand=True
58+
df.import_data(
59+
input_path, file_type="tsv", add_header=["column_names", "values"]
6560
)
66-
df_df.drop("placeholder", axis=1, inplace=True)
61+
df_df = df.df
6762
df_df = df_df.T
6863
df_df = df_df.rename(columns=df_df.loc["column_names"])
6964
df_df.drop("column_names", axis=0, inplace=True)
70-
# print(df_df, df_df.shape)
65+
# print(df_df, df_df.shape, df_df.columns)
7166
df.df = df_df
7267
# df_df.columns = df_df['column_names']
68+
else:
69+
df.import_data(input_path, file_type="tsv", add_header=add_header)
70+
# print(df.df)
71+
72+
if replace_header:
73+
df.rename_header(replace_header)
7374

7475
if filter_columns:
7576
df.filter_columns(filter_columns)
7677

77-
# print(df.df)
78+
# df.show()
7879
# print(type(df.df))
7980

8081
df.export_data(output_path, file_type="tsv")

‎nbs/00_core.ipynb

+29-18
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
},
1313
{
1414
"cell_type": "code",
15-
"execution_count": null,
15+
"execution_count": 1,
1616
"metadata": {},
1717
"outputs": [],
1818
"source": [
@@ -780,26 +780,25 @@
780780
" :param delimiter: Delimiter used in the file (default is comma for CSV).\n",
781781
" \"\"\"\n",
782782
" # Check if add_header is a string and split it into a list\n",
783-
" if add_header == 0:\n",
784-
" new_columns = None\n",
785783
" if isinstance(add_header, str):\n",
786-
" add_header = add_header.replace(\" \", \"\")\n",
787-
" new_columns = add_header.split(',')\n",
788-
" if isinstance(add_header, list):\n",
789-
" new_columns = add_header\n",
784+
" if len(add_header) > 0:\n",
785+
" add_header = add_header.replace(\" \", \"\").split(',')\n",
786+
" #elif isinstance(add_header, list) and len(add_header > 0):\n",
787+
"\n",
790788
"\n",
791789
" if file_type == 'csv':\n",
792-
" self.df = pd.read_csv(file_path, delimiter=',', names=new_columns)\n",
793-
" if isinstance(add_header, str):\n",
794-
" if len(new_columns) != len(self.df.columns):\n",
795-
" raise ValueError(f\"Error: Number of new column names ({len(new_columns)}) must match the number of columns in the DataFrame ({len(self.df.columns)}).\")\n",
796-
" self.df.columns = new_columns\n",
790+
" self.df = pd.read_csv(file_path, delimiter=',', names=add_header if add_header else None)\n",
791+
" #if isinstance(add_header, str):\n",
792+
" if add_header:\n",
793+
" if len(add_header) != len(self.df.columns):\n",
794+
" raise ValueError(f\"Error: Number of new column names ({len(add_header)}) must match the number of columns in the DataFrame ({len(self.df.columns)}).\")\n",
795+
" #self.df.columns = new_columns\n",
797796
" elif file_type == 'tsv':\n",
798-
" self.df = pd.read_csv(file_path, delimiter='\\t', names=new_columns)\n",
799-
" if isinstance(add_header, str):\n",
800-
" if len(new_columns) != len(self.df.columns):\n",
801-
" raise ValueError(f\"Error: Number of new column names ({len(new_columns)}) must match the number of columns in the DataFrame ({len(self.df.columns)}).\")\n",
802-
" self.df.columns = new_columns\n",
797+
" self.df = pd.read_csv(file_path, delimiter='\\t', names=add_header if add_header else None)\n",
798+
" if add_header:\n",
799+
" if len(add_header) != len(self.df.columns):\n",
800+
" raise ValueError(f\"Error: Number of new column names ({len(add_header)}) must match the number of columns in the DataFrame ({len(self.df.columns)}).\")\n",
801+
" #self.df.columns = new_columns\n",
803802
" elif file_type == 'json':\n",
804803
" self.import_nested_json_data(file_path)\n",
805804
" elif file_type == 'yaml':\n",
@@ -1095,9 +1094,21 @@
10951094
],
10961095
"metadata": {
10971096
"kernelspec": {
1098-
"display_name": "python3",
1097+
"display_name": "base",
10991098
"language": "python",
11001099
"name": "python3"
1100+
},
1101+
"language_info": {
1102+
"codemirror_mode": {
1103+
"name": "ipython",
1104+
"version": 3
1105+
},
1106+
"file_extension": ".py",
1107+
"mimetype": "text/x-python",
1108+
"name": "python",
1109+
"nbconvert_exporter": "python",
1110+
"pygments_lexer": "ipython3",
1111+
"version": "3.11.10"
11011112
}
11021113
},
11031114
"nbformat": 4,

‎nbs/04_quast.ipynb

+57-23
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": null,
5+
"execution_count": 23,
66
"metadata": {},
77
"outputs": [],
88
"source": [
@@ -11,7 +11,7 @@
1111
},
1212
{
1313
"cell_type": "code",
14-
"execution_count": null,
14+
"execution_count": 24,
1515
"metadata": {},
1616
"outputs": [],
1717
"source": [
@@ -23,7 +23,7 @@
2323
},
2424
{
2525
"cell_type": "code",
26-
"execution_count": null,
26+
"execution_count": 25,
2727
"metadata": {},
2828
"outputs": [],
2929
"source": [
@@ -34,7 +34,7 @@
3434
"import os\n",
3535
"import re\n",
3636
"\n",
37-
"# Common to template\n",
37+
"# Common to template´\n",
3838
"# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`\n",
3939
"import dotenv # for loading config from .env files, https://pypi.org/project/python-dotenv/\n",
4040
"import envyaml # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml\n",
@@ -54,7 +54,7 @@
5454
},
5555
{
5656
"cell_type": "code",
57-
"execution_count": null,
57+
"execution_count": 26,
5858
"metadata": {},
5959
"outputs": [],
6060
"source": [
@@ -72,7 +72,7 @@
7272
},
7373
{
7474
"cell_type": "code",
75-
"execution_count": null,
75+
"execution_count": 27,
7676
"metadata": {},
7777
"outputs": [],
7878
"source": [
@@ -81,6 +81,7 @@
8181
"def process_quast_data(\n",
8282
" input_path:str,\n",
8383
" output_path:str = './output.tsv',\n",
84+
" add_header:str = '',\n",
8485
" replace_header:str = None,\n",
8586
" filter_columns:str = None,\n",
8687
" transpose:bool = True):\n",
@@ -100,31 +101,30 @@
100101
" \"\"\"\n",
101102
"\n",
102103
" df = core.DataFrame()\n",
103-
" df.import_data(input_path, file_type='csv', add_header='placeholder')\n",
104-
"\n",
105104
" #df.print_header()\n",
106105
" #df.show()\n",
107106
"\n",
108-
" if replace_header:\n",
109-
" df.rename_header(replace_header)\n",
110-
"\n",
111107
" if transpose:\n",
108+
" df.import_data(input_path, file_type='tsv', add_header=['column_names', 'values'])\n",
112109
" df_df = df.df\n",
113-
" df_df[['column_names','values']] = df_df['placeholder'].str.split('\\t',expand=True)\n",
114-
" df_df.drop('placeholder', axis = 1, inplace=True)\n",
115110
" df_df = df_df.T\n",
116111
" df_df = df_df.rename(columns=df_df.loc['column_names'])\n",
117112
" df_df.drop('column_names', axis=0, inplace=True)\n",
118-
" #print(df_df, df_df.shape)\n",
113+
" #print(df_df, df_df.shape, df_df.columns)\n",
119114
" df.df = df_df\n",
120115
" #df_df.columns = df_df['column_names']\n",
116+
" else:\n",
117+
" df.import_data(input_path, file_type='tsv', add_header=add_header)\n",
118+
" #print(df.df)\n",
121119
"\n",
120+
" if replace_header:\n",
121+
" df.rename_header(replace_header)\n",
122122
"\n",
123123
" if filter_columns:\n",
124124
" df.filter_columns(filter_columns)\n",
125125
"\n",
126126
"\n",
127-
" #print(df.df)\n",
127+
" #df.show()\n",
128128
" #print(type(df.df))\n",
129129
"\n",
130130
" df.export_data(output_path, file_type='tsv')\n",
@@ -142,18 +142,40 @@
142142
},
143143
{
144144
"cell_type": "code",
145-
"execution_count": null,
145+
"execution_count": 30,
146146
"metadata": {},
147147
"outputs": [],
148148
"source": [
149149
"# |hide\n",
150150
"# Example usage of the function\n",
151-
"#process_quast_data(\n",
152-
"# input_path='/Users/B246654/vscode_storage/ssi-dk/bifrost_bridge/test_data/quast.tsv', \n",
153-
"# output_path='/Users/B246654/vscode_storage/ssi-dk/bifrost_bridge/test_data/quast_test_out.tsv',\n",
154-
"# filter_columns='Assembly,# contigs (>= 0 bp), N50',\n",
155-
"# transpose=True\n",
156-
"#)"
151+
"process_quast_data(\n",
152+
" input_path='/Users/B246357/ssi_stuff/bifrost_bridge/test_data/quast.tsv', \n",
153+
" output_path='/Users/B246357/ssi_stuff/bifrost_bridge/test_data/quast_test_out.tsv',\n",
154+
" #filter_columns='Assembly,# contigs (>= 0 bp), N50',\n",
155+
" add_header = '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15',\n",
156+
" #replace_header = '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15',\n",
157+
" transpose=True\n",
158+
")"
159+
]
160+
},
161+
{
162+
"cell_type": "code",
163+
"execution_count": 36,
164+
"metadata": {},
165+
"outputs": [],
166+
"source": [
167+
"# |hide\n",
168+
"# Example usage of the function\n",
169+
"process_quast_data(\n",
170+
" input_path='/Users/B246357/ssi_stuff/bifrost_bridge/test_data/quast_transposed.tsv', \n",
171+
" output_path='/Users/B246357/ssi_stuff/bifrost_bridge/test_data/quast_test_out.tsv',\n",
172+
" #filter_columns='Assembly,# contigs (>= 0 bp), N50',\n",
173+
" #add_header = '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15',\n",
174+
" add_header = '1',\n",
175+
" #replace_header = '1,2,3,4,5,6,7,8,9,10,11,12,13,14,15',\n",
176+
" #filter_columns = '1,2,3',\n",
177+
" transpose=False\n",
178+
")"
157179
]
158180
},
159181
{
@@ -178,9 +200,21 @@
178200
],
179201
"metadata": {
180202
"kernelspec": {
181-
"display_name": "python3",
203+
"display_name": "base",
182204
"language": "python",
183205
"name": "python3"
206+
},
207+
"language_info": {
208+
"codemirror_mode": {
209+
"name": "ipython",
210+
"version": 3
211+
},
212+
"file_extension": ".py",
213+
"mimetype": "text/x-python",
214+
"name": "python",
215+
"nbconvert_exporter": "python",
216+
"pygments_lexer": "ipython3",
217+
"version": "3.11.10"
184218
}
185219
},
186220
"nbformat": 4,

0 commit comments

Comments
 (0)
Please sign in to comment.