Skip to content

Commit 8e9550b

Browse files
committed
bifrost function added
1 parent d9347c7 commit 8e9550b

14 files changed

+328
-38
lines changed

Diff for: bifrost_bridge/_modidx.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
'doc_host': 'https://$GIT_USER_NAME.github.io',
66
'git_url': 'https://github.com/$GIT_USER_NAME/bifrost_bridge',
77
'lib_path': 'bifrost_bridge'},
8-
'syms': { 'bifrost_bridge.cli': {'bifrost_bridge.cli.cli': ('cli.html#cli', 'bifrost_bridge/cli.py')},
8+
'syms': { 'bifrost_bridge.bifrost': { 'bifrost_bridge.bifrost.process_qc_data': ( 'bifrost.html#process_qc_data',
9+
'bifrost_bridge/bifrost.py')},
10+
'bifrost_bridge.cli': {'bifrost_bridge.cli.cli': ('cli.html#cli', 'bifrost_bridge/cli.py')},
911
'bifrost_bridge.core': { 'bifrost_bridge.core.DataFrame': ('core.html#dataframe', 'bifrost_bridge/core.py'),
1012
'bifrost_bridge.core.DataFrame.__init__': ('core.html#dataframe.__init__', 'bifrost_bridge/core.py'),
1113
'bifrost_bridge.core.cli': ('core.html#cli', 'bifrost_bridge/core.py'),
@@ -31,5 +33,9 @@
3133
'bifrost_bridge.core.show_project_env_vars': ( 'core.html#show_project_env_vars',
3234
'bifrost_bridge/core.py')},
3335
'bifrost_bridge.fastp': { 'bifrost_bridge.fastp.process_fastp_data': ( 'fastp.html#process_fastp_data',
34-
'bifrost_bridge/fastp.py')},
35-
'bifrost_bridge.mlst': {'bifrost_bridge.mlst.process_mlst_data': ('mlst.html#process_mlst_data', 'bifrost_bridge/mlst.py')}}}
36+
'bifrost_bridge/fastp.py'),
37+
'bifrost_bridge.fastp.process_fastp_data_from_cli': ( 'fastp.html#process_fastp_data_from_cli',
38+
'bifrost_bridge/fastp.py')},
39+
'bifrost_bridge.mlst': { 'bifrost_bridge.mlst.process_mlst_data': ('mlst.html#process_mlst_data', 'bifrost_bridge/mlst.py'),
40+
'bifrost_bridge.mlst.process_mlst_data_from_cli': ( 'mlst.html#process_mlst_data_from_cli',
41+
'bifrost_bridge/mlst.py')}}}

Diff for: bifrost_bridge/bifrost.py

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/99_bifrost.ipynb.
2+
3+
# %% auto 0
4+
__all__ = ['process_qc_data']
5+
6+
# %% ../nbs/99_bifrost.ipynb 2
7+
# That export there, it makes sure this code goes into the module.
8+
9+
# standard libs
10+
import os
11+
import re
12+
13+
# Common to template
14+
# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
15+
import dotenv # for loading config from .env files, https://pypi.org/project/python-dotenv/
16+
import envyaml # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
17+
import fastcore # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
18+
from fastcore import (
19+
test,
20+
)
21+
from fastcore.script import (
22+
call_parse,
23+
) # for @call_parse, https://fastcore.fast.ai/script
24+
import json # for nicely printing json and yaml
25+
from fastcore import test
26+
27+
#!export
28+
from . import core
29+
30+
# %% ../nbs/99_bifrost.ipynb 5
31+
from .mlst import process_mlst_data
32+
from .fastp import process_fastp_data
33+
34+
35+
@call_parse
36+
def process_qc_data(
37+
mlst_path: str = None, fastp_path: str = None, output_path: str = "./output.tsv"
38+
):
39+
"""
40+
Command-line interface for processing QC data.
41+
42+
This function processes MLST and FASTP data files based on the provided command-line arguments.
43+
It supports specifying input file paths for MLST and FASTP data, and outputs the processed data to specified paths.
44+
45+
Arguments:
46+
mlst (str): Path to the MLST input file.
47+
fastp (str): Path to the FASTP input file.
48+
output (str): Path to the output file (default: './output.tsv').
49+
"""
50+
print(mlst_path)
51+
print(fastp_path)
52+
if mlst_path is not None:
53+
if not os.path.exists(mlst_path):
54+
raise FileNotFoundError(f"File not found: {mlst_path}")
55+
process_mlst_data(
56+
input_path=str(mlst_path),
57+
output_path="./parsed_mlst.tsv",
58+
replace_header=None,
59+
filter_columns="SampleID, Species, ST",
60+
header_exists=0,
61+
add_header="SampleID, Species, ST, 1, 2, 3, 4, 5, 6, 7",
62+
)
63+
64+
if fastp_path is not None:
65+
if not os.path.exists(fastp_path):
66+
raise FileNotFoundError(f"File not found: {fastp_path}")
67+
process_fastp_data(
68+
input_path=fastp_path,
69+
output_path="./parsed_fastp.tsv",
70+
filter_columns="summary£fastp_version, summary£sequencing, summary£before_filtering£total_reads",
71+
replace_header="fastp_version, sequencing, total_reads",
72+
)

Diff for: bifrost_bridge/fastp.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/03_fastp.ipynb.
22

33
# %% auto 0
4-
__all__ = ['process_fastp_data']
4+
__all__ = ['process_fastp_data', 'process_fastp_data_from_cli']
55

66
# %% ../nbs/03_fastp.ipynb 2
77
# That export there, it makes sure this code goes into the module.
@@ -28,7 +28,6 @@
2828
from . import core
2929

3030
# %% ../nbs/03_fastp.ipynb 5
31-
@call_parse
3231
def process_fastp_data(
3332
input_path: str,
3433
output_path: str = "./output.tsv",
@@ -62,3 +61,13 @@ def process_fastp_data(
6261
df.show()
6362

6463
df.export_data(output_path, file_type="tsv")
64+
65+
66+
@call_parse
67+
def process_fastp_data_from_cli(
68+
input_path: str,
69+
output_path: str = "./output.tsv",
70+
replace_header: str = None,
71+
filter_columns: str = None,
72+
):
73+
process_fastp_data(input_path, output_path, replace_header, filter_columns)

Diff for: bifrost_bridge/mlst.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/02_mlst.ipynb.
22

33
# %% auto 0
4-
__all__ = ['process_mlst_data']
4+
__all__ = ['process_mlst_data', 'process_mlst_data_from_cli']
55

66
# %% ../nbs/02_mlst.ipynb 2
77
# That export there, it makes sure this code goes into the module.
@@ -26,7 +26,6 @@
2626
from . import core
2727

2828
# %% ../nbs/02_mlst.ipynb 6
29-
@call_parse
3029
def process_mlst_data(
3130
input_path: str,
3231
output_path: str = "./output.tsv",
@@ -67,3 +66,22 @@ def process_mlst_data(
6766
df.show()
6867

6968
df.export_data(output_path, file_type="tsv")
69+
70+
71+
@call_parse
72+
def process_mlst_data_from_cli(
73+
input_path: str,
74+
output_path: str = "./output.tsv",
75+
replace_header: str = None,
76+
filter_columns: str = None,
77+
header_exists: int = 1,
78+
add_header: str = None,
79+
):
80+
process_mlst_data(
81+
input_path,
82+
output_path,
83+
replace_header,
84+
filter_columns,
85+
header_exists,
86+
add_header,
87+
)

Diff for: nbs/00_core.ipynb

-1
Original file line numberDiff line numberDiff line change
@@ -915,7 +915,6 @@
915915
"name": "stdout",
916916
"output_type": "stream",
917917
"text": [
918-
"SampleID, Species, ST\n",
919918
" SampleID Species ST\n",
920919
"0 TestSample2 campylobacter 22\n"
921920
]

Diff for: nbs/02_mlst.ipynb

+12-13
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@
8282
"source": [
8383
"# |export\n",
8484
"\n",
85-
"@call_parse\n",
8685
"def process_mlst_data(\n",
8786
" input_path:str,\n",
8887
" output_path:str = './output.tsv',\n",
@@ -120,24 +119,24 @@
120119
" \n",
121120
" df.show()\n",
122121
"\n",
123-
" df.export_data(output_path, file_type='tsv')"
122+
" df.export_data(output_path, file_type='tsv')\n",
123+
"\n",
124+
"@call_parse\n",
125+
"def process_mlst_data_from_cli(\n",
126+
" input_path:str,\n",
127+
" output_path:str = './output.tsv',\n",
128+
" replace_header:str = None,\n",
129+
" filter_columns:str = None,\n",
130+
" header_exists:int = 1,\n",
131+
" add_header:str = None):\n",
132+
" process_mlst_data(input_path, output_path, replace_header, filter_columns, header_exists, add_header)"
124133
]
125134
},
126135
{
127136
"cell_type": "code",
128137
"execution_count": null,
129138
"metadata": {},
130-
"outputs": [
131-
{
132-
"name": "stdout",
133-
"output_type": "stream",
134-
"text": [
135-
"SampleID, Species, ST\n",
136-
" SampleID Species ST\n",
137-
"0 TestSample2 campylobacter 22\n"
138-
]
139-
}
140-
],
139+
"outputs": [],
141140
"source": [
142141
"# |hide\n",
143142
"# Example usage of the function\n",

Diff for: nbs/03_fastp.ipynb

+11-12
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@
7878
"source": [
7979
"# |export\n",
8080
"\n",
81-
"@call_parse\n",
8281
"def process_fastp_data(\n",
8382
" input_path:str,\n",
8483
" output_path:str = './output.tsv',\n",
@@ -111,23 +110,23 @@
111110
"\n",
112111
" df.show()\n",
113112
"\n",
114-
" df.export_data(output_path, file_type='tsv')"
113+
" df.export_data(output_path, file_type='tsv')\n",
114+
"\n",
115+
"\n",
116+
"@call_parse\n",
117+
"def process_fastp_data_from_cli(\n",
118+
" input_path:str,\n",
119+
" output_path:str = './output.tsv',\n",
120+
" replace_header:str = None,\n",
121+
" filter_columns:str = None):\n",
122+
" process_fastp_data(input_path, output_path, replace_header, filter_columns)"
115123
]
116124
},
117125
{
118126
"cell_type": "code",
119127
"execution_count": null,
120128
"metadata": {},
121-
"outputs": [
122-
{
123-
"name": "stdout",
124-
"output_type": "stream",
125-
"text": [
126-
" fastp_version sequencing total_reads\n",
127-
"0 0.23.4 paired end (151 cycles + 151 cycles) 4369610\n"
128-
]
129-
}
130-
],
129+
"outputs": [],
131130
"source": [
132131
"# |hide\n",
133132
"# Example usage of the function\n",

0 commit comments

Comments
 (0)