Skip to content

Commit d35c495

Browse files
LEBCLEBC
LEBC
authored and
LEBC
committedFeb 25, 2025·
adding rmlst stuff
1 parent 3784da9 commit d35c495

File tree

4 files changed

+359
-0
lines changed

4 files changed

+359
-0
lines changed
 

‎bifrost_bridge/rmlst.py

+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/09_rmlst.ipynb.
2+
3+
# %% auto 0
4+
__all__ = ['process_rmlst_data', 'process_rmlst_data_from_cli']
5+
6+
# %% ../nbs/09_rmlst.ipynb 2
7+
# That export there, it makes sure this code goes into the module.
8+
9+
# standard libs
10+
import os
11+
import re
12+
13+
# Common to template
14+
# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`
15+
import dotenv # for loading config from .env files, https://pypi.org/project/python-dotenv/
16+
import envyaml # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml
17+
import fastcore # To add functionality related to nbdev development, https://github.com/fastai/fastcore/
18+
from fastcore import (
19+
test,
20+
)
21+
from fastcore.script import (
22+
call_parse,
23+
) # for @call_parse, https://fastcore.fast.ai/script
24+
import json # for nicely printing json and yaml
25+
from fastcore import test
26+
from . import core
27+
import pandas as pd
28+
29+
# %% ../nbs/09_rmlst.ipynb 6
30+
def process_rmlst_data(
31+
input_path: str,
32+
output_path: str = "./output.tsv",
33+
replace_header: str = None,
34+
filter_columns: str = None,
35+
add_header: str = None,
36+
):
37+
"""
38+
Command-line interface for processing rmlst data.
39+
40+
This function sets up an argument parser to handle command-line arguments for processing rmlst data files.
41+
It supports specifying input and output file paths, replacing headers, filtering columns, and handling the presence or absence of headers in the input file.
42+
43+
Arguments:
44+
input_path (str): Path to the input file.
45+
output_path (str): Path to the output file (default: './output.tsv').
46+
replace_header (str): Header to replace the existing header (default: None).
47+
filter_columns (str): Columns to filter from the header (default: None).
48+
header_exists (int): Indicates if the header exists in the input file (default: 1).
49+
add_header (str): Header to add if the header does not exist in the input file (default: None).
50+
"""
51+
52+
df = core.DataFrame()
53+
54+
if not os.path.exists(input_path):
55+
raise FileNotFoundError(f"The input file {input_path} does not exist.")
56+
if os.stat(input_path).st_size > 0:
57+
# df.import_data(input_path, file_type='tsv', add_header=add_header)
58+
with open(input_path) as rmlst_json:
59+
rmlst_dict = json.load(rmlst_json)
60+
rmlst_dict.pop("exact_matches") # remove the big section with data per contig
61+
taxon_prediction_df = pd.json_normalize(
62+
rmlst_dict["taxon_prediction"]
63+
) # this dict is a list and may contain several entries
64+
65+
# print(taxon_prediction_df)
66+
def concatenate_vector(vector: list, sep=",") -> str:
67+
return ",".join([str(i) for i in vector])
68+
69+
taxon_prediction_df = (
70+
taxon_prediction_df.apply(concatenate_vector, axis=0).to_frame().T
71+
) # concatenate the list into one line
72+
# print(rmlst_dict['taxon_prediction'])
73+
fields_df = pd.Series(rmlst_dict["fields"]).to_frame().T
74+
# TODO: concatenate df, put it in to the scuffed povilas object and then carry on
75+
# print(fields_df)
76+
# print(taxon_prediction_df)
77+
conc_df = pd.concat((fields_df, taxon_prediction_df), axis=1)
78+
df.df = conc_df
79+
80+
if filter_columns:
81+
df.filter_columns(filter_columns)
82+
83+
if replace_header:
84+
df.rename_header(replace_header)
85+
86+
# df.show()
87+
88+
df.export_data(output_path, file_type="tsv")
89+
else:
90+
empty_df = pd.DataFrame(
91+
columns=[col.strip() for col in filter_columns.split(",")]
92+
)
93+
empty_df.to_csv(output_path, index=False)
94+
95+
96+
@call_parse
97+
def process_rmlst_data_from_cli(
98+
input_path: str,
99+
output_path: str = "./output.tsv",
100+
replace_header: str = None,
101+
filter_columns: str = None,
102+
add_header: str = None,
103+
):
104+
process_rmlst_data(
105+
input_path, output_path, replace_header, filter_columns, add_header
106+
)

‎nbs/09_rmlst.ipynb

+251
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"# |default_exp rmlst"
10+
]
11+
},
12+
{
13+
"cell_type": "code",
14+
"execution_count": null,
15+
"metadata": {},
16+
"outputs": [],
17+
"source": [
18+
"# |hide\n",
19+
"# See above? this hides these blocks, meaning these blocks aren't in the module and aren't in the documentation\n",
20+
"import nbdev\n",
21+
"from nbdev.showdoc import * # ignore this Pylance warning in favor of following nbdev docs"
22+
]
23+
},
24+
{
25+
"cell_type": "code",
26+
"execution_count": null,
27+
"metadata": {},
28+
"outputs": [],
29+
"source": [
30+
"# |export\n",
31+
"# That export there, it makes sure this code goes into the module.\n",
32+
"\n",
33+
"# standard libs\n",
34+
"import os\n",
35+
"import re\n",
36+
"\n",
37+
"# Common to template\n",
38+
"# add into settings.ini, requirements, package name is python-dotenv, for conda build ensure `conda config --add channels conda-forge`\n",
39+
"import dotenv # for loading config from .env files, https://pypi.org/project/python-dotenv/\n",
40+
"import envyaml # Allows to loads env vars into a yaml file, https://github.com/thesimj/envyaml\n",
41+
"import fastcore # To add functionality related to nbdev development, https://github.com/fastai/fastcore/\n",
42+
"from fastcore import (\n",
43+
" test,\n",
44+
")\n",
45+
"from fastcore.script import (\n",
46+
" call_parse,\n",
47+
") # for @call_parse, https://fastcore.fast.ai/script\n",
48+
"import json # for nicely printing json and yaml\n",
49+
"from fastcore import test\n",
50+
"from bifrost_bridge import core\n",
51+
"import pandas as pd\n"
52+
]
53+
},
54+
{
55+
"cell_type": "markdown",
56+
"metadata": {},
57+
"source": [
58+
"Because the notebooks now are located in the `nbs` folder, we need to change the python `wd` for the notebook to the project folder. Keep this included in all notebooks but don't export it to the package. "
59+
]
60+
},
61+
{
62+
"cell_type": "code",
63+
"execution_count": null,
64+
"metadata": {},
65+
"outputs": [],
66+
"source": [
67+
"# This block should never be exported. It is to have python running in the project (and not the nbs) dir, and to initiate the package using pip.\n",
68+
"os.chdir(core.PROJECT_DIR)"
69+
]
70+
},
71+
{
72+
"cell_type": "markdown",
73+
"metadata": {},
74+
"source": [
75+
"##################################################CODE_SEGMENT###########################################"
76+
]
77+
},
78+
{
79+
"cell_type": "code",
80+
"execution_count": null,
81+
"metadata": {},
82+
"outputs": [],
83+
"source": [
84+
"# |export\n",
85+
"\n",
86+
"def process_rmlst_data(\n",
87+
" input_path:str,\n",
88+
" output_path:str = './output.tsv',\n",
89+
" replace_header:str = None,\n",
90+
" filter_columns:str = None,\n",
91+
" add_header:str = None):\n",
92+
"\n",
93+
" \"\"\"\n",
94+
" Command-line interface for processing rmlst data.\n",
95+
"\n",
96+
" This function sets up an argument parser to handle command-line arguments for processing rmlst data files.\n",
97+
" It supports specifying input and output file paths, replacing headers, filtering columns, and handling the presence or absence of headers in the input file.\n",
98+
"\n",
99+
" Arguments:\n",
100+
" input_path (str): Path to the input file.\n",
101+
" output_path (str): Path to the output file (default: './output.tsv').\n",
102+
" replace_header (str): Header to replace the existing header (default: None).\n",
103+
" filter_columns (str): Columns to filter from the header (default: None).\n",
104+
" header_exists (int): Indicates if the header exists in the input file (default: 1).\n",
105+
" add_header (str): Header to add if the header does not exist in the input file (default: None).\n",
106+
" \"\"\"\n",
107+
"\n",
108+
"\n",
109+
" df = core.DataFrame()\n",
110+
"\n",
111+
" if not os.path.exists(input_path):\n",
112+
" raise FileNotFoundError(f\"The input file {input_path} does not exist.\")\n",
113+
" if os.stat(input_path).st_size > 0:\n",
114+
" #df.import_data(input_path, file_type='tsv', add_header=add_header)\n",
115+
" with open(input_path) as rmlst_json:\n",
116+
" rmlst_dict = json.load(rmlst_json)\n",
117+
" rmlst_dict.pop('exact_matches') # remove the big section with data per contig\n",
118+
" taxon_prediction_df = pd.json_normalize(rmlst_dict['taxon_prediction']) # this dict is a list and may contain several entries\n",
119+
" #print(taxon_prediction_df)\n",
120+
" def concatenate_vector(vector: list, sep=',') -> str:\n",
121+
" return ','.join([str(i) for i in vector])\n",
122+
"\n",
123+
" taxon_prediction_df = taxon_prediction_df.apply(concatenate_vector, axis=0).to_frame().T # concatenate the list into one line\n",
124+
" #print(rmlst_dict['taxon_prediction'])\n",
125+
" fields_df = pd.Series(rmlst_dict['fields']).to_frame().T\n",
126+
" # TODO: concatenate df, put it in to the scuffed povilas object and then carry on\n",
127+
" #print(fields_df)\n",
128+
" #print(taxon_prediction_df)\n",
129+
" conc_df = pd.concat((fields_df, taxon_prediction_df), axis=1)\n",
130+
" df.df = conc_df\n",
131+
"\n",
132+
" if filter_columns:\n",
133+
" df.filter_columns(filter_columns)\n",
134+
"\n",
135+
" if replace_header:\n",
136+
" df.rename_header(replace_header)\n",
137+
" \n",
138+
" #df.show()\n",
139+
"\n",
140+
" df.export_data(output_path, file_type='tsv')\n",
141+
" else:\n",
142+
" empty_df = pd.DataFrame(columns = [col.strip() for col in filter_columns.split(',')])\n",
143+
" empty_df.to_csv(output_path, index=False)\n",
144+
"\n",
145+
"@call_parse\n",
146+
"def process_rmlst_data_from_cli(\n",
147+
" input_path:str,\n",
148+
" output_path:str = './output.tsv',\n",
149+
" replace_header:str = None,\n",
150+
" filter_columns:str = None,\n",
151+
" add_header:str = None):\n",
152+
" process_rmlst_data(input_path, output_path, replace_header, filter_columns, add_header)"
153+
]
154+
},
155+
{
156+
"cell_type": "code",
157+
"execution_count": null,
158+
"metadata": {},
159+
"outputs": [],
160+
"source": [
161+
"#|hide\n",
162+
"#Example usage of the function\n",
163+
"process_rmlst_data(\n",
164+
" input_path='test_data/rmlst.json', \n",
165+
" output_path='test_data/rmlst_testout.tsv'#,\n",
166+
" #filter_columns=\"Query / Template length\"\n",
167+
")"
168+
]
169+
},
170+
{
171+
"cell_type": "code",
172+
"execution_count": null,
173+
"metadata": {},
174+
"outputs": [
175+
{
176+
"name": "stdout",
177+
"output_type": "stream",
178+
"text": [
179+
"Error: Number of new column names must match the number of columns in the DataFrame.\n",
180+
"Current header: ['genus', 'species', 'rST', 'rank', 'taxon', 'support', 'taxonomy']\n"
181+
]
182+
}
183+
],
184+
"source": [
185+
"#|hide\n",
186+
"#Example usage of the function\n",
187+
"process_rmlst_data(\n",
188+
" input_path='test_data/rmlst.json', \n",
189+
" output_path='test_data/rmlst_testout_custom_header.tsv',\n",
190+
" filter_columns=\"taxon,species,rank,support\",\n",
191+
" replace_header=\"match,species,rank,percentage\"\n",
192+
")"
193+
]
194+
},
195+
{
196+
"cell_type": "code",
197+
"execution_count": null,
198+
"metadata": {},
199+
"outputs": [
200+
{
201+
"name": "stdout",
202+
"output_type": "stream",
203+
"text": [
204+
" Database Plasmid Identity Query / Template length Contig Position in contig \\\n",
205+
"0 \n",
206+
"\n",
207+
" Note Accession number \n",
208+
"0 \n"
209+
]
210+
}
211+
],
212+
"source": [
213+
"#|hide\n",
214+
"#Example usage of the function\n",
215+
"#process_rmlst_data(\n",
216+
"# input_path='test_data/rmlst_empty.tsv', \n",
217+
"# output_path='test_data/rmlst_empty_testout.tsv',\n",
218+
"# filter_columns=\"Query / Template length\"\n",
219+
"#)"
220+
]
221+
},
222+
{
223+
"cell_type": "markdown",
224+
"metadata": {},
225+
"source": [
226+
"##################################################CODE_SEGMENT###########################################"
227+
]
228+
},
229+
{
230+
"cell_type": "code",
231+
"execution_count": null,
232+
"metadata": {},
233+
"outputs": [],
234+
"source": [
235+
"#| hide\n",
236+
"# This is included at the end to ensure when you run through your notebook the code is also transferred to the associated python package\n",
237+
"\n",
238+
"nbdev.nbdev_export()"
239+
]
240+
}
241+
],
242+
"metadata": {
243+
"kernelspec": {
244+
"display_name": "python3",
245+
"language": "python",
246+
"name": "python3"
247+
}
248+
},
249+
"nbformat": 4,
250+
"nbformat_minor": 2
251+
}

‎settings.ini

+1
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,5 @@ console_scripts =
3838
bridge_amrfinderplus=bifrost_bridge.amrfinderplus:process_amrfinderplus_data_from_cli
3939
bridge_bracken=bifrost_bridge.bracken:process_bracken_data_from_cli
4040
bridge_pmlst=bifrost_bridge.pmlst:process_pmlst_data_from_cli
41+
bridge_rmlst=bifrost_bridge.rmlst:process_rmlst_data_from_cli
4142
bridge_qc=bifrost_bridge.bifrost:process_qc_data

‎test_data/rmlst.json

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"fields": {"genus": "Clostridioides", "species": "Clostridioides difficile", "rST": "24672"}, "taxon_prediction": [{"rank": "SPECIES", "taxon": "Clostridioides difficile", "support": 100, "taxonomy": "Bacillota > Clostridia > Peptostreptococcales > Peptostreptococcaceae > Clostridioides > Clostridioides difficile"}], "exact_matches": {"BACT000031": [{"contig": "Contig_30_225.862", "start": 2913, "length": 831, "allele_id": "1019", "orientation": "forward", "end": 3743, "linked_data": {"rMLST genome database": {"species": [{"frequency": 1912, "value": "Clostridioides difficile"}]}}}], "BACT000059": [{"allele_id": "217", "length": 186, "start": 9666, "contig": "Contig_30_225.862", "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 19533}, {"frequency": 5, "value": "Clostridioides sp."}]}}, "end": 9851, "orientation": "forward"}], "BACT000032": [{"contig": "Contig_30_225.862", "start": 1311, "length": 630, "allele_id": "1003", "orientation": "forward", "end": 1940, "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 1906}]}}}], "BACT000058": [{"end": 5937, "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 19788}, {"value": "Clostridioides sp.", "frequency": 31}]}}, "orientation": "forward", "length": 204, "allele_id": "201", "contig": "Contig_30_225.862", "start": 5734}], "BACT000049": [{"contig": "Contig_37_165.545", "start": 19747, "allele_id": "937", "length": 357, "orientation": "forward", "end": 20103, "linked_data": {"rMLST genome database": {"species": [{"frequency": 1926, "value": "Clostridioides difficile"}]}}}], "BACT000048": [{"orientation": "reverse", "linked_data": {"rMLST genome database": {"species": [{"frequency": 18546, "value": "Clostridioides difficile"}]}}, "end": 83827, "start": 83477, "contig": "Contig_22_162.775", "length": 351, "allele_id": "280"}], "BACT000033": [{"contig": "Contig_30_225.862", "start": 1970, "allele_id": "887", "length": 624, "orientation": "forward", "end": 2593, "linked_data": {"rMLST genome database": {"species": [{"frequency": 1897, "value": "Clostridioides difficile"}]}}}], "BACT000065": [{"linked_data": {"rMLST genome database": {"species": [{"frequency": 19545, "value": "Clostridioides difficile"}, {"value": "Clostridioides sp.", "frequency": 8}]}}, "end": 13749, "orientation": "forward", "length": 114, "allele_id": "205", "start": 13636, "contig": "Contig_30_225.862"}], "BACT000007": [{"linked_data": {"rMLST genome database": {"species": [{"frequency": 1903, "value": "Clostridioides difficile"}]}}, "end": 14320, "orientation": "forward", "length": 471, "allele_id": "960", "start": 13850, "contig": "Contig_29_219.314"}], "BACT000030": [{"allele_id": "1054", "length": 699, "start": 2292, "contig": "Contig_29_219.314", "linked_data": {"rMLST genome database": {"species": [{"frequency": 1923, "value": "Clostridioides difficile"}]}}, "end": 2990, "orientation": "forward"}], "BACT000064": [{"length": 195, "allele_id": "790", "start": 19497, "contig": "Contig_37_165.545", "linked_data": {"rMLST genome database": {"species": [{"frequency": 1930, "value": "Clostridioides difficile"}, {"value": "Clostridioides sp.", "frequency": 2}]}}, "end": 19691, "orientation": "forward"}], "BACT000017": [{"orientation": "forward", "end": 6215, "linked_data": {"rMLST genome database": {"species": [{"frequency": 19487, "value": "Clostridioides difficile"}, {"frequency": 7, "value": "Clostridioides sp."}]}}, "contig": "Contig_30_225.862", "start": 5961, "length": 255, "allele_id": "468"}], "BACT000004": [{"length": 624, "allele_id": "930", "start": 14739, "contig": "Contig_30_225.862", "linked_data": {"rMLST genome database": {"species": [{"frequency": 1929, "value": "Clostridioides difficile"}]}}, "end": 15362, "orientation": "forward"}], "BACT000038": [{"start": 120901, "contig": "Contig_14_199.888", "allele_id": "885", "length": 450, "orientation": "reverse", "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 1908}]}}, "end": 121350}], "BACT000006": [{"start": 125500, "contig": "Contig_14_199.888", "allele_id": "416", "length": 279, "orientation": "reverse", "linked_data": {"rMLST genome database": {"species": [{"frequency": 16147, "value": "Clostridioides difficile"}]}}, "end": 125778}], "BACT000052": [{"end": 2883, "linked_data": {"rMLST genome database": {"species": [{"frequency": 6536, "value": "Clostridioides difficile"}]}}, "orientation": "forward", "length": 291, "allele_id": "23", "contig": "Contig_30_225.862", "start": 2593}], "BACT000039": [{"start": 3213, "contig": "Contig_29_219.314", "length": 507, "allele_id": "916", "orientation": "forward", "linked_data": {"rMLST genome database": {"species": [{"frequency": 1903, "value": "Clostridioides difficile"}]}}, "end": 3719}], "BACT000014": [{"start": 7528, "contig": "Contig_30_225.862", "allele_id": "873", "length": 186, "orientation": "forward", "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 3111}, {"value": "Clostridioides sp.", "frequency": 33}]}}, "end": 7713}], "BACT000051": [{"length": 336, "allele_id": "884", "contig": "Contig_30_225.862", "start": 4091, "end": 4426, "linked_data": {"rMLST genome database": {"species": [{"frequency": 1913, "value": "Clostridioides difficile"}]}}, "orientation": "forward"}], "BACT000016": [{"contig": "Contig_22_162.775", "start": 85620, "length": 273, "allele_id": "900", "orientation": "reverse", "end": 85892, "linked_data": {"rMLST genome database": {"species": [{"frequency": 1924, "value": "Clostridioides difficile"}]}}}], "BACT000005": [{"orientation": "forward", "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 1923}]}}, "end": 9651, "start": 9142, "contig": "Contig_30_225.862", "length": 510, "allele_id": "956"}], "BACT000042": [{"orientation": "forward", "end": 20667, "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 1902}]}}, "contig": "Contig_30_225.862", "start": 20236, "allele_id": "920", "length": 432}], "BACT000015": [{"allele_id": "853", "length": 258, "start": 19312, "contig": "Contig_22_162.775", "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 1925}]}}, "end": 19569, "orientation": "reverse"}], "BACT000040": [{"length": 426, "allele_id": "957", "contig": "Contig_29_219.314", "start": 1796, "end": 2221, "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 1926}]}}, "orientation": "forward"}], "BACT000053": [{"start": 6630, "contig": "Contig_30_225.862", "allele_id": "13", "length": 309, "orientation": "forward", "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 19756}, {"frequency": 5, "value": "Clostridioides sp."}]}}, "end": 6938}], "BACT000043": [{"orientation": "forward", "end": 6609, "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 19768}, {"value": "Clostridioides sp.", "frequency": 33}]}}, "contig": "Contig_30_225.862", "start": 6241, "length": 369, "allele_id": "338"}], "BACT000050": [{"contig": "Contig_22_162.775", "start": 177353, "allele_id": "236", "length": 312, "orientation": "reverse", "end": 177664, "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 17480}]}}}], "BACT000010": [{"orientation": "forward", "end": 1219, "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 18411}]}}, "contig": "Contig_30_225.862", "start": 908, "allele_id": "386", "length": 312}], "BACT000013": [{"start": 13902, "contig": "Contig_30_225.862", "allele_id": "937", "length": 372, "orientation": "forward", "linked_data": {"rMLST genome database": {"species": [{"frequency": 1928, "value": "Clostridioides difficile"}]}}, "end": 14273}], "BACT000003": [{"orientation": "forward", "linked_data": {"rMLST genome database": {"species": [{"frequency": 1896, "value": "Clostridioides difficile"}]}}, "end": 5265, "start": 4450, "contig": "Contig_30_225.862", "length": 816, "allele_id": "910"}], "BACT000046": [{"end": 16749, "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 1930}]}}, "orientation": "forward", "allele_id": "880", "length": 342, "contig": "Contig_30_225.862", "start": 16408}], "BACT000044": [{"length": 444, "allele_id": "936", "contig": "Contig_30_225.862", "start": 9884, "end": 10327, "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 2340}]}}, "orientation": "forward"}], "BACT000001": [{"length": 1266, "allele_id": "1995", "contig": "Contig_25_165.135", "start": 38307, "end": 39572, "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 1905}]}}, "orientation": "forward"}], "BACT000012": [{"length": 423, "allele_id": "419", "start": 13305, "contig": "Contig_29_219.314", "linked_data": {"rMLST genome database": {"species": [{"frequency": 7008, "value": "Clostridioides difficile"}]}}, "end": 13727, "orientation": "forward"}], "BACT000011": [{"orientation": "forward", "end": 14707, "linked_data": {"rMLST genome database": {"species": [{"frequency": 1928, "value": "Clostridioides difficile"}]}}, "contig": "Contig_30_225.862", "start": 14309, "length": 399, "allele_id": "928"}], "BACT000056": [{"linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 2896}, {"frequency": 2, "value": "Clostridioides sp."}]}}, "end": 177007, "orientation": "reverse", "allele_id": "941", "length": 291, "start": 176717, "contig": "Contig_22_162.775"}], "BACT000045": [{"end": 5732, "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 1925}]}}, "orientation": "forward", "length": 432, "allele_id": "895", "contig": "Contig_30_225.862", "start": 5301}], "BACT000002": [{"start": 57751, "contig": "Contig_9_145.921", "allele_id": "995", "length": 714, "orientation": "forward", "linked_data": {"rMLST genome database": {"species": [{"frequency": 1895, "value": "Clostridioides difficile"}]}}, "end": 58464}], "BACT000062": [{"linked_data": {"rMLST genome database": {"species": [{"frequency": 12238, "value": "Clostridioides difficile"}, {"frequency": 25, "value": "Clostridioides sp."}]}}, "end": 939, "orientation": "forward", "length": 150, "allele_id": "351", "start": 790, "contig": "Contig_29_219.314"}], "BACT000061": [{"end": 161606, "linked_data": {"rMLST genome database": {"species": [{"frequency": 19793, "value": "Clostridioides difficile"}, {"frequency": 3, "value": "Clostridioides sp."}]}}, "orientation": "reverse", "length": 177, "allele_id": "20", "contig": "Contig_22_162.775", "start": 161430}], "BACT000057": [{"orientation": "forward", "end": 10751, "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 16629}]}}, "contig": "Contig_5_168.137", "start": 10563, "allele_id": "20", "length": 189}], "BACT000047": [{"end": 9121, "linked_data": {"rMLST genome database": {"species": [{"frequency": 1922, "value": "Clostridioides difficile"}]}}, "orientation": "forward", "length": 369, "allele_id": "839", "contig": "Contig_30_225.862", "start": 8753}], "BACT000020": [{"orientation": "reverse", "end": 69227, "linked_data": {"rMLST genome database": {"species": [{"frequency": 16472, "value": "Clostridioides difficile"}]}}, "contig": "Contig_47_156.181", "start": 68961, "allele_id": "252", "length": 267}], "BACT000063": [{"end": 141065, "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 19797}, {"value": "Clostridioides sp.", "frequency": 8}]}}, "orientation": "reverse", "allele_id": "257", "length": 138, "contig": "Contig_14_199.888", "start": 140928}], "BACT000035": [{"allele_id": "954", "length": 543, "start": 8175, "contig": "Contig_30_225.862", "linked_data": {"rMLST genome database": {"species": [{"frequency": 1929, "value": "Clostridioides difficile"}]}}, "end": 8717, "orientation": "forward"}], "BACT000021": [{"end": 101160, "linked_data": {"rMLST genome database": {"species": [{"frequency": 19792, "value": "Clostridioides difficile"}, {"value": "Clostridioides sp.", "frequency": 6}]}}, "orientation": "forward", "length": 180, "allele_id": "212", "contig": "Contig_47_156.181", "start": 100981}], "BACT000034": [{"start": 6968, "contig": "Contig_30_225.862", "allele_id": "965", "length": 543, "orientation": "forward", "linked_data": {"rMLST genome database": {"species": [{"frequency": 1922, "value": "Clostridioides difficile"}]}}, "end": 7510}], "BACT000060": [{"orientation": "forward", "end": 62288, "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 19779}]}}, "contig": "Contig_59_193.646", "start": 62088, "allele_id": "29", "length": 201}], "BACT000019": [{"start": 3778, "contig": "Contig_30_225.862", "allele_id": "843", "length": 282, "orientation": "forward", "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 1927}]}}, "end": 4059}], "BACT000008": [{"contig": "Contig_30_225.862", "start": 7746, "length": 399, "allele_id": "337", "orientation": "forward", "end": 8144, "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 16898}]}}}], "BACT000036": [{"allele_id": "946", "length": 366, "start": 3777, "contig": "Contig_29_219.314", "linked_data": {"rMLST genome database": {"species": [{"frequency": 3410, "value": "Clostridioides difficile"}, {"frequency": 5, "value": "Clostridioides sp."}]}}, "end": 4142, "orientation": "forward"}], "BACT000009": [{"allele_id": "928", "length": 393, "start": 20696, "contig": "Contig_30_225.862", "linked_data": {"rMLST genome database": {"species": [{"value": "Clostridioides difficile", "frequency": 1923}]}}, "end": 21088, "orientation": "forward"}], "BACT000018": [{"orientation": "reverse", "linked_data": {"rMLST genome database": {"species": [{"frequency": 10510, "value": "Clostridioides difficile"}, {"value": "Clostridioides sp.", "frequency": 7}]}}, "end": 125010, "start": 124783, "contig": "Contig_14_199.888", "length": 228, "allele_id": "859"}]}}

0 commit comments

Comments
 (0)
Please sign in to comment.