From b26ca98e2592334d9816db77afb9d97ec6bba3f4 Mon Sep 17 00:00:00 2001 From: Samuel Bunga Date: Wed, 10 Nov 2021 09:31:30 -0500 Subject: [PATCH 1/8] bigg initial push --- src/pyobo/sources/bigg.py | 68 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 src/pyobo/sources/bigg.py diff --git a/src/pyobo/sources/bigg.py b/src/pyobo/sources/bigg.py new file mode 100644 index 00000000..51f86629 --- /dev/null +++ b/src/pyobo/sources/bigg.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- + +"""Converter for bigg.""" +import click +import bioversions +from typing import Iterable +from more_click import verbose_option +from pyobo.path_utils import ensure_df, ensure_tar_df +from pyobo.struct import Obo, Reference, Synonym, SynonymTypeDef, Term, from_species, TypeDef + +HEADER = ['bigg_id', 'universal_bigg_id', 'name', 'model_list', 'database_links', + 'old_bigg_ids'] +PREFIX = 'bigg' + +URL = 'http://bigg.ucsd.edu/static/namespace/bigg_models_metabolites.txt' + +alias_type = SynonymTypeDef(id="alias", name="alias") +has_role = TypeDef(reference=Reference(prefix="bigg", identifier="has_role")) + + +def get_obo(force: bool = False) -> Obo: + """Get bigg as OBO.""" + version = bioversions.get_version("bigg") + #version = '1.2' + return Obo( + ontology=PREFIX, + name="bigg models metabolites database", + iter_terms=get_terms, + iter_terms_kwargs=dict(force=False), + typedefs=[has_role], + synonym_typedefs=[alias_type], + auto_generated_by=f"bio2obo:{PREFIX}", + data_version=version, + ) + + +def get_terms(force: bool = False) -> Iterable[Term]: + bigg_df = ensure_df( + prefix=PREFIX, + url=URL, + sep="\t", + skiprows=18, + header=None, + names=HEADER, + ) + + for r, c in bigg_df.iterrows(): + bigg_id = c[0] + name = c[2] + synonyms = [] + term = Term( + reference=Reference(prefix=PREFIX, identifier=bigg_id, name=name), + definition=[], + synonyms=synonyms, + ) + yield term + + +@click.command() +@verbose_option +def _main(): + obo = get_obo(force=True) + obo.write_default(force=True, write_obo=True) + + +if __name__ == "__main__": + #get_obo(force=True).write_default(write_obo=True, write_obograph=True, force=True) + _main() \ No newline at end of file From 4615e74d2a30e5e2c7b29399b246cb7302b4c917 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 10 Nov 2021 16:19:58 +0100 Subject: [PATCH 2/8] Add autogenerated CLI --- src/pyobo/sources/bigg.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/src/pyobo/sources/bigg.py b/src/pyobo/sources/bigg.py index 51f86629..ed9eb527 100644 --- a/src/pyobo/sources/bigg.py +++ b/src/pyobo/sources/bigg.py @@ -1,10 +1,9 @@ # -*- coding: utf-8 -*- """Converter for bigg.""" -import click + import bioversions from typing import Iterable -from more_click import verbose_option from pyobo.path_utils import ensure_df, ensure_tar_df from pyobo.struct import Obo, Reference, Synonym, SynonymTypeDef, Term, from_species, TypeDef @@ -56,13 +55,5 @@ def get_terms(force: bool = False) -> Iterable[Term]: yield term -@click.command() -@verbose_option -def _main(): - obo = get_obo(force=True) - obo.write_default(force=True, write_obo=True) - - if __name__ == "__main__": - #get_obo(force=True).write_default(write_obo=True, write_obograph=True, force=True) - _main() \ No newline at end of file + get_obo(force=True).cli() From 94eb3652a2e29942d2c2f8b3b1e42beb45b1d468 Mon Sep 17 00:00:00 2001 From: Samuel Bunga Date: Wed, 10 Nov 2021 12:22:06 -0500 Subject: [PATCH 3/8] more changes --- src/pyobo/sources/bigg.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pyobo/sources/bigg.py b/src/pyobo/sources/bigg.py index 51f86629..a03dfc81 100644 --- a/src/pyobo/sources/bigg.py +++ b/src/pyobo/sources/bigg.py @@ -5,8 +5,10 @@ import bioversions from typing import Iterable from more_click import verbose_option -from pyobo.path_utils import ensure_df, ensure_tar_df +from pyobo.path.utils import ensure_df, ensure_tar_df from pyobo.struct import Obo, Reference, Synonym, SynonymTypeDef, Term, from_species, TypeDef +from ..utils.path import ensure_df, ensure_tar_df + HEADER = ['bigg_id', 'universal_bigg_id', 'name', 'model_list', 'database_links', 'old_bigg_ids'] From e1a2e5a13ccb00aa29e1aa30abdbd45d4cbf0ece Mon Sep 17 00:00:00 2001 From: Samuel Bunga Date: Thu, 11 Nov 2021 22:37:39 -0500 Subject: [PATCH 4/8] initial working version of bigg --- src/pyobo/sources/bigg.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/src/pyobo/sources/bigg.py b/src/pyobo/sources/bigg.py index 28f1010f..f780f3b2 100644 --- a/src/pyobo/sources/bigg.py +++ b/src/pyobo/sources/bigg.py @@ -2,17 +2,24 @@ """Converter for bigg.""" +from typing import Iterable, Optional + import bioversions -from typing import Iterable -from pyobo.path_utils import ensure_df, ensure_tar_df -from pyobo.struct import Obo, Reference, Synonym, SynonymTypeDef, Term, from_species, TypeDef +from pyobo.struct import ( + Obo, + Reference, + SynonymTypeDef, + Term, + TypeDef +) + +from ..utils.path import ensure_df -HEADER = ['bigg_id', 'universal_bigg_id', 'name', 'model_list', 'database_links', - 'old_bigg_ids'] -PREFIX = 'bigg' +HEADER = ["bigg_id", "universal_bigg_id", "name", "model_list", "database_links", "old_bigg_ids"] +PREFIX = "bigg.metabolite" -URL = 'http://bigg.ucsd.edu/static/namespace/bigg_models_metabolites.txt' +URL = "http://bigg.ucsd.edu/static/namespace/bigg_models_metabolites.txt" alias_type = SynonymTypeDef(id="alias", name="alias") has_role = TypeDef(reference=Reference(prefix="bigg", identifier="has_role")) @@ -21,12 +28,12 @@ def get_obo(force: bool = False) -> Obo: """Get bigg as OBO.""" version = bioversions.get_version("bigg") - #version = '1.2' + # version = '1.2' return Obo( ontology=PREFIX, name="bigg models metabolites database", iter_terms=get_terms, - iter_terms_kwargs=dict(force=False), + iter_terms_kwargs=dict(force=force, version=version), typedefs=[has_role], synonym_typedefs=[alias_type], auto_generated_by=f"bio2obo:{PREFIX}", @@ -34,7 +41,7 @@ def get_obo(force: bool = False) -> Obo: ) -def get_terms(force: bool = False) -> Iterable[Term]: +def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Term]: bigg_df = ensure_df( prefix=PREFIX, url=URL, @@ -42,7 +49,9 @@ def get_terms(force: bool = False) -> Iterable[Term]: skiprows=18, header=None, names=HEADER, - ) + force=force, + version=version, + ) for r, c in bigg_df.iterrows(): bigg_id = c[0] @@ -50,7 +59,6 @@ def get_terms(force: bool = False) -> Iterable[Term]: synonyms = [] term = Term( reference=Reference(prefix=PREFIX, identifier=bigg_id, name=name), - definition=[], synonyms=synonyms, ) yield term From 5e0f9fb8a01690256154d08ef2efb85dd308ea61 Mon Sep 17 00:00:00 2001 From: Samuel Bunga Date: Thu, 11 Nov 2021 22:56:06 -0500 Subject: [PATCH 5/8] changed iterrows -> values for efficiency and getting only columns needed from ensure_df() --- src/pyobo/sources/bigg.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/src/pyobo/sources/bigg.py b/src/pyobo/sources/bigg.py index f780f3b2..d1d8ca13 100644 --- a/src/pyobo/sources/bigg.py +++ b/src/pyobo/sources/bigg.py @@ -6,17 +6,12 @@ import bioversions -from pyobo.struct import ( - Obo, - Reference, - SynonymTypeDef, - Term, - TypeDef -) +from pyobo.struct import Obo, Reference, SynonymTypeDef, Term, TypeDef from ..utils.path import ensure_df -HEADER = ["bigg_id", "universal_bigg_id", "name", "model_list", "database_links", "old_bigg_ids"] +# HEADER = ["bigg_id", "universal_bigg_id", "name", "model_list", "database_links", "old_bigg_ids"] +HEADER = ["bigg_id", "name"] PREFIX = "bigg.metabolite" URL = "http://bigg.ucsd.edu/static/namespace/bigg_models_metabolites.txt" @@ -53,9 +48,9 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te version=version, ) - for r, c in bigg_df.iterrows(): - bigg_id = c[0] - name = c[2] + for v in bigg_df.values: + bigg_id = v[0] + name = v[1] synonyms = [] term = Term( reference=Reference(prefix=PREFIX, identifier=bigg_id, name=name), From d5895b71a196875f3c5621484e461b8a53e44eb5 Mon Sep 17 00:00:00 2001 From: Samuel Bunga Date: Thu, 11 Nov 2021 23:00:06 -0500 Subject: [PATCH 6/8] changed to usecols --- src/pyobo/sources/bigg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pyobo/sources/bigg.py b/src/pyobo/sources/bigg.py index d1d8ca13..f1d90f28 100644 --- a/src/pyobo/sources/bigg.py +++ b/src/pyobo/sources/bigg.py @@ -10,8 +10,7 @@ from ..utils.path import ensure_df -# HEADER = ["bigg_id", "universal_bigg_id", "name", "model_list", "database_links", "old_bigg_ids"] -HEADER = ["bigg_id", "name"] +HEADER = ["bigg_id", "universal_bigg_id", "name", "model_list", "database_links", "old_bigg_ids"] PREFIX = "bigg.metabolite" URL = "http://bigg.ucsd.edu/static/namespace/bigg_models_metabolites.txt" @@ -44,6 +43,7 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te skiprows=18, header=None, names=HEADER, + usecols=['bigg_id', 'name'], force=force, version=version, ) From e89f33c0bbc9b5e320c809e72d739d8a9e18a70f Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Fri, 12 Nov 2021 14:16:36 +0100 Subject: [PATCH 7/8] Update bigg.py --- src/pyobo/sources/bigg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pyobo/sources/bigg.py b/src/pyobo/sources/bigg.py index f1d90f28..168bf73a 100644 --- a/src/pyobo/sources/bigg.py +++ b/src/pyobo/sources/bigg.py @@ -43,7 +43,7 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te skiprows=18, header=None, names=HEADER, - usecols=['bigg_id', 'name'], + usecols=["bigg_id", "name"], force=force, version=version, ) From be366cffb6e865960b2ba39e790f34073e3cb5eb Mon Sep 17 00:00:00 2001 From: Samuel Bunga Date: Fri, 12 Nov 2021 11:26:57 -0500 Subject: [PATCH 8/8] Removed empty Synonym list --- src/pyobo/sources/bigg.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pyobo/sources/bigg.py b/src/pyobo/sources/bigg.py index f1d90f28..25eaaa9d 100644 --- a/src/pyobo/sources/bigg.py +++ b/src/pyobo/sources/bigg.py @@ -51,10 +51,8 @@ def get_terms(force: bool = False, version: Optional[str] = None) -> Iterable[Te for v in bigg_df.values: bigg_id = v[0] name = v[1] - synonyms = [] term = Term( reference=Reference(prefix=PREFIX, identifier=bigg_id, name=name), - synonyms=synonyms, ) yield term