diff --git a/src/pyobo/getters.py b/src/pyobo/getters.py index 360db59a..53dc8392 100644 --- a/src/pyobo/getters.py +++ b/src/pyobo/getters.py @@ -117,7 +117,7 @@ def get_ontology( ontology_format, path = _ensure_ontology_path(prefix, force=force, version=version) if path is None: - raise NoBuild + raise NoBuild(prefix) elif ontology_format == "obo": pass # all gucci elif ontology_format == "owl": diff --git a/src/pyobo/sources/__init__.py b/src/pyobo/sources/__init__.py index ec3d7658..79756d61 100644 --- a/src/pyobo/sources/__init__.py +++ b/src/pyobo/sources/__init__.py @@ -8,6 +8,7 @@ from .ccle import CCLEGetter from .cgnc import CGNCGetter from .chembl import ChEMBLCompoundGetter +from .civic_gene import CIVICGeneGetter from .complexportal import ComplexPortalGetter from .conso import CONSOGetter from .cpt import CPTGetter @@ -38,6 +39,7 @@ from .msigdb import MSigDBGetter from .ncbigene import NCBIGeneGetter from .npass import NPASSGetter +from .omim_ps import OMIMPSGetter from .pathbank import PathBankGetter from .pfam import PfamGetter from .pfam_clan import PfamClanGetter @@ -61,6 +63,7 @@ "AntibodyRegistryGetter", "CCLEGetter", "CGNCGetter", + "CIVICGeneGetter", "CONSOGetter", "CPTGetter", "CVXGetter", @@ -94,6 +97,7 @@ "MiRBaseMatureGetter", "NCBIGeneGetter", "NPASSGetter", + "OMIMPSGetter", "PIDGetter", "PathBankGetter", "PfamClanGetter", diff --git a/src/pyobo/sources/civic_gene.py b/src/pyobo/sources/civic_gene.py new file mode 100644 index 00000000..2df55c2b --- /dev/null +++ b/src/pyobo/sources/civic_gene.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- + +"""Converter for CiVIC Genes.""" + +from typing import Iterable, Optional + +import pandas as pd + +from pyobo.struct import Obo, Reference, Term +from pyobo.utils.path import ensure_df + +__all__ = [ + "CIVICGeneGetter", +] + +PREFIX = "civic.gid" +URL = "https://civicdb.org/downloads/nightly/nightly-GeneSummaries.tsv" + + +def _sort(_o, t): + return int(t.identifier) + + +class CIVICGeneGetter(Obo): + """An ontology representation of CiVIC's gene nomenclature.""" + + bioversions_key = ontology = PREFIX + term_sort_key = _sort + + def iter_terms(self, force: bool = False) -> Iterable[Term]: + """Iterate over gene terms for CiVIC.""" + yield from get_terms(self.data_version, force=force) + + +def get_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]: + """Get CIVIC terms.""" + # if version is not None: + # version_dt: datetime.date = dateutil.parser.parse(version) + # else: + # version_dt: datetime.date = datetime.today() + # version = version_dt.strftime("01-%b-%Y") + # version is like 01-Feb-2024 + url = f"https://civicdb.org/downloads/{version}/{version}-GeneSummaries.tsv" + df = ensure_df(prefix=PREFIX, url=url, sep="\t", force=force, dtype=str, version=version) + for identifier, _, name, entrez_id, description, _last_review, _flag in df.values: + term = Term( + reference=Reference(prefix=PREFIX, identifier=identifier, name=name), + definition=description if pd.notna(description) else None, + ) + term.append_exact_match(Reference(prefix="ncbigene", identifier=entrez_id)) + yield term + + +if __name__ == "__main__": + CIVICGeneGetter.cli() diff --git a/src/pyobo/sources/omim_ps.py b/src/pyobo/sources/omim_ps.py new file mode 100644 index 00000000..b809a235 --- /dev/null +++ b/src/pyobo/sources/omim_ps.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- + +"""Converter for OMIM Phenotypic Series.""" + +import logging +from typing import Iterable + +from bioversions.utils import get_soup + +from pyobo.struct import Obo, Term + +__all__ = [ + "OMIMPSGetter", +] + + +logger = logging.getLogger(__name__) +PREFIX = "omim.ps" +URL = "https://omim.org/phenotypicSeriesTitles/all" + + +class OMIMPSGetter(Obo): + """An ontology representation of OMIM Phenotypic Series.""" + + ontology = bioversions_key = PREFIX + + def iter_terms(self, force: bool = False) -> Iterable[Term]: + """Iterate over terms in the ontology.""" + soup = get_soup(URL, user_agent="Mozilla/5.0") + rows = soup.find(id="mimContent").find("table").find("tbody").find_all("tr") + for row in rows: + anchor = row.find("td").find("a") + name = anchor.text.strip() + identifier = anchor.attrs["href"][len("/phenotypicSeries/") :] + yield Term.from_triple(PREFIX, identifier, name) + + +if __name__ == "__main__": + OMIMPSGetter.cli() diff --git a/src/pyobo/xrefdb/sources/wikidata.py b/src/pyobo/xrefdb/sources/wikidata.py index 0df8841b..a497a667 100644 --- a/src/pyobo/xrefdb/sources/wikidata.py +++ b/src/pyobo/xrefdb/sources/wikidata.py @@ -24,7 +24,7 @@ #: WikiData SPARQL endpoint. See https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service#Interfacing URL = "https://query.wikidata.org/bigdata/namespace/wdq/sparql" -WIKIDATA_MAPPING_DIRECTORY = RAW_MODULE.submodule("wikidata", "mappings") +WIKIDATA_MAPPING_DIRECTORY = RAW_MODULE.module("wikidata", "mappings") def get_wikidata_xrefs_df(*, use_tqdm: bool = True) -> pd.DataFrame: