From db77d3ae8fa6c0bc86f457035d0db1dd64031cb4 Mon Sep 17 00:00:00 2001 From: "Benjamin M. Gyori" Date: Mon, 27 Nov 2023 04:11:05 -0500 Subject: [PATCH] Make Python 3.9 compatible (#8) This PR makes this code usable in the environment I'm working in at least to the extent immediately necessary to make progress. Co-authored-by: Charles Tapley Hoyt --- src/semra/pipeline.py | 28 ++++++++++++++-------------- src/semra/sources/chembl.py | 6 ++++-- src/semra/sources/pubchem.py | 3 ++- src/semra/struct.py | 22 +++++++++++----------- 4 files changed, 31 insertions(+), 28 deletions(-) diff --git a/src/semra/pipeline.py b/src/semra/pipeline.py index 54fe00b..9b35e80 100644 --- a/src/semra/pipeline.py +++ b/src/semra/pipeline.py @@ -5,7 +5,7 @@ import logging import time from pathlib import Path -from typing import Any, Literal +from typing import Any, Literal, Optional from pydantic import BaseModel, Field, root_validator from tqdm.autonotebook import tqdm @@ -60,7 +60,7 @@ class Input(BaseModel): """Represents the input to a mapping assembly.""" source: Literal["pyobo", "bioontologies", "biomappings", "custom", "sssom", "gilda"] - prefix: str | None = None + prefix: Optional[str] = None confidence: float = 1.0 extras: dict[str, Any] = Field(default_factory=dict) @@ -89,8 +89,8 @@ class Configuration(BaseModel): description="A list of pairs of prefixes. Remove all mappings whose source " "prefix is the first in a pair and target prefix is second in a pair. Order matters.", ) - remove_prefixes: list[str] | None = None - keep_prefixes: list[str] | None = None + remove_prefixes: Optional[list[str]] = None + keep_prefixes: Optional[list[str]] = None remove_imprecise: bool = True validate_raw: bool = Field( default=False, @@ -98,18 +98,18 @@ class Configuration(BaseModel): "prefixes and local unique identifier regular expressions (when available)?", ) - raw_pickle_path: Path | None = None - raw_sssom_path: Path | None = None - raw_neo4j_path: Path | None = Field(default=None, description="Directory in which Neo4j stuff goes") - raw_neo4j_name: str | None = Field(default=None, description="Directory for docker tag for Neo4j") + raw_pickle_path: Optional[Path] = None + raw_sssom_path: Optional[Path] = None + raw_neo4j_path: Optional[Path] = Field(default=None, description="Directory in which Neo4j stuff goes") + raw_neo4j_name: Optional[str] = Field(default=None, description="Directory for docker tag for Neo4j") - processed_pickle_path: Path | None = None - processed_sssom_path: Path | None = None - processed_neo4j_path: Path | None = Field(default=None, description="Directory in which Neo4j stuff goes") - processed_neo4j_name: str | None = Field(default=None, description="Directory for docker tag for Neo4j") + processed_pickle_path: Optional[Path] = None + processed_sssom_path: Optional[Path] = None + processed_neo4j_path: Optional[Path] = Field(default=None, description="Directory in which Neo4j stuff goes") + processed_neo4j_name: Optional[str] = Field(default=None, description="Directory for docker tag for Neo4j") - priority_pickle_path: Path | None = None - priority_sssom_path: Path | None = None + priority_pickle_path: Optional[Path] = None + priority_sssom_path: Optional[Path] = None # note that making a priority neo4j doesn't make sense add_labels: bool = Field(default=False, description="Should PyOBO be used to look up labels for SSSOM output?") diff --git a/src/semra/sources/chembl.py b/src/semra/sources/chembl.py index 47706e5..f4fec39 100644 --- a/src/semra/sources/chembl.py +++ b/src/semra/sources/chembl.py @@ -1,4 +1,6 @@ """Get mappings from ChEMBL.""" +from typing import Optional + import bioregistry from semra import EXACT_MATCH, UNSPECIFIED_MAPPING, Mapping, MappingSet, Reference, SimpleEvidence @@ -9,7 +11,7 @@ ] -def get_chembl_compound_mappings(version: str | None = None) -> list[Mapping]: +def get_chembl_compound_mappings(version: Optional[str] = None) -> list[Mapping]: """Get ChEMBL chemical equivalences.""" import chembl_downloader @@ -36,7 +38,7 @@ def get_chembl_compound_mappings(version: str | None = None) -> list[Mapping]: return rows -def get_chembl_protein_mappings(version: str | None = None) -> list[Mapping]: +def get_chembl_protein_mappings(version: Optional[str] = None) -> list[Mapping]: """Get ChEMBL to protein mappings.""" import chembl_downloader diff --git a/src/semra/sources/pubchem.py b/src/semra/sources/pubchem.py index e313492..135cf8e 100644 --- a/src/semra/sources/pubchem.py +++ b/src/semra/sources/pubchem.py @@ -1,6 +1,7 @@ """Get mappings from PubChem.""" import logging +from typing import Optional import bioversions import pandas as pd @@ -16,7 +17,7 @@ logger = logging.getLogger(__name__) -def get_pubchem_mesh_mappings(version: str | None = None) -> list[Mapping]: +def get_pubchem_mesh_mappings(version: Optional[str] = None) -> list[Mapping]: """Get a mapping from PubChem compound identifiers to their equivalent MeSH terms.""" if version is None: version = bioversions.get_version("pubchem") diff --git a/src/semra/struct.py b/src/semra/struct.py index 2e1cdf2..9289c63 100644 --- a/src/semra/struct.py +++ b/src/semra/struct.py @@ -8,7 +8,7 @@ from collections.abc import Iterable from hashlib import md5 from itertools import islice -from typing import Annotated, Literal +from typing import Annotated, Literal, Optional, Union import pydantic from curies import Reference @@ -65,9 +65,9 @@ def curie(self) -> str: class MappingSet(pydantic.BaseModel): name: str = Field(..., description="Name of the mapping set") - version: str | None = Field(default=None, description="The version of the dataset from which the mapping comes") - license: str | None = Field(default=None, description="License name or URL for mapping set") - confidence: float | None = Field(default=None, description="Mapping set level confidence") + version: Optional[str] = Field(default=None, description="The version of the dataset from which the mapping comes") + license: Optional[str] = Field(default=None, description="License name or URL for mapping set") + confidence: Optional[float] = Field(default=None, description="Mapping set level confidence") def key(self): return self.name, self.version or "", self.license or "", 1.0 if self.confidence is None else self.confidence @@ -100,7 +100,7 @@ class Config: description="A SSSOM-compliant justification", ) mapping_set: MappingSet = Field(..., description="The name of the dataset from which the mapping comes") - author: Reference | None = Field( + author: Optional[Reference] = Field( default=None, description="A reference to the author of the mapping (e.g. with ORCID)", examples=[ @@ -123,7 +123,7 @@ def mapping_set_names(self) -> set[str]: return {self.mapping_set.name} @property - def confidence(self) -> float | None: + def confidence(self) -> Optional[float]: return self.mapping_set.confidence @property @@ -144,7 +144,7 @@ class Config: mappings: list[Mapping] = Field( ..., description="A list of mappings and their evidences consumed to create this evidence" ) - author: Reference | None = None + author: Optional[Reference] = None confidence_factor: float = 1.0 def key(self): @@ -155,7 +155,7 @@ def key(self): ) @property - def confidence(self) -> float | None: + def confidence(self) -> Optional[float]: confidences = [mapping.confidence for mapping in self.mappings] nn_confidences = [c for c in confidences if c is not None] if not nn_confidences: @@ -178,7 +178,7 @@ def explanation(self) -> str: Evidence = Annotated[ - ReasonedEvidence | SimpleEvidence, + Union[ReasonedEvidence, SimpleEvidence], Field(discriminator="evidence_type"), ] @@ -202,13 +202,13 @@ def triple(self) -> Triple: return self.s, self.p, self.o @classmethod - def from_triple(cls, triple: Triple, evidence: list[Evidence] | None = None) -> Mapping: + def from_triple(cls, triple: Triple, evidence: Union[list[Evidence], None] = None) -> Mapping: """Instantiate a mapping from a triple.""" s, p, o = triple return cls(s=s, p=p, o=o, evidence=evidence or []) @property - def confidence(self) -> float | None: + def confidence(self) -> Optional[float]: if not self.evidence: return None confidences = [e.confidence for e in self.evidence]