Skip to content

Commit

Permalink
Merge branch 'main' into add39test
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt authored Nov 27, 2023
2 parents 0517e7f + db77d3a commit d4f5cfa
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 28 deletions.
28 changes: 14 additions & 14 deletions src/semra/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import logging
import time
from pathlib import Path
from typing import Any, Literal
from typing import Any, Literal, Optional

from pydantic import BaseModel, Field, root_validator
from tqdm.autonotebook import tqdm
Expand Down Expand Up @@ -60,7 +60,7 @@ class Input(BaseModel):
"""Represents the input to a mapping assembly."""

source: Literal["pyobo", "bioontologies", "biomappings", "custom", "sssom", "gilda"]
prefix: str | None = None
prefix: Optional[str] = None
confidence: float = 1.0
extras: dict[str, Any] = Field(default_factory=dict)

Expand Down Expand Up @@ -89,27 +89,27 @@ class Configuration(BaseModel):
description="A list of pairs of prefixes. Remove all mappings whose source "
"prefix is the first in a pair and target prefix is second in a pair. Order matters.",
)
remove_prefixes: list[str] | None = None
keep_prefixes: list[str] | None = None
remove_prefixes: Optional[list[str]] = None
keep_prefixes: Optional[list[str]] = None
remove_imprecise: bool = True
validate_raw: bool = Field(
default=False,
description="Should the raw mappings be validated against Bioregistry "
"prefixes and local unique identifier regular expressions (when available)?",
)

raw_pickle_path: Path | None = None
raw_sssom_path: Path | None = None
raw_neo4j_path: Path | None = Field(default=None, description="Directory in which Neo4j stuff goes")
raw_neo4j_name: str | None = Field(default=None, description="Directory for docker tag for Neo4j")
raw_pickle_path: Optional[Path] = None
raw_sssom_path: Optional[Path] = None
raw_neo4j_path: Optional[Path] = Field(default=None, description="Directory in which Neo4j stuff goes")
raw_neo4j_name: Optional[str] = Field(default=None, description="Directory for docker tag for Neo4j")

processed_pickle_path: Path | None = None
processed_sssom_path: Path | None = None
processed_neo4j_path: Path | None = Field(default=None, description="Directory in which Neo4j stuff goes")
processed_neo4j_name: str | None = Field(default=None, description="Directory for docker tag for Neo4j")
processed_pickle_path: Optional[Path] = None
processed_sssom_path: Optional[Path] = None
processed_neo4j_path: Optional[Path] = Field(default=None, description="Directory in which Neo4j stuff goes")
processed_neo4j_name: Optional[str] = Field(default=None, description="Directory for docker tag for Neo4j")

priority_pickle_path: Path | None = None
priority_sssom_path: Path | None = None
priority_pickle_path: Optional[Path] = None
priority_sssom_path: Optional[Path] = None
# note that making a priority neo4j doesn't make sense

add_labels: bool = Field(default=False, description="Should PyOBO be used to look up labels for SSSOM output?")
Expand Down
6 changes: 4 additions & 2 deletions src/semra/sources/chembl.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""Get mappings from ChEMBL."""
from typing import Optional

import bioregistry

from semra import EXACT_MATCH, UNSPECIFIED_MAPPING, Mapping, MappingSet, Reference, SimpleEvidence
Expand All @@ -9,7 +11,7 @@
]


def get_chembl_compound_mappings(version: str | None = None) -> list[Mapping]:
def get_chembl_compound_mappings(version: Optional[str] = None) -> list[Mapping]:
"""Get ChEMBL chemical equivalences."""
import chembl_downloader

Expand All @@ -36,7 +38,7 @@ def get_chembl_compound_mappings(version: str | None = None) -> list[Mapping]:
return rows


def get_chembl_protein_mappings(version: str | None = None) -> list[Mapping]:
def get_chembl_protein_mappings(version: Optional[str] = None) -> list[Mapping]:
"""Get ChEMBL to protein mappings."""
import chembl_downloader

Expand Down
3 changes: 2 additions & 1 deletion src/semra/sources/pubchem.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Get mappings from PubChem."""

import logging
from typing import Optional

import bioversions
import pandas as pd
Expand All @@ -16,7 +17,7 @@
logger = logging.getLogger(__name__)


def get_pubchem_mesh_mappings(version: str | None = None) -> list[Mapping]:
def get_pubchem_mesh_mappings(version: Optional[str] = None) -> list[Mapping]:
"""Get a mapping from PubChem compound identifiers to their equivalent MeSH terms."""
if version is None:
version = bioversions.get_version("pubchem")
Expand Down
22 changes: 11 additions & 11 deletions src/semra/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from collections.abc import Iterable
from hashlib import md5
from itertools import islice
from typing import Annotated, Literal
from typing import Annotated, Literal, Optional, Union

import pydantic
from curies import Reference
Expand Down Expand Up @@ -65,9 +65,9 @@ def curie(self) -> str:

class MappingSet(pydantic.BaseModel):
name: str = Field(..., description="Name of the mapping set")
version: str | None = Field(default=None, description="The version of the dataset from which the mapping comes")
license: str | None = Field(default=None, description="License name or URL for mapping set")
confidence: float | None = Field(default=None, description="Mapping set level confidence")
version: Optional[str] = Field(default=None, description="The version of the dataset from which the mapping comes")
license: Optional[str] = Field(default=None, description="License name or URL for mapping set")
confidence: Optional[float] = Field(default=None, description="Mapping set level confidence")

def key(self):
return self.name, self.version or "", self.license or "", 1.0 if self.confidence is None else self.confidence
Expand Down Expand Up @@ -100,7 +100,7 @@ class Config:
description="A SSSOM-compliant justification",
)
mapping_set: MappingSet = Field(..., description="The name of the dataset from which the mapping comes")
author: Reference | None = Field(
author: Optional[Reference] = Field(
default=None,
description="A reference to the author of the mapping (e.g. with ORCID)",
examples=[
Expand All @@ -123,7 +123,7 @@ def mapping_set_names(self) -> set[str]:
return {self.mapping_set.name}

@property
def confidence(self) -> float | None:
def confidence(self) -> Optional[float]:
return self.mapping_set.confidence

@property
Expand All @@ -144,7 +144,7 @@ class Config:
mappings: list[Mapping] = Field(
..., description="A list of mappings and their evidences consumed to create this evidence"
)
author: Reference | None = None
author: Optional[Reference] = None
confidence_factor: float = 1.0

def key(self):
Expand All @@ -155,7 +155,7 @@ def key(self):
)

@property
def confidence(self) -> float | None:
def confidence(self) -> Optional[float]:
confidences = [mapping.confidence for mapping in self.mappings]
nn_confidences = [c for c in confidences if c is not None]
if not nn_confidences:
Expand All @@ -178,7 +178,7 @@ def explanation(self) -> str:


Evidence = Annotated[
ReasonedEvidence | SimpleEvidence,
Union[ReasonedEvidence, SimpleEvidence],
Field(discriminator="evidence_type"),
]

Expand All @@ -202,13 +202,13 @@ def triple(self) -> Triple:
return self.s, self.p, self.o

@classmethod
def from_triple(cls, triple: Triple, evidence: list[Evidence] | None = None) -> Mapping:
def from_triple(cls, triple: Triple, evidence: Union[list[Evidence], None] = None) -> Mapping:
"""Instantiate a mapping from a triple."""
s, p, o = triple
return cls(s=s, p=p, o=o, evidence=evidence or [])

@property
def confidence(self) -> float | None:
def confidence(self) -> Optional[float]:
if not self.evidence:
return None
confidences = [e.confidence for e in self.evidence]
Expand Down

0 comments on commit d4f5cfa

Please sign in to comment.