-
Notifications
You must be signed in to change notification settings - Fork 1
feat: testing centralized lazy imports #168
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| import importlib | ||
| import typing as t | ||
|
|
||
| if t.TYPE_CHECKING: | ||
| from types import ModuleType | ||
|
|
||
|
|
||
| class LazyImportError(ImportError): | ||
| def __init__(self, module_name: str, extras: str, package_name: str | None = None) -> None: | ||
| super().__init__( | ||
| f"Module '{module_name}' is not installed. Please install it with `pip install {package_name or module_name}` or `dreadnode[{extras}]` extras." | ||
| ) | ||
|
|
||
|
|
||
| class LazyImport: | ||
| def __init__(self, module_name: str, extras: str, package_name: str | None = None) -> None: | ||
| self._name = module_name | ||
| self._extras = extras | ||
| self._mod: ModuleType | None = None | ||
| self.package_name = package_name | ||
|
|
||
| def _load(self) -> t.Any: | ||
| if self._mod is None: | ||
| try: | ||
| self._mod = importlib.import_module(self._name) | ||
| except ModuleNotFoundError as e: | ||
| if e.name == self._name: | ||
| raise LazyImportError( | ||
| self._name, self._extras, package_name=self.package_name | ||
| ) from None | ||
| raise | ||
| return self._mod | ||
|
|
||
| def __getattr__(self, item: str) -> t.Any: | ||
| return getattr(self._load(), item) | ||
|
|
||
|
|
||
| class LazyAttr: | ||
| def __init__( | ||
| self, module_name: str, attr: str, extras: str, package_name: str | None = None | ||
| ) -> None: | ||
| self._module_name = module_name | ||
| self._attr = attr | ||
| self._extras = extras | ||
| self._value = None | ||
| self.package_name = package_name | ||
|
|
||
| def _load(self) -> t.Any: | ||
| if self._value is None: | ||
| try: | ||
| mod = importlib.import_module(self._module_name) | ||
| self._value = getattr(mod, self._attr) | ||
| except ModuleNotFoundError: | ||
| raise LazyImportError( | ||
| self._module_name, self._extras, package_name=self.package_name | ||
| ) from None | ||
| return self._value | ||
|
|
||
| def __getattr__(self, item: str) -> t.Any: | ||
| return getattr(self._load(), item) | ||
|
|
||
| def __call__(self, *args: t.Any, **kwargs: t.Any) -> t.Any: | ||
| return self._load()(*args, **kwargs) | ||
|
|
||
| def __repr__(self) -> str: | ||
| status = "loaded" if self._value is not None else "unloaded" | ||
| return f"<LazyAttr {self._module_name}.{self._attr} ({status})>" | ||
|
|
||
| def __dir__(self) -> list[str]: | ||
| try: | ||
| return sorted(set(dir(self._load()))) | ||
| except LazyImportError: | ||
| return [ | ||
| "__call__", | ||
| "__getattr__", | ||
| "_load", | ||
| "_module_name", | ||
| "_attr", | ||
| "_extras", | ||
| "_value", | ||
| ] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| import typing as t | ||
|
|
||
| from dreadnode.lazy.core import LazyAttr, LazyImport | ||
|
|
||
| if t.TYPE_CHECKING: | ||
| import litellm as litellm # type: ignore[import-not-found] | ||
| import nltk as nltk # type: ignore[import-not-found] | ||
| from nltk.tokenize import ( # type: ignore[import-not-found] | ||
| word_tokenize as word_tokenize, | ||
| ) | ||
| from nltk.translate.bleu_score import ( # type: ignore[import-not-found] | ||
| sentence_bleu as sentence_bleu, | ||
| ) | ||
| from rapidfuzz import distance as distance # type: ignore[import-not-found] | ||
| from rapidfuzz import fuzz as fuzz # type: ignore[import-not-found] | ||
| from rapidfuzz import utils as utils # type: ignore[import-not-found] | ||
| from sentence_transformers import ( # type: ignore[import-not-found] | ||
| SentenceTransformer as SentenceTransformer, | ||
| ) | ||
| from sentence_transformers import ( # type: ignore[import-not-found] | ||
| util as util, | ||
| ) | ||
| from sklearn.feature_extraction.text import ( # type: ignore[import-not-found] | ||
| TfidfVectorizer as TfidfVectorizer, | ||
| ) | ||
| from sklearn.metrics.pairwise import ( # type: ignore[import-not-found] | ||
| cosine_similarity as cosine_similarity, | ||
| ) | ||
| else: | ||
| fuzz = LazyAttr("rapidfuzz", "fuzz", "text") | ||
| utils = LazyAttr("rapidfuzz", "utils", "text") | ||
| distance = LazyAttr("rapidfuzz", "distance", "text") | ||
| litellm = LazyImport("litellm", "llm") | ||
| util = LazyAttr("sentence_transformers", "util", "text", package_name="sentence-transformers") | ||
| TfidfVectorizer = LazyAttr( | ||
| "sklearn.feature_extraction.text", "TfidfVectorizer", "text", package_name="scikit-learn" | ||
| ) | ||
| SentenceTransformer = LazyAttr( | ||
| "sentence_transformers", "SentenceTransformer", "text", package_name="sentence-transformers" | ||
| ) | ||
| cosine_similarity = LazyAttr( | ||
| "sklearn.metrics.pairwise", "cosine_similarity", "text", package_name="scikit-learn" | ||
|
Comment on lines
+5
to
+42
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks Brian for tackling this.
Example., if TYPE_CHECKING:
from sklearn.metrics.pairwise import cosine_similarity as cosine_similarity # Place 1
from rapidfuzz import fuzz as fuzz # Place 1
....
else:
cosine_similarity = LazyAttr("sklearn.metrics.pairwise", "cosine_similarity", "text") # Place 2
fuzz = LazyAttr("rapidfuzz", "fuzz", "text") # Place 2
...For insace, if we wanted to add new sklearn import, need to update at both blocks. Follows the same if any method changes, API changes, need to sync at 2 places. If it's out of sync, we might encounter run time erros. for text in large_dataset:
score = fuzz.ratio(ref, text) # __getattr__overhead * 10,000 calls if we have those many examples in the large datasetHowever the calls are minmal not expensive Proposed solution: Use the similar pattern, what you have now, Pandas style: ref link # dreadnode/utils/imports.py
def import_optional_dependency(
name: str,
extra: str = "",
package_name: str = None
) -> Any:
"""Import optional dependency with helpful error message."""
try:
return importlib.import_module(name)
except ImportError:
pkg = package_name or name
raise ImportError(
f"Missing dependency '{name}'. "
f"Install with: pip install {pkg} or dreadnode[{extra}]"
) from NoneThen in scorers/similarity.py def similarity_with_rapidfuzz(
reference: str,
method: str = "ratio"
) -> Scorer:
"""RapidFuzz similarity scorer."""
def evaluate(data: Any) -> Metric:
# Import exactly when needed - no globals, no dual maintenance
fuzz = import_optional_dependency("rapidfuzz.fuzz", extra="text", package_name="rapidfuzz")
candidate_text = str(data)
score = getattr(fuzz, method)(reference, candidate_text)
return Metric(value=score / 100.0)
return Scorer(evaluate, name=f"rapidfuzz_{method}")
def similarity_with_sentence_transformers(
reference: str,
model_name: str = "all-MiniLM-L6-v2"
) -> Scorer:
"""Sentence Transformers similarity scorer."""
def evaluate(data: Any) -> Metric:
# Complex package - import both modules cleanly
st = import_optional_dependency("sentence_transformers", extra="text", package_name="sentence-transformers")
torch = import_optional_dependency("torch", extra="text")
model = st.SentenceTransformer(model_name)
# ... rest of implementation
return Metric(value=similarity_score)
return Scorer(evaluate, name="sentence_transformers")I think wiuth this we could also minimize TYPE_CHECKING/else blocks and perf overhead There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you. Will try it out! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was trying to find an existing tidy example. Good find w/ pandas |
||
| ) | ||
| nltk = LazyImport("nltk", "text") | ||
| word_tokenize = LazyAttr("nltk.tokenize", "word_tokenize", "text", package_name="nltk") | ||
| sentence_bleu = LazyAttr( | ||
| "nltk.translate.bleu_score", "sentence_bleu", "text", package_name="nltk" | ||
| ) | ||
Uh oh!
There was an error while loading. Please reload this page.