Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added dreadnode/lazy/__init__.py
Empty file.
81 changes: 81 additions & 0 deletions dreadnode/lazy/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import importlib
import typing as t

if t.TYPE_CHECKING:
from types import ModuleType


class LazyImportError(ImportError):
def __init__(self, module_name: str, extras: str, package_name: str | None = None) -> None:
super().__init__(
f"Module '{module_name}' is not installed. Please install it with `pip install {package_name or module_name}` or `dreadnode[{extras}]` extras."
)


class LazyImport:
def __init__(self, module_name: str, extras: str, package_name: str | None = None) -> None:
self._name = module_name
self._extras = extras
self._mod: ModuleType | None = None
self.package_name = package_name

def _load(self) -> t.Any:
if self._mod is None:
try:
self._mod = importlib.import_module(self._name)
except ModuleNotFoundError as e:
if e.name == self._name:
raise LazyImportError(
self._name, self._extras, package_name=self.package_name
) from None
raise
return self._mod

def __getattr__(self, item: str) -> t.Any:
return getattr(self._load(), item)


class LazyAttr:
def __init__(
self, module_name: str, attr: str, extras: str, package_name: str | None = None
) -> None:
self._module_name = module_name
self._attr = attr
self._extras = extras
self._value = None
self.package_name = package_name

def _load(self) -> t.Any:
if self._value is None:
try:
mod = importlib.import_module(self._module_name)
self._value = getattr(mod, self._attr)
except ModuleNotFoundError:
raise LazyImportError(
self._module_name, self._extras, package_name=self.package_name
) from None
return self._value

def __getattr__(self, item: str) -> t.Any:
return getattr(self._load(), item)

def __call__(self, *args: t.Any, **kwargs: t.Any) -> t.Any:
return self._load()(*args, **kwargs)

def __repr__(self) -> str:
status = "loaded" if self._value is not None else "unloaded"
return f"<LazyAttr {self._module_name}.{self._attr} ({status})>"

def __dir__(self) -> list[str]:
try:
return sorted(set(dir(self._load())))
except LazyImportError:
return [
"__call__",
"__getattr__",
"_load",
"_module_name",
"_attr",
"_extras",
"_value",
]
48 changes: 48 additions & 0 deletions dreadnode/lazy/scorers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import typing as t

from dreadnode.lazy.core import LazyAttr, LazyImport

if t.TYPE_CHECKING:
import litellm as litellm # type: ignore[import-not-found]
import nltk as nltk # type: ignore[import-not-found]
from nltk.tokenize import ( # type: ignore[import-not-found]
word_tokenize as word_tokenize,
)
from nltk.translate.bleu_score import ( # type: ignore[import-not-found]
sentence_bleu as sentence_bleu,
)
from rapidfuzz import distance as distance # type: ignore[import-not-found]
from rapidfuzz import fuzz as fuzz # type: ignore[import-not-found]
from rapidfuzz import utils as utils # type: ignore[import-not-found]
from sentence_transformers import ( # type: ignore[import-not-found]
SentenceTransformer as SentenceTransformer,
)
from sentence_transformers import ( # type: ignore[import-not-found]
util as util,
)
from sklearn.feature_extraction.text import ( # type: ignore[import-not-found]
TfidfVectorizer as TfidfVectorizer,
)
from sklearn.metrics.pairwise import ( # type: ignore[import-not-found]
cosine_similarity as cosine_similarity,
)
else:
fuzz = LazyAttr("rapidfuzz", "fuzz", "text")
utils = LazyAttr("rapidfuzz", "utils", "text")
distance = LazyAttr("rapidfuzz", "distance", "text")
litellm = LazyImport("litellm", "llm")
util = LazyAttr("sentence_transformers", "util", "text", package_name="sentence-transformers")
TfidfVectorizer = LazyAttr(
"sklearn.feature_extraction.text", "TfidfVectorizer", "text", package_name="scikit-learn"
)
SentenceTransformer = LazyAttr(
"sentence_transformers", "SentenceTransformer", "text", package_name="sentence-transformers"
)
cosine_similarity = LazyAttr(
"sklearn.metrics.pairwise", "cosine_similarity", "text", package_name="scikit-learn"
Comment on lines +5 to +42
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks Brian for tackling this. importlib.* is a good pattern for this, but couple issues I could see here,

  1. Dual maintenance problem, must maintain imports in two places

Example.,

if TYPE_CHECKING:
    from sklearn.metrics.pairwise import cosine_similarity as cosine_similarity  # Place 1
    from rapidfuzz import fuzz as fuzz  # Place 1 
    ....
else:
    cosine_similarity = LazyAttr("sklearn.metrics.pairwise", "cosine_similarity", "text")  # Place 2 
    fuzz = LazyAttr("rapidfuzz", "fuzz", "text")  # Place 2 
    ...

For insace, if we wanted to add new sklearn import, need to update at both blocks. Follows the same if any method changes, API changes, need to sync at 2 places. If it's out of sync, we might encounter run time erros.
2. Hidden small perf overhead in calls
every attribute access goers through getattr call, for exmaple

for text in large_dataset:
    score = fuzz.ratio(ref, text) # __getattr__overhead * 10,000 calls if we have those many examples in the large dataset

However the calls are minmal not expensive

Proposed solution: Use the similar pattern, what you have now, Pandas style: ref link
Something like below:

# dreadnode/utils/imports.py
def import_optional_dependency(
    name: str, 
    extra: str = "", 
    package_name: str = None
) -> Any:
    """Import optional dependency with helpful error message."""
    try:
        return importlib.import_module(name)
    except ImportError:
        pkg = package_name or name
        raise ImportError(
            f"Missing dependency '{name}'. "
            f"Install with: pip install {pkg} or dreadnode[{extra}]"
        ) from None

Then in scorers/similarity.py

def similarity_with_rapidfuzz(
    reference: str, 
    method: str = "ratio"
) -> Scorer:
    """RapidFuzz similarity scorer."""
    
    def evaluate(data: Any) -> Metric:
        # Import exactly when needed - no globals, no dual maintenance
        fuzz = import_optional_dependency("rapidfuzz.fuzz", extra="text", package_name="rapidfuzz")
        
        candidate_text = str(data)
        score = getattr(fuzz, method)(reference, candidate_text)
        return Metric(value=score / 100.0)
    
    return Scorer(evaluate, name=f"rapidfuzz_{method}")

def similarity_with_sentence_transformers(
    reference: str, 
    model_name: str = "all-MiniLM-L6-v2"
) -> Scorer:
    """Sentence Transformers similarity scorer."""
    
    def evaluate(data: Any) -> Metric:
        # Complex package - import both modules cleanly
        st = import_optional_dependency("sentence_transformers", extra="text", package_name="sentence-transformers")
        torch = import_optional_dependency("torch", extra="text")
        
        model = st.SentenceTransformer(model_name)
        # ... rest of implementation
        return Metric(value=similarity_score)
    
    return Scorer(evaluate, name="sentence_transformers")

I think wiuth this we could also minimize TYPE_CHECKING/else blocks and perf overhead

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you. Will try it out!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was trying to find an existing tidy example. Good find w/ pandas

)
nltk = LazyImport("nltk", "text")
word_tokenize = LazyAttr("nltk.tokenize", "word_tokenize", "text", package_name="nltk")
sentence_bleu = LazyAttr(
"nltk.translate.bleu_score", "sentence_bleu", "text", package_name="nltk"
)
Loading
Loading