Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added utility function for downloading and loading models results #995

Merged
merged 2 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions mteb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
MTEB_RETRIEVAL_WITH_INSTRUCTIONS,
)
from mteb.evaluation import *
from mteb.load_results import load_results
from mteb.models import get_model, get_model_meta
from mteb.overview import TASKS_REGISTRY, get_task, get_tasks

Expand All @@ -25,4 +26,5 @@
"get_task",
"get_model",
"get_model_meta",
"load_results",
]
2 changes: 1 addition & 1 deletion mteb/abstasks/AbsTask.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from mteb.encoder_interface import Encoder, EncoderWithQueryCorpusEncode
from mteb.languages import LanguageScripts

from ..MTEBResults import HFSubset, ScoresDict
from ..load_results.mteb_results import HFSubset, ScoresDict

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/AbsTaskBitextMining.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from datasets import Dataset

from ..evaluation.evaluators import BitextMiningEvaluator
from ..MTEBResults import HFSubset, ScoresDict
from ..load_results.mteb_results import HFSubset, ScoresDict
from .AbsTask import AbsTask

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/AbsTaskClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
kNNClassificationEvaluatorPytorch,
logRegClassificationEvaluator,
)
from ..MTEBResults import HFSubset, ScoresDict
from ..load_results.mteb_results import HFSubset, ScoresDict
from .AbsTask import AbsTask

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/AbsTaskClustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from datasets import Dataset

from mteb.encoder_interface import Encoder, EncoderWithQueryCorpusEncode
from mteb.MTEBResults import ScoresDict
from mteb.load_results.mteb_results import ScoresDict

from ..evaluation.evaluators import ClusteringEvaluator
from .AbsTask import AbsTask
Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/AbsTaskClusteringFast.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from sklearn.metrics.cluster import v_measure_score

from ..evaluation.evaluators.model_encode import model_encode
from ..MTEBResults import HFSubset
from ..load_results.mteb_results import HFSubset
from .AbsTask import AbsTask

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/AbsTaskMultilabelClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from sklearn.preprocessing import MultiLabelBinarizer

from ..evaluation.evaluators.model_encode import model_encode
from ..MTEBResults import HFSubset, ScoresDict
from ..load_results.mteb_results import HFSubset, ScoresDict
from .AbsTask import AbsTask

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/AbsTaskPairClassification.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from ..encoder_interface import Encoder, EncoderWithQueryCorpusEncode
from ..evaluation.evaluators import PairClassificationEvaluator
from ..MTEBResults import ScoresDict
from ..load_results.mteb_results import ScoresDict
from .AbsTask import AbsTask

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/AbsTaskReranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from datasets import Dataset

from mteb.encoder_interface import Encoder, EncoderWithQueryCorpusEncode
from mteb.MTEBResults import ScoresDict
from mteb.load_results.mteb_results import ScoresDict

from ..evaluation.evaluators import RerankingEvaluator
from .AbsTask import AbsTask
Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/AbsTaskRetrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from datasets import Features, Value, load_dataset

from ..evaluation.evaluators import RetrievalEvaluator
from ..MTEBResults import ScoresDict
from ..load_results.mteb_results import ScoresDict
from .AbsTask import AbsTask

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/AbsTaskSTS.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging

from ..evaluation.evaluators import STSEvaluator
from ..MTEBResults import ScoresDict
from ..load_results.mteb_results import ScoresDict
from .AbsTask import AbsTask

logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/AbsTaskSpeedTask.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np

from mteb.encoder_interface import Encoder, EncoderWithQueryCorpusEncode
from mteb.MTEBResults import ScoresDict
from mteb.load_results.mteb_results import ScoresDict

from .AbsTask import AbsTask

Expand Down
2 changes: 1 addition & 1 deletion mteb/abstasks/AbsTaskSummarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import numpy as np

from mteb.MTEBResults import ScoresDict
from mteb.load_results.mteb_results import ScoresDict

from ..evaluation.evaluators import SummarizationEvaluator
from .AbsTask import AbsTask
Expand Down
2 changes: 1 addition & 1 deletion mteb/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
import yaml

import mteb
from mteb.MTEBResults import MTEBResults
from mteb.load_results.mteb_results import MTEBResults

logging.basicConfig(level=logging.WARNING)
logger = logging.getLogger(__name__)
Expand Down
2 changes: 1 addition & 1 deletion mteb/evaluation/MTEB.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

from ..abstasks import *
from ..abstasks import AbsTask
from ..MTEBResults import MTEBResults
from ..load_results.mteb_results import MTEBResults
from ..tasks import *
from . import LangMapping

Expand Down
4 changes: 4 additions & 0 deletions mteb/load_results/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .load_results import load_results
from .mteb_results import MTEBResults

__all__ = ["load_results", "MTEBResults"]
135 changes: 135 additions & 0 deletions mteb/load_results/load_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
from __future__ import annotations

import json
import logging
import os
import subprocess
from collections import defaultdict
from pathlib import Path

import mteb

logger = logging.getLogger(__name__)
MODEL_NAME = str
REVISION = str


def download_of_results(
results_repo: str, cache_directory: Path | None = None, download_latest: bool = True
) -> Path:
"""Downloads the latest version of the results repository from GitHub to a local cache directory. Required git to be installed.

Args:
results_repo: The URL of the results repository on GitHub.
cache_directory: The directory where the repository should be cached. If None it will use the MTEB_CACHE environment variable or "~/.cache/mteb" by default.
download_latest: If True it will download the latest version of the repository, otherwise it will only update the existing repository.

Returns:
The path to the local cache directory.
"""
default_cache_directory = Path.home() / ".cache" / "mteb"

if cache_directory is None:
_cache_directory = os.environ.get("MTEB_CACHE", None)
cache_directory = (
Path(_cache_directory) if _cache_directory else default_cache_directory
)

if not cache_directory.exists():
cache_directory.mkdir(parents=True)

# if "results" folder already exists update it
results_directory = cache_directory / "results"
if results_directory.exists():
if download_latest:
logger.info(
f"Results repository already exists in {results_directory}, updating it using git pull"
)
subprocess.run(["git", "pull"], cwd=results_directory)
else:
logger.info(
f"Results repository already exists in {results_directory}, skipping update, set download_latest=True to update it"
)
else:
logger.info(
f"No results repository found in {results_directory}, cloning it from {results_repo}"
)
subprocess.run(["git", "clone", results_repo], cwd=cache_directory)

return results_directory


def _model_name_and_revision(revision_path: Path) -> tuple[MODEL_NAME, REVISION]:
model_meta = revision_path / "model_meta.json"
model_path = revision_path.parent
if not model_meta.exists():
logger.warning(
f"model_meta.json not found in {revision_path}, extracting model_name and revision from the path"
)
model_name, revision = model_path.name, revision_path.name
else:
with model_meta.open("r") as f:
model_meta_json = json.load(f)
model_name = model_meta_json["name"]
revision = model_meta_json["revision"]

return model_name, revision


def load_results(
results_repo: str = "https://github.com/embeddings-benchmark/results",
download_latest: bool = True,
) -> dict[MODEL_NAME, dict[REVISION, list[mteb.MTEBResults]]]:
"""Loads the results from the latest version of the results repository. The results are cached locally in the MTEB_CACHE directory.
This directory can be set using the MTEB_CACHE environment variable or defaults to "~/.cache/mteb".

Args:
results_repo: The URL of the results repository on GitHub. Defaults to "https://github.com/embeddings-benchmark/results".
download_latest: If True it will update the existing version of the results cache. Defaults to True.

Returns:
A dictionary where the keys are the model names and the values are dictionaries where the keys are the revisions and the values are lists of MTEBResults objects.

Example:
>>> results = load_results()
>>> results
{'mixedbread-ai/mxbai-embed-large-v1':
{'990580e27d329c7408b3741ecff85876e128e203': [
MTEBResults(task_name=TwentyNewsgroupsClustering.v2, scores=...),
MTEBResults(task_name=MedrxivClusteringP2P, scores=...),
MTEBResults(task_name=StackExchangeClustering, scores=...),
MTEBResults(task_name=BiorxivClusteringP2P.v2, scores=...),
MTEBResults(task_name=MedrxivClusteringS2S.v2, scores=...),
MTEBResults(task_name=MedrxivClusteringS2S, scores=...),
...
]},
'intfloat/multilingual-e5-small':
{'e4ce9877abf3edfe10b0d82785e83bdcb973e22e': [
MTEBResults(task_name=IndicGenBenchFloresBitextMining, scores=...),
MTEBResults(task_name=PpcPC, scores=...),
MTEBResults(task_name=TwentyNewsgroupsClustering.v2, scores=...),
...
]},
...
"""
results_directory = download_of_results(
results_repo, download_latest=download_latest
)
models = [p for p in results_directory.glob("*") if p.is_dir() and p.stem != ".git"]

results = defaultdict(dict)

for model in models:
model_revisions = model.glob("*")

for revision_path in model_revisions:
model_name, revision = _model_name_and_revision(revision_path)

task_json_files = [
f for f in revision_path.glob("*.json") if "model_meta.json" != f.name
]
results[model_name][revision] = [
mteb.MTEBResults.from_disk(f) for f in task_json_files
]

return dict(results)
File renamed without changes.
2 changes: 1 addition & 1 deletion mteb/tasks/Reranking/multilingual/MIRACLReranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from mteb.abstasks.TaskMetadata import TaskMetadata
from mteb.encoder_interface import Encoder, EncoderWithQueryCorpusEncode
from mteb.evaluation.evaluators import RerankingEvaluator
from mteb.MTEBResults import ScoresDict
from mteb.load_results.mteb_results import ScoresDict

from ....abstasks import MultilingualTask
from ....abstasks.AbsTaskReranking import AbsTaskReranking
Expand Down
25 changes: 25 additions & 0 deletions tests/test_load_results/test_mteb_load_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os
from pathlib import Path

import mteb


def test_mteb_load_results():
tests_path = Path(__file__).parent.parent

os.environ["MTEB_CACHE"] = str(tests_path)

results = mteb.load_results(download_latest=False)

assert isinstance(results, dict)
for model in results:
assert isinstance(results[model], dict)
for revision in results[model]:
assert isinstance(results[model][revision], list)
for result in results[model][revision]:
assert isinstance(result, mteb.MTEBResults)

known_model = "sentence-transformers/average_word_embeddings_levy_dependency"
known_revision = "6d9c09a789ad5dd126b476323fccfeeafcd90509"
assert known_model in results
assert known_revision in results[known_model]
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import mteb
from mteb import AbsTask
from mteb.MTEBResults import MTEBResults
from mteb.load_results.mteb_results import MTEBResults

tests_folder = Path(__file__).parent

Expand Down
Loading