encord-team · eloy-encord · Apr 16, 2024 · Apr 15, 2024 · Apr 15, 2024 · Apr 15, 2024
diff --git a/README.md b/README.md
@@ -1,40 +1,72 @@
-## Goals are:
+# clip-eval
 
-- [x] Have a way to load classification datasets from HF. See this [colab](https://colab.research.google.com/drive/1O7PBYHKrk8SELHq40AoH8hehig-WNezS?usp=sharing)
-- [x] Have a way to load clip mod from HF. See this [colab](https://colab.research.google.com/drive/1O7PBYHKrk8SELHq40AoH8hehig-WNezS?usp=sharing)
-- [ ] Find relevant datasets for medical domain
-- [ ] Find relevant "CLIP" models for medical domain
-- [ ] Compute embeddings across datasets and models for medical and store them
-- [ ] Evaluate each model on each dataset based on the `evaluation` module in this repo
+Welcome to `clip-eval`, a repository for evaluating text-to-image models like CLIP, SigLIP, and the like.
 
-- Repeat for geospatial and sports analytics
+Evaluate machine learning models against a benchmark of datasets to assess their performance on the generated embeddings, and visualize changes in embeddings from one model to another within the same dataset.
 
-### Set up the development environment
+## Installation
 
-1. Create the virtual environment, add dev dependencies and set up pre-commit hooks.
+> `clip-eval` requires [Python 3.11](https://www.python.org/downloads/release/python-3115/) and [Poetry](https://python-poetry.org/docs/#installation).
+
+1. Clone the repository:
    ```
-   ./dev-setup.sh
+   git clone https://github.com/encord-team/text-to-image-eval.git
    ```
-2. Add environment variables:
+2. Navigate to the project directory:
+   ```
+   cd text-to-image-eval
+   ```
+3. Install the required dependencies:
+   ```
+   poetry shell
+   poetry install
    ```
-   export CLIP_CACHE_PATH=$PWD/.cache
-   export OUTPUT_PATH=$PWD/output
+4. Add environment variables:
+   ```
+   export CLIP_EVAL_CACHE_PATH=$PWD/.cache
+   export CLIP_EVAL_OUTPUT_PATH=$PWD/output
    export ENCORD_SSH_KEY_PATH=<path_to_the_encord_ssh_key_file>
-   export ENCORD_CACHE_DIR=$PWD/.cache/encord
    ```
 
-### CLI Interface
+## Usage
+
+### Embeddings generation
+
+To build embeddings, run the CLI command `clip-eval build`.
+This commands allows you to interactively select the model and dataset combinations on which to build the embeddings.
+
+Alternatively, you can choose known (model, dataset) pairs using the `--model-dataset` option. For example:
+```
+clip-eval build --model-dataset clip/plants
+```
+
+### Model evaluation
 
-Basic CLI interface available with:
+To evaluate models, use the CLI command `clip-eval evaluate`.
+This command enables interactive selection of model and dataset combinations for evaluation.
 
-```shell
-clip-eval [command]
+Alternatively, you can specify known (model, dataset) pairs using the `--model-dataset` option. For example:
+```
+clip-eval evaluate --model-dataset clip/plants
 ```
 
-### [DEPRECATED] Commands I used to run different bits of the code
+### Embeddings animation
+
+To create a 2D animation of the embeddings, use the CLI command `clip-eval animate`.
+This command allows to visualise the reduction of embeddings from two different models on the same dataset.
+
+The animations will be saved at the location specified by the environment variable `CLIP_EVAL_OUTPUT_PATH`.
+By default, this path corresponds to the repository directory.
 
-0. data models: `PYTHONPATH=$PWD python src/common/data_models.py`
-1. knn: `PYTHONPATH=$PWD python src/evaluation/knn.py`
-2. zero shot: `PYTHONPATH=$PWD python src/evaluation/zero_shot.py`
-3. linear probe: `PYTHONPATH=$PWD python src/evaluation/linear_probe.py`
-4. evaluation: `PYTHONPATH=$PWD python src/evaluation/evaluator.py`
+## Set up the development environment
+
+1. Create the virtual environment, add dev dependencies and set up pre-commit hooks.
+   ```
+   ./dev-setup.sh
+   ```
+2. Add environment variables:
+   ```
+   export CLIP_EVAL_CACHE_PATH=$PWD/.cache
+   export CLIP_EVAL_OUTPUT_PATH=$PWD/output
+   export ENCORD_SSH_KEY_PATH=<path_to_the_encord_ssh_key_file>
+   ```
diff --git a/clip_eval/cli/main.py b/clip_eval/cli/main.py
@@ -3,10 +3,11 @@
 import matplotlib.pyplot as plt
 from typer import Option, Typer
 
-from clip_eval.common.data_models import EmbeddingDefinition, Split
+from clip_eval.dataset import Split
 from clip_eval.utils import read_all_cached_embeddings
 
 from .utils import (
+    parse_raw_embedding_definitions,
     select_existing_embedding_definitions,
     select_from_all_embedding_definitions,
 )
@@ -17,26 +18,30 @@
 @cli.command(
     "build",
     help="""Build embeddings.
-If no arguments are given, you will be prompted to select a combination of dataset and model(s).
+If no arguments are given, you will be prompted to select the model and dataset combinations for generating embeddings.
 You can use [TAB] to select multiple combinations and execute them sequentially.
  """,
 )
 def build_command(
-    model_dataset: Annotated[str, Option(help="model, dataset pair delimited by model/dataset")] = "",
+    model_datasets: Annotated[
+        Optional[list[str]],
+        Option(
+            "--model-dataset",
+            help="Specify a model and dataset combination. Can be used multiple times. "
+            "(model, dataset) pairs must be presented as 'MODEL/DATASET'.",
+        ),
+    ] = None,
     include_existing: Annotated[
         bool,
-        Option(help="Show also options for which the embeddings have been computed already"),
+        Option(help="Show combinations for which the embeddings have already been computed."),
     ] = False,
     by_dataset: Annotated[
         bool,
-        Option(help="Select dataset first, then model. Will only work if `model_dataset` not specified."),
+        Option(help="Select dataset first, then model. Will only work if `model_dataset` is not specified."),
     ] = False,
 ):
-    if len(model_dataset) > 0:
-        if model_dataset.count("/") != 1:
-            raise ValueError("model dataset must contain only 1 /")
-        model, dataset = model_dataset.split("/")
-        definitions = [EmbeddingDefinition(model=model, dataset=dataset)]
+    if len(model_datasets) > 0:
+        definitions = parse_raw_embedding_definitions(model_datasets)
     else:
         definitions = select_from_all_embedding_definitions(
             include_existing=include_existing,
@@ -60,17 +65,23 @@ def build_command(
 
 @cli.command(
     "evaluate",
-    help="""Evaluate embedding performance.
-For this two work, you should have already run the `build` command for the model/dataset of interest.
+    help="""Evaluate embeddings performance.
+If no arguments are given, you will be prompted to select the model and dataset combinations to evaluate.
+Only (model, dataset) pairs whose embeddings have been built will be available for evaluation.
+You can use [TAB] to select multiple combinations and execute them sequentially.
 """,
 )
 def evaluate_embeddings(
     model_datasets: Annotated[
         Optional[list[str]],
-        Option(help="Specify specific combinations of models and datasets"),
+        Option(
+            "--model-dataset",
+            help="Specify a model and dataset combination. Can be used multiple times. "
+            "(model, dataset) pairs must be presented as 'MODEL/DATASET'.",
+        ),
     ] = None,
-    is_all: Annotated[bool, Option(help="Evaluate all models.")] = False,
-    save: Annotated[bool, Option(help="Save evaluation results to csv")] = False,
+    all_: Annotated[bool, Option("--all", "-a", help="Evaluate all models.")] = False,
+    save: Annotated[bool, Option("--save", "-s", help="Save evaluation results to a CSV file.")] = False,
 ):
     from clip_eval.evaluation import (
         I2IRetrievalEvaluator,
@@ -82,24 +93,17 @@ def evaluate_embeddings(
 
     model_datasets = model_datasets or []
 
-    if is_all:
-        defns = read_all_cached_embeddings(as_list=True)
+    if all_:
+        definitions = read_all_cached_embeddings(as_list=True)
     elif len(model_datasets) > 0:
-        # Error could be localised better
-        if not all([model_dataset.count("/") == 1 for model_dataset in model_datasets]):
-            raise ValueError("All model,dataset pairs must be presented as MODEL/DATASET")
-        model_dataset_pairs = [model_dataset.split("/") for model_dataset in model_datasets]
-        defns = [
-            EmbeddingDefinition(model=model_dataset[0], dataset=model_dataset[1])
-            for model_dataset in model_dataset_pairs
-        ]
+        definitions = parse_raw_embedding_definitions(model_datasets)
     else:
-        defns = select_existing_embedding_definitions()
+        definitions = select_existing_embedding_definitions()
 
     models = [ZeroShotClassifier, LinearProbeClassifier, WeightedKNNClassifier, I2IRetrievalEvaluator]
-    performances = run_evaluation(models, defns)
+    performances = run_evaluation(models, definitions)
     if save:
-        export_evaluation_to_csv(defns, performances)
+        export_evaluation_to_csv(performances)
 
 
 @cli.command(
@@ -109,8 +113,8 @@ def evaluate_embeddings(
 """,
 )
 def animate_embeddings(
-    interactive: Annotated[bool, Option(help="Interactive plot instead of animation")] = False,
-    reduction: Annotated[str, Option(help="Reduction type [pca, tsne, umap (default)")] = "umap",
+    interactive: Annotated[bool, Option(help="Interactive plot instead of animation.")] = False,
+    reduction: Annotated[str, Option(help="Reduction type [pca, tsne, umap (default)].")] = "umap",
 ):
     from clip_eval.plotting.animation import build_animation, save_animation_to_file
 
@@ -125,23 +129,23 @@ def animate_embeddings(
 
 @cli.command("list", help="List models and datasets. By default, only cached pairs are listed.")
 def list_models_datasets(
-    all: Annotated[
+    all_: Annotated[
         bool,
-        Option(help="List all models and dataset that are available via the tool."),
+        Option("--all", "-a", help="List all models and datasets that are available via the tool."),
     ] = False,
 ):
     from clip_eval.dataset import DatasetProvider
     from clip_eval.models import ModelProvider
 
-    if all:
+    if all_:
         datasets = DatasetProvider.list_dataset_titles()
         models = ModelProvider.list_model_titles()
         print(f"Available datasets are: {', '.join(datasets)}")
         print(f"Available models are: {', '.join(models)}")
         return
 
     defns = read_all_cached_embeddings(as_list=True)
-    print(f"Available model_dataset pairs: {', '.join([str(defn) for defn in defns])}")
+    print(f"Available model_datasets pairs: {', '.join([str(defn) for defn in defns])}")
 
 
 if __name__ == "__main__":

diff --git a/clip_eval/cli/utils.py b/clip_eval/cli/utils.py
@@ -59,6 +59,15 @@ def _by_dataset(defs: list[EmbeddingDefinition] | dict[str, list[EmbeddingDefini
     return definitions
 
 
+def parse_raw_embedding_definitions(raw_embedding_definitions: list[str]) -> list[EmbeddingDefinition]:
+    if not all([model_dataset.count("/") == 1 for model_dataset in raw_embedding_definitions]):
+        raise ValueError("All (model, dataset) pairs must be presented as MODEL/DATASET")
+    model_dataset_pairs = [model_dataset.split("/") for model_dataset in raw_embedding_definitions]
+    return [
+        EmbeddingDefinition(model=model_dataset[0], dataset=model_dataset[1]) for model_dataset in model_dataset_pairs
+    ]
+
+
 def select_existing_embedding_definitions(
     by_dataset: bool = False,
     count: int | None = None,

diff --git a/clip_eval/dataset/base.py b/clip_eval/dataset/base.py
@@ -19,7 +19,7 @@ class DatasetDefinitionSpec(BaseModel):
     dataset_type: str
     module_path: Path
     title: str
-    split: Split = Split.ALL
+    split: Split | None = None
     title_in_source: str | None = None
     cache_dir: Path | None = None
 

diff --git a/clip_eval/evaluation/evaluator.py b/clip_eval/evaluation/evaluator.py
@@ -76,10 +76,7 @@ def run_evaluation(
     return embeddings_performance
 
 
-def export_evaluation_to_csv(
-    embedding_definitions: list[EmbeddingDefinition],
-    embeddings_performance: list[dict[str, float]],
-) -> None:
+def export_evaluation_to_csv(embeddings_performance: dict[EmbeddingDefinition, dict[str, float]]) -> None:
     ts = datetime.now()
     results_file = OUTPUT_PATH.EVALUATIONS / f"eval_{ts.isoformat()}.csv"
     results_file.parent.mkdir(parents=True, exist_ok=True)  # Ensure that parent folder exists
@@ -89,16 +86,17 @@ def export_evaluation_to_csv(
         writer = csv.writer(csvfile)
         writer.writerow(headers)
 
-        for def_, perf in zip(embedding_definitions, embeddings_performance, strict=True):
+        for def_, perf in embeddings_performance.items():
             def_: EmbeddingDefinition
             for classifier_title, accuracy in perf.items():
                 writer.writerow([def_.model, def_.dataset, classifier_title, accuracy])
+    print(f"Evaluation results exported to `{results_file.as_posix()}`")
 
 
 if __name__ == "__main__":
     models = [ZeroShotClassifier, LinearProbeClassifier, WeightedKNNClassifier, I2IRetrievalEvaluator]
     defs = read_all_cached_embeddings(as_list=True)
     print(defs)
     performances = run_evaluation(models, defs)
-    export_evaluation_to_csv(defs, performances)
+    export_evaluation_to_csv(performances)
     print(performances)