diff --git a/README.md b/README.md index bac9fce..3a8b379 100644 --- a/README.md +++ b/README.md @@ -1,40 +1,72 @@ -## Goals are: +# clip-eval -- [x] Have a way to load classification datasets from HF. See this [colab](https://colab.research.google.com/drive/1O7PBYHKrk8SELHq40AoH8hehig-WNezS?usp=sharing) -- [x] Have a way to load clip mod from HF. See this [colab](https://colab.research.google.com/drive/1O7PBYHKrk8SELHq40AoH8hehig-WNezS?usp=sharing) -- [ ] Find relevant datasets for medical domain -- [ ] Find relevant "CLIP" models for medical domain -- [ ] Compute embeddings across datasets and models for medical and store them -- [ ] Evaluate each model on each dataset based on the `evaluation` module in this repo +Welcome to `clip-eval`, a repository for evaluating text-to-image models like CLIP, SigLIP, and the like. -- Repeat for geospatial and sports analytics +Evaluate machine learning models against a benchmark of datasets to assess their performance on the generated embeddings, and visualize changes in embeddings from one model to another within the same dataset. -### Set up the development environment +## Installation -1. Create the virtual environment, add dev dependencies and set up pre-commit hooks. +> `clip-eval` requires [Python 3.11](https://www.python.org/downloads/release/python-3115/) and [Poetry](https://python-poetry.org/docs/#installation). + +1. Clone the repository: ``` - ./dev-setup.sh + git clone https://github.com/encord-team/text-to-image-eval.git ``` -2. Add environment variables: +2. Navigate to the project directory: + ``` + cd text-to-image-eval + ``` +3. Install the required dependencies: + ``` + poetry shell + poetry install ``` - export CLIP_CACHE_PATH=$PWD/.cache - export OUTPUT_PATH=$PWD/output +4. Add environment variables: + ``` + export CLIP_EVAL_CACHE_PATH=$PWD/.cache + export CLIP_EVAL_OUTPUT_PATH=$PWD/output export ENCORD_SSH_KEY_PATH= - export ENCORD_CACHE_DIR=$PWD/.cache/encord ``` -### CLI Interface +## Usage + +### Embeddings generation + +To build embeddings, run the CLI command `clip-eval build`. +This commands allows you to interactively select the model and dataset combinations on which to build the embeddings. + +Alternatively, you can choose known (model, dataset) pairs using the `--model-dataset` option. For example: +``` +clip-eval build --model-dataset clip/plants +``` + +### Model evaluation -Basic CLI interface available with: +To evaluate models, use the CLI command `clip-eval evaluate`. +This command enables interactive selection of model and dataset combinations for evaluation. -```shell -clip-eval [command] +Alternatively, you can specify known (model, dataset) pairs using the `--model-dataset` option. For example: +``` +clip-eval evaluate --model-dataset clip/plants ``` -### [DEPRECATED] Commands I used to run different bits of the code +### Embeddings animation + +To create a 2D animation of the embeddings, use the CLI command `clip-eval animate`. +This command allows to visualise the reduction of embeddings from two different models on the same dataset. + +The animations will be saved at the location specified by the environment variable `CLIP_EVAL_OUTPUT_PATH`. +By default, this path corresponds to the repository directory. -0. data models: `PYTHONPATH=$PWD python src/common/data_models.py` -1. knn: `PYTHONPATH=$PWD python src/evaluation/knn.py` -2. zero shot: `PYTHONPATH=$PWD python src/evaluation/zero_shot.py` -3. linear probe: `PYTHONPATH=$PWD python src/evaluation/linear_probe.py` -4. evaluation: `PYTHONPATH=$PWD python src/evaluation/evaluator.py` +## Set up the development environment + +1. Create the virtual environment, add dev dependencies and set up pre-commit hooks. + ``` + ./dev-setup.sh + ``` +2. Add environment variables: + ``` + export CLIP_EVAL_CACHE_PATH=$PWD/.cache + export CLIP_EVAL_OUTPUT_PATH=$PWD/output + export ENCORD_SSH_KEY_PATH= + ``` diff --git a/clip_eval/cli/main.py b/clip_eval/cli/main.py index 82f3bbd..3369837 100644 --- a/clip_eval/cli/main.py +++ b/clip_eval/cli/main.py @@ -3,10 +3,11 @@ import matplotlib.pyplot as plt from typer import Option, Typer -from clip_eval.common.data_models import EmbeddingDefinition, Split +from clip_eval.dataset import Split from clip_eval.utils import read_all_cached_embeddings from .utils import ( + parse_raw_embedding_definitions, select_existing_embedding_definitions, select_from_all_embedding_definitions, ) @@ -17,26 +18,30 @@ @cli.command( "build", help="""Build embeddings. -If no arguments are given, you will be prompted to select a combination of dataset and model(s). +If no arguments are given, you will be prompted to select the model and dataset combinations for generating embeddings. You can use [TAB] to select multiple combinations and execute them sequentially. """, ) def build_command( - model_dataset: Annotated[str, Option(help="model, dataset pair delimited by model/dataset")] = "", + model_datasets: Annotated[ + Optional[list[str]], + Option( + "--model-dataset", + help="Specify a model and dataset combination. Can be used multiple times. " + "(model, dataset) pairs must be presented as 'MODEL/DATASET'.", + ), + ] = None, include_existing: Annotated[ bool, - Option(help="Show also options for which the embeddings have been computed already"), + Option(help="Show combinations for which the embeddings have already been computed."), ] = False, by_dataset: Annotated[ bool, - Option(help="Select dataset first, then model. Will only work if `model_dataset` not specified."), + Option(help="Select dataset first, then model. Will only work if `model_dataset` is not specified."), ] = False, ): - if len(model_dataset) > 0: - if model_dataset.count("/") != 1: - raise ValueError("model dataset must contain only 1 /") - model, dataset = model_dataset.split("/") - definitions = [EmbeddingDefinition(model=model, dataset=dataset)] + if len(model_datasets) > 0: + definitions = parse_raw_embedding_definitions(model_datasets) else: definitions = select_from_all_embedding_definitions( include_existing=include_existing, @@ -60,17 +65,23 @@ def build_command( @cli.command( "evaluate", - help="""Evaluate embedding performance. -For this two work, you should have already run the `build` command for the model/dataset of interest. + help="""Evaluate embeddings performance. +If no arguments are given, you will be prompted to select the model and dataset combinations to evaluate. +Only (model, dataset) pairs whose embeddings have been built will be available for evaluation. +You can use [TAB] to select multiple combinations and execute them sequentially. """, ) def evaluate_embeddings( model_datasets: Annotated[ Optional[list[str]], - Option(help="Specify specific combinations of models and datasets"), + Option( + "--model-dataset", + help="Specify a model and dataset combination. Can be used multiple times. " + "(model, dataset) pairs must be presented as 'MODEL/DATASET'.", + ), ] = None, - is_all: Annotated[bool, Option(help="Evaluate all models.")] = False, - save: Annotated[bool, Option(help="Save evaluation results to csv")] = False, + all_: Annotated[bool, Option("--all", "-a", help="Evaluate all models.")] = False, + save: Annotated[bool, Option("--save", "-s", help="Save evaluation results to a CSV file.")] = False, ): from clip_eval.evaluation import ( I2IRetrievalEvaluator, @@ -82,24 +93,17 @@ def evaluate_embeddings( model_datasets = model_datasets or [] - if is_all: - defns = read_all_cached_embeddings(as_list=True) + if all_: + definitions = read_all_cached_embeddings(as_list=True) elif len(model_datasets) > 0: - # Error could be localised better - if not all([model_dataset.count("/") == 1 for model_dataset in model_datasets]): - raise ValueError("All model,dataset pairs must be presented as MODEL/DATASET") - model_dataset_pairs = [model_dataset.split("/") for model_dataset in model_datasets] - defns = [ - EmbeddingDefinition(model=model_dataset[0], dataset=model_dataset[1]) - for model_dataset in model_dataset_pairs - ] + definitions = parse_raw_embedding_definitions(model_datasets) else: - defns = select_existing_embedding_definitions() + definitions = select_existing_embedding_definitions() models = [ZeroShotClassifier, LinearProbeClassifier, WeightedKNNClassifier, I2IRetrievalEvaluator] - performances = run_evaluation(models, defns) + performances = run_evaluation(models, definitions) if save: - export_evaluation_to_csv(defns, performances) + export_evaluation_to_csv(performances) @cli.command( @@ -109,8 +113,8 @@ def evaluate_embeddings( """, ) def animate_embeddings( - interactive: Annotated[bool, Option(help="Interactive plot instead of animation")] = False, - reduction: Annotated[str, Option(help="Reduction type [pca, tsne, umap (default)")] = "umap", + interactive: Annotated[bool, Option(help="Interactive plot instead of animation.")] = False, + reduction: Annotated[str, Option(help="Reduction type [pca, tsne, umap (default)].")] = "umap", ): from clip_eval.plotting.animation import build_animation, save_animation_to_file @@ -125,15 +129,15 @@ def animate_embeddings( @cli.command("list", help="List models and datasets. By default, only cached pairs are listed.") def list_models_datasets( - all: Annotated[ + all_: Annotated[ bool, - Option(help="List all models and dataset that are available via the tool."), + Option("--all", "-a", help="List all models and datasets that are available via the tool."), ] = False, ): from clip_eval.dataset import DatasetProvider from clip_eval.models import ModelProvider - if all: + if all_: datasets = DatasetProvider.list_dataset_titles() models = ModelProvider.list_model_titles() print(f"Available datasets are: {', '.join(datasets)}") @@ -141,7 +145,7 @@ def list_models_datasets( return defns = read_all_cached_embeddings(as_list=True) - print(f"Available model_dataset pairs: {', '.join([str(defn) for defn in defns])}") + print(f"Available model_datasets pairs: {', '.join([str(defn) for defn in defns])}") if __name__ == "__main__": diff --git a/clip_eval/cli/utils.py b/clip_eval/cli/utils.py index 8d5adc4..16ce38d 100644 --- a/clip_eval/cli/utils.py +++ b/clip_eval/cli/utils.py @@ -59,6 +59,15 @@ def _by_dataset(defs: list[EmbeddingDefinition] | dict[str, list[EmbeddingDefini return definitions +def parse_raw_embedding_definitions(raw_embedding_definitions: list[str]) -> list[EmbeddingDefinition]: + if not all([model_dataset.count("/") == 1 for model_dataset in raw_embedding_definitions]): + raise ValueError("All (model, dataset) pairs must be presented as MODEL/DATASET") + model_dataset_pairs = [model_dataset.split("/") for model_dataset in raw_embedding_definitions] + return [ + EmbeddingDefinition(model=model_dataset[0], dataset=model_dataset[1]) for model_dataset in model_dataset_pairs + ] + + def select_existing_embedding_definitions( by_dataset: bool = False, count: int | None = None, diff --git a/clip_eval/dataset/base.py b/clip_eval/dataset/base.py index 57822b1..ba22b67 100644 --- a/clip_eval/dataset/base.py +++ b/clip_eval/dataset/base.py @@ -19,7 +19,7 @@ class DatasetDefinitionSpec(BaseModel): dataset_type: str module_path: Path title: str - split: Split = Split.ALL + split: Split | None = None title_in_source: str | None = None cache_dir: Path | None = None diff --git a/clip_eval/evaluation/evaluator.py b/clip_eval/evaluation/evaluator.py index f501e3f..d23a060 100644 --- a/clip_eval/evaluation/evaluator.py +++ b/clip_eval/evaluation/evaluator.py @@ -76,10 +76,7 @@ def run_evaluation( return embeddings_performance -def export_evaluation_to_csv( - embedding_definitions: list[EmbeddingDefinition], - embeddings_performance: list[dict[str, float]], -) -> None: +def export_evaluation_to_csv(embeddings_performance: dict[EmbeddingDefinition, dict[str, float]]) -> None: ts = datetime.now() results_file = OUTPUT_PATH.EVALUATIONS / f"eval_{ts.isoformat()}.csv" results_file.parent.mkdir(parents=True, exist_ok=True) # Ensure that parent folder exists @@ -89,10 +86,11 @@ def export_evaluation_to_csv( writer = csv.writer(csvfile) writer.writerow(headers) - for def_, perf in zip(embedding_definitions, embeddings_performance, strict=True): + for def_, perf in embeddings_performance.items(): def_: EmbeddingDefinition for classifier_title, accuracy in perf.items(): writer.writerow([def_.model, def_.dataset, classifier_title, accuracy]) + print(f"Evaluation results exported to `{results_file.as_posix()}`") if __name__ == "__main__": @@ -100,5 +98,5 @@ def export_evaluation_to_csv( defs = read_all_cached_embeddings(as_list=True) print(defs) performances = run_evaluation(models, defs) - export_evaluation_to_csv(defs, performances) + export_evaluation_to_csv(performances) print(performances)