From 7c9e78be59c63b8640f01cfdb90d51326aa57ee7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eloy=20P=C3=A9rez=20Torres?= <99720527+eloy-encord@users.noreply.github.com> Date: Tue, 21 Mar 2023 15:53:42 +0000 Subject: [PATCH] feat: add acquisition functions (#217) --- docs/docs/active-learning/_category_.json | 5 + .../active-learning/acquisition-functions.md | 38 + .../active-learning-workflow.md | 49 + docs/docs/active-learning/index.mdx | 29 + .../active-learning/active-learning-cycle.svg | 1292 +++++++++++++++++ examples/active learning/.gitignore | 2 + .../active-learning-in-mnist.ipynb | 275 ++++ examples/active learning/example_config.yaml | 8 + .../lib/metrics/acquisition_functions.py | 242 +++ .../lib/metrics/heuristic/random.py | 2 +- 10 files changed, 1941 insertions(+), 1 deletion(-) create mode 100644 docs/docs/active-learning/_category_.json create mode 100644 docs/docs/active-learning/acquisition-functions.md create mode 100644 docs/docs/active-learning/active-learning-workflow.md create mode 100644 docs/docs/active-learning/index.mdx create mode 100644 docs/docs/images/active-learning/active-learning-cycle.svg create mode 100644 examples/active learning/.gitignore create mode 100644 examples/active learning/active-learning-in-mnist.ipynb create mode 100644 examples/active learning/example_config.yaml create mode 100644 src/encord_active/lib/metrics/acquisition_functions.py diff --git a/docs/docs/active-learning/_category_.json b/docs/docs/active-learning/_category_.json new file mode 100644 index 000000000..3806626e7 --- /dev/null +++ b/docs/docs/active-learning/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "Active Learning", + "position": 7, + "collapsible": true +} diff --git a/docs/docs/active-learning/acquisition-functions.md b/docs/docs/active-learning/acquisition-functions.md new file mode 100644 index 000000000..1f977bf00 --- /dev/null +++ b/docs/docs/active-learning/acquisition-functions.md @@ -0,0 +1,38 @@ +# Acquisition Functions + +We want you to select the data samples that will be the most informative to your model, so a natural approach would be to score each sample based on its predicted usefulness for training. +Since labeling samples is usually done in batches, you could take the top _k_ scoring samples for annotation. +This type of function, that takes an unlabeled data sample and outputs its score, is called _acquisition function_. + +## Uncertainty-based acquisition functions + +In **Encord Active**, we employ the _uncertainty sampling_ strategy where we score data samples based on the uncertainty of the model predictions. +The assumption is that samples the model is unconfident about are likely to be more informative than samples for which the model is very confident about the label. + +We include the following uncertainty-based acquisition functions: +* Least Confidence $U(x) = 1 - P_\theta(\hat{y}|x)$, where $\hat{y} = \underset{y \in \mathcal{Y}}{\arg\max} P_\theta(y|x)$ +* Margin $U(x) = P_\theta(\hat{y_1}|x) - P_\theta(\hat{y_2}|x)$, where $\hat{y_1}$ and $\hat{y_2}$ are the first and second highest-predicted labels +* Variance $U(x) = Var(P_\theta(y|x)) = \frac{1}{|Y|} \underset{y \in \mathcal{Y}}{\sum} (P_\theta(y|x) - \mu)^2$, where $\mu = \frac{1}{|Y|} \underset{y \in \mathcal{Y}}{\sum} P_\theta(y|x)$ + +* Entropy $U(x) = \mathcal{H}(P_\theta(y|x)) = -\underset{y \in \mathcal{Y}}{\sum} P_\theta(y|x) \log P_\theta(y|x)$ + +:::caution +On the following scenarios, uncertainty-based acquisition functions must be used with extra care: +* Softmax outputs from deep networks are often not calibrated and tend to be quite overconfident. +* For convolutional neural networks, small, seemingly meaningless perturbations in the input space can completely change predictions. +::: + + +## Which acquisition function should I use? + +_“Ok, I have this list of acquisition functions now, but which one is the best? How do I choose?”_ + +This isn’t an easy question to answer and heavily depends on your problem, your data, your model, your labeling budget, your goals, etc. +This choice can be crucial to your results and comparing multiple acquisition functions during the active learning process is not always feasible. + +This isn’t a question for which we can just give you a good answer. +Simple uncertainty measures like least confident score, margin score and entropy make good first considerations. + +:::tip +If you’d like to talk to an expert on the topic, the Encord ML team can be found in the #general channel in our Encord Active [Slack workspace](https://join.slack.com/t/encordactive/shared_invite/zt-1hc2vqur9-Fzj1EEAHoqu91sZ0CX0A7Q). +::: diff --git a/docs/docs/active-learning/active-learning-workflow.md b/docs/docs/active-learning/active-learning-workflow.md new file mode 100644 index 000000000..c3ffd8601 --- /dev/null +++ b/docs/docs/active-learning/active-learning-workflow.md @@ -0,0 +1,49 @@ +# Getting Started + +To get started with using Encord Active for active learning, you should choose: +1. an Encord Active project, +2. a machine learning model and +3. an acquisition function. + +Also, you need to take into account some basics on **dataset initialization** and **model selection** while you make your choices. +If you already have these principles covered, you can directly advance to #todo. + + +## Dataset initialization + +In the active learning paradigm your model selects examples to be labeled, however, to make these selections you need a model from which you can get useful representations or uncertainty metrics - a model that already “knows” something about the data. + +This is typically accomplished by training an initial model on a random subset of the training data. You would want to use just enough data to get a model that can make the acquisition function useful to kickstart the active learning process. + +Also, **transfer learning** with pre-trained models can further reduce the required size of the seed dataset and accelerate the whole process. + +:::tip +We recommend that initially you separate (not literally) your project data into training, test and validation sets as it’s important to note that the test and validation datasets still need to be selected randomly and annotated in order to have unbiased performance estimates. +::: + + +## Model selection + +Selecting a model for active learning is not a straightforward task. + +Often this is done primarily with domain knowledge rather than validating models with data. +For example, searching over architectures and hyperparameters using the initial seed training set. +However, models that perform better in this limited data setting are not likely to be the best performing once you’ve labeled 10x as many examples. +You should avoid using those models to select your data. + +Instead, you should select data that optimizes the performance of your final model. +So you want to use the type of model that you expect to perform best on your task in general. + + +## Acquisition function selection + + + + +## Plug the model into an acquisition metric + + +## What's next? + + +talk about stopping criterion \ No newline at end of file diff --git a/docs/docs/active-learning/index.mdx b/docs/docs/active-learning/index.mdx new file mode 100644 index 000000000..0111902d9 --- /dev/null +++ b/docs/docs/active-learning/index.mdx @@ -0,0 +1,29 @@ +# Active Learning + +The annotation process can sometimes be extensively time-consuming and expensive. +Images and videos can often be scraped or even taken automatically, however labeling for tasks like segmentation and motion detection is laborious. +Some domains, such as medical imaging, require domain knowledge from experts with limited accessibility. + +When the unlabeled data is abundant, wouldn’t it be nice if you could pick out the 5% of samples most useful to your model, rather than labeling large swathes of redundant data points? +This is the idea behind active learning. + +**Encord Active** provides you with the tools to take advantage of the active learning method, and it's integrated with **Encord Annotate** to deliver the best annotation experience. + +If you are already familiar with the active learning foundation, continue your read with an exploration of **Encord Active**'s acquisition functions and common workflows. + +import DocCardList from "@theme/DocCardList"; + + + +## What is active learning? + +Active learning is an iterative process where a [machine learning model](https://encord.com/blog/introduction-to-building-your-first-machine-learning) is used to select the best examples to be labeled next. +After annotation, the model is retrained on the new, larger dataset, then selects more data to be labeled until reaching a stopping criterion. +This process is illustrated in the figure below. + +![active-learning-cycle.svg](../images/active-learning/active-learning-cycle.svg) + + +Check out our [practical guide to active learning for computer vision](https://encord.com/blog/a-practical-guide-to-active-learning-for-computer-vision/) to learn more about active learning, its tradeoffs, alternatives and a comprehensive explanation on active learning pipelines. + + diff --git a/docs/docs/images/active-learning/active-learning-cycle.svg b/docs/docs/images/active-learning/active-learning-cycle.svg new file mode 100644 index 000000000..b690bea96 --- /dev/null +++ b/docs/docs/images/active-learning/active-learning-cycle.svg @@ -0,0 +1,1292 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/examples/active learning/.gitignore b/examples/active learning/.gitignore new file mode 100644 index 000000000..568660e57 --- /dev/null +++ b/examples/active learning/.gitignore @@ -0,0 +1,2 @@ +# workflow example +config.yaml diff --git a/examples/active learning/active-learning-in-mnist.ipynb b/examples/active learning/active-learning-in-mnist.ipynb new file mode 100644 index 000000000..abff2a7d5 --- /dev/null +++ b/examples/active learning/active-learning-in-mnist.ipynb @@ -0,0 +1,275 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "from collections import defaultdict\n", + "from functools import partialmethod\n", + "from pathlib import Path\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import yaml\n", + "from loguru import logger\n", + "from PIL import Image\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.metrics import accuracy_score\n", + "from sklearn.utils import shuffle\n", + "from tqdm.auto import tqdm\n", + "\n", + "from encord_active.lib.common.iterator import DatasetIterator\n", + "from encord_active.lib.metrics.execute import execute_metrics\n", + "from encord_active.lib.project.project_file_structure import ProjectFileStructure\n", + "\n", + "# silence logger\n", + "logger.remove()\n", + "tqdm.__init__ = partialmethod(tqdm.__init__, disable=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Utility functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_data_hashes_from_project(project_fs: ProjectFileStructure, subset_size=None):\n", + " iterator = DatasetIterator(project_fs.project_dir, subset_size)\n", + " data_hashes = [(iterator.label_hash, iterator.du_hash) for data_unit, img_pth in iterator.iterate()]\n", + " return data_hashes\n", + "\n", + "def get_data_from_data_hashes(project_fs: ProjectFileStructure, data_hashes: list[tuple[str, str]]):\n", + " image_arrays, class_labels = zip(*(get_data_sample(project_fs, data_hash) for data_hash in data_hashes))\n", + " return list(image_arrays), list(class_labels)\n", + "\n", + "def get_data_sample(project_fs: ProjectFileStructure, data_hash: tuple[str, str]):\n", + " label_hash, du_hash = data_hash\n", + " lr_struct = project_fs.label_row_structure(label_hash)\n", + " \n", + " # get classification label\n", + " label_row = json.loads(lr_struct.label_row_file.read_text())\n", + " class_label = get_classification_label(label_row, du_hash, class_name=\"digit\")\n", + " \n", + " # get image\n", + " image_path = lr_struct.images_dir / f\"{du_hash}.{label_row['data_units'][du_hash]['data_type'].split('/')[-1]}\"\n", + " image_array = np.asarray(Image.open(image_path)).flatten()\n", + " \n", + " return image_array, class_label\n", + "\n", + "def get_classification_label(label_row, du_hash: str, class_name: str):\n", + " data_unit = label_row[\"data_units\"][du_hash]\n", + " filtered_class = [_class for _class in data_unit[\"labels\"][\"classifications\"] if _class[\"name\"] == class_name]\n", + " if len(filtered_class) == 0:\n", + " return None\n", + " class_hash = filtered_class[0][\"classificationHash\"]\n", + " class_label = label_row[\"classification_answers\"][class_hash][\"classifications\"][0][\"answers\"]\n", + " return class_label\n", + "\n", + "def train_model(X_train, y_train, model=None):\n", + " # use logistic regression model as a dummy model example\n", + " if model is None:\n", + " model = LogisticRegression()\n", + " model.fit(X_train, y_train)\n", + " return model\n", + "\n", + "def get_model_accuracy(X_test, y_test, model):\n", + " y_pred = model.predict(X_test)\n", + " accuracy = accuracy_score(y_test, y_pred)\n", + " return accuracy\n", + "\n", + "def get_n_best_ranked_data_samples(project_fs: ProjectFileStructure, data_hashes, n, acq_func_instance, rank_by: str):\n", + " execute_metrics([acq_func_instance], data_dir=project_fs.project_dir)\n", + " unique_acq_func_name = acq_func_instance.metadata.get_unique_name()\n", + " acq_func_results = pd.read_csv(project_fs.metrics / f\"{unique_acq_func_name}.csv\")\n", + " \n", + " # filter acquisition function results to only contain data samples specified in data_hashes\n", + " str_data_hashes = tuple(f\"{label_hash}_{du_hash}\" for label_hash, du_hash in data_hashes)\n", + " filtered_results = acq_func_results[acq_func_results['identifier'].str.startswith(str_data_hashes, na=False)]\n", + " \n", + " if rank_by == \"asc\": # get the first n data samples if they were sorted by ascending score order\n", + " best_n = filtered_results[[\"identifier\", \"score\"]].nsmallest(n, \"score\", keep=\"first\")[\"identifier\"]\n", + " elif rank_by == \"desc\": # get the first n data samples if they were sorted by descending score order\n", + " best_n = filtered_results[[\"identifier\", \"score\"]].nlargest(n, \"score\", keep=\"first\")[\"identifier\"]\n", + " else:\n", + " raise ValueError\n", + " return [get_data_hash_from_identifier(identifier) for identifier in best_n]\n", + " \n", + "def get_data_hash_from_identifier(identifier: str):\n", + " return tuple(identifier.split(\"_\", maxsplit=2)[:2])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "config = yaml.safe_load(Path(\"config.yaml\").read_text())[\"active-learning-in-mnist\"]\n", + "\n", + "# train\n", + "project_dir_train = Path(config[\"train\"][\"project_dir\"])\n", + "project_fs_train = ProjectFileStructure(project_dir_train)\n", + "data_hashes_train = get_data_hashes_from_project(project_fs_train, subset_size=None)\n", + "# shuffle data hashes\n", + "data_hashes_train = shuffle(data_hashes_train, random_state=42)\n", + "print(f\"Train dataset size: {len(data_hashes_train)}\")\n", + "\n", + "# test\n", + "project_dir_test = Path(config[\"test\"][\"project_dir\"])\n", + "project_fs_test = ProjectFileStructure(project_dir_test)\n", + "data_hashes_test = get_data_hashes_from_project(project_fs_test)\n", + "X_test, y_test = get_data_from_data_hashes(project_fs_test, data_hashes_test)\n", + "print(f\"Test dataset size: {len(data_hashes_test)}\")\n", + "\n", + "# active learning (AL) config variables\n", + "initial_data_amount = config[\"initial_data_amount\"]\n", + "n_iterations = config[\"n_iterations\"]\n", + "batch_size_to_label = config[\"batch_size_to_label\"]\n", + "print(f\"Initial amount of labeled data in the train dataset: {initial_data_amount}\")\n", + "print(f\"Number of iterations in the active learning (AL) workflow: {n_iterations}\")\n", + "print(f\"Number of data samples annotated between AL iterations: {batch_size_to_label}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load acquisition functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from encord_active.lib.metrics.acquisition_functions import Entropy, LeastConfidence, Margin, Variance\n", + "from encord_active.lib.metrics.heuristic.random import RandomImageMetric\n", + "\n", + "# use 'asc' (ascending) and 'desc' (descending) ordering for posterior selection of k highest ranked data samples\n", + "acq_funcs = [\n", + " (Entropy, \"desc\"),\n", + " (LeastConfidence, \"desc\"),\n", + " (Margin, \"asc\"),\n", + " (Variance, \"asc\"),\n", + " (RandomImageMetric, \"asc\"),\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Run the active learning workflow with each acquisition function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "accuracy_logger = defaultdict(dict)\n", + "for acq_func, rank_order in acq_funcs:\n", + " # mockup of the initial labeling phase\n", + " labeled_data_hashes_train = data_hashes_train[:initial_data_amount]\n", + " unlabeled_data_hashes_train = set(data_hashes_train[initial_data_amount:])\n", + " \n", + " X, y = get_data_from_data_hashes(project_fs_train, labeled_data_hashes_train)\n", + " model = train_model(X, y)\n", + " accuracy_logger[acq_func.__name__][0] = get_model_accuracy(X_test, y_test, model)\n", + " for it in tqdm(range(1, n_iterations + 1), disable=False, desc=f\"Analyzing {acq_func.__name__} performance\"):\n", + " if acq_func.__name__ in [\"RandomImageMetric\"]:\n", + " acq_func_instance = acq_func()\n", + " else:\n", + " acq_func_instance = acq_func(model)\n", + " data_to_label_next = get_n_best_ranked_data_samples(project_fs_train, unlabeled_data_hashes_train, batch_size_to_label, acq_func_instance, rank_by=rank_order)\n", + " \n", + " # mockup of the labeling phase\n", + " X_new, y_new = get_data_from_data_hashes(project_fs_train, data_to_label_next)\n", + " unlabeled_data_hashes_train.difference_update(data_to_label_next)\n", + " \n", + " X.extend(X_new)\n", + " y.extend(y_new)\n", + " model = train_model(X, y)\n", + " accuracy_logger[acq_func.__name__][it] = get_model_accuracy(X_test, y_test, model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# misc: beautify function names\n", + "if RandomImageMetric.__name__ in accuracy_logger:\n", + " accuracy_logger[\"Random\"] = accuracy_logger.pop(RandomImageMetric.__name__)\n", + "if LeastConfidence.__name__ in accuracy_logger:\n", + " accuracy_logger[\"Least Confidence\"] = accuracy_logger.pop(LeastConfidence.__name__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Show results of the active learning workflow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for acq_func_name, points in accuracy_logger.items():\n", + " xs, ys = zip(*points.items())\n", + " plt.plot(xs, ys, label=acq_func_name)\n", + "\n", + "plt.xlabel(\"Iteration\")\n", + "plt.ylabel(\"Model Accuracy\")\n", + "plt.xticks(range(n_iterations + 1))\n", + "plt.legend()\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/examples/active learning/example_config.yaml b/examples/active learning/example_config.yaml new file mode 100644 index 000000000..614ad0f8c --- /dev/null +++ b/examples/active learning/example_config.yaml @@ -0,0 +1,8 @@ +active-learning-in-mnist: + initial_data_amount: 500 + n_iterations: 15 + batch_size_to_label: 300 + train: + project_dir: /path/to/train/mnist/project/dir + test: + project_dir: /path/to/test/mnist/project/dir diff --git a/src/encord_active/lib/metrics/acquisition_functions.py b/src/encord_active/lib/metrics/acquisition_functions.py new file mode 100644 index 000000000..03b2b25f6 --- /dev/null +++ b/src/encord_active/lib/metrics/acquisition_functions.py @@ -0,0 +1,242 @@ +from abc import abstractmethod +from pathlib import Path +from typing import Any, Optional, Union + +import numpy as np +from PIL import Image + +from encord_active.lib.common.iterator import Iterator +from encord_active.lib.labels.classification import ClassificationType +from encord_active.lib.labels.object import ObjectShape +from encord_active.lib.metrics.metric import ( + AnnotationType, + DataType, + EmbeddingType, + Metric, + MetricType, +) +from encord_active.lib.metrics.writer import CSVMetricWriter + + +class BaseModelWrapper: + def __init__(self, model): + self._model = model + + @classmethod + @abstractmethod + def prepare_data(cls, data_path: Path) -> Optional[Any]: + """ + Reads and prepares a data sample from local storage to feed the model with it. + + Args: + data_path (Path): Path to the data sample. + + Returns: + Data sample prepared to be used as input of `self.predict_probabilities()` method. + """ + pass + + def predict_probabilities(self, data) -> Optional[np.ndarray]: + """ + Calculate the model-predicted class probabilities of the examples in the data sample found by the model. + + Args: + data: Input data sample. + + Returns: + An array of shape ``(N, K)`` of model-predicted class probabilities, ``P(label=k|x)``. + Each row of this matrix corresponds to an example `x` and contains the model-predicted probabilities that + `x` belongs to each possible class, for each of the K classes. + In the case the model can't extract any example `x` from the data sample, the method returns ``None``. + """ + pred_proba = self._predict_proba(data) + if pred_proba is not None and pred_proba.min() < 0: + raise ValueError("Model-predicted class probabilities cannot be less than zero.") + return pred_proba + + @abstractmethod + def _predict_proba(self, X) -> Optional[np.ndarray]: + """ + Probability estimates. + + Note that in the multilabel case, each sample can have any number of labels. + This returns the marginal probability that the given sample has the label in question. + + Args: + X ({array-like} of shape (n_samples, n_features)): Input data. + + Returns: + An array of shape (n_samples, n_classes). Probability of the sample for each class in the model. + In the case the model fails, the method returns ``None``. + """ + pass + + +class SKLearnModelWrapper(BaseModelWrapper): + @classmethod + def prepare_data(cls, data_path: Path) -> Optional[Any]: + return [np.asarray(Image.open(data_path)).flatten() / 255] + + def _predict_proba(self, X) -> Optional[np.ndarray]: + return self._model.predict_proba(X) + + +class AcquisitionFunction(Metric): + def __init__( + self, + title: str, + short_description: str, + long_description: str, + metric_type: MetricType, + data_type: DataType, + model: BaseModelWrapper, + annotation_type: list[Union[ObjectShape, ClassificationType]] = [], + embedding_type: Optional[EmbeddingType] = None, + ): + """ + Creates an instance of the acquisition function with a custom model to score data samples. + + Args: + model (BaseModelWrapper): Machine learning model used to score data samples. + """ + self._model = model + super().__init__( + title, short_description, long_description, metric_type, data_type, annotation_type, embedding_type + ) + + def execute(self, iterator: Iterator, writer: CSVMetricWriter): + for _, img_pth in iterator.iterate(desc=f"Running {self.metadata.title} acquisition function"): + if img_pth is None: + continue + prepared_data = self._model.prepare_data(img_pth) + if prepared_data is None: + continue + pred_proba = self._model.predict_probabilities(prepared_data) + if pred_proba is None: + continue + score = self.score_predicted_class_probabilities(pred_proba) + writer.write(score) + + @abstractmethod + def score_predicted_class_probabilities(self, pred_proba: np.ndarray) -> float: + """ + Scores model-predicted class probabilities according the acquisition function description. + + Args: + pred_proba: An array of shape ``(N, K)`` of model-predicted class probabilities, ``P(label=k|x)``. + Each row of this matrix corresponds to an example `x` and contains the model-predicted probabilities + that `x` belongs to each possible class, for each of the K classes. + + Returns: + score: Score of the model-predicted class probabilities. + """ + pass + + +class Entropy(AcquisitionFunction): + def __init__(self, model): + super().__init__( + title="Entropy", + short_description="Ranks images by their entropy.", + long_description=( + "Ranks images by their entropy. \n \n" + "In information theory, the **entropy** of a random variable is the average level of “information”, " + "“surprise”, or “uncertainty” inherent to the variable's possible outcomes. " + "The higher the entropy, the more “uncertain” the variable outcome. \n \n" + r"The mathematical formula of entropy is: $H(p) = -\sum_{i=1}^{n} p_i \log_{2}{p_i}$" + " \n \nIt can be employed to define a heuristic that measures a model’s uncertainty about the classes " + "in an image using the average of the entropies of the model-predicted class probabilities in the " + "image. Like before, the higher the image's score, the more “confused” the model is. " + "As a result, data samples with higher entropy score should be offered for annotation." + ), + metric_type=MetricType.HEURISTIC, + data_type=DataType.IMAGE, + annotation_type=AnnotationType.NONE, + model=model, + ) + + def score_predicted_class_probabilities(self, pred_proba: np.ndarray) -> float: + # silence divide by zero warning as the result will be correct (log2(0) is -inf, when multiplied by 0 gives 0) + # raise exception if invalid (negative) values are found in the pred_proba array + with np.errstate(divide="ignore", invalid="raise"): + return -np.multiply(pred_proba, np.nan_to_num(np.log2(pred_proba))).sum(axis=1).mean() + + +class LeastConfidence(AcquisitionFunction): + def __init__(self, model): + super().__init__( + title="Least Confidence", + short_description="Ranks images by their least confidence score.", + long_description=( + "Ranks images by their least confidence score. \n \n" + "**Least confidence** (**LC**) score of a model's prediction is the difference between 1 " + "(100% confidence) and its most confidently predicted class label. The higher the **LC** score, the " + "more “uncertain” the prediction. \n \n" + "The mathematical formula of the **LC** score of a model's prediction $x$ is: " + r"$H(p) = 1 - \underset{y}{\max}(P(y|x))$" + " \n \nIt can be employed to define a heuristic that measures a model’s uncertainty about the classes " + "in an image using the average of the **LC** score of the model-predicted class probabilities in the " + "image. Like before, the higher the image's score, the more “confused” the model is. " + "As a result, data samples with higher **LC** score should be offered for annotation." + ), + metric_type=MetricType.HEURISTIC, + data_type=DataType.IMAGE, + annotation_type=AnnotationType.NONE, + model=model, + ) + + def score_predicted_class_probabilities(self, pred_proba: np.ndarray) -> float: + return (1 - pred_proba.max(axis=1)).mean() + + +class Margin(AcquisitionFunction): + def __init__(self, model): + super().__init__( + title="Margin", + short_description="Ranks images by their margin score.", + long_description=( + "Ranks images by their margin score. \n \n" + "**Margin** score of a model's prediction is the difference between the two classes with the highest " + "probabilities. The lower the margin score, the more “uncertain” the prediction. \n \n" + "It can be employed to define a heuristic that measures a model’s uncertainty about the classes " + "in an image using the average of the margin score of the model-predicted class probabilities in the" + " image. Like before, the lower the image's score, the more “confused” the model is. " + "As a result, data samples with lower margin score should be offered for annotation." + ), + metric_type=MetricType.HEURISTIC, + data_type=DataType.IMAGE, + annotation_type=AnnotationType.NONE, + model=model, + ) + + def score_predicted_class_probabilities(self, pred_proba: np.ndarray) -> float: + # move the second highest and highest class prediction values to the last two columns respectively + preds = np.partition(pred_proba, -2) + return (preds[:, -1] - preds[:, -2]).mean() + + +class Variance(AcquisitionFunction): + def __init__(self, model): + super().__init__( + title="Variance", + short_description="Ranks images by their variance.", + long_description=( + "Ranks images by their variance. \n \n" + "Variance is a measure of dispersion that takes into account the spread of all data points in a " + "data set. The variance is the mean squared difference between each data point and the centre of the " + "distribution measured by the mean. The lower the variance, the more “clustered” the data points. \n \n" + "The mathematical formula of variance of a data set is: \n" + r"$Var(X) = \frac{1}{n} \sum_{i=1}^{n}(x_i - \mu)^2, \text{where } \mu = \frac{1}{n} \sum_{i=1}^{n}x_i$" + " \n \nIt can be employed to define a heuristic that measures a model’s uncertainty about the classes " + "in an image using the average of the variance of the model-predicted class probabilities in the " + "image. Like before, the lower the image's score, the more “confused” the model is. " + "As a result, data samples with lower variance score should be offered for annotation." + ), + metric_type=MetricType.HEURISTIC, + data_type=DataType.IMAGE, + annotation_type=AnnotationType.NONE, + model=model, + ) + + def score_predicted_class_probabilities(self, pred_proba: np.ndarray) -> float: + return pred_proba.var(axis=1).mean() diff --git a/src/encord_active/lib/metrics/heuristic/random.py b/src/encord_active/lib/metrics/heuristic/random.py index 8e5554e05..54bd2be56 100644 --- a/src/encord_active/lib/metrics/heuristic/random.py +++ b/src/encord_active/lib/metrics/heuristic/random.py @@ -10,7 +10,7 @@ from encord_active.lib.metrics.writer import CSVMetricWriter -class RandomeImageMetric(Metric): +class RandomImageMetric(Metric): def __init__(self): super().__init__( title="Random Values on Images",