diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7901047
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,393 @@
+# Created by https://www.toptal.com/developers/gitignore/api/vim,osx,python,windows,pycharm,jupyternotebooks
+# Edit at https://www.toptal.com/developers/gitignore?templates=vim,osx,python,windows,pycharm,jupyternotebooks
+
+### JupyterNotebooks ###
+# gitignore template for Jupyter Notebooks
+# website: http://jupyter.org/
+
+.ipynb_checkpoints
+*/.ipynb_checkpoints/*
+
+# local notebook identifiers
+*.ipynb:Zone.Identifier
+
+# IPython
+profile_default/
+ipython_config.py
+
+# Remove previous ipynb_checkpoints
+# git rm -r .ipynb_checkpoints/
+
+### OSX ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+### PyCharm ###
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+
+# AWS User-specific
+.idea/**/aws.xml
+
+# Generated files
+.idea/**/contentModel.xml
+
+# Sensitive or high-churn files
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+
+# Gradle
+.idea/**/gradle.xml
+.idea/**/libraries
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn. Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+cmake-build-*/
+
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+
+# File-based project format
+*.iws
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+.idea/replstate.xml
+
+# SonarLint plugin
+.idea/sonarlint/
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+
+# Editor-based Rest Client
+.idea/httpRequests
+
+# Android studio 3.1+ serialized cache file
+.idea/caches/build_file_checksums.ser
+
+### PyCharm Patch ###
+# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
+
+# *.iml
+# modules.xml
+# .idea/misc.xml
+# *.ipr
+
+# Sonarlint plugin
+# https://plugins.jetbrains.com/plugin/7973-sonarlint
+.idea/**/sonarlint/
+
+# SonarQube Plugin
+# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin
+.idea/**/sonarIssues.xml
+
+# Markdown Navigator plugin
+# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced
+.idea/**/markdown-navigator.xml
+.idea/**/markdown-navigator-enh.xml
+.idea/**/markdown-navigator/
+
+# Cache file creation bug
+# See https://youtrack.jetbrains.com/issue/JBR-2257
+.idea/$CACHE_FILE$
+
+# CodeStream plugin
+# https://plugins.jetbrains.com/plugin/12206-codestream
+.idea/codestream.xml
+
+# Azure Toolkit for IntelliJ plugin
+# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij
+.idea/**/azureSettings.xml
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+
+# IPython
+
+# pyenv
+# For a library or package, you might want to ignore these files since the code is
+# intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+# However, in case of collaboration, if having platform-specific dependencies or dependencies
+# having no cross-platform support, pipenv may install dependencies that don't work, or not
+# install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+# This is especially recommended for binary packages to ensure reproducibility, and is more
+# commonly ignored for libraries.
+# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+# in version control.
+# https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+# and can be added to the global gitignore or merged into this file. For a more nuclear
+# option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+
+# ruff
+.ruff_cache/
+
+# LSP config files
+pyrightconfig.json
+
+### Vim ###
+# Swap
+[._]*.s[a-v][a-z]
+!*.svg # comment out if you don't need vector files
+[._]*.sw[a-p]
+[._]s[a-rt-v][a-z]
+[._]ss[a-gi-z]
+[._]sw[a-p]
+
+# Session
+Session.vim
+Sessionx.vim
+
+# Temporary
+.netrwhist
+*~
+# Auto-generated tag files
+tags
+# Persistent undo
+[._]*.un~
+
+### Windows ###
+# Windows thumbnail cache files
+Thumbs.db
+Thumbs.db:encryptable
+ehthumbs.db
+ehthumbs_vista.db
+
+# Dump file
+*.stackdump
+
+# Folder config file
+[Dd]esktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+
+# Windows shortcuts
+*.lnk
+
+# End of https://www.toptal.com/developers/gitignore/api/vim,osx,python,windows,pycharm,jupyternotebooks
+
+.gitignore
+
+*.Identifier
+
+/local-tests/
+
+*.pub
+
+notebooks/download-sandbox-dataset.ipynb
+notebooks/building-a-custom-metric-function_hold.ipynb
+notebooks/getting-started-with-encord-projects.ipynb
+notebooks/ssh_key
+notebooks/getting-started-with-coco-project.ipynb
diff --git a/README.md b/README.md
index cc28778..bd45316 100644
--- a/README.md
+++ b/README.md
@@ -15,8 +15,10 @@
-
-
+
+
+
+
@@ -119,11 +121,11 @@ pip install -r requirements.txt
| **📓 Encord Notebook** | **🚀 Launch Notebook** | **📰 Description** | **📺 Video Explainer** | **💡Other Resources** |
| :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------: | -----------------------------------------------------------------------------------------------------------------------------------------: |
-| [Encord Notebooks - 📥 Explore Encord Active's 0.1.70 Native UI](./notebooks/Encord_Notebooks__demo_ea_native_display.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/encord-team/encord-notebooks/blob/main/notebooks/Encord_Notebooks__demo_ea_native_display.ipynb) | This notebook shows you a quick way to test Encord Active with a sandbox project and without installing anything locally. | | ▶️ [Encord Active Documentation ](https://docs.encord.com/docs/active-getting-started) |
-| [ Encord Active - Download Sandbox Project](./notebooks/01_Encord_Active_Notebooks___Download_sandbox_project.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1l4K-QPOqBC4mv2SGADEe2erd5nhFVjoU?usp=sharing) | This notebook gives you a quick way to test 🟣 Encord Active with a sandbox project and without installing anything locally. | | 📑 [Encord Active Documentation - Touring the Coco Sandbox Dataset](https://docs.encord.com/active/docs/tutorials/touring-the-coco-dataset) |
+| [Encord Notebooks - 📥 Explore Encord Active's 0.1.75 Native UI](./local-notebooks/Encord_Notebooks__demo_ea_native_display.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](hhttps://colab.research.google.com/github/encord-team/encord-notebooks/blob/main/colab-notebooks/Encord_Notebooks__demo_ea_native_display.ipynb) | This notebook shows you a quick way to test Encord Active with a sandbox project and without installing anything locally. | | ▶️ [Encord Active Documentation ](https://docs.encord.com/docs/active-getting-started) |
+| [ Encord Active - Download Sandbox Project](./local-notebooks/01_Encord_Active_Notebooks___Download_sandbox_project.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/encord-team/encord-notebooks/blob/main/colab-notebooks/01_Encord_Active_Notebooks_Download_sandbox_project.ipynb) | This notebook gives you a quick way to test 🟣 Encord Active with a sandbox project and without installing anything locally. | | 📑 [Encord Active Documentation - Touring the Coco Sandbox Dataset](https://docs.encord.com/docs/active-touring-coco-dataset) |
| [ Encord Active - Getting Started with Encord Projects](./notebooks/02_Encord_Active___Import_project_(self_hosting).ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1pchKiSZPiu2ENp0pr7iSs3L4JqO6cNAr?usp=sharing) | This 📓 notebook shows you how to import existing Encord projects into Encord Active | ▶️ [How to Create an Annotation Project](https://encord.com/learning-hub/how-to-create-an-annotation-project/) | 📑 [Encord Documentation - Projects Overview](https://docs.encord.com/projects/projects-overview) |
-| [ Encord Active - 🏗️ Building a Custom Metric Function](./notebooks/Encord_Active_Building_a_Custom_Metric_Function.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1oLA-EnKTtrYHJRn1KNjglWDAVUsxD6bq?usp=sharing) | This 📓 notebook will take you through how to write such metric functions and use them with Encord Active | | 📑 [Encord Documentation - Writing Custom Quality Metric](https://docs.encord.com/active/docs/metrics/write-your-own) |
-| [ Encord Active - Add Custom Embeddings](./notebooks/Encord_Active_Add_Custom_Embeddings.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1mYEF2K-5Yp76cRaq-HOKT19UeUcDK3tu?usp=sharing) | In this 📓 notebook, learn about the three different types of embeddings in Encord Active and how to use them | |
+| [ Encord Active - 🏗️ Building a Custom Metric Function](./local-notebooks/Encord_Active_Building_a_Custom_Metric_Function.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/encord-team/encord-notebooks/blob/main/colab-notebooks/Encord_Active_Building_a_Custom_Metric_Function.ipynb) | This 📓 notebook will take you through how to write such metric functions and use them with Encord Active | | 📑 [Encord Documentation - Writing Custom Quality Metric](https://docs.encord.com/active/docs/metrics/write-your-own) |
+| [ Encord Active - Add Custom Embeddings](./local-notebooks/Encord_Active_Add_Custom_Embeddings.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/encord-team/encord-notebooks/blob/main/colab-notebooks/Encord_Active_Add_Custom_Embeddings.ipynb) | In this 📓 notebook, learn about the three different types of embeddings in Encord Active and how to use them | |
| [ Encord Notebooks - 🧵 Generate Encord Active Model Segmentation Masks Using Encord Annotate Micro-Models](./Import-Encord-Active-Model-Predictions/Micromodels-generate-segmentation-predictions.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1CBTUgowsUCm9JGMP_mbQ7GkIfbSqRh4z?usp=sharing) | In this short notebook walkthrough, learn how to generate segmentation masks with Encord Active using Micro-Models in Encord Annotate | | ▶️ [Encord Learning Hub - How to Build a Micro-model](https://encord.com/learning-hub/how-to-build-a-micro-model/) |
| [ Encord Notebooks - 📦 Generate Encord Active Model Object Detection Boxes Using Encord Annotate Micro-Models](./Import-Encord-Active-Model-Predictions/Micromodels-generate-detection-predictions.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1nwdSHFonQBEYEhywLfQPkMMya0xTOjdc?usp=sharing) | In this short notebook walkthrough, learn how to generate bounding boxes with Encord Active using Micro-Models in Encord Annotate | | ▶️ [Encord Learning Hub - How to Build a Micro-model](https://encord.com/learning-hub/how-to-build-a-micro-model/) |
@@ -138,8 +140,8 @@ pip install -r requirements.txt
| **📓 Encord Notebook** | **🚀 Launch Notebook** | **📰 Description** | **📺 Video Explainer** | **💡Other Resources** |
| :--------------------------------------------------------------------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------: | --------------------------------------------------------------------------------------------------------------------------------------: |
-| [ Encord Active - 🔦 Torchvision Dataset Exploration](./notebooks/Encord_Active_Torchvision_Dataset_Exploration.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1zVIyqsF5fpDNsjFLLKPvI2VXPSMdQ2T5?usp=sharing) | In this notebook, you will use Encord Active to explore the quality of a dataset from the built-in samples in the `torchvision.datasets` module | | [Encord Active Documentation - Exploring data distribution](https://docs.encord.com/active/docs/workflows/understand-data-distribution) |
-| [ Encord Active - 🤗 HuggingFace Dataset Exploration](./notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1Ohsd1BrO6s9HuliYdHqMsIblaR9KXbpk?usp=sharing) | In this notebook, you will use Encord Active to explore the quality of a dataset from the Hugging Face Datasets library | | [Encord Active Documentation - Exploring data distribution](https://docs.encord.com/active/docs/workflows/understand-data-distribution) |
+| [ Encord Active - 🔦 Torchvision Dataset Exploration](./local-notebooks/Encord_Active_Torchvision_Dataset_Exploration.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/encord-team/encord-notebooks/blob/main/colab-notebooks/Encord_Active_Torchvision_Dataset_Exploration.ipynb) | In this notebook, you will use Encord Active to explore the quality of a dataset from the built-in samples in the `torchvision.datasets` module | | [Encord Active Documentation - Exploring data distribution](https://docs.encord.com/active/docs/workflows/understand-data-distribution) |
+| [ Encord Active - 🤗 HuggingFace Dataset Exploration](./local-notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/encord-team/encord-notebooks/blob/main/colab-notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb) | In this notebook, you will use Encord Active to explore the quality of a dataset from the Hugging Face Datasets library | | [Encord Active Documentation - Exploring data distribution](https://docs.encord.com/docs/active-exploring-data-and-label-distributions) |
@@ -152,7 +154,7 @@ pip install -r requirements.txt
| **📓 Encord Notebook** | **🚀 Launch Notebook** | **📰 Description** | **📺 Video Explainer** | **💡Other Resources** |
| :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------: | ------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| [ Encord Notebooks - 🆚 Grounding-DINO+SAM vs. Mask-RCNN](./notebooks/Encord_Notebooks_Team_gDINO+SAM_vs_maskrcnn_webinar.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1MKlB0AU8yaCwSvnaAPO3B1sHHP9xcsM1?usp=sharing) | In this notebook file, you will get and evaluate the segmentation predictions of images using Grounding-DINO and Segment Anything Model (SAM) | ▶️ [Encord Learning Hub - Are VFMs on par with SOTA?](https://encord.com/learning-hub/are-vfms-on-par-with-sota/) | • [ Encord Notebooks - 🔧 Zero-Shot Image Segmentation with Grounding-DINO + Segment Anything Model (SAM)](./notebooks/Encord_Notebooks_Zero_shot_image_segmentation_with_grounding_dino_and_sam.ipynb) • 📖 [Encord Blog - Grounding-DINO + Segment Anything Model (SAM) vs Mask-RCNN: A comparison](https://encord.com/blog/grounding-dino-sam-vs-mask-rcnn-comparison/) |
+| [ Encord Notebooks - 🆚 Grounding-DINO+SAM vs. Mask-RCNN](./local-notebooks/Encord_Notebooks_Team_gDINO+SAM_vs_maskrcnn_webinar.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1MKlB0AU8yaCwSvnaAPO3B1sHHP9xcsM1?usp=sharing) | In this notebook file, you will get and evaluate the segmentation predictions of images using Grounding-DINO and Segment Anything Model (SAM) | ▶️ [Encord Learning Hub - Are VFMs on par with SOTA?](https://encord.com/learning-hub/are-vfms-on-par-with-sota/) | • [ Encord Notebooks - 🔧 Zero-Shot Image Segmentation with Grounding-DINO + Segment Anything Model (SAM)](./local-notebooks/Encord_Notebooks_Zero_shot_image_segmentation_with_grounding_dino_and_sam.ipynb) • 📖 [Encord Blog - Grounding-DINO + Segment Anything Model (SAM) vs Mask-RCNN: A comparison](https://encord.com/blog/grounding-dino-sam-vs-mask-rcnn-comparison/) |
@@ -166,8 +168,8 @@ pip install -r requirements.txt
| **📓 Encord Notebook** | **🚀 Launch Notebook** | **📰 Description** | **📺 Video Explainer** | **💡Other Resources** |
| :-------------------------------------------------------------------------------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------: | :-------------------: | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| [ Encord Notebooks - 🔧 How to fine-tune Segment Anything Model (SAM)](./notebooks/Encord_Notebooks_How_To_Fine_Tuning_SAM.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1XeMSjS7F4QTTI0BSo0MJ6oA7Aj9Pz_UD?usp=sharing) | This is the notebook gives you a walkthrough on fine-tuning Segment Anything Model (SAM) to a specific application | | • 📖 [Encord's Blog - How To Fine-Tune Segment Anything](https://encord.com/blog/learn-how-to-fine-tune-the-segment-anything-model-sam/) • ▶️ [Encord Learning Hub - How to use SAM to Automate Data Labeling](https://encord.com/learning-hub/how-to-use-sam-to-automate-data-labeling/) • 📖 [Encord's Blog - Meta AI's New Breakthrough: Segment Anything Model (SAM) Explained](https://encord.com/blog/segment-anything-model-explained/) • 📖 [Segment Anything (SAM) is live in Encord](https://encord.com/blog/segment-anything-live-in-encord/). |
-| [ Encord Notebooks - 🔎 Building Semantic Search for Visual Data](./notebooks/Encord_Notebooks_Building_Semantic_Search_for_Visual_Data.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/13SjdECFWlTZVgXGazxOBcNSjqR5C0b8a?usp=sharing) | In this notebook, you will build a semantic search engine using CLIP and ChatGPT | ▶️ [Encord's YouTube Channel - How to build Semantic Visual Search with ChatGPT and CLIP](https://youtu.be/_thRPX91WLM) | ▶️ [Webinar - Webinar: How to build Semantic Visual Search with ChatGPT and CLIP](https://encord.com/blog/webinar-semantic-visual-search-chatgpt-clip/) |
+| [ Encord Notebooks - 🔧 How to fine-tune Segment Anything Model (SAM)](./local-notebooks/Encord_Notebooks_How_To_Fine_Tuning_SAM.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/encord-team/encord-notebooks/blob/main/colab-notebooks/Encord_Notebooks_How_To_Fine_Tuning_SAM.ipynb) | This is the notebook gives you a walkthrough on fine-tuning Segment Anything Model (SAM) to a specific application | | • 📖 [Encord's Blog - How To Fine-Tune Segment Anything](https://encord.com/blog/learn-how-to-fine-tune-the-segment-anything-model-sam/) • ▶️ [Encord Learning Hub - How to use SAM to Automate Data Labeling](https://encord.com/learning-hub/how-to-use-sam-to-automate-data-labeling/) • 📖 [Encord's Blog - Meta AI's New Breakthrough: Segment Anything Model (SAM) Explained](https://encord.com/blog/segment-anything-model-explained/) • 📖 [Segment Anything (SAM) is live in Encord](https://encord.com/blog/segment-anything-live-in-encord/). |
+| [ Encord Notebooks - 🔎 Building Semantic Search for Visual Data](./local-notebooks/Encord_Notebooks_Building_Semantic_Search_for_Visual_Data.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/13SjdECFWlTZVgXGazxOBcNSjqR5C0b8a?usp=sharing) | In this notebook, you will build a semantic search engine using CLIP and ChatGPT | ▶️ [Encord's YouTube Channel - How to build Semantic Visual Search with ChatGPT and CLIP](https://youtu.be/_thRPX91WLM) | ▶️ [Webinar - Webinar: How to build Semantic Visual Search with ChatGPT and CLIP](https://encord.com/blog/webinar-semantic-visual-search-chatgpt-clip/) |
@@ -234,7 +236,7 @@ We follow a [code of conduct](https://github.com/encord-team/encord-active/blob/
* If you plan to work on an issue, mention so in the [issue page](https://github.com/encord-team/encord-notebooks/issues) before you start working on it.
* If you have an idea for a notebook or tutorial, kindly create an issue and share it with other community members/maintainers.
-* Ask for help in our [Discord community](https://discord.gg/TU6yT7Uvx3).
+* Ask for help in the [Active community](https://join.slack.com/t/encordactive/shared_invite/zt-1hc2vqur9-Fzj1EEAHoqu91sZ0CX0A7Q).
* Please include the file name and a brief description of any spelling or text changes. The reviewers may struggle to identify corrections.
Please ensure that your contributions align with the repository's goals and adhere to the project's license.
diff --git a/colab-notebooks/01_Encord_Active_Notebooks_Download_sandbox_project.ipynb b/colab-notebooks/01_Encord_Active_Notebooks_Download_sandbox_project.ipynb
new file mode 100644
index 0000000..1bb67fd
--- /dev/null
+++ b/colab-notebooks/01_Encord_Active_Notebooks_Download_sandbox_project.ipynb
@@ -0,0 +1,442 @@
+{
+ "cells": [
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "oWG683Ze9f5R"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 🟣 Encord Active | 🏗️ Building a Custom Metric Function"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 🚀 Overview"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "-n4-PmG0Kwxk"
+ },
+ "source": [
+ "Hi there, 👋.\n",
+ "\n",
+ "> ⚠️ **Prerequisites:** you should have `encord-active` [installed](https://docs.encord.com/active/docs/installation).\n",
+ "\n",
+ "Developing machine learning models are often (and should be) based on iterative hypothesis testing. Typically, you get some data and labels and train your first model. Then, you realise that the model is performing worse than you had hoped.\n",
+ "\n",
+ "Now, you starting hypothesizing about what might be wrong. Perhaps you suspect that red objects make your model perform worse. So you define a hypothesis like:\n",
+ "\n",
+ "> Red objects have a significant impact on my model performance\n",
+ "\n",
+ "Traditionally, the next thing you would do is to write a script for filtering, ordering, and visualising your validation data as a function of the object colors.\n",
+ "Something like the code below.\n",
+ "\n",
+ "\n",
+ "> ⚠️ DISCLAIMER: The code below is just to show how much code you need to write to test your hypothesis. It's not meant to work or to be copied in any way!\n",
+ "\n",
+ "\n",
+ "Code block that you can safely hide\n",
+ "\n",
+ "\n",
+ "\n",
+ "```python\n",
+ "# DISCLAIMER: This is just to show how much code you need to write to test your hypothesis\n",
+ "# It's not meant to work or to be copied in any way!\n",
+ "\n",
+ "from functools import partial\n",
+ "\n",
+ "color_ordering = [] \n",
+ "acc = [] \n",
+ "\n",
+ "def compute_redness_of_objects(image, object):\n",
+ " # Some code to determine colors\n",
+ " # color_metric = ...\n",
+ " return color_metric\n",
+ "\n",
+ "for batch in validation_loader:\n",
+ " for image, labels in batch:\n",
+ " predictions = my_model(images)\n",
+ "\n",
+ " acc += ... # some hard to write code for match predictions with labels\n",
+ " color_ordering += list(map(partial(get_colors_for_object, image=image), predictions))\n",
+ " \n",
+ "color_ordering = np.array(color_ordering)\n",
+ "sorting = np.argsort(color_ordering)\n",
+ "color_ordering = color_ordering[ordering]\n",
+ "acc = np.array(color_ordering)[ordering]\n",
+ "\n",
+ "# LOONG plotting code section for displaying samples, plots, and what not.\n",
+ "# ...\n",
+ "# ...\n",
+ "# ...\n",
+ "```\n",
+ " \n",
+ "\n",
+ "\n",
+ "When you're finally done writing code and plotting things, hopefully you can reach a conclusion regarding your hypothesis.\n",
+ "When you reach this point, you will most likely have many more hypothesis that you want to test and eventually also more models to evaluate.\n",
+ "Do we need to mention how painful it will be to extend the code above with new use cases, plots, etc.?\n",
+ "What if you, for example, wanted to know the same thing, not only for your predictions but also for the labels? What about false negatives? .. and so on.\n",
+ "\n",
+ "Encord Active solves this problem with a couple of points in focus:\n",
+ "\n",
+ "1. **Reusability:** You define your metric function once and then you can reuse again and again.\n",
+ "2. **Isolation of functionality:** Since the metric function is defined in isolation from other metrics, you won't accidentally introduce errors in other functions, plots, etc.\n",
+ "3. **Iteration speed:** We've made it easy to implement your own metric function such that you can iterate faster.\n",
+ "4. **It's built from experience:** We have felt this pain many times and we have seen many of the common hypothesis that come up. We're building Encord Active to deel with all these common scenarios while being extensible enough to be tailored to your custom use case.\n",
+ "\n",
+ "Other points that we want to highlight is that \n",
+ "\n",
+ "1. Encord Active ships with a bunch of [pre-defined metrics](https://docs.encord.com/active/docs/category/metrics) that will automatically be run on your data when you import it.\n",
+ "2. When you've [imported your model predictions](https://docs.encord.com/active/docs/workflows/import-predictions), Encord Active will _automatically_ identify those metrics that are more important for your model performance.\n",
+ "\n",
+ "This 📓 notebook will take you through how to write such metric functions and use them with Encord Active.\n",
+ "\n",
+ " \n",
+ "\n",
+ "> 💡 Learn more about 🟣 Encord Active: \n",
+ "* [GitHub](https://github.com/encord-team/encord-active) \n",
+ "* [Docs](https://docs.encord.com/docs/active-overview)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 📏 Defining a `Metric` sub-class"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ESEt8WweThsO"
+ },
+ "source": [
+ "\n",
+ "\n",
+ "Here, we'll give some detailed information on how a quality metric is defined.\n",
+ "\n",
+ "> **🌟 Info**: If you don't like abstract talk, you can skip directly to [the example below](#concrete-example) to see how to implement a specific metric.\n",
+ "\n",
+ "We have listed the entire stub below for defining a metric. Following right after is a breakdown of the different components."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from typing import List, Optional, Union\n",
+ "\n",
+ "from encord_active.lib.common.iterator import Iterator\n",
+ "from encord_active.lib.metrics.metric import Metric\n",
+ "from encord_active.lib.metrics.types import AnnotationType, DataType, MetricType\n",
+ "from encord_active.lib.metrics.writer import CSVMetricWriter\n",
+ "\n",
+ "class ExampleMetric(Metric):\n",
+ " # === SECTION 1 === #\n",
+ " def __init__(self):\n",
+ " from typing import List, Optional, Union\n",
+ "\n",
+ "from encord_active.lib.common.iterator import Iterator\n",
+ "from encord_active.lib.metrics.metric import Metric\n",
+ "from encord_active.lib.metrics.types import AnnotationType, DataType, MetricType\n",
+ "from encord_active.lib.metrics.writer import CSVMetricWriter\n",
+ "\n",
+ "class ExampleMetric(Metric):\n",
+ " # === SECTION 1 === #\n",
+ " def __init__(self):\n",
+ " \n",
+ " super().__init__(\n",
+ " title=\"[the-name-of-your-metric]\",\n",
+ " short_description=\"A short description of your metric.\",\n",
+ " long_description=\"A longer and more detailed description. \" \\\n",
+ " \"I can use Markdown to _format_ the text.\",\n",
+ " metric_type=MetricType.GEOMETRIC,\n",
+ " data_type=DataType.IMAGE,\n",
+ " annotation_type=[AnnotationType.OBJECT.BOUNDING_BOX, AnnotationType.OBJECT.POLYGON],\n",
+ " )\n",
+ "\n",
+ " def execute(self, iterator: Iterator, writer: CSVMetricWriter):\n",
+ " valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type}\n",
+ "\n",
+ " for data_unit, image in iterator.iterate(desc=\"Progress bar description\"):\n",
+ " # === SECTION 2 === #\n",
+ " # Write a score for the image itself (data quality)\n",
+ " writer.write(1337, description=\"Your description for the frame [can be omitted]\")\n",
+ " \n",
+ " for obj in data_unit[\"labels\"].get(\"objects\", []):\n",
+ " # === SECTION 3 === #\n",
+ " # Label (object/classification) level score (label / model prediction quality)\n",
+ " if not obj[\"shape\"] in valid_annotation_types:\n",
+ " continue\n",
+ "\n",
+ " # Do your thing (inference)\n",
+ " # ...\n",
+ " # Then\n",
+ " writer.write(42, labels=obj, description=\"Your description of the score [can be omitted]\")\n",
+ "\n",
+ " from typing import List, Optional, Union\n",
+ "\n",
+ "from encord_active.lib.common.iterator import Iterator\n",
+ "from encord_active.lib.metrics.metric import Metric\n",
+ "from encord_active.lib.metrics.types import AnnotationType, DataType, MetricType\n",
+ "from encord_active.lib.metrics.writer import CSVMetricWriter\n",
+ "\n",
+ "class ExampleMetric(Metric):\n",
+ " # === SECTION 1 === #\n",
+ " def __init__(self):\n",
+ " super().__init__(\n",
+ " title=\"[the-name-of-your-metric]\",\n",
+ " short_description=\"A short description of your metric.\",\n",
+ " long_description=\"A longer and more detailed description. \" \\\n",
+ " \"I can use Markdown to _format_ the text.\",\n",
+ " metric_type=MetricType.GEOMETRIC,\n",
+ " data_type=DataType.IMAGE,\n",
+ " annotation_type=[AnnotationType.OBJECT.BOUNDING_BOX, AnnotationType.OBJECT.POLYGON],\n",
+ " )\n",
+ "\n",
+ " def execute(self, iterator: Iterator, writer: CSVMetricWriter):\n",
+ " valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type}\n",
+ "\n",
+ " for data_unit, image in iterator.iterate(desc=\"Progress bar description\"):\n",
+ " # === SECTION 2 === #\n",
+ " # Write a score for the image itself (data quality)\n",
+ " writer.write(1337, description=\"Your description for the frame [can be omitted]\")\n",
+ " \n",
+ " for obj in data_unit[\"labels\"].get(\"objects\", []):\n",
+ " # === SECTION 3 === #\n",
+ " # Label (object/classification) level score (label / model prediction quality)\n",
+ " if not obj[\"shape\"] in valid_annotation_types:\n",
+ " continue\n",
+ "\n",
+ " # Do your thing (inference)\n",
+ " # ...\n",
+ " # Then\n",
+ " writer.write(42, labels=obj, description=\"Your description of the score [can be omitted]\")\n",
+ " super().__init__(\n",
+ " title=\"[the-name-of-your-metric]\",\n",
+ " short_description=\"A short description of your metric.\",\n",
+ " long_description=\"A longer and more detailed description. \" \\\n",
+ " \"I can use Markdown to _format_ the text.\",\n",
+ " metric_type=MetricType.GEOMETRIC,\n",
+ " data_type=DataType.IMAGE,\n",
+ " annotation_type=[AnnotationType.OBJECT.BOUNDING_BOX, AnnotationType.OBJECT.POLYGON],\n",
+ " )\n",
+ "\n",
+ " def execute(self, iterator: Iterator, writer: CSVMetricWriter):\n",
+ " valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type}\n",
+ "\n",
+ " for data_unit, image in iterator.iterate(desc=\"Progress bar description\"):\n",
+ " # === SECTION 2 === #\n",
+ " # Write a score for the image itself (data quality)\n",
+ " writer.write(1337, description=\"Your description for the frame [can be omitted]\")\n",
+ " \n",
+ " for obj in data_unit[\"labels\"].get(\"objects\", []):\n",
+ " # === SECTION 3 === #\n",
+ " # Label (object/classification) level score (label / model prediction quality)\n",
+ " if not obj[\"shape\"] in valid_annotation_types:\n",
+ " continue\n",
+ "\n",
+ " # Do your thing (inference)\n",
+ " # ...\n",
+ " # Then\n",
+ " writer.write(42, labels=obj, description=\"Your description of the score [can be omitted]\")"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "There are a couple of sections in the code above. \n",
+ "\n",
+ "`SECTION 1`: Is used for proper display of the values that the metric produces. The properties being set there are:\n",
+ "\n",
+ "1. `title`: Is the title of your metric. It will be used in data frames and the app to identify the metric.\n",
+ "2. `metric_type`: We have distinguished metric types into three categories - `HEURISTIC`\n",
+ " - `HEURISTIC`: operate on images or individual video frames and are heuristic in the sense that they mostly depend on the image content without labels.\n",
+ " - `GEOMETRIC`: operate on the geometries of objects like bounding boxes, polygons, and polylines.\n",
+ " - `SEMANTIC`: operate with the semantic information of images or individual video frames - for example, metrics based on NN embeddings would go here.\n",
+ "3. `data_type`: The type of data that the metric applies to.\n",
+ " - `IMAGE`: Individual images witout any temporal dependencies.\n",
+ " - `SEQUENCE`: Consecutive frames where order across frames matter.\n",
+ "4. `annotation_type`:\n",
+ " - `NONE`: Doesn't need annotations (Data Quality)\n",
+ " - `OBJECT`: A list of object types like polygon or bounding box that the metric works for.\n",
+ " - `CLASSIFICATION`: A list of classification types like radio buttons and checkboxes that the metric works for.\n",
+ " - `ALL`: All objects and classification types. Could, for example, be used for annotation time.\n",
+ "5. `short_description`: Used in the ui\n",
+ "6. `long_description`: Used in the ui\n",
+ "\n",
+ "\n",
+ "`SECTION 2`: Is used for metric functions that yield one score for each frame. Note how the `writer.write(...)` specifies no objects.\n",
+ "\n",
+ "`SECTION 3`: Is used for metric functions that yield a score for each object / classification. For these metrics, `writer.write(...)` should contain a list of objects or classifications that should be associated with a giveen score.\n",
+ "\n",
+ "> _Note:_ You should stick to either writing scores with or without the `writer.write(..., labels=obj)` argument. Mixing them up will confuse the app.\n",
+ "\n",
+ "### Using the iterator\n",
+ "When you call `iterator.iterate(...)`, you will get an iterator over all the data in a given dataset (see how to execute the metric [below](#execute)). Each item in the iterator is a tuple of a `data_unit` dictionary and a `pathlib.Path` to where the image can be loaded from. \n",
+ "\n",
+ "The `data_unit` dictionary has the following structure (there may be more or less `\"objects\"` and `\"labels\"`):\n",
+ "\n",
+ "\n",
+ "data_unit example structure\n",
+ "\n",
+ "```python\n",
+ "{\n",
+ " \"data_hash\": \"595d9721-913b-45c9-8645-c3ebf8a6ae0b\",\n",
+ " \"data_title\": \"231822\",\n",
+ " \"data_type\": \"image/jpeg\",\n",
+ " \"data_sequence\": 0,\n",
+ " \"labels\": {\n",
+ " \"objects\": [\n",
+ " { # Example polygon\n",
+ " \"name\": \"Bottle\",\n",
+ " \"color\": \"#68BC00\",\n",
+ " \"shape\": \"polygon\",\n",
+ " \"value\": \"bottle\",\n",
+ " \"polygon\": {\n",
+ " \"0\": {\"x\": 0.9559, \"y\": 0.0038},\n",
+ " \"1\": {\"x\": 0.9356, \"y\": 0.1399},\n",
+ " \"2\": {\"x\": 0.9216, \"y\": 0.1982},\n",
+ " # ...\n",
+ " },\n",
+ " \"createdAt\": \"Thu, 25 Aug 2022 15:45:31 GMT\",\n",
+ " \"createdBy\": \"robot@cord.tech\",\n",
+ " \"confidence\": 1,\n",
+ " \"objectHash\": \"9728826c\",\n",
+ " \"featureHash\": \"671c61d7\",\n",
+ " \"lastEditedAt\": \"Thu, 25 Aug 2022 15:45:31 GMT\",\n",
+ " \"lastEditedBy\": \"robot@encord.com\",\n",
+ " \"manualAnnotation\": False,\n",
+ " },\n",
+ " { # Example bounding box\n",
+ " \"name\": \"Cyclist\",\n",
+ " \"color\": \"#DBDF00\",\n",
+ " \"shape\": \"bounding_box\",\n",
+ " \"value\": \"Cyclist\",\n",
+ " \"createdAt\": \"Wed, 23 Nov 2022 10:05:22 GMT\",\n",
+ " \"createdBy\": \"robot@encord.com\",\n",
+ " \"confidence\": 1.0,\n",
+ " \"objectHash\": \"t2KUSWgj\",\n",
+ " \"featureHash\": \"yJ+hgd0r\",\n",
+ " \"lastEditedAt\": \"Wed, 23 Nov 2022 10:05:22 GMT\",\n",
+ " \"lastEditedBy\": \"robot@encord.com\",\n",
+ " \"manualAnnotation\": True,\n",
+ " \"boundingBox\": {\n",
+ " \"h\": 0.2810061626666667,\n",
+ " \"w\": 0.0897509331723027,\n",
+ " \"x\": 0.4464461135265701,\n",
+ " \"y\": 0.443804288,\n",
+ " },\n",
+ " \"reviews\": [],\n",
+ " },\n",
+ " ],\n",
+ " \"classifications\": [\n",
+ " { # Example classification\n",
+ " \"name\": \"Classification Question\",\n",
+ " \"value\": \"classification-question\",\n",
+ " \"createdAt\": \"Fri, 11 Nov 2022 09:41:21 GMT\",\n",
+ " \"createdBy\": \"robot@cord.tech\",\n",
+ " \"confidence\": 1,\n",
+ " \"featureHash\": \"MTYzMTkx\",\n",
+ " \"classificationHash\": \"sHNoiYPw\",\n",
+ " \"manualAnnotation\": True,\n",
+ " \"reviews\": [],\n",
+ " }, \n",
+ " # ...\n",
+ " ],\n",
+ " },\n",
+ " \"data_link\": \"...\",\n",
+ " \"width\": 500,\n",
+ " \"height\": 361,\n",
+ "}\n",
+ "```\n",
+ "\n",
+ "\n",
+ "> _💡 Hint:_ You can inpect the entire structure by looking in the file `/path/to/project/data//label_row.json`.\n",
+ "\n",
+ "> 📝 _Note:_ To find the actual answers to classification questions, you access `iterator.label_rows[iterator.label_hash][\"classification_answers\"][]`.\n",
+ "\n",
+ "> 📝 _Note:_ If you are computing metrics based on temporal aspects, the `iterator.frame` will tell you what frame of a sequence you are currently looking at and the `iterator.label_hash` will give you the unique id of the sequence."
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "### 👟 Executing a metric\n",
+ "\n",
+ "When you have implemented a metric function, you can run it using the following code snippet:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pathlib import Path\n",
+ "from encord_active.lib.metrics.execute import execute_metrics\n",
+ "from encord_active.lib.model_predictions.iterator import PredictionIterator\n",
+ "\n",
+ "target = Path(\"/path/to/your/project\") # TODO UPDATE\n",
+ "\n",
+ "execute_metrics([ExampleMetric()], data_dir=target, use_cache_only=True) # for labels\n",
+ "execute_metrics([ExampleMetric()], data_dir=target, iterator_cls=PredictionIterator, use_cache_only=True) # for predictions (only makes sense to do if your metric applies to labels)\n",
+ "\n",
+ "# Wrap this entire code block in a \n",
+ "# `if __name__ == \"__main__\":`\n",
+ "# and put it in the bottom of your metric file if you want to be able to run\n",
+ "# python your_metric.py"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `target` variable points to the directory that containts the Encord Active project that you want to run the metric on.\n",
+ "This directory should, for example, contain a `project-meta.yaml`.\n",
+ "\n",
+ "> Info: The `use_cache_only` argument tells Encord Active to not try and download more data via the Encord SDK.\n",
+ "\n",
+ "Having covered the overall structure, let's dive into a concrete example."
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ESEt8WweThsO",
+ "tags": []
+ },
+ "source": [
+ "\n",
+ "# 🦮 Concrete walkthrough Example\n",
+ "\n",
+ "> 💡 Hint: We refer to line numbers. In most notebooks, you can enable line numbers in the \"View\" options.\n",
+ "\n",
+ "In this example, you'll continue the idea of testing the model performance as a function of the \"redness\" of individual objects. \n",
+ "Specifically, you will use the annotations/predictions to extract the image patchs that contain an object and compute the mean Hue value of that patch.\n",
+ "\n",
+ "To get started, let's have a look at the [HSV color space](https://en.wikipedia.org/wiki/HSL_and_HSV), which is great for color filtering.\n",
+ "The following code indicates how different Hue (the H from HSV) values correspond to different colors."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "import cv2\n",
+ "import numpy as np\n",
+ "\n",
+ "def get_img(H: int):\n",
+ " \"\"\"\n",
+ " Make image with specific Hue color and convert it to RGB for plotting.\n",
+ " \"\"\"\n",
+ " img = np.ones((20, 20, 3), dtype=np.uint8)\n",
+ " img[..., 0] = H\n",
+ " img[..., 1] = 255\n",
+ " img[..., 2] = 150 \n",
+ " return cv2.cvtColor(img, cv2.COLOR_HSV2RGB)\n",
+ "\n",
+ "# Hue ranges from 0 to 180 and \"wraps\" around.\n",
+ "hues = np.linspace(0, 179, 18, dtype=np.uint8)\n",
+ "imgs = [get_img(i) for i in hues]\n",
+ "\n",
+ "fig, ax = plt.subplots(2, 9, figsize=(10, 3))\n",
+ "ax = ax.reshape(-1)\n",
+ "\n",
+ "# Plot the colors\n",
+ "for img, a, h in zip(imgs, ax, hues):\n",
+ " a.set_title(f\"Hue: {h}\")\n",
+ " a.axis('off')\n",
+ " a.imshow(img)\n",
+ "\n",
+ "fig.tight_layout()\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "👉 Note how the first and the last images are very red but have very different hue values. \n",
+ "This is because of the \"circular\" / \"wrap-around\" nature of the color space. \n",
+ "Let's account for that by computing a value, which makes red colors close to zero and others closer to one."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "def transform_hue(H: int, offset=0):\n",
+ " return (90 - np.abs(H - 90)) / 90\n",
+ "\n",
+ "# Plotting\n",
+ "fig, ax = plt.subplots(2, 9, figsize=(10, 3))\n",
+ "ax = ax.reshape(-1)\n",
+ "\n",
+ "for img, a, h in zip(imgs, ax, hues):\n",
+ " t = transform_hue(h)\n",
+ " a.set_title(f\"Transf.: {t:.2f}\")\n",
+ " a.imshow(img)\n",
+ " a.axis('off')\n",
+ "fig.tight_layout()"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "Alright, this looks better.\n",
+ "The transformed value is a better candidate for our metric function.\n",
+ "\n",
+ "Next, let's use this to crop out the relevant parts of polygon annotations and compute their mean (transformed) hue values.\n",
+ "\n",
+ "We define a `Metric` subclass and compute the transformed hue value for each object to see how red it is."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "cItsCLacS2Gx",
+ "outputId": "39d039b2-03b9-4b2b-e9ce-d8b3dbf61745",
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "from encord_active.lib.common import utils\n",
+ "from encord_active.lib.common.iterator import Iterator\n",
+ "from encord_active.lib.metrics.metric import Metric\n",
+ "from encord_active.lib.metrics.types import AnnotationType, DataType, MetricType\n",
+ "from encord_active.lib.metrics.writer import CSVMetricWriter\n",
+ "from loguru import logger\n",
+ "\n",
+ "\n",
+ "class ObjectRedness(Metric):\n",
+ " def __init__(self):\n",
+ " super().__init__(\n",
+ " title=\"Polygon Average Hue\",\n",
+ " short_description=\"Compute the average Hue value of the pixels contained within each polygon.\",\n",
+ " long_description=r\"\"\"Crops out the pixels associated to each object and computes the (transformed)\n",
+ "Hue value of each object.\n",
+ "\n",
+ "The transform \"breaks\" the wrap-around of the Hue color space, so Hue values in range [0, 180] becomes [0, 1] as follows:\n",
+ "\n",
+ "```\n",
+ "H: [0, 45, 90, 135, 179]\n",
+ "t(H): [0, 0.5, 1, 0.5, 0+e]\n",
+ "```\n",
+ "\"\"\",\n",
+ " metric_type=MetricType.SEMANTIC ,\n",
+ " data_type=DataType.IMAGE,\n",
+ " annotation_type=[AnnotationType.OBJECT.POLYGON],\n",
+ " )\n",
+ "\n",
+ " def execute(self, iterator: Iterator, writer: CSVMetricWriter):\n",
+ " valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type}\n",
+ "\n",
+ " # Separate objects' instances (same objectHash [aka track id] means same object instance)\n",
+ " for data_unit, image in iterator.iterate(desc=\"Custom progress description\"):\n",
+ " # Convert image to the HSV color space\n",
+ " full_image = np.array(image)\n",
+ " full_hsv_image = cv2.cvtColor(full_image, cv2.COLOR_RGB2HSV)[...,0] # Take only the hue channel\n",
+ " img_h, img_w = full_hsv_image.shape[:2]\n",
+ " \n",
+ " for obj in data_unit[\"labels\"].get(\"objects\", []):\n",
+ " if not obj[\"shape\"] in valid_annotation_types:\n",
+ " continue # Only use polygons\n",
+ " \n",
+ " # The `get_geometry_from_encord_object` function will get us a numpy array of xy coordinates.\n",
+ " poly: Optional[np.ndarray] = utils.get_geometry_from_encord_object(obj, w=img_w, h=img_h) # [n, d]\n",
+ " if poly is None:\n",
+ " continue\n",
+ " \n",
+ " # Check that the polygon takes up at least one pixel\n",
+ " ymi, xmi = poly.min(0)\n",
+ " yma, xma = poly.max(0)\n",
+ " \n",
+ " if ymi == yma or xmi == xma:\n",
+ " continue # Empty polygon\n",
+ " \n",
+ " # Draw mask from polygon\n",
+ " mask = np.zeros((img_h, img_w), dtype=np.uint8)\n",
+ " mask = cv2.fillPoly(mask, [poly], 1)\n",
+ " \n",
+ " polygon_pixels = full_hsv_image[mask==1] # Take only pixels within polygon\n",
+ " transformed_mean_hue = transform_hue(polygon_pixels.mean())\n",
+ " writer.write(transformed_mean_hue.item(), labels=obj)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Okay, so you have defined your metric which extracts the pixels of each polygon and computes the average (transformed) hue value of those pixels.\n",
+ "The next step will then be to apply the metric to your data.\n",
+ "\n",
+ "In the next code cell, you'll download one of the sandbox datasets, but you can also point the metric to your own dataset by setting the `target` path below to point to the root of your project directory."
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 📩 Download the \"quickstart\" sandbox dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "!encord-active download --project-name quickstart"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You should now be able to see the quickstart directory in the `File Browser`. \n",
+ "Apply your metric to that project."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pathlib import Path\n",
+ "from encord_active.lib.metrics.execute import execute_metrics\n",
+ "\n",
+ "target = Path(\"quickstart\")\n",
+ "\n",
+ "# Apply metric to labels\n",
+ "execute_metrics([ObjectRedness()], data_dir=target, use_cache_only=True)\n",
+ "\n",
+ "# For predictions (only makes sense to do if your metric applies to labels)\n",
+ "from encord_active.lib.model_predictions.iterator import PredictionIterator\n",
+ "from encord_active.lib.model_predictions.writer import MainPredictionType\n",
+ "execute_metrics([ObjectRedness()], data_dir=target, iterator_cls=PredictionIterator, use_cache_only=True, prediction_type=MainPredictionType.OBJECT)\n",
+ "\n",
+ "# Wrap this entire code block in a \n",
+ "# `if __name__ == \"__main__\":`\n",
+ "# and put it in the bottom of your metric file if you want to be able to run\n",
+ "# python your_metric.py"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "To see the results, you can run the app with the project as the target:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!encord-active start -t \"quickstart\""
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For the quickstart dataset, the \"Polygon Average Hue\" metric that we just defined seems to have little or no influence on the model performance - based on the \"Metric Importance\" chart on the \"Model Quality -> Metrics\" page.\n",
+ "However, if you filter by the person class in the settings panel in the top, you will see that the redness of objects do seem to have an effect on the model performance.\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# ✅ Wrap Up: Next Steps"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The next steps from here could be many.\n",
+ "You have seen that the redness of objects is generally not extremely important for the model performance - which is a good thing - one less thing to worry about.\n",
+ "\n",
+ "From here, one could go on to define a new custom metric function to test the next hypothesis.\n",
+ "Some of the things that would be simple to test now that you have your first custom metric in place is, e.g., the standard deviation of the colors within an object, the saturaion, other colors, etc. These metrics would only require changing line 57 in the metric definition above.\n",
+ "\n",
+ "Of course, you should keep all the metrics that we define to make sure that redness of values doesn't turn into a problem at a later stage in the model development."
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "🟣 Encord Active is an open-source framework for computer vision model testing, evaluation, and validation. **Check out the project on [GitHub](https://github.com/encord-team/encord-active), leave a star 🌟** if you like it. We welcome you to [contribute](https://docs.encord.com/docs/active-contributing) if you find something is missing.\n",
+ "\n",
+ "---\n",
+ "\n",
+ "👉 Check out the 📖 [Encord Blog](https://encord.com/blog/) and 📺 [YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n",
+ "\n",
+ "---\n",
+ "\n",
+ "Thanks for now!"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# ⏭️ Next: Learn how to add custom embeddings to 🟣 Encord Active"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "What should you check out next? 👀 Learn how to add custom embeddings to Encord Active. The Colab notebook will cover:\n",
+ "\n",
+ "* Example code for **adding custom image and object embeddings** to your Encord Active project.\n",
+ "\n",
+ "### $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ *👇*"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### ⬅️ [*Previous Notebook*](./Encord_Active_HuggingFace_Dataset_Exploration.ipynb) $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ [*Next Notebook*](./Encord_Active_Add_Custom_Embeddings.ipynb) *➡️*"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.16"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/colab-notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb b/colab-notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb
new file mode 100644
index 0000000..7052351
--- /dev/null
+++ b/colab-notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb
@@ -0,0 +1,740 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Ix90mmYg-S_f"
+ },
+ "source": [
+ "
"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 🟣 Encord Active | 🏗️ Building a Custom Metric Function"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## 🚀 Overview"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "-n4-PmG0Kwxk"
+ },
+ "source": [
+ "Hi there, 👋.\n",
+ "\n",
+ "> ⚠️ **Prerequisites:** you should have `encord-active` [installed](https://docs.encord.com/active/docs/installation).\n",
+ "\n",
+ "Developing machine learning models are often (and should be) based on iterative hypothesis testing. Typically, you get some data and labels and train your first model. Then, you realise that the model is performing worse than you had hoped.\n",
+ "\n",
+ "Now, you starting hypothesizing about what might be wrong. Perhaps you suspect that red objects make your model perform worse. So you define a hypothesis like:\n",
+ "\n",
+ "> Red objects have a significant impact on my model performance\n",
+ "\n",
+ "Traditionally, the next thing you would do is to write a script for filtering, ordering, and visualising your validation data as a function of the object colors.\n",
+ "Something like the code below.\n",
+ "\n",
+ "\n",
+ "> ⚠️ DISCLAIMER: The code below is just to show how much code you need to write to test your hypothesis. It's not meant to work or to be copied in any way!\n",
+ "\n",
+ "\n",
+ "Code block that you can safely hide\n",
+ "\n",
+ "\n",
+ "\n",
+ "```python\n",
+ "# DISCLAIMER: This is just to show how much code you need to write to test your hypothesis\n",
+ "# It's not meant to work or to be copied in any way!\n",
+ "\n",
+ "from functools import partial\n",
+ "\n",
+ "color_ordering = [] \n",
+ "acc = [] \n",
+ "\n",
+ "def compute_redness_of_objects(image, object):\n",
+ " # Some code to determine colors\n",
+ " # color_metric = ...\n",
+ " return color_metric\n",
+ "\n",
+ "for batch in validation_loader:\n",
+ " for image, labels in batch:\n",
+ " predictions = my_model(images)\n",
+ "\n",
+ " acc += ... # some hard to write code for match predictions with labels\n",
+ " color_ordering += list(map(partial(get_colors_for_object, image=image), predictions))\n",
+ " \n",
+ "color_ordering = np.array(color_ordering)\n",
+ "sorting = np.argsort(color_ordering)\n",
+ "color_ordering = color_ordering[ordering]\n",
+ "acc = np.array(color_ordering)[ordering]\n",
+ "\n",
+ "# LOONG plotting code section for displaying samples, plots, and what not.\n",
+ "# ...\n",
+ "# ...\n",
+ "# ...\n",
+ "```\n",
+ " \n",
+ "\n",
+ "\n",
+ "When you're finally done writing code and plotting things, hopefully you can reach a conclusion regarding your hypothesis.\n",
+ "When you reach this point, you will most likely have many more hypothesis that you want to test and eventually also more models to evaluate.\n",
+ "Do we need to mention how painful it will be to extend the code above with new use cases, plots, etc.?\n",
+ "What if you, for example, wanted to know the same thing, not only for your predictions but also for the labels? What about false negatives? .. and so on.\n",
+ "\n",
+ "Encord Active solves this problem with a couple of points in focus:\n",
+ "\n",
+ "1. **Reusability:** You define your metric function once and then you can reuse again and again.\n",
+ "2. **Isolation of functionality:** Since the metric function is defined in isolation from other metrics, you won't accidentally introduce errors in other functions, plots, etc.\n",
+ "3. **Iteration speed:** We've made it easy to implement your own metric function such that you can iterate faster.\n",
+ "4. **It's built from experience:** We have felt this pain many times and we have seen many of the common hypothesis that come up. We're building Encord Active to deel with all these common scenarios while being extensible enough to be tailored to your custom use case.\n",
+ "\n",
+ "Other points that we want to highlight is that \n",
+ "\n",
+ "1. Encord Active ships with a bunch of [pre-defined metrics](https://docs.encord.com/active/docs/category/metrics) that will automatically be run on your data when you import it.\n",
+ "2. When you've [imported your model predictions](https://docs.encord.com/active/docs/workflows/import-predictions), Encord Active will _automatically_ identify those metrics that are more important for your model performance.\n",
+ "\n",
+ "This 📓 notebook will take you through how to write such metric functions and use them with Encord Active.\n",
+ "\n",
+ " \n",
+ "\n",
+ "> 💡 Learn more about 🟣 Encord Active: \n",
+ "* [GitHub](https://github.com/encord-team/encord-active) \n",
+ "* [Docs](https://docs.encord.com/docs/active-overview)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# 📏 Defining a `Metric` sub-class"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ESEt8WweThsO"
+ },
+ "source": [
+ "\n",
+ "\n",
+ "Here, we'll give some detailed information on how a quality metric is defined.\n",
+ "\n",
+ "> **🌟 Info**: If you don't like abstract talk, you can skip directly to [the example below](#concrete-example) to see how to implement a specific metric.\n",
+ "\n",
+ "We have listed the entire stub below for defining a metric. Following right after is a breakdown of the different components."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from typing import List, Optional, Union\n",
+ "\n",
+ "from encord_active.lib.common.iterator import Iterator\n",
+ "from encord_active.lib.metrics.metric import Metric\n",
+ "from encord_active.lib.metrics.types import AnnotationType, DataType, MetricType\n",
+ "from encord_active.lib.metrics.writer import CSVMetricWriter\n",
+ "\n",
+ "class ExampleMetric(Metric):\n",
+ " # === SECTION 1 === #\n",
+ " def __init__(self):\n",
+ " from typing import List, Optional, Union\n",
+ "\n",
+ "from encord_active.lib.common.iterator import Iterator\n",
+ "from encord_active.lib.metrics.metric import Metric\n",
+ "from encord_active.lib.metrics.types import AnnotationType, DataType, MetricType\n",
+ "from encord_active.lib.metrics.writer import CSVMetricWriter\n",
+ "\n",
+ "class ExampleMetric(Metric):\n",
+ " # === SECTION 1 === #\n",
+ " def __init__(self):\n",
+ " \n",
+ " super().__init__(\n",
+ " title=\"[the-name-of-your-metric]\",\n",
+ " short_description=\"A short description of your metric.\",\n",
+ " long_description=\"A longer and more detailed description. \" \\\n",
+ " \"I can use Markdown to _format_ the text.\",\n",
+ " metric_type=MetricType.GEOMETRIC,\n",
+ " data_type=DataType.IMAGE,\n",
+ " annotation_type=[AnnotationType.OBJECT.BOUNDING_BOX, AnnotationType.OBJECT.POLYGON],\n",
+ " )\n",
+ "\n",
+ " def execute(self, iterator: Iterator, writer: CSVMetricWriter):\n",
+ " valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type}\n",
+ "\n",
+ " for data_unit, image in iterator.iterate(desc=\"Progress bar description\"):\n",
+ " # === SECTION 2 === #\n",
+ " # Write a score for the image itself (data quality)\n",
+ " writer.write(1337, description=\"Your description for the frame [can be omitted]\")\n",
+ " \n",
+ " for obj in data_unit[\"labels\"].get(\"objects\", []):\n",
+ " # === SECTION 3 === #\n",
+ " # Label (object/classification) level score (label / model prediction quality)\n",
+ " if not obj[\"shape\"] in valid_annotation_types:\n",
+ " continue\n",
+ "\n",
+ " # Do your thing (inference)\n",
+ " # ...\n",
+ " # Then\n",
+ " writer.write(42, labels=obj, description=\"Your description of the score [can be omitted]\")\n",
+ "\n",
+ " from typing import List, Optional, Union\n",
+ "\n",
+ "from encord_active.lib.common.iterator import Iterator\n",
+ "from encord_active.lib.metrics.metric import Metric\n",
+ "from encord_active.lib.metrics.types import AnnotationType, DataType, MetricType\n",
+ "from encord_active.lib.metrics.writer import CSVMetricWriter\n",
+ "\n",
+ "class ExampleMetric(Metric):\n",
+ " # === SECTION 1 === #\n",
+ " def __init__(self):\n",
+ " super().__init__(\n",
+ " title=\"[the-name-of-your-metric]\",\n",
+ " short_description=\"A short description of your metric.\",\n",
+ " long_description=\"A longer and more detailed description. \" \\\n",
+ " \"I can use Markdown to _format_ the text.\",\n",
+ " metric_type=MetricType.GEOMETRIC,\n",
+ " data_type=DataType.IMAGE,\n",
+ " annotation_type=[AnnotationType.OBJECT.BOUNDING_BOX, AnnotationType.OBJECT.POLYGON],\n",
+ " )\n",
+ "\n",
+ " def execute(self, iterator: Iterator, writer: CSVMetricWriter):\n",
+ " valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type}\n",
+ "\n",
+ " for data_unit, image in iterator.iterate(desc=\"Progress bar description\"):\n",
+ " # === SECTION 2 === #\n",
+ " # Write a score for the image itself (data quality)\n",
+ " writer.write(1337, description=\"Your description for the frame [can be omitted]\")\n",
+ " \n",
+ " for obj in data_unit[\"labels\"].get(\"objects\", []):\n",
+ " # === SECTION 3 === #\n",
+ " # Label (object/classification) level score (label / model prediction quality)\n",
+ " if not obj[\"shape\"] in valid_annotation_types:\n",
+ " continue\n",
+ "\n",
+ " # Do your thing (inference)\n",
+ " # ...\n",
+ " # Then\n",
+ " writer.write(42, labels=obj, description=\"Your description of the score [can be omitted]\")\n",
+ " super().__init__(\n",
+ " title=\"[the-name-of-your-metric]\",\n",
+ " short_description=\"A short description of your metric.\",\n",
+ " long_description=\"A longer and more detailed description. \" \\\n",
+ " \"I can use Markdown to _format_ the text.\",\n",
+ " metric_type=MetricType.GEOMETRIC,\n",
+ " data_type=DataType.IMAGE,\n",
+ " annotation_type=[AnnotationType.OBJECT.BOUNDING_BOX, AnnotationType.OBJECT.POLYGON],\n",
+ " )\n",
+ "\n",
+ " def execute(self, iterator: Iterator, writer: CSVMetricWriter):\n",
+ " valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type}\n",
+ "\n",
+ " for data_unit, image in iterator.iterate(desc=\"Progress bar description\"):\n",
+ " # === SECTION 2 === #\n",
+ " # Write a score for the image itself (data quality)\n",
+ " writer.write(1337, description=\"Your description for the frame [can be omitted]\")\n",
+ " \n",
+ " for obj in data_unit[\"labels\"].get(\"objects\", []):\n",
+ " # === SECTION 3 === #\n",
+ " # Label (object/classification) level score (label / model prediction quality)\n",
+ " if not obj[\"shape\"] in valid_annotation_types:\n",
+ " continue\n",
+ "\n",
+ " # Do your thing (inference)\n",
+ " # ...\n",
+ " # Then\n",
+ " writer.write(42, labels=obj, description=\"Your description of the score [can be omitted]\")"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "There are a couple of sections in the code above. \n",
+ "\n",
+ "`SECTION 1`: Is used for proper display of the values that the metric produces. The properties being set there are:\n",
+ "\n",
+ "1. `title`: Is the title of your metric. It will be used in data frames and the app to identify the metric.\n",
+ "2. `metric_type`: We have distinguished metric types into three categories - `HEURISTIC`\n",
+ " - `HEURISTIC`: operate on images or individual video frames and are heuristic in the sense that they mostly depend on the image content without labels.\n",
+ " - `GEOMETRIC`: operate on the geometries of objects like bounding boxes, polygons, and polylines.\n",
+ " - `SEMANTIC`: operate with the semantic information of images or individual video frames - for example, metrics based on NN embeddings would go here.\n",
+ "3. `data_type`: The type of data that the metric applies to.\n",
+ " - `IMAGE`: Individual images witout any temporal dependencies.\n",
+ " - `SEQUENCE`: Consecutive frames where order across frames matter.\n",
+ "4. `annotation_type`:\n",
+ " - `NONE`: Doesn't need annotations (Data Quality)\n",
+ " - `OBJECT`: A list of object types like polygon or bounding box that the metric works for.\n",
+ " - `CLASSIFICATION`: A list of classification types like radio buttons and checkboxes that the metric works for.\n",
+ " - `ALL`: All objects and classification types. Could, for example, be used for annotation time.\n",
+ "5. `short_description`: Used in the ui\n",
+ "6. `long_description`: Used in the ui\n",
+ "\n",
+ "\n",
+ "`SECTION 2`: Is used for metric functions that yield one score for each frame. Note how the `writer.write(...)` specifies no objects.\n",
+ "\n",
+ "`SECTION 3`: Is used for metric functions that yield a score for each object / classification. For these metrics, `writer.write(...)` should contain a list of objects or classifications that should be associated with a giveen score.\n",
+ "\n",
+ "> _Note:_ You should stick to either writing scores with or without the `writer.write(..., labels=obj)` argument. Mixing them up will confuse the app.\n",
+ "\n",
+ "### Using the iterator\n",
+ "When you call `iterator.iterate(...)`, you will get an iterator over all the data in a given dataset (see how to execute the metric [below](#execute)). Each item in the iterator is a tuple of a `data_unit` dictionary and a `pathlib.Path` to where the image can be loaded from. \n",
+ "\n",
+ "The `data_unit` dictionary has the following structure (there may be more or less `\"objects\"` and `\"labels\"`):\n",
+ "\n",
+ "\n",
+ "data_unit example structure\n",
+ "\n",
+ "```python\n",
+ "{\n",
+ " \"data_hash\": \"595d9721-913b-45c9-8645-c3ebf8a6ae0b\",\n",
+ " \"data_title\": \"231822\",\n",
+ " \"data_type\": \"image/jpeg\",\n",
+ " \"data_sequence\": 0,\n",
+ " \"labels\": {\n",
+ " \"objects\": [\n",
+ " { # Example polygon\n",
+ " \"name\": \"Bottle\",\n",
+ " \"color\": \"#68BC00\",\n",
+ " \"shape\": \"polygon\",\n",
+ " \"value\": \"bottle\",\n",
+ " \"polygon\": {\n",
+ " \"0\": {\"x\": 0.9559, \"y\": 0.0038},\n",
+ " \"1\": {\"x\": 0.9356, \"y\": 0.1399},\n",
+ " \"2\": {\"x\": 0.9216, \"y\": 0.1982},\n",
+ " # ...\n",
+ " },\n",
+ " \"createdAt\": \"Thu, 25 Aug 2022 15:45:31 GMT\",\n",
+ " \"createdBy\": \"robot@cord.tech\",\n",
+ " \"confidence\": 1,\n",
+ " \"objectHash\": \"9728826c\",\n",
+ " \"featureHash\": \"671c61d7\",\n",
+ " \"lastEditedAt\": \"Thu, 25 Aug 2022 15:45:31 GMT\",\n",
+ " \"lastEditedBy\": \"robot@encord.com\",\n",
+ " \"manualAnnotation\": False,\n",
+ " },\n",
+ " { # Example bounding box\n",
+ " \"name\": \"Cyclist\",\n",
+ " \"color\": \"#DBDF00\",\n",
+ " \"shape\": \"bounding_box\",\n",
+ " \"value\": \"Cyclist\",\n",
+ " \"createdAt\": \"Wed, 23 Nov 2022 10:05:22 GMT\",\n",
+ " \"createdBy\": \"robot@encord.com\",\n",
+ " \"confidence\": 1.0,\n",
+ " \"objectHash\": \"t2KUSWgj\",\n",
+ " \"featureHash\": \"yJ+hgd0r\",\n",
+ " \"lastEditedAt\": \"Wed, 23 Nov 2022 10:05:22 GMT\",\n",
+ " \"lastEditedBy\": \"robot@encord.com\",\n",
+ " \"manualAnnotation\": True,\n",
+ " \"boundingBox\": {\n",
+ " \"h\": 0.2810061626666667,\n",
+ " \"w\": 0.0897509331723027,\n",
+ " \"x\": 0.4464461135265701,\n",
+ " \"y\": 0.443804288,\n",
+ " },\n",
+ " \"reviews\": [],\n",
+ " },\n",
+ " ],\n",
+ " \"classifications\": [\n",
+ " { # Example classification\n",
+ " \"name\": \"Classification Question\",\n",
+ " \"value\": \"classification-question\",\n",
+ " \"createdAt\": \"Fri, 11 Nov 2022 09:41:21 GMT\",\n",
+ " \"createdBy\": \"robot@cord.tech\",\n",
+ " \"confidence\": 1,\n",
+ " \"featureHash\": \"MTYzMTkx\",\n",
+ " \"classificationHash\": \"sHNoiYPw\",\n",
+ " \"manualAnnotation\": True,\n",
+ " \"reviews\": [],\n",
+ " }, \n",
+ " # ...\n",
+ " ],\n",
+ " },\n",
+ " \"data_link\": \"...\",\n",
+ " \"width\": 500,\n",
+ " \"height\": 361,\n",
+ "}\n",
+ "```\n",
+ "\n",
+ "\n",
+ "> _💡 Hint:_ You can inpect the entire structure by looking in the file `/path/to/project/data//label_row.json`.\n",
+ "\n",
+ "> 📝 _Note:_ To find the actual answers to classification questions, you access `iterator.label_rows[iterator.label_hash][\"classification_answers\"][]`.\n",
+ "\n",
+ "> 📝 _Note:_ If you are computing metrics based on temporal aspects, the `iterator.frame` will tell you what frame of a sequence you are currently looking at and the `iterator.label_hash` will give you the unique id of the sequence."
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "\n",
+ "### 👟 Executing a metric\n",
+ "\n",
+ "When you have implemented a metric function, you can run it using the following code snippet:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pathlib import Path\n",
+ "from encord_active.lib.metrics.execute import execute_metrics\n",
+ "from encord_active.lib.model_predictions.iterator import PredictionIterator\n",
+ "\n",
+ "target = Path(\"/path/to/your/project\") # TODO UPDATE\n",
+ "\n",
+ "execute_metrics([ExampleMetric()], data_dir=target, use_cache_only=True) # for labels\n",
+ "execute_metrics([ExampleMetric()], data_dir=target, iterator_cls=PredictionIterator, use_cache_only=True) # for predictions (only makes sense to do if your metric applies to labels)\n",
+ "\n",
+ "# Wrap this entire code block in a \n",
+ "# `if __name__ == \"__main__\":`\n",
+ "# and put it in the bottom of your metric file if you want to be able to run\n",
+ "# python your_metric.py"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The `target` variable points to the directory that containts the Encord Active project that you want to run the metric on.\n",
+ "This directory should, for example, contain a `project-meta.yaml`.\n",
+ "\n",
+ "> Info: The `use_cache_only` argument tells Encord Active to not try and download more data via the Encord SDK.\n",
+ "\n",
+ "Having covered the overall structure, let's dive into a concrete example."
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ESEt8WweThsO",
+ "tags": []
+ },
+ "source": [
+ "\n",
+ "# 🦮 Concrete walkthrough Example\n",
+ "\n",
+ "> 💡 Hint: We refer to line numbers. In most notebooks, you can enable line numbers in the \"View\" options.\n",
+ "\n",
+ "In this example, you'll continue the idea of testing the model performance as a function of the \"redness\" of individual objects. \n",
+ "Specifically, you will use the annotations/predictions to extract the image patchs that contain an object and compute the mean Hue value of that patch.\n",
+ "\n",
+ "To get started, let's have a look at the [HSV color space](https://en.wikipedia.org/wiki/HSL_and_HSV), which is great for color filtering.\n",
+ "The following code indicates how different Hue (the H from HSV) values correspond to different colors."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import matplotlib.pyplot as plt\n",
+ "import cv2\n",
+ "import numpy as np\n",
+ "\n",
+ "def get_img(H: int):\n",
+ " \"\"\"\n",
+ " Make image with specific Hue color and convert it to RGB for plotting.\n",
+ " \"\"\"\n",
+ " img = np.ones((20, 20, 3), dtype=np.uint8)\n",
+ " img[..., 0] = H\n",
+ " img[..., 1] = 255\n",
+ " img[..., 2] = 150 \n",
+ " return cv2.cvtColor(img, cv2.COLOR_HSV2RGB)\n",
+ "\n",
+ "# Hue ranges from 0 to 180 and \"wraps\" around.\n",
+ "hues = np.linspace(0, 179, 18, dtype=np.uint8)\n",
+ "imgs = [get_img(i) for i in hues]\n",
+ "\n",
+ "fig, ax = plt.subplots(2, 9, figsize=(10, 3))\n",
+ "ax = ax.reshape(-1)\n",
+ "\n",
+ "# Plot the colors\n",
+ "for img, a, h in zip(imgs, ax, hues):\n",
+ " a.set_title(f\"Hue: {h}\")\n",
+ " a.axis('off')\n",
+ " a.imshow(img)\n",
+ "\n",
+ "fig.tight_layout()\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "👉 Note how the first and the last images are very red but have very different hue values. \n",
+ "This is because of the \"circular\" / \"wrap-around\" nature of the color space. \n",
+ "Let's account for that by computing a value, which makes red colors close to zero and others closer to one."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "def transform_hue(H: int, offset=0):\n",
+ " return (90 - np.abs(H - 90)) / 90\n",
+ "\n",
+ "# Plotting\n",
+ "fig, ax = plt.subplots(2, 9, figsize=(10, 3))\n",
+ "ax = ax.reshape(-1)\n",
+ "\n",
+ "for img, a, h in zip(imgs, ax, hues):\n",
+ " t = transform_hue(h)\n",
+ " a.set_title(f\"Transf.: {t:.2f}\")\n",
+ " a.imshow(img)\n",
+ " a.axis('off')\n",
+ "fig.tight_layout()"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "Alright, this looks better.\n",
+ "The transformed value is a better candidate for our metric function.\n",
+ "\n",
+ "Next, let's use this to crop out the relevant parts of polygon annotations and compute their mean (transformed) hue values.\n",
+ "\n",
+ "We define a `Metric` subclass and compute the transformed hue value for each object to see how red it is."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "cItsCLacS2Gx",
+ "outputId": "39d039b2-03b9-4b2b-e9ce-d8b3dbf61745",
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "from encord_active.lib.common import utils\n",
+ "from encord_active.lib.common.iterator import Iterator\n",
+ "from encord_active.lib.metrics.metric import Metric\n",
+ "from encord_active.lib.metrics.types import AnnotationType, DataType, MetricType\n",
+ "from encord_active.lib.metrics.writer import CSVMetricWriter\n",
+ "from loguru import logger\n",
+ "\n",
+ "\n",
+ "class ObjectRedness(Metric):\n",
+ " def __init__(self):\n",
+ " super().__init__(\n",
+ " title=\"Polygon Average Hue\",\n",
+ " short_description=\"Compute the average Hue value of the pixels contained within each polygon.\",\n",
+ " long_description=r\"\"\"Crops out the pixels associated to each object and computes the (transformed)\n",
+ "Hue value of each object.\n",
+ "\n",
+ "The transform \"breaks\" the wrap-around of the Hue color space, so Hue values in range [0, 180] becomes [0, 1] as follows:\n",
+ "\n",
+ "```\n",
+ "H: [0, 45, 90, 135, 179]\n",
+ "t(H): [0, 0.5, 1, 0.5, 0+e]\n",
+ "```\n",
+ "\"\"\",\n",
+ " metric_type=MetricType.SEMANTIC ,\n",
+ " data_type=DataType.IMAGE,\n",
+ " annotation_type=[AnnotationType.OBJECT.POLYGON],\n",
+ " )\n",
+ "\n",
+ " def execute(self, iterator: Iterator, writer: CSVMetricWriter):\n",
+ " valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type}\n",
+ "\n",
+ " # Separate objects' instances (same objectHash [aka track id] means same object instance)\n",
+ " for data_unit, image in iterator.iterate(desc=\"Custom progress description\"):\n",
+ " # Convert image to the HSV color space\n",
+ " full_image = np.array(image)\n",
+ " full_hsv_image = cv2.cvtColor(full_image, cv2.COLOR_RGB2HSV)[...,0] # Take only the hue channel\n",
+ " img_h, img_w = full_hsv_image.shape[:2]\n",
+ " \n",
+ " for obj in data_unit[\"labels\"].get(\"objects\", []):\n",
+ " if not obj[\"shape\"] in valid_annotation_types:\n",
+ " continue # Only use polygons\n",
+ " \n",
+ " # The `get_geometry_from_encord_object` function will get us a numpy array of xy coordinates.\n",
+ " poly: Optional[np.ndarray] = utils.get_geometry_from_encord_object(obj, w=img_w, h=img_h) # [n, d]\n",
+ " if poly is None:\n",
+ " continue\n",
+ " \n",
+ " # Check that the polygon takes up at least one pixel\n",
+ " ymi, xmi = poly.min(0)\n",
+ " yma, xma = poly.max(0)\n",
+ " \n",
+ " if ymi == yma or xmi == xma:\n",
+ " continue # Empty polygon\n",
+ " \n",
+ " # Draw mask from polygon\n",
+ " mask = np.zeros((img_h, img_w), dtype=np.uint8)\n",
+ " mask = cv2.fillPoly(mask, [poly], 1)\n",
+ " \n",
+ " polygon_pixels = full_hsv_image[mask==1] # Take only pixels within polygon\n",
+ " transformed_mean_hue = transform_hue(polygon_pixels.mean())\n",
+ " writer.write(transformed_mean_hue.item(), labels=obj)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Okay, so you have defined your metric which extracts the pixels of each polygon and computes the average (transformed) hue value of those pixels.\n",
+ "The next step will then be to apply the metric to your data.\n",
+ "\n",
+ "In the next code cell, you'll download one of the sandbox datasets, but you can also point the metric to your own dataset by setting the `target` path below to point to the root of your project directory."
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### 📩 Download the \"quickstart\" sandbox dataset."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "!encord-active download --project-name quickstart"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You should now be able to see the quickstart directory in the `File Browser`. \n",
+ "Apply your metric to that project."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pathlib import Path\n",
+ "from encord_active.lib.metrics.execute import execute_metrics\n",
+ "\n",
+ "target = Path(\"quickstart\")\n",
+ "\n",
+ "# Apply metric to labels\n",
+ "execute_metrics([ObjectRedness()], data_dir=target, use_cache_only=True)\n",
+ "\n",
+ "# For predictions (only makes sense to do if your metric applies to labels)\n",
+ "from encord_active.lib.model_predictions.iterator import PredictionIterator\n",
+ "from encord_active.lib.model_predictions.writer import MainPredictionType\n",
+ "execute_metrics([ObjectRedness()], data_dir=target, iterator_cls=PredictionIterator, use_cache_only=True, prediction_type=MainPredictionType.OBJECT)\n",
+ "\n",
+ "# Wrap this entire code block in a \n",
+ "# `if __name__ == \"__main__\":`\n",
+ "# and put it in the bottom of your metric file if you want to be able to run\n",
+ "# python your_metric.py"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "To see the results, you can run the app with the project as the target:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!encord-active start -t \"quickstart\""
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For the quickstart dataset, the \"Polygon Average Hue\" metric that we just defined seems to have little or no influence on the model performance - based on the \"Metric Importance\" chart on the \"Model Quality -> Metrics\" page.\n",
+ "However, if you filter by the person class in the settings panel in the top, you will see that the redness of objects do seem to have an effect on the model performance.\n",
+ "\n",
+ ""
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# ✅ Wrap Up: Next Steps"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The next steps from here could be many.\n",
+ "You have seen that the redness of objects is generally not extremely important for the model performance - which is a good thing - one less thing to worry about.\n",
+ "\n",
+ "From here, one could go on to define a new custom metric function to test the next hypothesis.\n",
+ "Some of the things that would be simple to test now that you have your first custom metric in place is, e.g., the standard deviation of the colors within an object, the saturaion, other colors, etc. These metrics would only require changing line 57 in the metric definition above.\n",
+ "\n",
+ "Of course, you should keep all the metrics that we define to make sure that redness of values doesn't turn into a problem at a later stage in the model development."
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "🟣 Encord Active is an open-source framework for computer vision model testing, evaluation, and validation. **Check out the project on [GitHub](https://github.com/encord-team/encord-active), leave a star 🌟** if you like it. We welcome you to [contribute](https://docs.encord.com/docs/active-contributing) if you find something is missing.\n",
+ "\n",
+ "---\n",
+ "\n",
+ "👉 Check out the 📖 [Encord Blog](https://encord.com/blog/) and 📺 [YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n",
+ "\n",
+ "---\n",
+ "\n",
+ "Thanks for now!"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# ⏭️ Next: Learn how to add custom embeddings to 🟣 Encord Active"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "What should you check out next? 👀 Learn how to add custom embeddings to Encord Active. The Colab notebook will cover:\n",
+ "\n",
+ "* Example code for **adding custom image and object embeddings** to your Encord Active project.\n",
+ "\n",
+ "### $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ *👇*"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### ⬅️ [*Previous Notebook*](./Encord_Active_HuggingFace_Dataset_Exploration.ipynb) $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ [*Next Notebook*](./Encord_Active_Add_Custom_Embeddings.ipynb) *➡️*"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.16"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/local-notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb b/local-notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb
new file mode 100644
index 0000000..fcaafda
--- /dev/null
+++ b/local-notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb
@@ -0,0 +1,725 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Ix90mmYg-S_f"
+ },
+ "source": [
+ "