diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7901047 --- /dev/null +++ b/.gitignore @@ -0,0 +1,393 @@ +# Created by https://www.toptal.com/developers/gitignore/api/vim,osx,python,windows,pycharm,jupyternotebooks +# Edit at https://www.toptal.com/developers/gitignore?templates=vim,osx,python,windows,pycharm,jupyternotebooks + +### JupyterNotebooks ### +# gitignore template for Jupyter Notebooks +# website: http://jupyter.org/ + +.ipynb_checkpoints +*/.ipynb_checkpoints/* + +# local notebook identifiers +*.ipynb:Zone.Identifier + +# IPython +profile_default/ +ipython_config.py + +# Remove previous ipynb_checkpoints +# git rm -r .ipynb_checkpoints/ + +### OSX ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### PyCharm ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### PyCharm Patch ### +# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721 + +# *.iml +# modules.xml +# .idea/misc.xml +# *.ipr + +# Sonarlint plugin +# https://plugins.jetbrains.com/plugin/7973-sonarlint +.idea/**/sonarlint/ + +# SonarQube Plugin +# https://plugins.jetbrains.com/plugin/7238-sonarqube-community-plugin +.idea/**/sonarIssues.xml + +# Markdown Navigator plugin +# https://plugins.jetbrains.com/plugin/7896-markdown-navigator-enhanced +.idea/**/markdown-navigator.xml +.idea/**/markdown-navigator-enh.xml +.idea/**/markdown-navigator/ + +# Cache file creation bug +# See https://youtrack.jetbrains.com/issue/JBR-2257 +.idea/$CACHE_FILE$ + +# CodeStream plugin +# https://plugins.jetbrains.com/plugin/12206-codestream +.idea/codestream.xml + +# Azure Toolkit for IntelliJ plugin +# https://plugins.jetbrains.com/plugin/8053-azure-toolkit-for-intellij +.idea/**/azureSettings.xml + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook + +# IPython + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +### Vim ### +# Swap +[._]*.s[a-v][a-z] +!*.svg # comment out if you don't need vector files +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] + +# Session +Session.vim +Sessionx.vim + +# Temporary +.netrwhist +*~ +# Auto-generated tag files +tags +# Persistent undo +[._]*.un~ + +### Windows ### +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +# End of https://www.toptal.com/developers/gitignore/api/vim,osx,python,windows,pycharm,jupyternotebooks + +.gitignore + +*.Identifier + +/local-tests/ + +*.pub + +notebooks/download-sandbox-dataset.ipynb +notebooks/building-a-custom-metric-function_hold.ipynb +notebooks/getting-started-with-encord-projects.ipynb +notebooks/ssh_key +notebooks/getting-started-with-coco-project.ipynb diff --git a/README.md b/README.md index cc28778..bd45316 100644 --- a/README.md +++ b/README.md @@ -15,8 +15,10 @@ PyPi project PyPi version -docs -"Join us on Discord" +docs + +Join us on Slack + "Encord Notebooks; @@ -119,11 +121,11 @@ pip install -r requirements.txt | **📓 Encord Notebook** | **🚀 Launch Notebook** | **📰 Description** | **📺 Video Explainer** | **💡Other Resources** | | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------: | -----------------------------------------------------------------------------------------------------------------------------------------: | -| [Encord Notebooks - 📥 Explore Encord Active's 0.1.70 Native UI](./notebooks/Encord_Notebooks__demo_ea_native_display.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/encord-team/encord-notebooks/blob/main/notebooks/Encord_Notebooks__demo_ea_native_display.ipynb) | This notebook shows you a quick way to test Encord Active with a sandbox project and without installing anything locally. | | ▶️ [Encord Active Documentation ](https://docs.encord.com/docs/active-getting-started) | -| [ Encord Active - Download Sandbox Project](./notebooks/01_Encord_Active_Notebooks___Download_sandbox_project.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1l4K-QPOqBC4mv2SGADEe2erd5nhFVjoU?usp=sharing) | This notebook gives you a quick way to test 🟣 Encord Active with a sandbox project and without installing anything locally. | | 📑 [Encord Active Documentation - Touring the Coco Sandbox Dataset](https://docs.encord.com/active/docs/tutorials/touring-the-coco-dataset) | +| [Encord Notebooks - 📥 Explore Encord Active's 0.1.75 Native UI](./local-notebooks/Encord_Notebooks__demo_ea_native_display.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](hhttps://colab.research.google.com/github/encord-team/encord-notebooks/blob/main/colab-notebooks/Encord_Notebooks__demo_ea_native_display.ipynb) | This notebook shows you a quick way to test Encord Active with a sandbox project and without installing anything locally. | | ▶️ [Encord Active Documentation ](https://docs.encord.com/docs/active-getting-started) | +| [ Encord Active - Download Sandbox Project](./local-notebooks/01_Encord_Active_Notebooks___Download_sandbox_project.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/encord-team/encord-notebooks/blob/main/colab-notebooks/01_Encord_Active_Notebooks_Download_sandbox_project.ipynb) | This notebook gives you a quick way to test 🟣 Encord Active with a sandbox project and without installing anything locally. | | 📑 [Encord Active Documentation - Touring the Coco Sandbox Dataset](https://docs.encord.com/docs/active-touring-coco-dataset) | | [ Encord Active - Getting Started with Encord Projects](./notebooks/02_Encord_Active___Import_project_(self_hosting).ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1pchKiSZPiu2ENp0pr7iSs3L4JqO6cNAr?usp=sharing) | This 📓 notebook shows you how to import existing Encord projects into Encord Active | ▶️ [How to Create an Annotation Project](https://encord.com/learning-hub/how-to-create-an-annotation-project/) | 📑 [Encord Documentation - Projects Overview](https://docs.encord.com/projects/projects-overview) | -| [ Encord Active - 🏗️ Building a Custom Metric Function](./notebooks/Encord_Active_Building_a_Custom_Metric_Function.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1oLA-EnKTtrYHJRn1KNjglWDAVUsxD6bq?usp=sharing) | This 📓 notebook will take you through how to write such metric functions and use them with Encord Active | | 📑 [Encord Documentation - Writing Custom Quality Metric](https://docs.encord.com/active/docs/metrics/write-your-own) | -| [ Encord Active - Add Custom Embeddings](./notebooks/Encord_Active_Add_Custom_Embeddings.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1mYEF2K-5Yp76cRaq-HOKT19UeUcDK3tu?usp=sharing) | In this 📓 notebook, learn about the three different types of embeddings in Encord Active and how to use them | | +| [ Encord Active - 🏗️ Building a Custom Metric Function](./local-notebooks/Encord_Active_Building_a_Custom_Metric_Function.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/encord-team/encord-notebooks/blob/main/colab-notebooks/Encord_Active_Building_a_Custom_Metric_Function.ipynb) | This 📓 notebook will take you through how to write such metric functions and use them with Encord Active | | 📑 [Encord Documentation - Writing Custom Quality Metric](https://docs.encord.com/active/docs/metrics/write-your-own) | +| [ Encord Active - Add Custom Embeddings](./local-notebooks/Encord_Active_Add_Custom_Embeddings.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/encord-team/encord-notebooks/blob/main/colab-notebooks/Encord_Active_Add_Custom_Embeddings.ipynb) | In this 📓 notebook, learn about the three different types of embeddings in Encord Active and how to use them | | | [ Encord Notebooks - 🧵 Generate Encord Active Model Segmentation Masks Using Encord Annotate Micro-Models](./Import-Encord-Active-Model-Predictions/Micromodels-generate-segmentation-predictions.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1CBTUgowsUCm9JGMP_mbQ7GkIfbSqRh4z?usp=sharing) | In this short notebook walkthrough, learn how to generate segmentation masks with Encord Active using Micro-Models in Encord Annotate | | ▶️ [Encord Learning Hub - How to Build a Micro-model](https://encord.com/learning-hub/how-to-build-a-micro-model/) | | [ Encord Notebooks - 📦 Generate Encord Active Model Object Detection Boxes Using Encord Annotate Micro-Models](./Import-Encord-Active-Model-Predictions/Micromodels-generate-detection-predictions.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1nwdSHFonQBEYEhywLfQPkMMya0xTOjdc?usp=sharing) | In this short notebook walkthrough, learn how to generate bounding boxes with Encord Active using Micro-Models in Encord Annotate | | ▶️ [Encord Learning Hub - How to Build a Micro-model](https://encord.com/learning-hub/how-to-build-a-micro-model/) | @@ -138,8 +140,8 @@ pip install -r requirements.txt | **📓 Encord Notebook** | **🚀 Launch Notebook** | **📰 Description** | **📺 Video Explainer** | **💡Other Resources** | | :--------------------------------------------------------------------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------: | --------------------------------------------------------------------------------------------------------------------------------------: | -| [ Encord Active - 🔦 Torchvision Dataset Exploration](./notebooks/Encord_Active_Torchvision_Dataset_Exploration.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1zVIyqsF5fpDNsjFLLKPvI2VXPSMdQ2T5?usp=sharing) | In this notebook, you will use Encord Active to explore the quality of a dataset from the built-in samples in the `torchvision.datasets` module | | [Encord Active Documentation - Exploring data distribution](https://docs.encord.com/active/docs/workflows/understand-data-distribution) | -| [ Encord Active - 🤗 HuggingFace Dataset Exploration](./notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1Ohsd1BrO6s9HuliYdHqMsIblaR9KXbpk?usp=sharing) | In this notebook, you will use Encord Active to explore the quality of a dataset from the Hugging Face Datasets library | | [Encord Active Documentation - Exploring data distribution](https://docs.encord.com/active/docs/workflows/understand-data-distribution) | +| [ Encord Active - 🔦 Torchvision Dataset Exploration](./local-notebooks/Encord_Active_Torchvision_Dataset_Exploration.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/encord-team/encord-notebooks/blob/main/colab-notebooks/Encord_Active_Torchvision_Dataset_Exploration.ipynb) | In this notebook, you will use Encord Active to explore the quality of a dataset from the built-in samples in the `torchvision.datasets` module | | [Encord Active Documentation - Exploring data distribution](https://docs.encord.com/active/docs/workflows/understand-data-distribution) | +| [ Encord Active - 🤗 HuggingFace Dataset Exploration](./local-notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/encord-team/encord-notebooks/blob/main/colab-notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb) | In this notebook, you will use Encord Active to explore the quality of a dataset from the Hugging Face Datasets library | | [Encord Active Documentation - Exploring data distribution](https://docs.encord.com/docs/active-exploring-data-and-label-distributions) |
@@ -152,7 +154,7 @@ pip install -r requirements.txt | **📓 Encord Notebook** | **🚀 Launch Notebook** | **📰 Description** | **📺 Video Explainer** | **💡Other Resources** | | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------: | ------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| [ Encord Notebooks - 🆚 Grounding-DINO+SAM vs. Mask-RCNN](./notebooks/Encord_Notebooks_Team_gDINO+SAM_vs_maskrcnn_webinar.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1MKlB0AU8yaCwSvnaAPO3B1sHHP9xcsM1?usp=sharing) | In this notebook file, you will get and evaluate the segmentation predictions of images using Grounding-DINO and Segment Anything Model (SAM) | ▶️ [Encord Learning Hub - Are VFMs on par with SOTA?](https://encord.com/learning-hub/are-vfms-on-par-with-sota/) | • [ Encord Notebooks - 🔧 Zero-Shot Image Segmentation with Grounding-DINO + Segment Anything Model (SAM)](./notebooks/Encord_Notebooks_Zero_shot_image_segmentation_with_grounding_dino_and_sam.ipynb)
• 📖 [Encord Blog - Grounding-DINO + Segment Anything Model (SAM) vs Mask-RCNN: A comparison](https://encord.com/blog/grounding-dino-sam-vs-mask-rcnn-comparison/) | +| [ Encord Notebooks - 🆚 Grounding-DINO+SAM vs. Mask-RCNN](./local-notebooks/Encord_Notebooks_Team_gDINO+SAM_vs_maskrcnn_webinar.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1MKlB0AU8yaCwSvnaAPO3B1sHHP9xcsM1?usp=sharing) | In this notebook file, you will get and evaluate the segmentation predictions of images using Grounding-DINO and Segment Anything Model (SAM) | ▶️ [Encord Learning Hub - Are VFMs on par with SOTA?](https://encord.com/learning-hub/are-vfms-on-par-with-sota/) | • [ Encord Notebooks - 🔧 Zero-Shot Image Segmentation with Grounding-DINO + Segment Anything Model (SAM)](./local-notebooks/Encord_Notebooks_Zero_shot_image_segmentation_with_grounding_dino_and_sam.ipynb)
• 📖 [Encord Blog - Grounding-DINO + Segment Anything Model (SAM) vs Mask-RCNN: A comparison](https://encord.com/blog/grounding-dino-sam-vs-mask-rcnn-comparison/) |
@@ -166,8 +168,8 @@ pip install -r requirements.txt | **📓 Encord Notebook** | **🚀 Launch Notebook** | **📰 Description** | **📺 Video Explainer** | **💡Other Resources** | | :-------------------------------------------------------------------------------------------------------------------------------- | :---------------------------------------------------------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------: | :-------------------: | -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| [ Encord Notebooks - 🔧 How to fine-tune Segment Anything Model (SAM)](./notebooks/Encord_Notebooks_How_To_Fine_Tuning_SAM.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1XeMSjS7F4QTTI0BSo0MJ6oA7Aj9Pz_UD?usp=sharing) | This is the notebook gives you a walkthrough on fine-tuning Segment Anything Model (SAM) to a specific application | | • 📖 [Encord's Blog - How To Fine-Tune Segment Anything](https://encord.com/blog/learn-how-to-fine-tune-the-segment-anything-model-sam/)
• ▶️ [Encord Learning Hub - How to use SAM to Automate Data Labeling](https://encord.com/learning-hub/how-to-use-sam-to-automate-data-labeling/)
• 📖 [Encord's Blog - Meta AI's New Breakthrough: Segment Anything Model (SAM) Explained](https://encord.com/blog/segment-anything-model-explained/)
• 📖 [Segment Anything (SAM) is live in Encord](https://encord.com/blog/segment-anything-live-in-encord/). | -| [ Encord Notebooks - 🔎 Building Semantic Search for Visual Data](./notebooks/Encord_Notebooks_Building_Semantic_Search_for_Visual_Data.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/13SjdECFWlTZVgXGazxOBcNSjqR5C0b8a?usp=sharing) | In this notebook, you will build a semantic search engine using CLIP and ChatGPT | ▶️ [Encord's YouTube Channel - How to build Semantic Visual Search with ChatGPT and CLIP](https://youtu.be/_thRPX91WLM) | ▶️ [Webinar - Webinar: How to build Semantic Visual Search with ChatGPT and CLIP](https://encord.com/blog/webinar-semantic-visual-search-chatgpt-clip/) | +| [ Encord Notebooks - 🔧 How to fine-tune Segment Anything Model (SAM)](./local-notebooks/Encord_Notebooks_How_To_Fine_Tuning_SAM.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/encord-team/encord-notebooks/blob/main/colab-notebooks/Encord_Notebooks_How_To_Fine_Tuning_SAM.ipynb) | This is the notebook gives you a walkthrough on fine-tuning Segment Anything Model (SAM) to a specific application | | • 📖 [Encord's Blog - How To Fine-Tune Segment Anything](https://encord.com/blog/learn-how-to-fine-tune-the-segment-anything-model-sam/)
• ▶️ [Encord Learning Hub - How to use SAM to Automate Data Labeling](https://encord.com/learning-hub/how-to-use-sam-to-automate-data-labeling/)
• 📖 [Encord's Blog - Meta AI's New Breakthrough: Segment Anything Model (SAM) Explained](https://encord.com/blog/segment-anything-model-explained/)
• 📖 [Segment Anything (SAM) is live in Encord](https://encord.com/blog/segment-anything-live-in-encord/). | +| [ Encord Notebooks - 🔎 Building Semantic Search for Visual Data](./local-notebooks/Encord_Notebooks_Building_Semantic_Search_for_Visual_Data.ipynb) | [![Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/13SjdECFWlTZVgXGazxOBcNSjqR5C0b8a?usp=sharing) | In this notebook, you will build a semantic search engine using CLIP and ChatGPT | ▶️ [Encord's YouTube Channel - How to build Semantic Visual Search with ChatGPT and CLIP](https://youtu.be/_thRPX91WLM) | ▶️ [Webinar - Webinar: How to build Semantic Visual Search with ChatGPT and CLIP](https://encord.com/blog/webinar-semantic-visual-search-chatgpt-clip/) | @@ -234,7 +236,7 @@ We follow a [code of conduct](https://github.com/encord-team/encord-active/blob/ * If you plan to work on an issue, mention so in the [issue page](https://github.com/encord-team/encord-notebooks/issues) before you start working on it. * If you have an idea for a notebook or tutorial, kindly create an issue and share it with other community members/maintainers. -* Ask for help in our [Discord community](https://discord.gg/TU6yT7Uvx3). +* Ask for help in the [Active community](https://join.slack.com/t/encordactive/shared_invite/zt-1hc2vqur9-Fzj1EEAHoqu91sZ0CX0A7Q). * Please include the file name and a brief description of any spelling or text changes. The reviewers may struggle to identify corrections. Please ensure that your contributions align with the repository's goals and adhere to the project's license. diff --git a/colab-notebooks/01_Encord_Active_Notebooks_Download_sandbox_project.ipynb b/colab-notebooks/01_Encord_Active_Notebooks_Download_sandbox_project.ipynb new file mode 100644 index 0000000..1bb67fd --- /dev/null +++ b/colab-notebooks/01_Encord_Active_Notebooks_Download_sandbox_project.ipynb @@ -0,0 +1,442 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "oWG683Ze9f5R" + }, + "source": [ + "
\n", + "

\"Open\n", + "\"License\"\n", + "\"PyPi\n", + "\"PyPi\n", + "\n", + "\"docs\"\n", + "\n", + "\"Join\n", + "\n", + "\""Encord\n", + "\n", + "\n", + "

\n", + "

\n", + "\"Twitter

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "4eF7Fn8lBiN8" + }, + "source": [ + "
\n", + "

\n", + " \n", + " \n", + " \n", + "

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "TxZTJQyoeK6m" + }, + "source": [ + "# 🟣 Encord Active | 📥 Download Sandbox Project\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "OPYaECRBKYig" + }, + "source": [ + "## 🏁 Overview" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "VrrgT0Ka9jk_" + }, + "source": [ + "👋 Hi there! This notebook gives you a quick way to test Encord Active with a sandbox project and without installing anything locally.\n", + "\n", + "This 📒 notebook will cover:\n", + "* Installing Encord Active\n", + "* Choosing a sandbox project\n", + "* Starting the Encord Active app with the sandbox project\n", + "\n", + "
\n", + "\n", + "> 💡 Learn more about 🟣 Encord Active: \n", + "* [GitHub](https://github.com/encord-team/encord-active) \n", + "* [Docs](https://docs.encord.com/docs/active-overview)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "WO8IUmYu47a0" + }, + "source": [ + "## 📥 Install 🟣 Encord-Active\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ByT_wNp6CYEr" + }, + "source": [ + "👟 Run the following script to install [Encord Active](https://docs.encord.com/active/docs/).\n", + "\n", + "\n", + "📌 `python3.9`, `python3.10`, and `python3.11` are the version requirements to run Encord Active." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r6AfzNEESQAX" + }, + "outputs": [], + "source": [ + "# Assert that python is 3.9 or 3.10 instead\n", + "import sys\n", + "assert sys.version_info.minor in [9, 10, 11], \"Encord Active only supported for python 3.9, 3.10, and 3.11.\"\n", + "%pip install -qq encord-active" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "zLjGzp9fmBqE" + }, + "source": [ + "# 🗂️ Import libraries and other utilities" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "from encord_active.lib.project.sandbox_projects import fetch_prebuilt_project" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "r37LQkresX2w" + }, + "source": [ + "# 🗃️ Choose a sandbox project\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "aS7qqy8s-2UB" + }, + "source": [ + "⏩ You can choose between four different sandbox projects with open datasets:\n", + "\n", + "* [COCO-2017 Validation](https://paperswithcode.com/dataset/coco) (~5000 samples, ~41k annotations)\n", + "* [BDD Dataset Validation](https://bdd-data.berkeley.edu/) (~1000 samples, ~13k annotations)\n", + "* [Covid-19 Segmentation](https://paperswithcode.com/task/covid-19-image-segmentation) (~100 samples, ~600 annotations)\n", + "* [Limuc Ulcerative Classification](https://paperswithcode.com/dataset/limuc) (~1686 samples, no annotations)\n", + "\n", + "
\n", + "\n", + "🙋 If you're here for the first time, we recommend you to start with the COCO-2017 Validation dataset." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 🗃️ Choose a sandbox project you want to get started with" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "sPBamQLfEt2P", + "outputId": "c9a5570e-faa7-4207-9228-1b3fc9f515b5" + }, + "outputs": [], + "source": [ + "from IPython.display import display, Markdown\n", + "\n", + "project = 'COCO-2017 Validation' \n", + "\n", + "project_names = {\n", + " 'COCO-2017 Validation': \"[open-source][validation]-coco-2017-dataset\",\n", + " 'Limuc Ulcerative Classification': \"[open-source][test]-limuc-ulcerative-colitis-classification\",\n", + " 'Covid-19 Segmentation': \"[open-source]-covid-19-segmentations\",\n", + " 'BDD Dataset Validation': \"[open-source][validation]-bdd-dataset\"\n", + "}\n", + "\n", + "if project in project_names:\n", + " project_name = project_names[project]\n", + " print(\"Great! You chose the\", project, \"dataset.\")\n", + "else:\n", + " print(\"Invalid project selection.\")\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "37f1HUrZ5k8B" + }, + "source": [ + "## 📩 Download sandbox project and start the app\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "💪 Lastly, download the sandbox project, including the dataset, and start the application.\n", + "\n", + "💡 The Encord Active UI is made with Streamlit. Encord Active fetches the dataset and pre-built image, downloads it locally, and the `start` command starts the Streamlit app on the backend." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Choose where to store the data\n", + "project_path = Path.cwd() / project_name\n", + "\n", + "# Download the dataset\n", + "fetch_prebuilt_project(project_name, project_path)\n", + "\n", + "# Open the app\n", + "#!encord-active start -t \"$project_path\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ♾️ Open the app inside Colab" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from google.colab.output import eval_js\n", + "from IPython.display import Javascript\n", + "\n", + "# Trick to be able to show the FE in a cell output.\n", + "def show_url(url: str, height=400):\n", + " display(Javascript(\"\"\"\n", + " (async ()=>{{\n", + " fm = document.createElement('iframe')\n", + " fm.src = '%s'\n", + " fm.width = '95%%'\n", + " fm.height = '%d'\n", + " fm.frameBorder = 0\n", + " document.body.append(fm)\n", + " }})();\n", + " \"\"\" % (url, height) ))\n", + "\n", + "# Proxy for FE and BE\n", + "fe_url = eval_js(\"google.colab.kernel.proxyPort(8000)\")\n", + "be_url = eval_js(\"google.colab.kernel.proxyPort(8001)\")\n", + "\n", + "# Start encord active in the background\n", + "get_ipython().system_raw(f\"ENV=packaged API_URL='{be_url}' ALLOWED_ORIGIN='{fe_url}' encord-active start &\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Show EA in a cell\n", + "show_url(fe_url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!echo \"Alternatively use this link: {fe_url} to open the fronted in a new tab\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "👉 Click on the `Network URL: ` to access the application in your browser.\n", + "\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "YrDh8Re6EIT3" + }, + "source": [ + "# 👏 Success! Welcome to 🟣 Encord Active!" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "4P2v4lOEAShb" + }, + "source": [ + "🏆 Congratulations, you should be able to see your Encord Active dashboard running 🔽." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "FBD5CIWlwKo6" + }, + "source": [ + "![Encord Active welcome resized.png.jpeg](https://storage.googleapis.com/encord-notebooks/local-notebooks/encord_active_welcome_page.jpeg)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "VCCnbuD8EQvu" + }, + "source": [ + "# ✅ Wrap up" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ebB0Y1AroE6n" + }, + "source": [ + "\n", + "📓 This Colab notebook showed you how to download a sandbox project with Encord Active. If you would like to learn more, check out our [documentation](https://docs.encord.com/docs/active-overview) to find more concrete workflow and guides.\n", + "\n", + "> ⚠️ Remember to stop the running cell above to close the app when you are done exploring Encord Active. You may also want to delete the project folder too.\n", + "\n", + "---\n", + "\n", + "🟣 Encord Active is an open-source framework for computer vision model testing, evaluation, and validation. **Check out the project on [GitHub](https://github.com/encord-team/encord-active), leave a star 🌟** if you like it. We welcome you to [contribute](https://docs.encord.com/docs/active-contributing) if you find something is missing.\n", + "\n", + "---\n", + "\n", + "👉 Check out the 📖 [Encord Blog](https://encord.com/blog/) and 📺 [YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "RDzrYNahKnX9" + }, + "source": [ + "# ⏭️ Next: Learn how to import your Encord Project" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "oD-Wv0xGKymr" + }, + "source": [ + "What should you check out next? 👀 Learn how to import an existing Encord project. The Colab notebook will cover:\n", + "\n", + "- Generating SSH key pairs to connect to the Encord platform and fetch a project.\n", + "- Installing Encord Active and import the selected project.\n", + "- Launching the Encord Active app containing the project.\n", + "\n", + "\n", + "### $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ *👇*\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "yWlE1NtaWZPx" + }, + "source": [ + "### $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ [*Next Notebook*](./02_Encord_Active___Import_project_(self_hosting).ipynb) *➡️*\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/colab-notebooks/Encord_Active_Add_Custom_Embeddings.ipynb b/colab-notebooks/Encord_Active_Add_Custom_Embeddings.ipynb new file mode 100644 index 0000000..a904873 --- /dev/null +++ b/colab-notebooks/Encord_Active_Add_Custom_Embeddings.ipynb @@ -0,0 +1,338 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "5f49161b", + "metadata": {}, + "source": [ + "
\n", + "

\"Open\n", + "\"License\"\n", + "\"PyPi\n", + "\"PyPi\n", + "\n", + "\"docs\"\n", + "\n", + "\"Join\n", + "\n", + "\""Encord\n", + "\n", + "\n", + "

\n", + "

\n", + "\"Twitter

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4d5f8ff9", + "metadata": {}, + "source": [ + "
\n", + "

\n", + " \n", + " \n", + " \n", + "

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "99cd4d8b", + "metadata": {}, + "source": [ + "# 🟣 Encord Active | Add Custom Embeddings" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "616ca378", + "metadata": {}, + "source": [ + "## 🚀 Overview" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "f735c771", + "metadata": {}, + "source": [ + "Hi there, 👋.\n", + "\n", + "Encord Active has three different types of embeddings.\n", + "\n", + "1. _Image embeddings:_ are general for each image / frame in the dataset\n", + "2. _Classification embeddings:_ are associated to specific frame level classifications\n", + "3. _Object embeddings:_ are associated to specific objects like polygons of bounding boxes\n", + "\n", + "If you like, you can \"swap out\" these embeddings with your own by following the steps in this notebook.\n", + "\n", + "There are two sections in the notebook. One for the image embeddings and one for the objects.\n", + "If you have classifications in your project, you should run:\n", + "\n", + "```\n", + "encord-active metric run \"Image-level Annotation Quality\"\n", + "```\n", + "\n", + "This will take the image level embeddings that you provided and also associate them to the classification labels.\n", + "\n", + "\n", + "
\n", + "\n", + "> 💡 Learn more about 🟣 Encord Active: \n", + "* [GitHub](https://github.com/encord-team/encord-active) \n", + "* [Docs](https://docs.encord.com/docs/active-overview)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c74cdeb0", + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "from pathlib import Path\n", + "from typing import List\n", + "\n", + "import torch\n", + "from encord_active.lib.common.iterator import DatasetIterator, Iterator\n", + "from encord_active.lib.embeddings.dimensionality_reduction import (\n", + " generate_2d_embedding_data,\n", + ")\n", + "from encord_active.lib.embeddings.types import LabelEmbedding\n", + "from encord_active.lib.metrics.types import EmbeddingType\n", + "from encord_active.lib.project.project_file_structure import ProjectFileStructure\n", + "from PIL import Image\n", + "from torchvision.transforms import ToTensor\n", + "\n", + "\n", + "def load_my_model() -> torch.nn.Module:\n", + " ... # <- HERE: Edit here to return your model\n", + "\n", + "\n", + "def get_transform():\n", + " return (\n", + " ToTensor()\n", + " ) # <- HERE: If you have any specific transforms to apply to PIL images." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "50e58bc8", + "metadata": {}, + "source": [ + "## 🖼️ Examle of Image Embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b5d39aa", + "metadata": {}, + "outputs": [], + "source": [ + "@torch.inference_mode()\n", + "def generate_cnn_image_embeddings(iterator: Iterator) -> List[LabelEmbedding]:\n", + " model = load_my_model()\n", + " transform = get_transform()\n", + "\n", + " collections: List[LabelEmbedding] = []\n", + " for data_unit, image in iterator.iterate(desc=\"Embedding image data.\"):\n", + " if image is None:\n", + " continue\n", + "\n", + " image_pil = image.convert(\"RGB\")\n", + " image = transform(image_pil)\n", + "\n", + " # START Embedding\n", + " embedding = model(image) # <- HERE - your logic for embedding data.\n", + "\n", + " if embedding is None:\n", + " continue\n", + "\n", + " embedding = embedding.flatten().detach().numpy() # <- should be a [d,] array.\n", + " # End Embedding\n", + "\n", + " entry = LabelEmbedding(\n", + " url=data_unit[\"data_link\"],\n", + " label_row=iterator.label_hash,\n", + " data_unit=data_unit[\"data_hash\"],\n", + " frame=iterator.frame,\n", + " dataset_title=iterator.dataset_title,\n", + " embedding=embedding,\n", + " labelHash=None,\n", + " lastEditedBy=None,\n", + " featureHash=None,\n", + " name=None,\n", + " classification_answers=None,\n", + " )\n", + " collections.append(entry)\n", + "\n", + " return collections\n", + "\n", + "\n", + "project = Path(\"/path/to/your/project/root\") # <- HERE: Path to the Encord Project\n", + "pfs = ProjectFileStructure(project)\n", + "\n", + "iterator = DatasetIterator(project)\n", + "embeddings = generate_cnn_image_embeddings(iterator)\n", + "out_file = prfs.get_embeddings_file(EmbeddingType.IMAGE)\n", + "\n", + "with out_file.open(\"wb\") as f:\n", + " pickle.dump(embeddings, f)\n", + "\n", + "generate_2d_embedding_data(EmbeddingType.IMAGE, project)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9d3fde49", + "metadata": {}, + "source": [ + "## 🏷️ Example of Object Embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ddddaa1e", + "metadata": {}, + "outputs": [], + "source": [ + "from encord_active.lib.common.utils import get_bbox_from_encord_label_object\n", + "\n", + "@torch.inference_mode()\n", + "def generate_cnn_object_embeddings(iterator: Iterator) -> List[LabelEmbedding]:\n", + " model = get_model()\n", + " transform = get_transform()\n", + "\n", + " embeddings: List[LabelEmbedding] = []\n", + " for data_unit, image in iterator.iterate(desc=\"Embedding object data.\"):\n", + " if image is None:\n", + " continue\n", + " \n", + " image_pil = image.convert(\"RGB\")\n", + " image = transform(image_pil)\n", + " \n", + " for obj in data_unit[\"labels\"].get(\"objects\", []):\n", + " if obj[\"shape\"] in [\n", + " ObjectShape.POLYGON.value,\n", + " ObjectShape.BOUNDING_BOX.value,\n", + " ObjectShape.ROTATABLE_BOUNDING_BOX.value,\n", + " ]:\n", + " # Crops images tightly around object\n", + " out = get_bbox_from_encord_label_object( \n", + " obj,\n", + " image.shape[2],\n", + " image.shape[1],\n", + " )\n", + "\n", + " if out is None:\n", + " continue\n", + " \n", + " x, y, w, h = out\n", + " img_patch = image[:, y : y + h, x : x + w]\n", + " \n", + " # Compute embeddings\n", + " embedding = model(img_patch)\n", + " embedding = embedding.flatten().detach().numpy() # <- should be a [d,] array.\n", + "\n", + " last_edited_by = obj[\"lastEditedBy\"] if \"lastEditedBy\" in obj.keys() else obj[\"createdBy\"]\n", + " entry = LabelEmbedding(\n", + " url=data_unit[\"data_link\"],\n", + " label_row=iterator.label_hash,\n", + " data_unit=data_unit[\"data_hash\"],\n", + " frame=iterator.frame,\n", + " labelHash=obj[\"objectHash\"],\n", + " lastEditedBy=last_edited_by,\n", + " featureHash=obj[\"featureHash\"],\n", + " name=obj[\"name\"],\n", + " dataset_title=iterator.dataset_title,\n", + " embedding=embedding,\n", + " classification_answers=None,\n", + " )\n", + "\n", + " embeddings.append(entry)\n", + "\n", + "\n", + " return embeddings\n", + "\n", + "embeddings = generate_cnn_object_embeddings(iterator)\n", + "out_file = pfs.get_embeddings_file(EmbeddingType.OBJECT)\n", + "\n", + "with out_file.open(\"wb\") as f:\n", + " pickle.dump(embeddings, f)\n", + "\n", + "generate_2d_embedding_data(EmbeddingType.OBJECT, project)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4744c9dc", + "metadata": {}, + "source": [ + "# ✅ Wrap Up: Next Steps" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "54067a9c", + "metadata": {}, + "source": [ + "🟣 Encord Active is an open-source framework for computer vision model testing, evaluation, and validation. **Check out the project on [GitHub](https://github.com/encord-team/encord-active), leave a star 🌟** if you like it. We welcome you to [contribute](https://docs.encord.com/docs/active-contributing) if you find something is missing.\n", + "\n", + "---\n", + "\n", + "👉 Check out the 📖 [Encord Blog](https://encord.com/blog/) and 📺 [YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n", + "\n", + "---\n", + "\n", + "Thanks for now!" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "d7587b03", + "metadata": {}, + "source": [ + "### ⬅️ [*Previous Notebook*](./Encord_Active_Building_a_Custom_Metric_Function.ipynb) $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ [*Next Notebook*](https://github.com/encord-team/encord-notebooks) *➡️*" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/colab-notebooks/Encord_Active_Building_a_Custom_Metric_Function.ipynb b/colab-notebooks/Encord_Active_Building_a_Custom_Metric_Function.ipynb new file mode 100644 index 0000000..5ecada1 --- /dev/null +++ b/colab-notebooks/Encord_Active_Building_a_Custom_Metric_Function.ipynb @@ -0,0 +1,827 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "

\"Open\n", + "\"License\"\n", + "\"PyPi\n", + "\"PyPi\n", + "\n", + "\"docs\"\n", + "\n", + "\"Join\n", + "\n", + "\""Encord\n", + "\n", + "\n", + "

\n", + "

\n", + "\"Twitter

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "

\n", + " \n", + " \n", + " \n", + "

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 🟣 Encord Active | 🏗️ Building a Custom Metric Function" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🚀 Overview" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "-n4-PmG0Kwxk" + }, + "source": [ + "Hi there, 👋.\n", + "\n", + "> ⚠️ **Prerequisites:** you should have `encord-active` [installed](https://docs.encord.com/active/docs/installation).\n", + "\n", + "Developing machine learning models are often (and should be) based on iterative hypothesis testing. Typically, you get some data and labels and train your first model. Then, you realise that the model is performing worse than you had hoped.\n", + "\n", + "Now, you starting hypothesizing about what might be wrong. Perhaps you suspect that red objects make your model perform worse. So you define a hypothesis like:\n", + "\n", + "> Red objects have a significant impact on my model performance\n", + "\n", + "Traditionally, the next thing you would do is to write a script for filtering, ordering, and visualising your validation data as a function of the object colors.\n", + "Something like the code below.\n", + "\n", + "\n", + "> ⚠️ DISCLAIMER: The code below is just to show how much code you need to write to test your hypothesis. It's not meant to work or to be copied in any way!\n", + "\n", + "
\n", + "Code block that you can safely hide\n", + "\n", + "\n", + "\n", + "```python\n", + "# DISCLAIMER: This is just to show how much code you need to write to test your hypothesis\n", + "# It's not meant to work or to be copied in any way!\n", + "\n", + "from functools import partial\n", + "\n", + "color_ordering = [] \n", + "acc = [] \n", + "\n", + "def compute_redness_of_objects(image, object):\n", + " # Some code to determine colors\n", + " # color_metric = ...\n", + " return color_metric\n", + "\n", + "for batch in validation_loader:\n", + " for image, labels in batch:\n", + " predictions = my_model(images)\n", + "\n", + " acc += ... # some hard to write code for match predictions with labels\n", + " color_ordering += list(map(partial(get_colors_for_object, image=image), predictions))\n", + " \n", + "color_ordering = np.array(color_ordering)\n", + "sorting = np.argsort(color_ordering)\n", + "color_ordering = color_ordering[ordering]\n", + "acc = np.array(color_ordering)[ordering]\n", + "\n", + "# LOONG plotting code section for displaying samples, plots, and what not.\n", + "# ...\n", + "# ...\n", + "# ...\n", + "```\n", + " \n", + "
\n", + "\n", + "When you're finally done writing code and plotting things, hopefully you can reach a conclusion regarding your hypothesis.\n", + "When you reach this point, you will most likely have many more hypothesis that you want to test and eventually also more models to evaluate.\n", + "Do we need to mention how painful it will be to extend the code above with new use cases, plots, etc.?\n", + "What if you, for example, wanted to know the same thing, not only for your predictions but also for the labels? What about false negatives? .. and so on.\n", + "\n", + "Encord Active solves this problem with a couple of points in focus:\n", + "\n", + "1. **Reusability:** You define your metric function once and then you can reuse again and again.\n", + "2. **Isolation of functionality:** Since the metric function is defined in isolation from other metrics, you won't accidentally introduce errors in other functions, plots, etc.\n", + "3. **Iteration speed:** We've made it easy to implement your own metric function such that you can iterate faster.\n", + "4. **It's built from experience:** We have felt this pain many times and we have seen many of the common hypothesis that come up. We're building Encord Active to deel with all these common scenarios while being extensible enough to be tailored to your custom use case.\n", + "\n", + "Other points that we want to highlight is that \n", + "\n", + "1. Encord Active ships with a bunch of [pre-defined metrics](https://docs.encord.com/active/docs/category/metrics) that will automatically be run on your data when you import it.\n", + "2. When you've [imported your model predictions](https://docs.encord.com/active/docs/workflows/import-predictions), Encord Active will _automatically_ identify those metrics that are more important for your model performance.\n", + "\n", + "This 📓 notebook will take you through how to write such metric functions and use them with Encord Active.\n", + "\n", + "
\n", + "\n", + "> 💡 Learn more about 🟣 Encord Active: \n", + "* [GitHub](https://github.com/encord-team/encord-active) \n", + "* [Docs](https://docs.encord.com/docs/active-overview)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 📏 Defining a `Metric` sub-class" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ESEt8WweThsO" + }, + "source": [ + "\n", + "\n", + "Here, we'll give some detailed information on how a quality metric is defined.\n", + "\n", + "> **🌟 Info**: If you don't like abstract talk, you can skip directly to [the example below](#concrete-example) to see how to implement a specific metric.\n", + "\n", + "We have listed the entire stub below for defining a metric. Following right after is a breakdown of the different components." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from typing import List, Optional, Union\n", + "\n", + "from encord_active.lib.common.iterator import Iterator\n", + "from encord_active.lib.metrics.metric import Metric\n", + "from encord_active.lib.metrics.types import AnnotationType, DataType, MetricType\n", + "from encord_active.lib.metrics.writer import CSVMetricWriter\n", + "\n", + "class ExampleMetric(Metric):\n", + " # === SECTION 1 === #\n", + " def __init__(self):\n", + " from typing import List, Optional, Union\n", + "\n", + "from encord_active.lib.common.iterator import Iterator\n", + "from encord_active.lib.metrics.metric import Metric\n", + "from encord_active.lib.metrics.types import AnnotationType, DataType, MetricType\n", + "from encord_active.lib.metrics.writer import CSVMetricWriter\n", + "\n", + "class ExampleMetric(Metric):\n", + " # === SECTION 1 === #\n", + " def __init__(self):\n", + " \n", + " super().__init__(\n", + " title=\"[the-name-of-your-metric]\",\n", + " short_description=\"A short description of your metric.\",\n", + " long_description=\"A longer and more detailed description. \" \\\n", + " \"I can use Markdown to _format_ the text.\",\n", + " metric_type=MetricType.GEOMETRIC,\n", + " data_type=DataType.IMAGE,\n", + " annotation_type=[AnnotationType.OBJECT.BOUNDING_BOX, AnnotationType.OBJECT.POLYGON],\n", + " )\n", + "\n", + " def execute(self, iterator: Iterator, writer: CSVMetricWriter):\n", + " valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type}\n", + "\n", + " for data_unit, image in iterator.iterate(desc=\"Progress bar description\"):\n", + " # === SECTION 2 === #\n", + " # Write a score for the image itself (data quality)\n", + " writer.write(1337, description=\"Your description for the frame [can be omitted]\")\n", + " \n", + " for obj in data_unit[\"labels\"].get(\"objects\", []):\n", + " # === SECTION 3 === #\n", + " # Label (object/classification) level score (label / model prediction quality)\n", + " if not obj[\"shape\"] in valid_annotation_types:\n", + " continue\n", + "\n", + " # Do your thing (inference)\n", + " # ...\n", + " # Then\n", + " writer.write(42, labels=obj, description=\"Your description of the score [can be omitted]\")\n", + "\n", + " from typing import List, Optional, Union\n", + "\n", + "from encord_active.lib.common.iterator import Iterator\n", + "from encord_active.lib.metrics.metric import Metric\n", + "from encord_active.lib.metrics.types import AnnotationType, DataType, MetricType\n", + "from encord_active.lib.metrics.writer import CSVMetricWriter\n", + "\n", + "class ExampleMetric(Metric):\n", + " # === SECTION 1 === #\n", + " def __init__(self):\n", + " super().__init__(\n", + " title=\"[the-name-of-your-metric]\",\n", + " short_description=\"A short description of your metric.\",\n", + " long_description=\"A longer and more detailed description. \" \\\n", + " \"I can use Markdown to _format_ the text.\",\n", + " metric_type=MetricType.GEOMETRIC,\n", + " data_type=DataType.IMAGE,\n", + " annotation_type=[AnnotationType.OBJECT.BOUNDING_BOX, AnnotationType.OBJECT.POLYGON],\n", + " )\n", + "\n", + " def execute(self, iterator: Iterator, writer: CSVMetricWriter):\n", + " valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type}\n", + "\n", + " for data_unit, image in iterator.iterate(desc=\"Progress bar description\"):\n", + " # === SECTION 2 === #\n", + " # Write a score for the image itself (data quality)\n", + " writer.write(1337, description=\"Your description for the frame [can be omitted]\")\n", + " \n", + " for obj in data_unit[\"labels\"].get(\"objects\", []):\n", + " # === SECTION 3 === #\n", + " # Label (object/classification) level score (label / model prediction quality)\n", + " if not obj[\"shape\"] in valid_annotation_types:\n", + " continue\n", + "\n", + " # Do your thing (inference)\n", + " # ...\n", + " # Then\n", + " writer.write(42, labels=obj, description=\"Your description of the score [can be omitted]\")\n", + " super().__init__(\n", + " title=\"[the-name-of-your-metric]\",\n", + " short_description=\"A short description of your metric.\",\n", + " long_description=\"A longer and more detailed description. \" \\\n", + " \"I can use Markdown to _format_ the text.\",\n", + " metric_type=MetricType.GEOMETRIC,\n", + " data_type=DataType.IMAGE,\n", + " annotation_type=[AnnotationType.OBJECT.BOUNDING_BOX, AnnotationType.OBJECT.POLYGON],\n", + " )\n", + "\n", + " def execute(self, iterator: Iterator, writer: CSVMetricWriter):\n", + " valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type}\n", + "\n", + " for data_unit, image in iterator.iterate(desc=\"Progress bar description\"):\n", + " # === SECTION 2 === #\n", + " # Write a score for the image itself (data quality)\n", + " writer.write(1337, description=\"Your description for the frame [can be omitted]\")\n", + " \n", + " for obj in data_unit[\"labels\"].get(\"objects\", []):\n", + " # === SECTION 3 === #\n", + " # Label (object/classification) level score (label / model prediction quality)\n", + " if not obj[\"shape\"] in valid_annotation_types:\n", + " continue\n", + "\n", + " # Do your thing (inference)\n", + " # ...\n", + " # Then\n", + " writer.write(42, labels=obj, description=\"Your description of the score [can be omitted]\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are a couple of sections in the code above. \n", + "\n", + "`SECTION 1`: Is used for proper display of the values that the metric produces. The properties being set there are:\n", + "\n", + "1. `title`: Is the title of your metric. It will be used in data frames and the app to identify the metric.\n", + "2. `metric_type`: We have distinguished metric types into three categories - `HEURISTIC`\n", + " - `HEURISTIC`: operate on images or individual video frames and are heuristic in the sense that they mostly depend on the image content without labels.\n", + " - `GEOMETRIC`: operate on the geometries of objects like bounding boxes, polygons, and polylines.\n", + " - `SEMANTIC`: operate with the semantic information of images or individual video frames - for example, metrics based on NN embeddings would go here.\n", + "3. `data_type`: The type of data that the metric applies to.\n", + " - `IMAGE`: Individual images witout any temporal dependencies.\n", + " - `SEQUENCE`: Consecutive frames where order across frames matter.\n", + "4. `annotation_type`:\n", + " - `NONE`: Doesn't need annotations (Data Quality)\n", + " - `OBJECT`: A list of object types like polygon or bounding box that the metric works for.\n", + " - `CLASSIFICATION`: A list of classification types like radio buttons and checkboxes that the metric works for.\n", + " - `ALL`: All objects and classification types. Could, for example, be used for annotation time.\n", + "5. `short_description`: Used in the ui\n", + "6. `long_description`: Used in the ui\n", + "\n", + "\n", + "`SECTION 2`: Is used for metric functions that yield one score for each frame. Note how the `writer.write(...)` specifies no objects.\n", + "\n", + "`SECTION 3`: Is used for metric functions that yield a score for each object / classification. For these metrics, `writer.write(...)` should contain a list of objects or classifications that should be associated with a giveen score.\n", + "\n", + "> _Note:_ You should stick to either writing scores with or without the `writer.write(..., labels=obj)` argument. Mixing them up will confuse the app.\n", + "\n", + "### Using the iterator\n", + "When you call `iterator.iterate(...)`, you will get an iterator over all the data in a given dataset (see how to execute the metric [below](#execute)). Each item in the iterator is a tuple of a `data_unit` dictionary and a `pathlib.Path` to where the image can be loaded from. \n", + "\n", + "The `data_unit` dictionary has the following structure (there may be more or less `\"objects\"` and `\"labels\"`):\n", + "\n", + "
\n", + "data_unit example structure\n", + "\n", + "```python\n", + "{\n", + " \"data_hash\": \"595d9721-913b-45c9-8645-c3ebf8a6ae0b\",\n", + " \"data_title\": \"231822\",\n", + " \"data_type\": \"image/jpeg\",\n", + " \"data_sequence\": 0,\n", + " \"labels\": {\n", + " \"objects\": [\n", + " { # Example polygon\n", + " \"name\": \"Bottle\",\n", + " \"color\": \"#68BC00\",\n", + " \"shape\": \"polygon\",\n", + " \"value\": \"bottle\",\n", + " \"polygon\": {\n", + " \"0\": {\"x\": 0.9559, \"y\": 0.0038},\n", + " \"1\": {\"x\": 0.9356, \"y\": 0.1399},\n", + " \"2\": {\"x\": 0.9216, \"y\": 0.1982},\n", + " # ...\n", + " },\n", + " \"createdAt\": \"Thu, 25 Aug 2022 15:45:31 GMT\",\n", + " \"createdBy\": \"robot@cord.tech\",\n", + " \"confidence\": 1,\n", + " \"objectHash\": \"9728826c\",\n", + " \"featureHash\": \"671c61d7\",\n", + " \"lastEditedAt\": \"Thu, 25 Aug 2022 15:45:31 GMT\",\n", + " \"lastEditedBy\": \"robot@encord.com\",\n", + " \"manualAnnotation\": False,\n", + " },\n", + " { # Example bounding box\n", + " \"name\": \"Cyclist\",\n", + " \"color\": \"#DBDF00\",\n", + " \"shape\": \"bounding_box\",\n", + " \"value\": \"Cyclist\",\n", + " \"createdAt\": \"Wed, 23 Nov 2022 10:05:22 GMT\",\n", + " \"createdBy\": \"robot@encord.com\",\n", + " \"confidence\": 1.0,\n", + " \"objectHash\": \"t2KUSWgj\",\n", + " \"featureHash\": \"yJ+hgd0r\",\n", + " \"lastEditedAt\": \"Wed, 23 Nov 2022 10:05:22 GMT\",\n", + " \"lastEditedBy\": \"robot@encord.com\",\n", + " \"manualAnnotation\": True,\n", + " \"boundingBox\": {\n", + " \"h\": 0.2810061626666667,\n", + " \"w\": 0.0897509331723027,\n", + " \"x\": 0.4464461135265701,\n", + " \"y\": 0.443804288,\n", + " },\n", + " \"reviews\": [],\n", + " },\n", + " ],\n", + " \"classifications\": [\n", + " { # Example classification\n", + " \"name\": \"Classification Question\",\n", + " \"value\": \"classification-question\",\n", + " \"createdAt\": \"Fri, 11 Nov 2022 09:41:21 GMT\",\n", + " \"createdBy\": \"robot@cord.tech\",\n", + " \"confidence\": 1,\n", + " \"featureHash\": \"MTYzMTkx\",\n", + " \"classificationHash\": \"sHNoiYPw\",\n", + " \"manualAnnotation\": True,\n", + " \"reviews\": [],\n", + " }, \n", + " # ...\n", + " ],\n", + " },\n", + " \"data_link\": \"...\",\n", + " \"width\": 500,\n", + " \"height\": 361,\n", + "}\n", + "```\n", + "
\n", + "\n", + "> _💡 Hint:_ You can inpect the entire structure by looking in the file `/path/to/project/data//label_row.json`.\n", + "\n", + "> 📝 _Note:_ To find the actual answers to classification questions, you access `iterator.label_rows[iterator.label_hash][\"classification_answers\"][]`.\n", + "\n", + "> 📝 _Note:_ If you are computing metrics based on temporal aspects, the `iterator.frame` will tell you what frame of a sequence you are currently looking at and the `iterator.label_hash` will give you the unique id of the sequence." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "### 👟 Executing a metric\n", + "\n", + "When you have implemented a metric function, you can run it using the following code snippet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from encord_active.lib.metrics.execute import execute_metrics\n", + "from encord_active.lib.model_predictions.iterator import PredictionIterator\n", + "\n", + "target = Path(\"/path/to/your/project\") # TODO UPDATE\n", + "\n", + "execute_metrics([ExampleMetric()], data_dir=target, use_cache_only=True) # for labels\n", + "execute_metrics([ExampleMetric()], data_dir=target, iterator_cls=PredictionIterator, use_cache_only=True) # for predictions (only makes sense to do if your metric applies to labels)\n", + "\n", + "# Wrap this entire code block in a \n", + "# `if __name__ == \"__main__\":`\n", + "# and put it in the bottom of your metric file if you want to be able to run\n", + "# python your_metric.py" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `target` variable points to the directory that containts the Encord Active project that you want to run the metric on.\n", + "This directory should, for example, contain a `project-meta.yaml`.\n", + "\n", + "> Info: The `use_cache_only` argument tells Encord Active to not try and download more data via the Encord SDK.\n", + "\n", + "Having covered the overall structure, let's dive into a concrete example." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ESEt8WweThsO", + "tags": [] + }, + "source": [ + "\n", + "# 🦮 Concrete walkthrough Example\n", + "\n", + "> 💡 Hint: We refer to line numbers. In most notebooks, you can enable line numbers in the \"View\" options.\n", + "\n", + "In this example, you'll continue the idea of testing the model performance as a function of the \"redness\" of individual objects. \n", + "Specifically, you will use the annotations/predictions to extract the image patchs that contain an object and compute the mean Hue value of that patch.\n", + "\n", + "To get started, let's have a look at the [HSV color space](https://en.wikipedia.org/wiki/HSL_and_HSV), which is great for color filtering.\n", + "The following code indicates how different Hue (the H from HSV) values correspond to different colors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import cv2\n", + "import numpy as np\n", + "\n", + "def get_img(H: int):\n", + " \"\"\"\n", + " Make image with specific Hue color and convert it to RGB for plotting.\n", + " \"\"\"\n", + " img = np.ones((20, 20, 3), dtype=np.uint8)\n", + " img[..., 0] = H\n", + " img[..., 1] = 255\n", + " img[..., 2] = 150 \n", + " return cv2.cvtColor(img, cv2.COLOR_HSV2RGB)\n", + "\n", + "# Hue ranges from 0 to 180 and \"wraps\" around.\n", + "hues = np.linspace(0, 179, 18, dtype=np.uint8)\n", + "imgs = [get_img(i) for i in hues]\n", + "\n", + "fig, ax = plt.subplots(2, 9, figsize=(10, 3))\n", + "ax = ax.reshape(-1)\n", + "\n", + "# Plot the colors\n", + "for img, a, h in zip(imgs, ax, hues):\n", + " a.set_title(f\"Hue: {h}\")\n", + " a.axis('off')\n", + " a.imshow(img)\n", + "\n", + "fig.tight_layout()\n", + "plt.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "👉 Note how the first and the last images are very red but have very different hue values. \n", + "This is because of the \"circular\" / \"wrap-around\" nature of the color space. \n", + "Let's account for that by computing a value, which makes red colors close to zero and others closer to one." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def transform_hue(H: int, offset=0):\n", + " return (90 - np.abs(H - 90)) / 90\n", + "\n", + "# Plotting\n", + "fig, ax = plt.subplots(2, 9, figsize=(10, 3))\n", + "ax = ax.reshape(-1)\n", + "\n", + "for img, a, h in zip(imgs, ax, hues):\n", + " t = transform_hue(h)\n", + " a.set_title(f\"Transf.: {t:.2f}\")\n", + " a.imshow(img)\n", + " a.axis('off')\n", + "fig.tight_layout()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "Alright, this looks better.\n", + "The transformed value is a better candidate for our metric function.\n", + "\n", + "Next, let's use this to crop out the relevant parts of polygon annotations and compute their mean (transformed) hue values.\n", + "\n", + "We define a `Metric` subclass and compute the transformed hue value for each object to see how red it is." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cItsCLacS2Gx", + "outputId": "39d039b2-03b9-4b2b-e9ce-d8b3dbf61745", + "tags": [] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "from encord_active.lib.common import utils\n", + "from encord_active.lib.common.iterator import Iterator\n", + "from encord_active.lib.metrics.metric import Metric\n", + "from encord_active.lib.metrics.types import AnnotationType, DataType, MetricType\n", + "from encord_active.lib.metrics.writer import CSVMetricWriter\n", + "from loguru import logger\n", + "\n", + "\n", + "class ObjectRedness(Metric):\n", + " def __init__(self):\n", + " super().__init__(\n", + " title=\"Polygon Average Hue\",\n", + " short_description=\"Compute the average Hue value of the pixels contained within each polygon.\",\n", + " long_description=r\"\"\"Crops out the pixels associated to each object and computes the (transformed)\n", + "Hue value of each object.\n", + "\n", + "The transform \"breaks\" the wrap-around of the Hue color space, so Hue values in range [0, 180] becomes [0, 1] as follows:\n", + "\n", + "```\n", + "H: [0, 45, 90, 135, 179]\n", + "t(H): [0, 0.5, 1, 0.5, 0+e]\n", + "```\n", + "\"\"\",\n", + " metric_type=MetricType.SEMANTIC ,\n", + " data_type=DataType.IMAGE,\n", + " annotation_type=[AnnotationType.OBJECT.POLYGON],\n", + " )\n", + "\n", + " def execute(self, iterator: Iterator, writer: CSVMetricWriter):\n", + " valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type}\n", + "\n", + " # Separate objects' instances (same objectHash [aka track id] means same object instance)\n", + " for data_unit, image in iterator.iterate(desc=\"Custom progress description\"):\n", + " # Convert image to the HSV color space\n", + " full_image = np.array(image)\n", + " full_hsv_image = cv2.cvtColor(full_image, cv2.COLOR_RGB2HSV)[...,0] # Take only the hue channel\n", + " img_h, img_w = full_hsv_image.shape[:2]\n", + " \n", + " for obj in data_unit[\"labels\"].get(\"objects\", []):\n", + " if not obj[\"shape\"] in valid_annotation_types:\n", + " continue # Only use polygons\n", + " \n", + " # The `get_geometry_from_encord_object` function will get us a numpy array of xy coordinates.\n", + " poly: Optional[np.ndarray] = utils.get_geometry_from_encord_object(obj, w=img_w, h=img_h) # [n, d]\n", + " if poly is None:\n", + " continue\n", + " \n", + " # Check that the polygon takes up at least one pixel\n", + " ymi, xmi = poly.min(0)\n", + " yma, xma = poly.max(0)\n", + " \n", + " if ymi == yma or xmi == xma:\n", + " continue # Empty polygon\n", + " \n", + " # Draw mask from polygon\n", + " mask = np.zeros((img_h, img_w), dtype=np.uint8)\n", + " mask = cv2.fillPoly(mask, [poly], 1)\n", + " \n", + " polygon_pixels = full_hsv_image[mask==1] # Take only pixels within polygon\n", + " transformed_mean_hue = transform_hue(polygon_pixels.mean())\n", + " writer.write(transformed_mean_hue.item(), labels=obj)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Okay, so you have defined your metric which extracts the pixels of each polygon and computes the average (transformed) hue value of those pixels.\n", + "The next step will then be to apply the metric to your data.\n", + "\n", + "In the next code cell, you'll download one of the sandbox datasets, but you can also point the metric to your own dataset by setting the `target` path below to point to the root of your project directory." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 📩 Download the \"quickstart\" sandbox dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "!encord-active download --project-name quickstart" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You should now be able to see the quickstart directory in the `File Browser`. \n", + "Apply your metric to that project." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from encord_active.lib.metrics.execute import execute_metrics\n", + "\n", + "target = Path(\"quickstart\")\n", + "\n", + "# Apply metric to labels\n", + "execute_metrics([ObjectRedness()], data_dir=target, use_cache_only=True)\n", + "\n", + "# For predictions (only makes sense to do if your metric applies to labels)\n", + "from encord_active.lib.model_predictions.iterator import PredictionIterator\n", + "from encord_active.lib.model_predictions.writer import MainPredictionType\n", + "execute_metrics([ObjectRedness()], data_dir=target, iterator_cls=PredictionIterator, use_cache_only=True, prediction_type=MainPredictionType.OBJECT)\n", + "\n", + "# Wrap this entire code block in a \n", + "# `if __name__ == \"__main__\":`\n", + "# and put it in the bottom of your metric file if you want to be able to run\n", + "# python your_metric.py" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To see the results, you can run the app with the project as the target:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!encord-active start -t \"quickstart\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For the quickstart dataset, the \"Polygon Average Hue\" metric that we just defined seems to have little or no influence on the model performance - based on the \"Metric Importance\" chart on the \"Model Quality -> Metrics\" page.\n", + "However, if you filter by the person class in the settings panel in the top, you will see that the redness of objects do seem to have an effect on the model performance.\n", + "\n", + "" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ✅ Wrap Up: Next Steps" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The next steps from here could be many.\n", + "You have seen that the redness of objects is generally not extremely important for the model performance - which is a good thing - one less thing to worry about.\n", + "\n", + "From here, one could go on to define a new custom metric function to test the next hypothesis.\n", + "Some of the things that would be simple to test now that you have your first custom metric in place is, e.g., the standard deviation of the colors within an object, the saturaion, other colors, etc. These metrics would only require changing line 57 in the metric definition above.\n", + "\n", + "Of course, you should keep all the metrics that we define to make sure that redness of values doesn't turn into a problem at a later stage in the model development." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "🟣 Encord Active is an open-source framework for computer vision model testing, evaluation, and validation. **Check out the project on [GitHub](https://github.com/encord-team/encord-active), leave a star 🌟** if you like it. We welcome you to [contribute](https://docs.encord.com/docs/active-contributing) if you find something is missing.\n", + "\n", + "---\n", + "\n", + "👉 Check out the 📖 [Encord Blog](https://encord.com/blog/) and 📺 [YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n", + "\n", + "---\n", + "\n", + "Thanks for now!" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ⏭️ Next: Learn how to add custom embeddings to 🟣 Encord Active" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "What should you check out next? 👀 Learn how to add custom embeddings to Encord Active. The Colab notebook will cover:\n", + "\n", + "* Example code for **adding custom image and object embeddings** to your Encord Active project.\n", + "\n", + "### $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ *👇*" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ⬅️ [*Previous Notebook*](./Encord_Active_HuggingFace_Dataset_Exploration.ipynb) $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ [*Next Notebook*](./Encord_Active_Add_Custom_Embeddings.ipynb) *➡️*" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/colab-notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb b/colab-notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb new file mode 100644 index 0000000..7052351 --- /dev/null +++ b/colab-notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb @@ -0,0 +1,740 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Ix90mmYg-S_f" + }, + "source": [ + "
\n", + "

\"Open\n", + "\"License\"\n", + "\"PyPi\n", + "\"PyPi\n", + "\n", + "\"docs\"\n", + "\n", + "\"Join\n", + "\n", + "\""Encord\n", + "\n", + "\n", + "

\n", + "

\n", + "\"Twitter

\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J3xuMLJinnX5" + }, + "source": [ + "
\n", + "

\n", + " \n", + " \n", + " \n", + "

\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iebgJ1JbFCwy" + }, + "source": [ + "# 🟣 Encord Active | 🤗 HuggingFace Dataset Exploration" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eq0L5XrAFd31" + }, + "source": [ + "## 🏁 Overview" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Z_mRHrJ_Jm7n" + }, + "source": [ + "👋 Hi there! In this notebook, you will use Encord Active (EA) to explore the quality of a dataset from the [Hugging Face Datasets](https://huggingface.co/datasets) library.\n", + "\n", + "\n", + "> ⚠️ **Prerequisites:** you should have `encord-active` [installed](https://docs.encord.com/docs/active-overview) in your environment.\n", + "\n", + "This 📒 notebook will cover:\n", + "* Using 🤗 Datasets to download and generate the dataset.\n", + "* Creating an Encord Active project.\n", + "* Inspecting problematic images in the dataset.\n", + "* Exploring more features with the EA UI.\n", + "\n", + "
\n", + "\n", + "> 💡 Learn more about 🟣 Encord Active:\n", + "* [GitHub](https://github.com/encord-team/encord-active)\n", + "* [Docs](https://docs.encord.com/docs/active-overview)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yv_kz9VtCJSe" + }, + "source": [ + "## 🛠️ Install Encord Active" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pwoSxHf7BcxX" + }, + "source": [ + "📌 `python3.9`, `python3.10`, and `python3.11` are the version requirements to run 🟣Encord Active." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "owdkTcPbP88e" + }, + "outputs": [], + "source": [ + "# Assert that python is 3.9 or 3.10 instead\n", + "import sys\n", + "assert sys.version_info.minor in [9, 10, 11], \"Encord Active only supported for python 3.9, 3.10, and 3.11.\"\n", + "\n", + "!pip install encord-active &> /dev/null\n", + "!encord-active --version" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qD0LBtnK-REL" + }, + "source": [ + "## 📥 Install the 🤗 Hugging Face Datasets package" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jehfrSQq0Iag" + }, + "source": [ + "👟 Run the following installation script for [🤗 Datasets](https://huggingface.co/datasets).\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4pXOBnMILgGV" + }, + "outputs": [], + "source": [ + "# Install the Hugging Face Datasets library\n", + "%pip install datasets &> /dev/null" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zXDKGS6G-W2-" + }, + "source": [ + "# 📨 Download a Hugging Face Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ziYuDPDgQQG-" + }, + "source": [ + "You can explore the [Hugging Face dataset](https://huggingface.co/datasets) directory and loady any dataset prefer to explore.\n", + "\n", + "\n", + "Here, install [`sashs/dog-food`](https://huggingface.co/datasets/sasha/dog-food) dataset where there are 3000 images consists of dogs and foods." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tdK0ZJ3-Ag9j" + }, + "outputs": [], + "source": [ + "from datasets import load_dataset, concatenate_datasets\n", + "from pathlib import Path\n", + "import shutil\n", + "from tqdm import tqdm\n", + "\n", + "# Use load_dataset function to download any dataset on the Hugging Face\n", + "# You can browse through datasets here: https://huggingface.co/datasets\n", + "dataset_dict = load_dataset('sasha/dog-food')\n", + "dataset = concatenate_datasets([d for d in dataset_dict.values()])\n", + "\n", + "huggingface_dataset_path = Path.cwd() / \"huggingface_dataset\"\n", + "\n", + "if huggingface_dataset_path.exists():\n", + " shutil.rmtree(huggingface_dataset_path)\n", + "huggingface_dataset_path.mkdir()\n", + "\n", + "for counter, item in tqdm(enumerate(dataset)):\n", + " image = item['image']\n", + " image.save(f'./huggingface_dataset/{counter}.{image.format}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AXxhLhin_WUT" + }, + "source": [ + "# 🔧 Create an 🟣 Encord Active project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "2enWYFbMTZz2" + }, + "outputs": [], + "source": [ + "#@title 👇🏽 Run this utility code for Colab notebooks\n", + "import sys\n", + "sys.stdout.fileno = lambda: 1\n", + "sys.stderr.fileno = lambda: 2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nneOC2M3TesF" + }, + "source": [ + "## 👉 Add the Dataset to an 🟣 Encord Active Project" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QqorHdUDSwIv" + }, + "source": [ + "The code below sets up a project using Encord Active, initializes it with image files, and runs metrics on the project's data.\n", + "\n", + "* It obtains a list of all the image files from `huggingface_dataset` directory with the `collect_all_images` that takes a root folder path as input and returns a list of Path objects representing image files within the root folder\n", + "\n", + "* Initializes a local project using Encord Active's `init_local_project` function\n", + "\n", + "* Creates a project in the specified `projects_dir` directory with the image files and project name\n", + "\n", + "* Calls the [`run_metrics_by_embedding_type`](https://docs.encord.com/active/docs/sdk/run-metrics/#running-data-or-label-metrics-only) function to run metrics for the image embeddings (`EmbeddingType.IMAGE`). The metrics will be executed on the data in `project_path`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PCBKNbXN5sZ0" + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "from encord_active.lib.metrics.execute import run_metrics, run_metrics_by_embedding_type\n", + "from encord_active.lib.metrics.metric import EmbeddingType\n", + "from encord_active.lib.project.local import ProjectExistsError, init_local_project\n", + "from encord_active.lib.project.project import Project\n", + "\n", + "def collect_all_images(root_folder: Path) -> list[Path]:\n", + " image_extensions = {\".jpg\", \".jpeg\", \".png\", \".bmp\"}\n", + " image_paths = []\n", + "\n", + " for file_path in root_folder.glob(\"**/*\"):\n", + " if file_path.suffix.lower() in image_extensions:\n", + " image_paths.append(file_path)\n", + "\n", + " return image_paths\n", + "\n", + "# Enter path to the downloaded torchvision project\n", + "root_folder = Path(\"./huggingface_dataset\")\n", + "projects_dir = Path.cwd()\n", + "\n", + "if not projects_dir.exists():\n", + " projects_dir.mkdir()\n", + "\n", + "image_files = collect_all_images(root_folder)\n", + "\n", + "try:\n", + " project_path: Path = init_local_project(\n", + " files = image_files,\n", + " target = projects_dir,\n", + " project_name = \"sample_ea_project\",\n", + " symlinks = False,\n", + " )\n", + "except ProjectExistsError as e:\n", + " project_path = Path(\"./sample_ea_project\")\n", + " print(e) # A project already exist with that name at the given path.\n", + "\n", + "run_metrics_by_embedding_type(\n", + " EmbeddingType.IMAGE,\n", + " data_dir=project_path,\n", + " use_cache_only=True\n", + ")\n", + "\n", + "ea_project = Project(project_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VLlodjncU8f4" + }, + "source": [ + "# 📥 Import helper functions\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5mUW34kCVmCX" + }, + "source": [ + "Now import some helper functions from Encord Active and with visualization libraries to visualize the images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "INU_TIhxU_bn" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import plotly.express as px\n", + "\n", + "from encord_active.lib.charts.data_quality_summary import create_image_size_distribution_chart, create_outlier_distribution_chart\n", + "from encord_active.lib.dataset.summary_utils import get_all_image_sizes, get_metric_summary, get_median_value_of_2d_array\n", + "from encord_active.lib.metrics.utils import load_available_metrics\n", + "from encord_active.lib.dataset.outliers import MetricsSeverity, get_all_metrics_outliers\n", + "from encord_active.lib.common.image_utils import load_or_fill_image\n", + "from encord_active.lib.charts.histogram import get_histogram\n", + "\n", + "def plot_top_k_images(metric_name: str, metrics_data_summary: MetricsSeverity, project: Project, k: int, show_description: bool = False, ascending: bool = True):\n", + " metric_df = metrics_data_summary.metrics[metric_name].df\n", + " metric_df.sort_values(by='score', ascending=ascending, inplace=True)\n", + "\n", + " for _, row in metric_df.head(k).iterrows():\n", + " image = load_or_fill_image(row, project.file_structure)\n", + " plt.imshow(image)\n", + " plt.show()\n", + " print(f\"{metric_name} score: {row['score']}\")\n", + " if show_description:\n", + " print(f\"{row['description']}\")\n", + "\n", + "def plot_metric_distribution(metric_name: str, metric_data_summary: MetricsSeverity):\n", + " fig = px.histogram(metrics_data_summary.metrics[metric_name].df, x=\"score\", nbins=50)\n", + "\n", + " fig.update_layout(title=f\"{metric_name} score distribution\", bargap=0.2)\n", + " fig.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HlcKNEnUUnKI" + }, + "source": [ + "# 🔔 Plot image size distributions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UDm0oVjzT8ZO" + }, + "outputs": [], + "source": [ + "image_sizes = get_all_image_sizes(ea_project.file_structure)\n", + "median_image_dimension = get_median_value_of_2d_array(image_sizes)\n", + "\n", + "fig = create_image_size_distribution_chart(image_sizes)\n", + "\n", + "print(f\"Total images in the dataset: {len(image_sizes)}\")\n", + "print(f\"Median image sizes: {median_image_dimension[0]}x{median_image_dimension[1]}\")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p8itAs0SUxMJ" + }, + "source": [ + "# 📈 Show total outliers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "m0ULGHxRUWsH" + }, + "outputs": [], + "source": [ + "available_metrics = load_available_metrics(ea_project.file_structure.metrics)\n", + "metrics_data_summary = get_metric_summary(available_metrics)\n", + "all_metrics_outliers = get_all_metrics_outliers(metrics_data_summary)\n", + "fig = create_outlier_distribution_chart(all_metrics_outliers, \"tomato\", 'orange')\n", + "\n", + "print(f'Total severe outliers: {metrics_data_summary.total_unique_severe_outliers} \\n'\n", + " f'Total moderate outliers: {metrics_data_summary.total_unique_moderate_outliers}')\n", + "\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wdAHQn-vbfeo" + }, + "source": [ + "# 🧐 Inspect problematic images" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IyQhWgxyWClS" + }, + "source": [ + "Now you will have to inspect the dataset for problematic images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FT5I5dGhVZNb" + }, + "outputs": [], + "source": [ + "# First, get the list of available metrics\n", + "[metric.name for metric in available_metrics]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OjtOB7Jzr7Dl" + }, + "source": [ + "# 👁️ Visualize score distributions based on metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OIfht5swsAHe" + }, + "outputs": [], + "source": [ + "for metric in available_metrics:\n", + " plot_metric_distribution(metric.name, metrics_data_summary)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lbuCMR3IiA8f" + }, + "source": [ + "# Get the smallest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LiFuNRCogHWd" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Area', metrics_data_summary, ea_project, k=5, ascending=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pBjOfkIviFta" + }, + "source": [ + "# Get the biggest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ziYYWe82hxzg" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Area', metrics_data_summary, ea_project, k=5, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mRznGY3OiMej" + }, + "source": [ + "# Get the blurriest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iOLwSfV5iRsw" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Blur', metrics_data_summary, ea_project, k=5, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f9uj-9YairJi" + }, + "source": [ + "# Get the brightest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZuezcOvwivGX" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Brightness', metrics_data_summary, ea_project, k=5, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xJTm7fGmmmpX" + }, + "source": [ + "# Get the darkest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5bqBxlZ0mqFt" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Brightness', metrics_data_summary, ea_project, k=5, ascending=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "957ribVtjVZo" + }, + "source": [ + "# Get the least unique images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ACyNR_S2iyT1" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Image Singularity', metrics_data_summary, ea_project, k=15, show_description=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fDwoY8JVwnnA" + }, + "source": [ + "# Get the images that have the smallest aspect ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BRlu5blZwVH0" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Aspect Ratio', metrics_data_summary, ea_project, k=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "254nYRNAwxbX" + }, + "source": [ + "# Get the images that have the biggest aspect ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SbJpit1EwyBV" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Aspect Ratio', metrics_data_summary, ea_project, k=10, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "80zLaV2FXoLx" + }, + "source": [ + "# ✅ Wrap Up: Explore more features with 🟣 Encord Active UI\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XJmKEaRUelq2" + }, + "source": [ + "\n", + "This was just a small part of Encord Active's capabilities. Use Encord Active app to explore more of your dataset, labels, and model performance via easy to use user interface. With Encord Active UI, you can:\n", + "\n", + "* Understand the data and label distribution\n", + "* Search through data in natural language\n", + "* Detect exact and near duplicate images\n", + "* Detect label errors and biases\n", + "* Gain insights into your model’s weak areas\n", + "* Generate model explainability reports\n", + "* Test, validate, and evaluate your models with advanced error analysis\n", + "\n", + "\n", + "
\n", + "\n", + "![Encord Active UI](https://images.prismic.io/encord/73635182-4f04-4299-a992-a4d383e19765_image2.gif?auto=compress,format)\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yI9LwxR4X9ER" + }, + "source": [ + "🟣 Encord Active is an open source toolkit to prioritize the most valuable image data for labeling to supercharge model performance! **Check out the project on [GitHub](https://github.com/encord-team/encord-active), leave a star 🌟** if you like it. We welcome you to [contribute](https://docs.encord.com/docs/active-contributing) if you find something is missing.\n", + "\n", + "---\n", + "\n", + "👉 Check out the 📖 [Encord Blog](https://encord.com/blog/) and 📺 [YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n", + "\n", + "---\n", + "\n", + "Thanks for now!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SUyAHyUKPVJn" + }, + "source": [ + "# ⏭️ Next: Learn how to build custom metrics functions in 🟣 Encord Active" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hSnIVJSkPVJn" + }, + "source": [ + "What should you check out next? 👀 Learn how to build custom metrics functions in Encord Active. The Colab notebook will cover code samples and example walkthroughs for:\n", + "* Defining metric sub-classes.\n", + "* Executing metric functions.\n", + "* Investigating custom metrics in the Encord Active UI.\n", + "\n", + "### $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ *👇*" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GouuGlveX97N" + }, + "source": [ + "### ⬅️ [*Previous Notebook*](./Encord_Active_Torchvision_Dataset_Exploration.ipynb) $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ [*Next Notebook*](./Encord_Active_Building_a_Custom_Metric_Function.ipynb) *➡️*\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "yv_kz9VtCJSe" + ], + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/colab-notebooks/Encord_Active_Torchvision_Dataset_Exploration.ipynb b/colab-notebooks/Encord_Active_Torchvision_Dataset_Exploration.ipynb new file mode 100644 index 0000000..31da200 --- /dev/null +++ b/colab-notebooks/Encord_Active_Torchvision_Dataset_Exploration.ipynb @@ -0,0 +1,703 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "RaxG4fvPK5dn" + }, + "source": [ + "
\n", + "

\"Open\n", + "\"License\"\n", + "\"PyPi\n", + "\"PyPi\n", + "\n", + "\"docs\"\n", + "\n", + "\"Join\n", + "\n", + "\""Encord\n", + "\n", + "\n", + "

\n", + "

\n", + "\"Twitter

\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3J2CLN4f0qt0" + }, + "source": [ + "
\n", + "

\n", + " \n", + " \n", + " \n", + "

\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vXywCRMtLCch" + }, + "source": [ + "# 🟣 Encord Active | 🔦 Torchvision Dataset Exploration\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CmaPL0STLnez" + }, + "source": [ + "## 🚀 Overview" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_5rxwf3cMcZr" + }, + "source": [ + "👋 Hi there! In this notebook, you will use Encord Active to explore the quality of a dataset from the built-in samples in the [`torchvision.datasets`](https://pytorch.org/vision/stable/datasets.html) module.\n", + "\n", + "> ⚠️ **Prerequisites:** you should have `encord-active` [installed](https://docs.encord.com/docs/active-overview) in your environment.\n", + "\n", + "This 📒 notebook will cover:\n", + "* Downloading a dataset through the built-in datasets in the `torchvision.datasets` module.\n", + "* Creating an Encord Active project.\n", + "* Inspecting problematic images in the dataset.\n", + "* Exploring more features with Encord Active UI.\n", + "\n", + "
\n", + "\n", + "> 💡 Learn more about 🟣 Encord Active:\n", + "* [GitHub](https://github.com/encord-team/encord-active)\n", + "* [Docs](https://docs.encord.com/docs/active-overview)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qD0LBtnK-REL" + }, + "source": [ + "## 📥 Install 🟣 Encord-Active" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2Y79puNuhpZp" + }, + "source": [ + "📌 `python3.9`, `python3.10`, and `python3.11` are the version requirements to run 🟣Encord Active." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "APt_Z1BUU-xI" + }, + "outputs": [], + "source": [ + "# Assert that python is 3.9 or 3.10 instead\n", + "import sys\n", + "assert sys.version_info.minor in [9, 10, 11], \"Encord Active only supported for python 3.9, 3.10, and 3.11.\"\n", + "\n", + "!pip install encord-active &> /dev/null\n", + "!encord-active --version" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zPEcbw2l5f1u" + }, + "outputs": [], + "source": [ + "#@title Optional: Install `numpy 1.23.5` as a utility library for this project.\n", + "\n", + "%pip install -U -q numpy==1.23.5 # If you encounter a numpy error later in the code, comment/uncomment this line, and run after this point" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zXDKGS6G-W2-" + }, + "source": [ + "# 📨 Download TorchVision Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SiKeDVCWYK0l" + }, + "source": [ + "You can install any torhcvision dataset. Here, we will install [Caltech101](https://pytorch.org/vision/stable/generated/torchvision.datasets.Caltech101.html#torchvision.datasets.Caltech101) dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RPkyi5wb_Vws" + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from torchvision import datasets\n", + "\n", + "datasets.Caltech101(Path.cwd(), target_type=\"category\", download=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AXxhLhin_WUT" + }, + "source": [ + "# 🔧 Create an 🟣 Encord Active project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "29KZ6Q4boYXs" + }, + "outputs": [], + "source": [ + "#@title 👇🏽 Run this utility code\n", + "import sys\n", + "sys.stdout.fileno = lambda: 1\n", + "sys.stderr.fileno = lambda: 2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4kxQ0AoEZI0x" + }, + "source": [ + "# 👉 Add the Dataset to Your 🟣 Encord Active Project" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dRR8bPc_t9UJ" + }, + "source": [ + "The code below essentially sets up a project using Encord Active, initializes it with image files, and then runs metrics on the project's data.\n", + "\n", + "- Obtain a list of all the Caltech 101 image files from the dataset directory with the `collect_all_images` that takes a root folder path as input and returns a list of Path objects representing image files within the root folder\n", + "\n", + "- Initialize a local project using Encord Active's `init_local_project` function\n", + "\n", + "- Creates a project in the specified `projects_dir` directory with the provided image files and project name\n", + "\n", + "- Call the [`run_metrics_by_embedding_type`](https://docs.encord.com/active/docs/sdk/run-metrics/#running-data-or-label-metrics-only) function to run metrics for the image embeddings (EmbeddingType.IMAGE). The metrics will be executed on the data in `project_path`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PCBKNbXN5sZ0" + }, + "outputs": [], + "source": [ + "from encord_active.lib.metrics.execute import run_metrics, run_metrics_by_embedding_type\n", + "from encord_active.lib.metrics.metric import EmbeddingType\n", + "from encord_active.lib.project.local import ProjectExistsError, init_local_project\n", + "from encord_active.lib.project.project import Project\n", + "\n", + "def collect_all_images(root_folder: Path) -> list[Path]:\n", + " image_extensions = {\".jpg\", \".jpeg\", \".png\", \".bmp\"}\n", + " image_paths = []\n", + "\n", + " for file_path in root_folder.glob(\"**/*\"):\n", + " if file_path.suffix.lower() in image_extensions:\n", + " image_paths.append(file_path)\n", + "\n", + " return image_paths\n", + "\n", + "# Enter path to the downloaded torchvision project\n", + "root_folder = Path(\"./caltech101\")\n", + "projects_dir = Path(\"./ea/\")\n", + "\n", + "if not projects_dir.exists():\n", + " projects_dir.mkdir()\n", + "\n", + "image_files = collect_all_images(root_folder)\n", + "\n", + "try:\n", + " project_path: Path = init_local_project(\n", + " files = image_files,\n", + " target = projects_dir,\n", + " project_name = \"sample_ea_project\",\n", + " symlinks = False,\n", + " )\n", + "except ProjectExistsError as e:\n", + " project_path = Path(\"./ea/sample_ea_project\")\n", + " print(e) # A project already exist with that name at the given path.\n", + "\n", + "run_metrics_by_embedding_type(\n", + " EmbeddingType.IMAGE,\n", + " data_dir=project_path,\n", + " use_cache_only=True\n", + ")\n", + "\n", + "ea_project = Project(project_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VLlodjncU8f4" + }, + "source": [ + "# 📥 Import helper functions\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Dkz0oi6BwK8H" + }, + "source": [ + "Now import some helper functions from Encord Active and with visualization libraries to visualize the images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "INU_TIhxU_bn" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import plotly.express as px\n", + "\n", + "from encord_active.lib.charts.data_quality_summary import create_image_size_distribution_chart, create_outlier_distribution_chart\n", + "from encord_active.lib.dataset.summary_utils import get_all_image_sizes, get_metric_summary, get_median_value_of_2d_array\n", + "from encord_active.lib.metrics.utils import load_available_metrics\n", + "from encord_active.lib.dataset.outliers import MetricsSeverity, get_all_metrics_outliers\n", + "from encord_active.lib.common.image_utils import load_or_fill_image\n", + "from encord_active.lib.charts.histogram import get_histogram\n", + "\n", + "def plot_top_k_images(metric_name: str, metrics_data_summary: MetricsSeverity, project: Project, k: int, show_description: bool = False, ascending: bool = True):\n", + " metric_df = metrics_data_summary.metrics[metric_name].df\n", + " metric_df.sort_values(by='score', ascending=ascending, inplace=True)\n", + "\n", + " for _, row in metric_df.head(k).iterrows():\n", + " image = load_or_fill_image(row, project.file_structure)\n", + " plt.imshow(image)\n", + " plt.show()\n", + " print(f\"{metric_name} score: {row['score']}\")\n", + " if show_description:\n", + " print(f\"{row['description']}\")\n", + "\n", + "def plot_metric_distribution(metric_name: str, metric_data_summary: MetricsSeverity):\n", + " fig = px.histogram(metrics_data_summary.metrics[metric_name].df, x=\"score\", nbins=50)\n", + "\n", + " fig.update_layout(title=f\"{metric_name} score distribution\", bargap=0.2)\n", + " fig.show()\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HlcKNEnUUnKI" + }, + "source": [ + "# 🔔 Plot image size distributions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UDm0oVjzT8ZO" + }, + "outputs": [], + "source": [ + "image_sizes = get_all_image_sizes(ea_project.file_structure)\n", + "median_image_dimension = get_median_value_of_2d_array(image_sizes)\n", + "\n", + "fig = create_image_size_distribution_chart(image_sizes)\n", + "\n", + "print(f\"Total images in the dataset: {len(image_sizes)}\")\n", + "print(f\"Median image sizes: {median_image_dimension[0]}x{median_image_dimension[1]}\")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p8itAs0SUxMJ" + }, + "source": [ + "# 📈 Show total outliers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "m0ULGHxRUWsH" + }, + "outputs": [], + "source": [ + "available_metrics = load_available_metrics(ea_project.file_structure.metrics)\n", + "metrics_data_summary = get_metric_summary(available_metrics)\n", + "all_metrics_outliers = get_all_metrics_outliers(metrics_data_summary)\n", + "fig = create_outlier_distribution_chart(all_metrics_outliers, \"tomato\", 'orange')\n", + "\n", + "print(f'Total severe outliers: {metrics_data_summary.total_unique_severe_outliers} \\n'\n", + " f'Total moderate outliers: {metrics_data_summary.total_unique_moderate_outliers}')\n", + "\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wdAHQn-vbfeo" + }, + "source": [ + "# 🧐 Inspect problematic images\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R9Kb0wQXV8TN" + }, + "source": [ + "Now you will have to inspect the dataset for problematic images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FT5I5dGhVZNb" + }, + "outputs": [], + "source": [ + "# First, get the list of available metrics\n", + "[metric.name for metric in available_metrics]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OjtOB7Jzr7Dl" + }, + "source": [ + "## 👁️ Visualize score distributions based on metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OIfht5swsAHe" + }, + "outputs": [], + "source": [ + "for metric in available_metrics:\n", + " plot_metric_distribution(metric.name, metrics_data_summary)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lbuCMR3IiA8f" + }, + "source": [ + "## ▪️ Get the smallest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LiFuNRCogHWd" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Area', metrics_data_summary, ea_project, k=5, ascending=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pBjOfkIviFta" + }, + "source": [ + "## ⬛️ Get the biggest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ziYYWe82hxzg" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Area', metrics_data_summary, ea_project, k=5, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mRznGY3OiMej" + }, + "source": [ + "## 🌫️ Get the blurriest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iOLwSfV5iRsw" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Blur', metrics_data_summary, ea_project, k=5, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f9uj-9YairJi" + }, + "source": [ + "## 🔆 Get the brightest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZuezcOvwivGX" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Brightness', metrics_data_summary, ea_project, k=5, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xJTm7fGmmmpX" + }, + "source": [ + "## ▓ Get the darkest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5bqBxlZ0mqFt" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Brightness', metrics_data_summary, ea_project, k=5, ascending=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "957ribVtjVZo" + }, + "source": [ + "## 🚀 Get the least unique images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ACyNR_S2iyT1" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Image Singularity', metrics_data_summary, ea_project, k=15, show_description=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fDwoY8JVwnnA" + }, + "source": [ + "## ▫️ Get the images that have the smallest aspect ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BRlu5blZwVH0" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Aspect Ratio', metrics_data_summary, ea_project, k=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "254nYRNAwxbX" + }, + "source": [ + "## 🔳 Get the images that have the biggest aspect ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SbJpit1EwyBV" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Aspect Ratio', metrics_data_summary, ea_project, k=10, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1Ap6XhG2yM45" + }, + "source": [ + "# ✅ Wrap Up: Explore more features with 🟣 Encord Active UI\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f8QDqSA6qNxA" + }, + "source": [ + "This was just a small part of Encord Active's capabilities. Use Encord Active app to explore more of your dataset, labels, and model performance via easy to use user interface.\n", + "\n", + "\n", + "With the Encord Active UI, you can:\n", + "\n", + "* Understand the data and label distribution.\n", + "* Search through data in natural language.\n", + "* Detect exact and near duplicate images.\n", + "* Detect label errors and biases.\n", + "* Gain insights into your model’s weak areas.\n", + "* Generate model explainability reports.\n", + "* Test, validate, and evaluate your models with advanced error analysis.\n", + "\n", + "
\n", + "\n", + "![Encord Active UI](https://images.prismic.io/encord/73635182-4f04-4299-a992-a4d383e19765_image2.gif?auto=compress,format)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fqjUr2ZYy5PZ" + }, + "source": [ + "🟣 Encord Active is an open source toolkit to prioritize the most valuable image data for labeling to supercharge model performance! **Check out the project on [GitHub](https://github.com/encord-team/encord-active), leave a star 🌟** if you like it. We welcome you to [contribute](https://docs.encord.com/docs/active-contributing) if you find something is missing.\n", + "\n", + "---\n", + "\n", + "👉 Check out the 📖 [Encord Blog](https://encord.com/blog/) and 📺 [YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n", + "\n", + "---\n", + "\n", + "Thanks for now!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RmzNTiz2q3ya" + }, + "source": [ + "# ⏭️ Next: Learn how to use 🟣 Encord Active to explore 🤗 Face Datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nUFw6ClerSQU" + }, + "source": [ + "What do you think you should check out next? 👀 Learn how to use Encord Active to explore Hugging Face Datasets. The Colab notebook will cover:\n", + "\n", + "* Using 🤗 Datasets to download and generate the dataset.\n", + "* Creating an Encord Active project.\n", + "* Inspecting problematic images in the dataset.\n", + "* Exploring more features with Encord Active UI.\n", + "\n", + "### $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ *👇*" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "56xRVycX0M1W" + }, + "source": [ + "### ⬅️ [*Previous Notebook*](./02_Encord_Active___Import_project_(self_hosting).ipynb) $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ [*Next Notebook*](./Encord_Active_HuggingFace_Dataset_Exploration.ipynb) *➡️*" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "qD0LBtnK-REL" + ], + "provenance": [], + "toc_visible": true + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/colab-notebooks/Encord_Notebooks_How_To_Fine_Tuning_SAM.ipynb b/colab-notebooks/Encord_Notebooks_How_To_Fine_Tuning_SAM.ipynb new file mode 100644 index 0000000..8f4e1be --- /dev/null +++ b/colab-notebooks/Encord_Notebooks_How_To_Fine_Tuning_SAM.ipynb @@ -0,0 +1,855 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ujdhO_AMlaxh" + }, + "source": [ + "
\n", + "

\"Open\n", + "\"License\"\n", + "\"PyPi\n", + "\"PyPi\n", + "\n", + "\"docs\"\n", + "\n", + "\"Join\n", + "\n", + "\""Encord\n", + "\n", + "\n", + "

\n", + "

\n", + "\"Twitter

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "lOXCsrzileKN" + }, + "source": [ + "
\n", + "

\n", + " \n", + " \n", + " \n", + "

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "tbJThT5Ol_lr" + }, + "source": [ + "# 🟣 Encord Notebooks | 🔧 How to fine-tune Segment Anything Model (SAM)\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "n6eLcQDjmvmg" + }, + "source": [ + "## 🏁 Overview" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "aiehQvBj5Crc" + }, + "source": [ + "👋 Hi there!\n", + "\n", + "\n", + "\n", + "This is the notebook gives you a walkthrough on fine-tuning [Segment Anything Model](https://encord.com/blog/segment-anything-model-explained/) (SAM) to a specific application.\n", + "\n", + "You will use the stamp verification dataset on [Kaggle]( https://www.kaggle.com/datasets/rtatman/stamp-verification-staver-dataset) since it has:\n", + "* data SAM is unlikely to have seen (scans of invoices with stamps),\n", + "* precise ground truth segmentation masks,\n", + "* and bounding boxes which we can use as prompts to SAM.\n", + "\n", + "This tutorial has been prepared by [Alex Bonnet](https://encord.com/author/alexandre-bonnet/), ML Solutions Engineer at Encord.\n", + "\n", + "\n", + "\n", + "
\n", + "\n", + "> 💡 If you want to read more about Encord Active checkout our [GitHub](https://github.com/encord-team/encord-active) and [documentation](https://docs.encord.com/docs/active-overview).\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "YSsqv3dWoVQ6" + }, + "source": [ + " ## 📰 Complementary Blog Post" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "-qRmUVKwDu-9" + }, + "source": [ + "![How To Fine-Tune Segment Anything - Encord Blog](https://images.prismic.io/encord/fc9dadaa-a011-4de1-b0eb-e7a55f854081_Group%2048096157.png?ixlib=gatsbyFP&auto=compress%2Cformat&fit=max)\n", + "\n", + "This notebook implements the steps discussed in the blog post: https://encord.com/blog/learn-how-to-fine-tune-the-segment-anything-model-sam/\n", + "\n", + "Check it 🔼 out for a comprehensive walkthrough." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "8xFZSDikKMsA" + }, + "source": [ + "## 📥 Installation and Set Up" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "n7rshBkIH3bl" + }, + "source": [ + "To ensure a smooth experience with this walkthrough notebook, you need to install the necessary libraries, dependencies, and model family. This step is essential for running the code and executing the examples effectively.\n", + "\n", + "By installing these libraries upfront, you'll have everything you need to follow along and explore the notebook without any interruptions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r0oru8hAn6q2" + }, + "outputs": [], + "source": [ + "! pip install kaggle &> /dev/null\n", + "! pip install torch torchvision &> /dev/null\n", + "! pip install opencv-python pycocotools matplotlib onnxruntime onnx &> /dev/null\n", + "! pip install git+https://github.com/facebookresearch/segment-anything.git &> /dev/null\n", + "! wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth &> /dev/null" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "YkwevXNd2Ofw" + }, + "source": [ + "**Action Required:** Place your kaggle.json file into the files in the notebook workspace. More info here https://github.com/Kaggle/kaggle-api#api-credentials" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0uTL0fDZEOnl" + }, + "outputs": [], + "source": [ + "! mkdir ~/.kaggle\n", + "! mv kaggle.json ~/.kaggle/\n", + "! chmod 600 ~/.kaggle/kaggle.json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sD5Kt6lO_HIw" + }, + "outputs": [], + "source": [ + "! kaggle datasets download rtatman/stamp-verification-staver-dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zJP-eL2_EA52" + }, + "outputs": [], + "source": [ + "! unzip stamp-verification-staver-dataset.zip &> /dev/null" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "dXzO_ZRWIEmz" + }, + "source": [ + "## 📩 Importing Relevant Libraries" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "I52JuXm7IVWt" + }, + "source": [ + "In this section, you will import the key libraries that will be used for dataset manipulation and visualization. These libraries play a crucial role in executing the code examples and demonstrating the concepts covered in the walkthrough." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lwmQm0C3n_3D" + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import cv2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Gv4ob2wRE9CS" + }, + "outputs": [], + "source": [ + "# Exclude scans with zero or multiple bboxes (of the first 100)\n", + "stamps_to_exclude = {\n", + " 'stampDS-00008',\n", + " 'stampDS-00010',\n", + " 'stampDS-00015',\n", + " 'stampDS-00021',\n", + " 'stampDS-00027',\n", + " 'stampDS-00031',\n", + " 'stampDS-00039',\n", + " 'stampDS-00041',\n", + " 'stampDS-00049',\n", + " 'stampDS-00053',\n", + " 'stampDS-00059',\n", + " 'stampDS-00069',\n", + " 'stampDS-00073',\n", + " 'stampDS-00080',\n", + " 'stampDS-00090',\n", + " 'stampDS-00098',\n", + " 'stampDS-00100'\n", + "}.union({\n", + " 'stampDS-00012',\n", + " 'stampDS-00013',\n", + " 'stampDS-00014',\n", + "}) # Exclude 3 scans that aren't the type of scan we want to be fine tuning for" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "WNJGn1BsKUMS" + }, + "source": [ + "## 🛠️ Preprocess the dataset" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "KhXGI8HXyIFi" + }, + "source": [ + "You'll need to preprocess the scans from numpy arrays to pytorch tensors. To do this, follow what happens inside [`SamPredictor.set_image`](https://github.com/facebookresearch/segment-anything/blob/c1910835a32a05cbb79bdacbec8f25914a7e3a20/segment_anything/predictor.py#L34-L60) and [`SamPredictor.set_torch_image`](https://github.com/facebookresearch/segment-anything/blob/c1910835a32a05cbb79bdacbec8f25914a7e3a20/segment_anything/predictor.py#L63) which preprocesses the image.\n", + "\n", + "\n", + "\n", + "First, extract the bounding box coordinates which will be used to feed into SAM as prompts." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PNrV8CN8F9G0" + }, + "outputs": [], + "source": [ + "bbox_coords = {}\n", + "for f in sorted(Path('ground-truth-maps/ground-truth-maps/').iterdir())[:100]:\n", + " k = f.stem[:-3]\n", + " if k not in stamps_to_exclude:\n", + " im = cv2.imread(f.as_posix())\n", + " gray=cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)\n", + " contours, hierarchy = cv2.findContours(gray,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)[-2:]\n", + " if len(contours) > 1:\n", + " x,y,w,h = cv2.boundingRect(contours[0])\n", + " height, width, _ = im.shape\n", + " bbox_coords[k] = np.array([x, y, x + w, y + h])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "zsv0IGBDyMkS" + }, + "source": [ + "Extract the ground truth segmentation masks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lz7B4NDoJRxJ" + }, + "outputs": [], + "source": [ + "ground_truth_masks = {}\n", + "for k in bbox_coords.keys():\n", + " gt_grayscale = cv2.imread(f'ground-truth-pixel/ground-truth-pixel/{k}-px.png', cv2.IMREAD_GRAYSCALE)\n", + " ground_truth_masks[k] = (gt_grayscale == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ground_truth_masks = {}\n", + "for k in bbox_coords.keys():\n", + " gt_grayscale = cv2.imread(f'ground-truth-pixel/ground-truth-pixel/{k}-px.png', cv2.IMREAD_GRAYSCALE)\n", + " ground_truth_masks[k] = (gt_grayscale == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ground_truth_masks = {}\n", + "for k in bbox_coords.keys():\n", + " gt_grayscale = cv2.imread(f'ground-truth-pixel/ground-truth-pixel/{k}-px.png', cv2.IMREAD_GRAYSCALE)\n", + " ground_truth_masks[k] = (gt_grayscale == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ground_truth_masks = {}\n", + "for k in bbox_coords.keys():\n", + " gt_grayscale = cv2.imread(f'ground-truth-pixel/ground-truth-pixel/{k}-px.png', cv2.IMREAD_GRAYSCALE)\n", + " ground_truth_masks[k] = (gt_grayscale == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ground_truth_masks = {}\n", + "for k in bbox_coords.keys():\n", + " gt_grayscale = cv2.imread(f'ground-truth-pixel/ground-truth-pixel/{k}-px.png', cv2.IMREAD_GRAYSCALE)\n", + " ground_truth_masks[k] = (gt_grayscale == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ground_truth_masks = {}\n", + "for k in bbox_coords.keys():\n", + " gt_grayscale = cv2.imread(f'ground-truth-pixel/ground-truth-pixel/{k}-px.png', cv2.IMREAD_GRAYSCALE)\n", + " ground_truth_masks[k] = (gt_grayscale == 0)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "FsX7SxD8KYOP" + }, + "source": [ + "## 👀 Inspect the images, bounding box prompts, and the ground truth segmentation masks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Bz8C8QaxoT6N" + }, + "outputs": [], + "source": [ + "# Helper functions provided in https://github.com/facebookresearch/segment-anything/blob/9e8f1309c94f1128a6e5c047a10fdcb02fc8d651/notebooks/predictor_example.ipynb\n", + "def show_mask(mask, ax, random_color=False):\n", + " if random_color:\n", + " color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)\n", + " else:\n", + " color = np.array([30/255, 144/255, 255/255, 0.6])\n", + " h, w = mask.shape[-2:]\n", + " mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)\n", + " ax.imshow(mask_image)\n", + "\n", + "def show_box(box, ax):\n", + " x0, y0 = box[0], box[1]\n", + " w, h = box[2] - box[0], box[3] - box[1]\n", + " ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ncVrlh5fyed9" + }, + "source": [ + "We can see here that the ground truth mask is extremely tight which will be good for calculating an accurate loss.\n", + "The bounding box overlaid will be a good prompt." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3csOAxFju_Pi" + }, + "outputs": [], + "source": [ + "name = 'stampDS-00004'\n", + "image = cv2.imread(f'scans/scans/{name}.png')\n", + "\n", + "plt.figure(figsize=(10,10))\n", + "plt.imshow(image)\n", + "show_box(bbox_coords[name], plt.gca())\n", + "show_mask(ground_truth_masks[name], plt.gca())\n", + "plt.axis('off')\n", + "plt.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "IJIFDGaUKfQp" + }, + "source": [ + "## 🧑‍🍳 Prepare Fine-Tuning" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OdTD9CTxKena" + }, + "outputs": [], + "source": [ + "model_type = 'vit_b'\n", + "checkpoint = 'sam_vit_b_01ec64.pth'\n", + "device = 'cuda:0'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HjTIJtLxP8ZG" + }, + "outputs": [], + "source": [ + "from segment_anything import SamPredictor, sam_model_registry\n", + "sam_model = sam_model_registry[model_type](checkpoint=checkpoint)\n", + "sam_model.to(device)\n", + "sam_model.train();" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "MKZFlHjdKlhr" + }, + "source": [ + "### 🔁 Convert the input images into a format SAM's internal functions expect." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "Bu0MdejGylZY" + }, + "source": [ + "First, use [`utils.transform.ResizeLongestSide`](https://github.com/facebookresearch/segment-anything/blob/c1910835a32a05cbb79bdacbec8f25914a7e3a20/segment_anything/predictor.py#L31) to resize the image, as this is the transformer used inside the predictor.\n", + "\n", + "Then convert the image to a pytorch tensor and use the SAM's [preprocess method](https://github.com/facebookresearch/segment-anything/blob/c1910835a32a05cbb79bdacbec8f25914a7e3a20/segment_anything/modeling/sam.py#L164) to finish preprocessing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jtPYpirbK3Wi" + }, + "outputs": [], + "source": [ + "# Preprocess the images\n", + "from collections import defaultdict\n", + "\n", + "import torch\n", + "\n", + "from segment_anything.utils.transforms import ResizeLongestSide\n", + "\n", + "transformed_data = defaultdict(dict)\n", + "for k in bbox_coords.keys():\n", + " image = cv2.imread(f'scans/scans/{k}.png')\n", + " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", + " transform = ResizeLongestSide(sam_model.image_encoder.img_size)\n", + " input_image = transform.apply_image(image)\n", + " input_image_torch = torch.as_tensor(input_image, device=device)\n", + " transformed_image = input_image_torch.permute(2, 0, 1).contiguous()[None, :, :, :]\n", + "\n", + " input_image = sam_model.preprocess(transformed_image)\n", + " original_image_size = image.shape[:2]\n", + " input_size = tuple(transformed_image.shape[-2:])\n", + "\n", + " transformed_data[k]['image'] = input_image\n", + " transformed_data[k]['input_size'] = input_size\n", + " transformed_data[k]['original_image_size'] = original_image_size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QxnY6TMGKjdc" + }, + "outputs": [], + "source": [ + "# Set up the optimizer, hyperparameter tuning will improve performance here\n", + "lr = 1e-4\n", + "wd = 0\n", + "optimizer = torch.optim.Adam(sam_model.mask_decoder.parameters(), lr=lr, weight_decay=wd)\n", + "\n", + "loss_fn = torch.nn.MSELoss()\n", + "# loss_fn = torch.nn.BCELoss()\n", + "keys = list(bbox_coords.keys())" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "sRHCNdzZy3dt" + }, + "source": [ + "## 🚀 Run SAM Fine-Tuning" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "9DIYcFKu14nr" + }, + "source": [ + "This is the main training loop.\n", + "\n", + "Improvements to be made include batching and moving the computation of the image and prompt embeddings outside the loop since we are not tuning these parts of the model, this will speed up training as we should not recompute the embeddings during each epoch.\n", + "\n", + "> ⚠️ Sometimes the optimizer gets lost in the parameter space and the loss function blows up. Restarting from scratch (including running all cells below 'Prepare Fine Tuning' in order to start with default weights again) should solve it.\n", + "\n", + "📝 In a production implementation, a better choice of optimiser/loss function will certainly help." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WRQ6yd_PM_B9" + }, + "outputs": [], + "source": [ + "from statistics import mean\n", + "\n", + "from tqdm import tqdm\n", + "from torch.nn.functional import threshold, normalize\n", + "\n", + "num_epochs = 100\n", + "losses = []\n", + "\n", + "for epoch in range(num_epochs):\n", + " epoch_losses = []\n", + " # Just train on the first 20 examples\n", + " for k in keys[:20]:\n", + " input_image = transformed_data[k]['image'].to(device)\n", + " input_size = transformed_data[k]['input_size']\n", + " original_image_size = transformed_data[k]['original_image_size']\n", + "\n", + " # No grad here as we don't want to optimise the encoders\n", + " with torch.no_grad():\n", + " image_embedding = sam_model.image_encoder(input_image)\n", + "\n", + " prompt_box = bbox_coords[k]\n", + " box = transform.apply_boxes(prompt_box, original_image_size)\n", + " box_torch = torch.as_tensor(box, dtype=torch.float, device=device)\n", + " box_torch = box_torch[None, :]\n", + "\n", + " sparse_embeddings, dense_embeddings = sam_model.prompt_encoder(\n", + " points=None,\n", + " boxes=box_torch,\n", + " masks=None,\n", + " )\n", + " low_res_masks, iou_predictions = sam_model.mask_decoder(\n", + " image_embeddings=image_embedding,\n", + " image_pe=sam_model.prompt_encoder.get_dense_pe(),\n", + " sparse_prompt_embeddings=sparse_embeddings,\n", + " dense_prompt_embeddings=dense_embeddings,\n", + " multimask_output=False,\n", + " )\n", + "\n", + " upscaled_masks = sam_model.postprocess_masks(low_res_masks, input_size, original_image_size).to(device)\n", + " binary_mask = normalize(threshold(upscaled_masks, 0.0, 0))\n", + "\n", + " gt_mask_resized = torch.from_numpy(np.resize(ground_truth_masks[k], (1, 1, ground_truth_masks[k].shape[0], ground_truth_masks[k].shape[1]))).to(device)\n", + " gt_binary_mask = torch.as_tensor(gt_mask_resized > 0, dtype=torch.float32)\n", + "\n", + " loss = loss_fn(binary_mask, gt_binary_mask)\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + " epoch_losses.append(loss.item())\n", + " losses.append(epoch_losses)\n", + " print(f'EPOCH: {epoch}')\n", + " print(f'Mean loss: {mean(epoch_losses)}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UKqIxUgAOTzp" + }, + "outputs": [], + "source": [ + "mean_losses = [mean(x) for x in losses]\n", + "mean_losses\n", + "\n", + "plt.plot(list(range(len(mean_losses))), mean_losses)\n", + "plt.title('Mean epoch loss')\n", + "plt.xlabel('Epoch Number')\n", + "plt.ylabel('Loss')\n", + "\n", + "plt.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "TuDlIiRjmitT" + }, + "source": [ + "## 📏 Compare the fine-tuned model to the original model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "J9fZiPoIKXYW" + }, + "outputs": [], + "source": [ + "# Load up the model with default weights\n", + "sam_model_orig = sam_model_registry[model_type](checkpoint=checkpoint)\n", + "sam_model_orig.to(device);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3dIKKKHOn_7R" + }, + "outputs": [], + "source": [ + "# Set up predictors for both tuned and original models\n", + "from segment_anything import sam_model_registry, SamPredictor\n", + "predictor_tuned = SamPredictor(sam_model)\n", + "predictor_original = SamPredictor(sam_model_orig)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nhNHx-6kpEWu" + }, + "outputs": [], + "source": [ + "# The model has not seen keys[21] (or keys[20]) since we only trained on keys[:20]\n", + "k = keys[21]\n", + "image = cv2.imread(f'scans/scans/{k}.png')\n", + "image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", + "\n", + "predictor_tuned.set_image(image)\n", + "predictor_original.set_image(image)\n", + "\n", + "input_bbox = np.array(bbox_coords[k])\n", + "\n", + "masks_tuned, _, _ = predictor_tuned.predict(\n", + " point_coords=None,\n", + " box=input_bbox,\n", + " multimask_output=False,\n", + ")\n", + "\n", + "masks_orig, _, _ = predictor_original.predict(\n", + " point_coords=None,\n", + " box=input_bbox,\n", + " multimask_output=False,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "Df2oxBaxxXrt" + }, + "source": [ + "See here that the tuned model is starting to ignore the whitespace between the words, which is what the ground truths show. With further training, more data and further hyperparameter tuning you will be able to improve this result.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sH6NorejpTii" + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "_, axs = plt.subplots(1, 2, figsize=(25, 25))\n", + "\n", + "\n", + "axs[0].imshow(image)\n", + "show_mask(masks_tuned, axs[0])\n", + "show_box(input_bbox, axs[0])\n", + "axs[0].set_title('Mask with Tuned Model', fontsize=26)\n", + "axs[0].axis('off')\n", + "\n", + "\n", + "axs[1].imshow(image)\n", + "show_mask(masks_orig, axs[1])\n", + "show_box(input_bbox, axs[1])\n", + "axs[1].set_title('Mask with Untuned Model', fontsize=26)\n", + "axs[1].axis('off')\n", + "\n", + "plt.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "H8eMyK1vNP4J" + }, + "source": [ + "# ✅ Wrap up" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ysWfdTinMjeI" + }, + "source": [ + "If the image does not render due to size limitations, you can view it here:\n", + "\n", + "![fine-tuned model vs sam model - encord notebooks](https://storage.googleapis.com/encord-notebooks/fine-tune%20SAM/tuned_model_comparison.png)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "-w9Hph-7NLnm" + }, + "source": [ + "\n", + "📓This Colab notebook showed you how to fine-tune Segment Anything Model (SAM) on your own data. If you would like to learn more, check out the [complementary blog post](https://encord.com/blog/learn-how-to-fine-tune-the-segment-anything-model-sam/).\n", + "\n", + "---\n", + "\n", + "🟣 Encord Active is an open-source framework for computer vision model testing, evaluation, and validation. **Check out the project on [GitHub](https://github.com/encord-team/encord-active), leave a star 🌟** if you like it. We welcome you to [contribute](https://docs.encord.com/docs/active-contributing) if you find something is missing.\n", + "\n", + "---\n", + "\n", + "👉 Check out our 📖[blog](https://encord.com/blog/) and 📺[YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "Lpkoq9YIQGWI" + }, + "source": [ + "#### ✨ Want more walthroughs like this? Check out the 🟣 [Encord Notebooks repository](https://github.com/encord-team/encord-notebooks)." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "provenance": [] + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/colab-notebooks/Encord_Notebooks__demo_ea_native_display.ipynb b/colab-notebooks/Encord_Notebooks__demo_ea_native_display.ipynb new file mode 100644 index 0000000..a6dc949 --- /dev/null +++ b/colab-notebooks/Encord_Notebooks__demo_ea_native_display.ipynb @@ -0,0 +1,208 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "nZ6Bu6FLvxYn" + }, + "source": [ + "
\n", + "

\"Open\n", + "\"License\"\n", + "\"PyPi\n", + "\"PyPi\n", + "\n", + "\"docs\"\n", + "\n", + "\"Join\n", + "\n", + "\""Encord\n", + "\n", + "\n", + "

\n", + "

\n", + "\"Twitter

\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lZmVMPzW6JQV" + }, + "source": [ + "# 🟣 Encord Notebooks | 📥 Explore Encord Active's `0.1.75` Native UI" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3LgvE_e362FL" + }, + "source": [ + "## 🏁 Overview" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Yej19g4c64t8" + }, + "source": [ + "👋 Hi there! This notebook gives you a quick way to test Encord Active with a sandbox project and without installing anything locally.\n", + "\n", + "This 📒 notebook will cover:\n", + "* Install the Encord Active `0.1.75` release.\n", + "* Launch the UI with a `quickstart` project.\n", + "* Explore the all-new Encord Active UI 🤩.\n", + "\n", + "
\n", + "\n", + "> 💡 Learn more about 🟣 Encord Active: \n", + "* [GitHub](https://github.com/encord-team/encord-active) \n", + "* [Docs](https://docs.encord.com/docs/active-overview)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jFRLWcZg-EVz" + }, + "source": [ + "## 📥 Install Encord Active `0.1.75` Release\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iHDRDFkaeBNh" + }, + "outputs": [], + "source": [ + "!python -m pip install encord-active==0.1.75" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QIexBBIw15V_" + }, + "outputs": [], + "source": [ + "!encord-active --version" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "19mNpYvVes1R" + }, + "outputs": [], + "source": [ + "!encord-active download --project-name quickstart" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qctxDtWReD7X" + }, + "outputs": [], + "source": [ + "from google.colab.output import eval_js\n", + "from IPython.display import Javascript\n", + "\n", + "# Trick to be able to show the FE in a cell output.\n", + "def show_url(url: str, height=400):\n", + " display(Javascript(\"\"\"\n", + " (async ()=>{{\n", + " fm = document.createElement('iframe')\n", + " fm.src = '%s'\n", + " fm.width = '95%%'\n", + " fm.height = '%d'\n", + " fm.frameBorder = 0\n", + " document.body.append(fm)\n", + " }})();\n", + " \"\"\" % (url, height) ))\n", + "\n", + "# Proxy for FE and BE\n", + "fe_url = eval_js(\"google.colab.kernel.proxyPort(8000)\")\n", + "be_url = eval_js(\"google.colab.kernel.proxyPort(8001)\")\n", + "\n", + "# Start encord active in the background\n", + "get_ipython().system_raw(f\"ENV=packaged API_URL='{be_url}' ALLOWED_ORIGIN='{fe_url}' encord-active start &\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Y_ftsgzvf0WX" + }, + "outputs": [], + "source": [ + "# Show EA in a cell\n", + "show_url(fe_url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "596xvqv6hWaz" + }, + "outputs": [], + "source": [ + "!echo \"Alternatively use this link: {fe_url} to open the fronted in a new tab\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-2QFNYviDB4Q" + }, + "source": [ + "# ✅ Wrap up" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pPE2ppItDE0M" + }, + "source": [ + "\n", + "📓This Colab notebook showed you how to download a quickstart project with Encord Active. If you would like to learn more, check out our [documentation](https://docs.encord.com/docs/active-overview) to find more concrete workflow and guides.\n", + "\n", + "---\n", + "\n", + "🟣 Encord Active is an open-source framework for computer vision model testing, evaluation, and validation. **Check out the project on [GitHub](https://github.com/encord-team/encord-active), leave a star 🌟** if you like it. We welcome you to [contribute](https://docs.encord.com/docs/active-contributing) if you find something is missing.\n", + "\n", + "---\n", + "\n", + "👉 Check out the 📖 [Encord Blog](https://encord.com/blog/) and 📺 [YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n", + "\n", + "---\n", + "\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/local-notebooks/01_Encord_Active_Notebooks___Download_sandbox_project.ipynb b/local-notebooks/01_Encord_Active_Notebooks___Download_sandbox_project.ipynb new file mode 100644 index 0000000..b9e5a0a --- /dev/null +++ b/local-notebooks/01_Encord_Active_Notebooks___Download_sandbox_project.ipynb @@ -0,0 +1,384 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "oWG683Ze9f5R" + }, + "source": [ + "
\n", + "

\"Open\n", + "\"License\"\n", + "\"PyPi\n", + "\"PyPi\n", + "\n", + "\"docs\"\n", + "\n", + "\"Join\n", + "\n", + "\""Encord\n", + "\n", + "\n", + "

\n", + "

\n", + "\"Twitter

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "4eF7Fn8lBiN8" + }, + "source": [ + "
\n", + "

\n", + " \n", + " \n", + " \n", + "

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "TxZTJQyoeK6m" + }, + "source": [ + "# 🟣 Encord Active | 📥 Download Sandbox Project\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "OPYaECRBKYig" + }, + "source": [ + "## 🏁 Overview" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "VrrgT0Ka9jk_" + }, + "source": [ + "👋 Hi there! This notebook gives you a quick way to test Encord Active with a sandbox project and without installing anything locally.\n", + "\n", + "This 📒 notebook will cover:\n", + "* Installing Encord Active\n", + "* Choosing a sandbox project\n", + "* Starting the Encord Active app with the sandbox project\n", + "\n", + "
\n", + "\n", + "> 💡 Learn more about 🟣 Encord Active: \n", + "* [GitHub](https://github.com/encord-team/encord-active) \n", + "* [Docs](https://docs.encord.com/docs/active-overview)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "WO8IUmYu47a0" + }, + "source": [ + "## 📥 Install 🟣 Encord-Active\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ByT_wNp6CYEr" + }, + "source": [ + "👟 Run the following script to install [Encord Active](https://docs.encord.com/docs/active-installation).\n", + "\n", + "\n", + "📌 `python3.9`, `python3.10`, and `python3.11` are the version requirements to run Encord Active." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r6AfzNEESQAX" + }, + "outputs": [], + "source": [ + "# Assert that python is 3.9 or 3.10 instead\n", + "import sys\n", + "assert sys.version_info.minor in [9, 10, 11], \"Encord Active only supported for python 3.9, 3.10, and 3.11.\"\n", + "%pip install -qq encord-active" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "zLjGzp9fmBqE" + }, + "source": [ + "# 🗂️ Import libraries and other utilities" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "from encord_active.lib.project.sandbox_projects import fetch_prebuilt_project" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "r37LQkresX2w" + }, + "source": [ + "# 🗃️ Choose a sandbox project\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "aS7qqy8s-2UB" + }, + "source": [ + "⏩ You can choose between four different sandbox projects with open datasets:\n", + "\n", + "* [COCO-2017 Validation](https://paperswithcode.com/dataset/coco) (~5000 samples, ~41k annotations)\n", + "* [BDD Dataset Validation](https://bdd-data.berkeley.edu/) (~1000 samples, ~13k annotations)\n", + "* [Covid-19 Segmentation](https://paperswithcode.com/task/covid-19-image-segmentation) (~100 samples, ~600 annotations)\n", + "* [Limuc Ulcerative Classification](https://paperswithcode.com/dataset/limuc) (~1686 samples, no annotations)\n", + "\n", + "
\n", + "\n", + "🙋 If you're here for the first time, we recommend you to start with the COCO-2017 Validation dataset." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 🗃️ Choose a sandbox project you want to get started with" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "sPBamQLfEt2P", + "outputId": "c9a5570e-faa7-4207-9228-1b3fc9f515b5" + }, + "outputs": [], + "source": [ + "project = 'COCO-2017 Validation' \n", + "\n", + "project_names = {\n", + " 'COCO-2017 Validation': \"[open-source][validation]-coco-2017-dataset\",\n", + " 'Limuc Ulcerative Classification': \"[open-source][test]-limuc-ulcerative-colitis-classification\",\n", + " 'Covid-19 Segmentation': \"[open-source]-covid-19-segmentations\",\n", + " 'BDD Dataset Validation': \"[open-source][validation]-bdd-dataset\"\n", + "}\n", + "\n", + "if project in project_names:\n", + " project_name = project_names[project]\n", + " print(\"Great! You chose the\", project, \"dataset.\")\n", + "else:\n", + " print(\"Invalid project selection.\")\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "37f1HUrZ5k8B" + }, + "source": [ + "## 📩 Download sandbox project and start the app\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "💪 Lastly, download the sandbox project, including the dataset, and start the application.\n", + "\n", + "💡 Encord Active fetches the dataset and pre-built image, downloads it locally, and the `start` command starts the Streamlit app on the backend." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Choose where to store the data\n", + "project_path = Path.cwd() / project_name\n", + "\n", + "# Download the dataset\n", + "fetch_prebuilt_project(project_name, project_path)\n", + "\n", + "# Open the app\n", + "!encord-active start -t \"$project_path\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "👉 Click on the `Network URL: ` to access the application in your browser.\n", + "\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "YrDh8Re6EIT3" + }, + "source": [ + "# 👏 Success! Welcome to 🟣 Encord Active!" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "4P2v4lOEAShb" + }, + "source": [ + "🏆 Congratulations, you should be able to see your Encord Active dashboard running 🔽." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "FBD5CIWlwKo6" + }, + "source": [ + "![Encord Active welcome resized.png.jpeg](https://storage.googleapis.com/encord-notebooks/local-notebooks/encord_active_welcome_page.jpeg)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "VCCnbuD8EQvu" + }, + "source": [ + "# ✅ Wrap up" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ebB0Y1AroE6n" + }, + "source": [ + "\n", + "📓This Colab notebook showed you how to download a sandbox project with Encord Active. If you would like to learn more, check out our [documentation](https://docs.encord.com/docs/active-overview) to find more concrete workflow and guides.\n", + "\n", + "> ⚠️ Remember to stop the running cell above to close the app when you are done exploring Encord Active. You may also want to delete the project folder too.\n", + "\n", + "---\n", + "\n", + "🟣 Encord Active is an open-source framework for computer vision model testing, evaluation, and validation. **Check out the project on [GitHub](https://github.com/encord-team/encord-active), leave a star 🌟** if you like it. We welcome you to [contribute](https://docs.encord.com/docs/active-contributing) if you find something is missing.\n", + "\n", + "---\n", + "\n", + "👉 Check out the 📖 [Encord Blog](https://encord.com/blog/) and 📺 [YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "RDzrYNahKnX9" + }, + "source": [ + "# ⏭️ Next: Learn how to import your Encord Project" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "oD-Wv0xGKymr" + }, + "source": [ + "What should you check out next? 👀 Learn how to import an existing Encord project. The notebook will cover:\n", + "\n", + "- Generating SSH key pairs to connect to the Encord platform and fetch a project\n", + "- Installing Encord Active and import the selected project\n", + "- Launching the Encord Active app containing the project\n", + "\n", + "\n", + "### $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ *👇*\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "yWlE1NtaWZPx" + }, + "source": [ + "### $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ [*Next Notebook*](./02_Encord_Active___Import_project_(self_hosting).ipynb) *➡️*\n" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/local-notebooks/Encord_Active_Add_Custom_Embeddings.ipynb b/local-notebooks/Encord_Active_Add_Custom_Embeddings.ipynb new file mode 100644 index 0000000..a904873 --- /dev/null +++ b/local-notebooks/Encord_Active_Add_Custom_Embeddings.ipynb @@ -0,0 +1,338 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "5f49161b", + "metadata": {}, + "source": [ + "
\n", + "

\"Open\n", + "\"License\"\n", + "\"PyPi\n", + "\"PyPi\n", + "\n", + "\"docs\"\n", + "\n", + "\"Join\n", + "\n", + "\""Encord\n", + "\n", + "\n", + "

\n", + "

\n", + "\"Twitter

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4d5f8ff9", + "metadata": {}, + "source": [ + "
\n", + "

\n", + " \n", + " \n", + " \n", + "

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "99cd4d8b", + "metadata": {}, + "source": [ + "# 🟣 Encord Active | Add Custom Embeddings" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "616ca378", + "metadata": {}, + "source": [ + "## 🚀 Overview" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "f735c771", + "metadata": {}, + "source": [ + "Hi there, 👋.\n", + "\n", + "Encord Active has three different types of embeddings.\n", + "\n", + "1. _Image embeddings:_ are general for each image / frame in the dataset\n", + "2. _Classification embeddings:_ are associated to specific frame level classifications\n", + "3. _Object embeddings:_ are associated to specific objects like polygons of bounding boxes\n", + "\n", + "If you like, you can \"swap out\" these embeddings with your own by following the steps in this notebook.\n", + "\n", + "There are two sections in the notebook. One for the image embeddings and one for the objects.\n", + "If you have classifications in your project, you should run:\n", + "\n", + "```\n", + "encord-active metric run \"Image-level Annotation Quality\"\n", + "```\n", + "\n", + "This will take the image level embeddings that you provided and also associate them to the classification labels.\n", + "\n", + "\n", + "
\n", + "\n", + "> 💡 Learn more about 🟣 Encord Active: \n", + "* [GitHub](https://github.com/encord-team/encord-active) \n", + "* [Docs](https://docs.encord.com/docs/active-overview)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c74cdeb0", + "metadata": {}, + "outputs": [], + "source": [ + "import pickle\n", + "from pathlib import Path\n", + "from typing import List\n", + "\n", + "import torch\n", + "from encord_active.lib.common.iterator import DatasetIterator, Iterator\n", + "from encord_active.lib.embeddings.dimensionality_reduction import (\n", + " generate_2d_embedding_data,\n", + ")\n", + "from encord_active.lib.embeddings.types import LabelEmbedding\n", + "from encord_active.lib.metrics.types import EmbeddingType\n", + "from encord_active.lib.project.project_file_structure import ProjectFileStructure\n", + "from PIL import Image\n", + "from torchvision.transforms import ToTensor\n", + "\n", + "\n", + "def load_my_model() -> torch.nn.Module:\n", + " ... # <- HERE: Edit here to return your model\n", + "\n", + "\n", + "def get_transform():\n", + " return (\n", + " ToTensor()\n", + " ) # <- HERE: If you have any specific transforms to apply to PIL images." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "50e58bc8", + "metadata": {}, + "source": [ + "## 🖼️ Examle of Image Embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b5d39aa", + "metadata": {}, + "outputs": [], + "source": [ + "@torch.inference_mode()\n", + "def generate_cnn_image_embeddings(iterator: Iterator) -> List[LabelEmbedding]:\n", + " model = load_my_model()\n", + " transform = get_transform()\n", + "\n", + " collections: List[LabelEmbedding] = []\n", + " for data_unit, image in iterator.iterate(desc=\"Embedding image data.\"):\n", + " if image is None:\n", + " continue\n", + "\n", + " image_pil = image.convert(\"RGB\")\n", + " image = transform(image_pil)\n", + "\n", + " # START Embedding\n", + " embedding = model(image) # <- HERE - your logic for embedding data.\n", + "\n", + " if embedding is None:\n", + " continue\n", + "\n", + " embedding = embedding.flatten().detach().numpy() # <- should be a [d,] array.\n", + " # End Embedding\n", + "\n", + " entry = LabelEmbedding(\n", + " url=data_unit[\"data_link\"],\n", + " label_row=iterator.label_hash,\n", + " data_unit=data_unit[\"data_hash\"],\n", + " frame=iterator.frame,\n", + " dataset_title=iterator.dataset_title,\n", + " embedding=embedding,\n", + " labelHash=None,\n", + " lastEditedBy=None,\n", + " featureHash=None,\n", + " name=None,\n", + " classification_answers=None,\n", + " )\n", + " collections.append(entry)\n", + "\n", + " return collections\n", + "\n", + "\n", + "project = Path(\"/path/to/your/project/root\") # <- HERE: Path to the Encord Project\n", + "pfs = ProjectFileStructure(project)\n", + "\n", + "iterator = DatasetIterator(project)\n", + "embeddings = generate_cnn_image_embeddings(iterator)\n", + "out_file = prfs.get_embeddings_file(EmbeddingType.IMAGE)\n", + "\n", + "with out_file.open(\"wb\") as f:\n", + " pickle.dump(embeddings, f)\n", + "\n", + "generate_2d_embedding_data(EmbeddingType.IMAGE, project)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "9d3fde49", + "metadata": {}, + "source": [ + "## 🏷️ Example of Object Embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ddddaa1e", + "metadata": {}, + "outputs": [], + "source": [ + "from encord_active.lib.common.utils import get_bbox_from_encord_label_object\n", + "\n", + "@torch.inference_mode()\n", + "def generate_cnn_object_embeddings(iterator: Iterator) -> List[LabelEmbedding]:\n", + " model = get_model()\n", + " transform = get_transform()\n", + "\n", + " embeddings: List[LabelEmbedding] = []\n", + " for data_unit, image in iterator.iterate(desc=\"Embedding object data.\"):\n", + " if image is None:\n", + " continue\n", + " \n", + " image_pil = image.convert(\"RGB\")\n", + " image = transform(image_pil)\n", + " \n", + " for obj in data_unit[\"labels\"].get(\"objects\", []):\n", + " if obj[\"shape\"] in [\n", + " ObjectShape.POLYGON.value,\n", + " ObjectShape.BOUNDING_BOX.value,\n", + " ObjectShape.ROTATABLE_BOUNDING_BOX.value,\n", + " ]:\n", + " # Crops images tightly around object\n", + " out = get_bbox_from_encord_label_object( \n", + " obj,\n", + " image.shape[2],\n", + " image.shape[1],\n", + " )\n", + "\n", + " if out is None:\n", + " continue\n", + " \n", + " x, y, w, h = out\n", + " img_patch = image[:, y : y + h, x : x + w]\n", + " \n", + " # Compute embeddings\n", + " embedding = model(img_patch)\n", + " embedding = embedding.flatten().detach().numpy() # <- should be a [d,] array.\n", + "\n", + " last_edited_by = obj[\"lastEditedBy\"] if \"lastEditedBy\" in obj.keys() else obj[\"createdBy\"]\n", + " entry = LabelEmbedding(\n", + " url=data_unit[\"data_link\"],\n", + " label_row=iterator.label_hash,\n", + " data_unit=data_unit[\"data_hash\"],\n", + " frame=iterator.frame,\n", + " labelHash=obj[\"objectHash\"],\n", + " lastEditedBy=last_edited_by,\n", + " featureHash=obj[\"featureHash\"],\n", + " name=obj[\"name\"],\n", + " dataset_title=iterator.dataset_title,\n", + " embedding=embedding,\n", + " classification_answers=None,\n", + " )\n", + "\n", + " embeddings.append(entry)\n", + "\n", + "\n", + " return embeddings\n", + "\n", + "embeddings = generate_cnn_object_embeddings(iterator)\n", + "out_file = pfs.get_embeddings_file(EmbeddingType.OBJECT)\n", + "\n", + "with out_file.open(\"wb\") as f:\n", + " pickle.dump(embeddings, f)\n", + "\n", + "generate_2d_embedding_data(EmbeddingType.OBJECT, project)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "4744c9dc", + "metadata": {}, + "source": [ + "# ✅ Wrap Up: Next Steps" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "54067a9c", + "metadata": {}, + "source": [ + "🟣 Encord Active is an open-source framework for computer vision model testing, evaluation, and validation. **Check out the project on [GitHub](https://github.com/encord-team/encord-active), leave a star 🌟** if you like it. We welcome you to [contribute](https://docs.encord.com/docs/active-contributing) if you find something is missing.\n", + "\n", + "---\n", + "\n", + "👉 Check out the 📖 [Encord Blog](https://encord.com/blog/) and 📺 [YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n", + "\n", + "---\n", + "\n", + "Thanks for now!" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "d7587b03", + "metadata": {}, + "source": [ + "### ⬅️ [*Previous Notebook*](./Encord_Active_Building_a_Custom_Metric_Function.ipynb) $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ [*Next Notebook*](https://github.com/encord-team/encord-notebooks) *➡️*" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/local-notebooks/Encord_Active_Building_a_Custom_Metric_Function.ipynb b/local-notebooks/Encord_Active_Building_a_Custom_Metric_Function.ipynb new file mode 100644 index 0000000..5ecada1 --- /dev/null +++ b/local-notebooks/Encord_Active_Building_a_Custom_Metric_Function.ipynb @@ -0,0 +1,827 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "

\"Open\n", + "\"License\"\n", + "\"PyPi\n", + "\"PyPi\n", + "\n", + "\"docs\"\n", + "\n", + "\"Join\n", + "\n", + "\""Encord\n", + "\n", + "\n", + "

\n", + "

\n", + "\"Twitter

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "

\n", + " \n", + " \n", + " \n", + "

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 🟣 Encord Active | 🏗️ Building a Custom Metric Function" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 🚀 Overview" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "-n4-PmG0Kwxk" + }, + "source": [ + "Hi there, 👋.\n", + "\n", + "> ⚠️ **Prerequisites:** you should have `encord-active` [installed](https://docs.encord.com/active/docs/installation).\n", + "\n", + "Developing machine learning models are often (and should be) based on iterative hypothesis testing. Typically, you get some data and labels and train your first model. Then, you realise that the model is performing worse than you had hoped.\n", + "\n", + "Now, you starting hypothesizing about what might be wrong. Perhaps you suspect that red objects make your model perform worse. So you define a hypothesis like:\n", + "\n", + "> Red objects have a significant impact on my model performance\n", + "\n", + "Traditionally, the next thing you would do is to write a script for filtering, ordering, and visualising your validation data as a function of the object colors.\n", + "Something like the code below.\n", + "\n", + "\n", + "> ⚠️ DISCLAIMER: The code below is just to show how much code you need to write to test your hypothesis. It's not meant to work or to be copied in any way!\n", + "\n", + "
\n", + "Code block that you can safely hide\n", + "\n", + "\n", + "\n", + "```python\n", + "# DISCLAIMER: This is just to show how much code you need to write to test your hypothesis\n", + "# It's not meant to work or to be copied in any way!\n", + "\n", + "from functools import partial\n", + "\n", + "color_ordering = [] \n", + "acc = [] \n", + "\n", + "def compute_redness_of_objects(image, object):\n", + " # Some code to determine colors\n", + " # color_metric = ...\n", + " return color_metric\n", + "\n", + "for batch in validation_loader:\n", + " for image, labels in batch:\n", + " predictions = my_model(images)\n", + "\n", + " acc += ... # some hard to write code for match predictions with labels\n", + " color_ordering += list(map(partial(get_colors_for_object, image=image), predictions))\n", + " \n", + "color_ordering = np.array(color_ordering)\n", + "sorting = np.argsort(color_ordering)\n", + "color_ordering = color_ordering[ordering]\n", + "acc = np.array(color_ordering)[ordering]\n", + "\n", + "# LOONG plotting code section for displaying samples, plots, and what not.\n", + "# ...\n", + "# ...\n", + "# ...\n", + "```\n", + " \n", + "
\n", + "\n", + "When you're finally done writing code and plotting things, hopefully you can reach a conclusion regarding your hypothesis.\n", + "When you reach this point, you will most likely have many more hypothesis that you want to test and eventually also more models to evaluate.\n", + "Do we need to mention how painful it will be to extend the code above with new use cases, plots, etc.?\n", + "What if you, for example, wanted to know the same thing, not only for your predictions but also for the labels? What about false negatives? .. and so on.\n", + "\n", + "Encord Active solves this problem with a couple of points in focus:\n", + "\n", + "1. **Reusability:** You define your metric function once and then you can reuse again and again.\n", + "2. **Isolation of functionality:** Since the metric function is defined in isolation from other metrics, you won't accidentally introduce errors in other functions, plots, etc.\n", + "3. **Iteration speed:** We've made it easy to implement your own metric function such that you can iterate faster.\n", + "4. **It's built from experience:** We have felt this pain many times and we have seen many of the common hypothesis that come up. We're building Encord Active to deel with all these common scenarios while being extensible enough to be tailored to your custom use case.\n", + "\n", + "Other points that we want to highlight is that \n", + "\n", + "1. Encord Active ships with a bunch of [pre-defined metrics](https://docs.encord.com/active/docs/category/metrics) that will automatically be run on your data when you import it.\n", + "2. When you've [imported your model predictions](https://docs.encord.com/active/docs/workflows/import-predictions), Encord Active will _automatically_ identify those metrics that are more important for your model performance.\n", + "\n", + "This 📓 notebook will take you through how to write such metric functions and use them with Encord Active.\n", + "\n", + "
\n", + "\n", + "> 💡 Learn more about 🟣 Encord Active: \n", + "* [GitHub](https://github.com/encord-team/encord-active) \n", + "* [Docs](https://docs.encord.com/docs/active-overview)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 📏 Defining a `Metric` sub-class" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ESEt8WweThsO" + }, + "source": [ + "\n", + "\n", + "Here, we'll give some detailed information on how a quality metric is defined.\n", + "\n", + "> **🌟 Info**: If you don't like abstract talk, you can skip directly to [the example below](#concrete-example) to see how to implement a specific metric.\n", + "\n", + "We have listed the entire stub below for defining a metric. Following right after is a breakdown of the different components." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from typing import List, Optional, Union\n", + "\n", + "from encord_active.lib.common.iterator import Iterator\n", + "from encord_active.lib.metrics.metric import Metric\n", + "from encord_active.lib.metrics.types import AnnotationType, DataType, MetricType\n", + "from encord_active.lib.metrics.writer import CSVMetricWriter\n", + "\n", + "class ExampleMetric(Metric):\n", + " # === SECTION 1 === #\n", + " def __init__(self):\n", + " from typing import List, Optional, Union\n", + "\n", + "from encord_active.lib.common.iterator import Iterator\n", + "from encord_active.lib.metrics.metric import Metric\n", + "from encord_active.lib.metrics.types import AnnotationType, DataType, MetricType\n", + "from encord_active.lib.metrics.writer import CSVMetricWriter\n", + "\n", + "class ExampleMetric(Metric):\n", + " # === SECTION 1 === #\n", + " def __init__(self):\n", + " \n", + " super().__init__(\n", + " title=\"[the-name-of-your-metric]\",\n", + " short_description=\"A short description of your metric.\",\n", + " long_description=\"A longer and more detailed description. \" \\\n", + " \"I can use Markdown to _format_ the text.\",\n", + " metric_type=MetricType.GEOMETRIC,\n", + " data_type=DataType.IMAGE,\n", + " annotation_type=[AnnotationType.OBJECT.BOUNDING_BOX, AnnotationType.OBJECT.POLYGON],\n", + " )\n", + "\n", + " def execute(self, iterator: Iterator, writer: CSVMetricWriter):\n", + " valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type}\n", + "\n", + " for data_unit, image in iterator.iterate(desc=\"Progress bar description\"):\n", + " # === SECTION 2 === #\n", + " # Write a score for the image itself (data quality)\n", + " writer.write(1337, description=\"Your description for the frame [can be omitted]\")\n", + " \n", + " for obj in data_unit[\"labels\"].get(\"objects\", []):\n", + " # === SECTION 3 === #\n", + " # Label (object/classification) level score (label / model prediction quality)\n", + " if not obj[\"shape\"] in valid_annotation_types:\n", + " continue\n", + "\n", + " # Do your thing (inference)\n", + " # ...\n", + " # Then\n", + " writer.write(42, labels=obj, description=\"Your description of the score [can be omitted]\")\n", + "\n", + " from typing import List, Optional, Union\n", + "\n", + "from encord_active.lib.common.iterator import Iterator\n", + "from encord_active.lib.metrics.metric import Metric\n", + "from encord_active.lib.metrics.types import AnnotationType, DataType, MetricType\n", + "from encord_active.lib.metrics.writer import CSVMetricWriter\n", + "\n", + "class ExampleMetric(Metric):\n", + " # === SECTION 1 === #\n", + " def __init__(self):\n", + " super().__init__(\n", + " title=\"[the-name-of-your-metric]\",\n", + " short_description=\"A short description of your metric.\",\n", + " long_description=\"A longer and more detailed description. \" \\\n", + " \"I can use Markdown to _format_ the text.\",\n", + " metric_type=MetricType.GEOMETRIC,\n", + " data_type=DataType.IMAGE,\n", + " annotation_type=[AnnotationType.OBJECT.BOUNDING_BOX, AnnotationType.OBJECT.POLYGON],\n", + " )\n", + "\n", + " def execute(self, iterator: Iterator, writer: CSVMetricWriter):\n", + " valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type}\n", + "\n", + " for data_unit, image in iterator.iterate(desc=\"Progress bar description\"):\n", + " # === SECTION 2 === #\n", + " # Write a score for the image itself (data quality)\n", + " writer.write(1337, description=\"Your description for the frame [can be omitted]\")\n", + " \n", + " for obj in data_unit[\"labels\"].get(\"objects\", []):\n", + " # === SECTION 3 === #\n", + " # Label (object/classification) level score (label / model prediction quality)\n", + " if not obj[\"shape\"] in valid_annotation_types:\n", + " continue\n", + "\n", + " # Do your thing (inference)\n", + " # ...\n", + " # Then\n", + " writer.write(42, labels=obj, description=\"Your description of the score [can be omitted]\")\n", + " super().__init__(\n", + " title=\"[the-name-of-your-metric]\",\n", + " short_description=\"A short description of your metric.\",\n", + " long_description=\"A longer and more detailed description. \" \\\n", + " \"I can use Markdown to _format_ the text.\",\n", + " metric_type=MetricType.GEOMETRIC,\n", + " data_type=DataType.IMAGE,\n", + " annotation_type=[AnnotationType.OBJECT.BOUNDING_BOX, AnnotationType.OBJECT.POLYGON],\n", + " )\n", + "\n", + " def execute(self, iterator: Iterator, writer: CSVMetricWriter):\n", + " valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type}\n", + "\n", + " for data_unit, image in iterator.iterate(desc=\"Progress bar description\"):\n", + " # === SECTION 2 === #\n", + " # Write a score for the image itself (data quality)\n", + " writer.write(1337, description=\"Your description for the frame [can be omitted]\")\n", + " \n", + " for obj in data_unit[\"labels\"].get(\"objects\", []):\n", + " # === SECTION 3 === #\n", + " # Label (object/classification) level score (label / model prediction quality)\n", + " if not obj[\"shape\"] in valid_annotation_types:\n", + " continue\n", + "\n", + " # Do your thing (inference)\n", + " # ...\n", + " # Then\n", + " writer.write(42, labels=obj, description=\"Your description of the score [can be omitted]\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are a couple of sections in the code above. \n", + "\n", + "`SECTION 1`: Is used for proper display of the values that the metric produces. The properties being set there are:\n", + "\n", + "1. `title`: Is the title of your metric. It will be used in data frames and the app to identify the metric.\n", + "2. `metric_type`: We have distinguished metric types into three categories - `HEURISTIC`\n", + " - `HEURISTIC`: operate on images or individual video frames and are heuristic in the sense that they mostly depend on the image content without labels.\n", + " - `GEOMETRIC`: operate on the geometries of objects like bounding boxes, polygons, and polylines.\n", + " - `SEMANTIC`: operate with the semantic information of images or individual video frames - for example, metrics based on NN embeddings would go here.\n", + "3. `data_type`: The type of data that the metric applies to.\n", + " - `IMAGE`: Individual images witout any temporal dependencies.\n", + " - `SEQUENCE`: Consecutive frames where order across frames matter.\n", + "4. `annotation_type`:\n", + " - `NONE`: Doesn't need annotations (Data Quality)\n", + " - `OBJECT`: A list of object types like polygon or bounding box that the metric works for.\n", + " - `CLASSIFICATION`: A list of classification types like radio buttons and checkboxes that the metric works for.\n", + " - `ALL`: All objects and classification types. Could, for example, be used for annotation time.\n", + "5. `short_description`: Used in the ui\n", + "6. `long_description`: Used in the ui\n", + "\n", + "\n", + "`SECTION 2`: Is used for metric functions that yield one score for each frame. Note how the `writer.write(...)` specifies no objects.\n", + "\n", + "`SECTION 3`: Is used for metric functions that yield a score for each object / classification. For these metrics, `writer.write(...)` should contain a list of objects or classifications that should be associated with a giveen score.\n", + "\n", + "> _Note:_ You should stick to either writing scores with or without the `writer.write(..., labels=obj)` argument. Mixing them up will confuse the app.\n", + "\n", + "### Using the iterator\n", + "When you call `iterator.iterate(...)`, you will get an iterator over all the data in a given dataset (see how to execute the metric [below](#execute)). Each item in the iterator is a tuple of a `data_unit` dictionary and a `pathlib.Path` to where the image can be loaded from. \n", + "\n", + "The `data_unit` dictionary has the following structure (there may be more or less `\"objects\"` and `\"labels\"`):\n", + "\n", + "
\n", + "data_unit example structure\n", + "\n", + "```python\n", + "{\n", + " \"data_hash\": \"595d9721-913b-45c9-8645-c3ebf8a6ae0b\",\n", + " \"data_title\": \"231822\",\n", + " \"data_type\": \"image/jpeg\",\n", + " \"data_sequence\": 0,\n", + " \"labels\": {\n", + " \"objects\": [\n", + " { # Example polygon\n", + " \"name\": \"Bottle\",\n", + " \"color\": \"#68BC00\",\n", + " \"shape\": \"polygon\",\n", + " \"value\": \"bottle\",\n", + " \"polygon\": {\n", + " \"0\": {\"x\": 0.9559, \"y\": 0.0038},\n", + " \"1\": {\"x\": 0.9356, \"y\": 0.1399},\n", + " \"2\": {\"x\": 0.9216, \"y\": 0.1982},\n", + " # ...\n", + " },\n", + " \"createdAt\": \"Thu, 25 Aug 2022 15:45:31 GMT\",\n", + " \"createdBy\": \"robot@cord.tech\",\n", + " \"confidence\": 1,\n", + " \"objectHash\": \"9728826c\",\n", + " \"featureHash\": \"671c61d7\",\n", + " \"lastEditedAt\": \"Thu, 25 Aug 2022 15:45:31 GMT\",\n", + " \"lastEditedBy\": \"robot@encord.com\",\n", + " \"manualAnnotation\": False,\n", + " },\n", + " { # Example bounding box\n", + " \"name\": \"Cyclist\",\n", + " \"color\": \"#DBDF00\",\n", + " \"shape\": \"bounding_box\",\n", + " \"value\": \"Cyclist\",\n", + " \"createdAt\": \"Wed, 23 Nov 2022 10:05:22 GMT\",\n", + " \"createdBy\": \"robot@encord.com\",\n", + " \"confidence\": 1.0,\n", + " \"objectHash\": \"t2KUSWgj\",\n", + " \"featureHash\": \"yJ+hgd0r\",\n", + " \"lastEditedAt\": \"Wed, 23 Nov 2022 10:05:22 GMT\",\n", + " \"lastEditedBy\": \"robot@encord.com\",\n", + " \"manualAnnotation\": True,\n", + " \"boundingBox\": {\n", + " \"h\": 0.2810061626666667,\n", + " \"w\": 0.0897509331723027,\n", + " \"x\": 0.4464461135265701,\n", + " \"y\": 0.443804288,\n", + " },\n", + " \"reviews\": [],\n", + " },\n", + " ],\n", + " \"classifications\": [\n", + " { # Example classification\n", + " \"name\": \"Classification Question\",\n", + " \"value\": \"classification-question\",\n", + " \"createdAt\": \"Fri, 11 Nov 2022 09:41:21 GMT\",\n", + " \"createdBy\": \"robot@cord.tech\",\n", + " \"confidence\": 1,\n", + " \"featureHash\": \"MTYzMTkx\",\n", + " \"classificationHash\": \"sHNoiYPw\",\n", + " \"manualAnnotation\": True,\n", + " \"reviews\": [],\n", + " }, \n", + " # ...\n", + " ],\n", + " },\n", + " \"data_link\": \"...\",\n", + " \"width\": 500,\n", + " \"height\": 361,\n", + "}\n", + "```\n", + "
\n", + "\n", + "> _💡 Hint:_ You can inpect the entire structure by looking in the file `/path/to/project/data//label_row.json`.\n", + "\n", + "> 📝 _Note:_ To find the actual answers to classification questions, you access `iterator.label_rows[iterator.label_hash][\"classification_answers\"][]`.\n", + "\n", + "> 📝 _Note:_ If you are computing metrics based on temporal aspects, the `iterator.frame` will tell you what frame of a sequence you are currently looking at and the `iterator.label_hash` will give you the unique id of the sequence." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "### 👟 Executing a metric\n", + "\n", + "When you have implemented a metric function, you can run it using the following code snippet:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from encord_active.lib.metrics.execute import execute_metrics\n", + "from encord_active.lib.model_predictions.iterator import PredictionIterator\n", + "\n", + "target = Path(\"/path/to/your/project\") # TODO UPDATE\n", + "\n", + "execute_metrics([ExampleMetric()], data_dir=target, use_cache_only=True) # for labels\n", + "execute_metrics([ExampleMetric()], data_dir=target, iterator_cls=PredictionIterator, use_cache_only=True) # for predictions (only makes sense to do if your metric applies to labels)\n", + "\n", + "# Wrap this entire code block in a \n", + "# `if __name__ == \"__main__\":`\n", + "# and put it in the bottom of your metric file if you want to be able to run\n", + "# python your_metric.py" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `target` variable points to the directory that containts the Encord Active project that you want to run the metric on.\n", + "This directory should, for example, contain a `project-meta.yaml`.\n", + "\n", + "> Info: The `use_cache_only` argument tells Encord Active to not try and download more data via the Encord SDK.\n", + "\n", + "Having covered the overall structure, let's dive into a concrete example." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ESEt8WweThsO", + "tags": [] + }, + "source": [ + "\n", + "# 🦮 Concrete walkthrough Example\n", + "\n", + "> 💡 Hint: We refer to line numbers. In most notebooks, you can enable line numbers in the \"View\" options.\n", + "\n", + "In this example, you'll continue the idea of testing the model performance as a function of the \"redness\" of individual objects. \n", + "Specifically, you will use the annotations/predictions to extract the image patchs that contain an object and compute the mean Hue value of that patch.\n", + "\n", + "To get started, let's have a look at the [HSV color space](https://en.wikipedia.org/wiki/HSL_and_HSV), which is great for color filtering.\n", + "The following code indicates how different Hue (the H from HSV) values correspond to different colors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import cv2\n", + "import numpy as np\n", + "\n", + "def get_img(H: int):\n", + " \"\"\"\n", + " Make image with specific Hue color and convert it to RGB for plotting.\n", + " \"\"\"\n", + " img = np.ones((20, 20, 3), dtype=np.uint8)\n", + " img[..., 0] = H\n", + " img[..., 1] = 255\n", + " img[..., 2] = 150 \n", + " return cv2.cvtColor(img, cv2.COLOR_HSV2RGB)\n", + "\n", + "# Hue ranges from 0 to 180 and \"wraps\" around.\n", + "hues = np.linspace(0, 179, 18, dtype=np.uint8)\n", + "imgs = [get_img(i) for i in hues]\n", + "\n", + "fig, ax = plt.subplots(2, 9, figsize=(10, 3))\n", + "ax = ax.reshape(-1)\n", + "\n", + "# Plot the colors\n", + "for img, a, h in zip(imgs, ax, hues):\n", + " a.set_title(f\"Hue: {h}\")\n", + " a.axis('off')\n", + " a.imshow(img)\n", + "\n", + "fig.tight_layout()\n", + "plt.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "👉 Note how the first and the last images are very red but have very different hue values. \n", + "This is because of the \"circular\" / \"wrap-around\" nature of the color space. \n", + "Let's account for that by computing a value, which makes red colors close to zero and others closer to one." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def transform_hue(H: int, offset=0):\n", + " return (90 - np.abs(H - 90)) / 90\n", + "\n", + "# Plotting\n", + "fig, ax = plt.subplots(2, 9, figsize=(10, 3))\n", + "ax = ax.reshape(-1)\n", + "\n", + "for img, a, h in zip(imgs, ax, hues):\n", + " t = transform_hue(h)\n", + " a.set_title(f\"Transf.: {t:.2f}\")\n", + " a.imshow(img)\n", + " a.axis('off')\n", + "fig.tight_layout()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "Alright, this looks better.\n", + "The transformed value is a better candidate for our metric function.\n", + "\n", + "Next, let's use this to crop out the relevant parts of polygon annotations and compute their mean (transformed) hue values.\n", + "\n", + "We define a `Metric` subclass and compute the transformed hue value for each object to see how red it is." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "cItsCLacS2Gx", + "outputId": "39d039b2-03b9-4b2b-e9ce-d8b3dbf61745", + "tags": [] + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "from encord_active.lib.common import utils\n", + "from encord_active.lib.common.iterator import Iterator\n", + "from encord_active.lib.metrics.metric import Metric\n", + "from encord_active.lib.metrics.types import AnnotationType, DataType, MetricType\n", + "from encord_active.lib.metrics.writer import CSVMetricWriter\n", + "from loguru import logger\n", + "\n", + "\n", + "class ObjectRedness(Metric):\n", + " def __init__(self):\n", + " super().__init__(\n", + " title=\"Polygon Average Hue\",\n", + " short_description=\"Compute the average Hue value of the pixels contained within each polygon.\",\n", + " long_description=r\"\"\"Crops out the pixels associated to each object and computes the (transformed)\n", + "Hue value of each object.\n", + "\n", + "The transform \"breaks\" the wrap-around of the Hue color space, so Hue values in range [0, 180] becomes [0, 1] as follows:\n", + "\n", + "```\n", + "H: [0, 45, 90, 135, 179]\n", + "t(H): [0, 0.5, 1, 0.5, 0+e]\n", + "```\n", + "\"\"\",\n", + " metric_type=MetricType.SEMANTIC ,\n", + " data_type=DataType.IMAGE,\n", + " annotation_type=[AnnotationType.OBJECT.POLYGON],\n", + " )\n", + "\n", + " def execute(self, iterator: Iterator, writer: CSVMetricWriter):\n", + " valid_annotation_types = {annotation_type.value for annotation_type in self.metadata.annotation_type}\n", + "\n", + " # Separate objects' instances (same objectHash [aka track id] means same object instance)\n", + " for data_unit, image in iterator.iterate(desc=\"Custom progress description\"):\n", + " # Convert image to the HSV color space\n", + " full_image = np.array(image)\n", + " full_hsv_image = cv2.cvtColor(full_image, cv2.COLOR_RGB2HSV)[...,0] # Take only the hue channel\n", + " img_h, img_w = full_hsv_image.shape[:2]\n", + " \n", + " for obj in data_unit[\"labels\"].get(\"objects\", []):\n", + " if not obj[\"shape\"] in valid_annotation_types:\n", + " continue # Only use polygons\n", + " \n", + " # The `get_geometry_from_encord_object` function will get us a numpy array of xy coordinates.\n", + " poly: Optional[np.ndarray] = utils.get_geometry_from_encord_object(obj, w=img_w, h=img_h) # [n, d]\n", + " if poly is None:\n", + " continue\n", + " \n", + " # Check that the polygon takes up at least one pixel\n", + " ymi, xmi = poly.min(0)\n", + " yma, xma = poly.max(0)\n", + " \n", + " if ymi == yma or xmi == xma:\n", + " continue # Empty polygon\n", + " \n", + " # Draw mask from polygon\n", + " mask = np.zeros((img_h, img_w), dtype=np.uint8)\n", + " mask = cv2.fillPoly(mask, [poly], 1)\n", + " \n", + " polygon_pixels = full_hsv_image[mask==1] # Take only pixels within polygon\n", + " transformed_mean_hue = transform_hue(polygon_pixels.mean())\n", + " writer.write(transformed_mean_hue.item(), labels=obj)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Okay, so you have defined your metric which extracts the pixels of each polygon and computes the average (transformed) hue value of those pixels.\n", + "The next step will then be to apply the metric to your data.\n", + "\n", + "In the next code cell, you'll download one of the sandbox datasets, but you can also point the metric to your own dataset by setting the `target` path below to point to the root of your project directory." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 📩 Download the \"quickstart\" sandbox dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "!encord-active download --project-name quickstart" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You should now be able to see the quickstart directory in the `File Browser`. \n", + "Apply your metric to that project." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from encord_active.lib.metrics.execute import execute_metrics\n", + "\n", + "target = Path(\"quickstart\")\n", + "\n", + "# Apply metric to labels\n", + "execute_metrics([ObjectRedness()], data_dir=target, use_cache_only=True)\n", + "\n", + "# For predictions (only makes sense to do if your metric applies to labels)\n", + "from encord_active.lib.model_predictions.iterator import PredictionIterator\n", + "from encord_active.lib.model_predictions.writer import MainPredictionType\n", + "execute_metrics([ObjectRedness()], data_dir=target, iterator_cls=PredictionIterator, use_cache_only=True, prediction_type=MainPredictionType.OBJECT)\n", + "\n", + "# Wrap this entire code block in a \n", + "# `if __name__ == \"__main__\":`\n", + "# and put it in the bottom of your metric file if you want to be able to run\n", + "# python your_metric.py" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To see the results, you can run the app with the project as the target:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!encord-active start -t \"quickstart\"" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For the quickstart dataset, the \"Polygon Average Hue\" metric that we just defined seems to have little or no influence on the model performance - based on the \"Metric Importance\" chart on the \"Model Quality -> Metrics\" page.\n", + "However, if you filter by the person class in the settings panel in the top, you will see that the redness of objects do seem to have an effect on the model performance.\n", + "\n", + "" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ✅ Wrap Up: Next Steps" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The next steps from here could be many.\n", + "You have seen that the redness of objects is generally not extremely important for the model performance - which is a good thing - one less thing to worry about.\n", + "\n", + "From here, one could go on to define a new custom metric function to test the next hypothesis.\n", + "Some of the things that would be simple to test now that you have your first custom metric in place is, e.g., the standard deviation of the colors within an object, the saturaion, other colors, etc. These metrics would only require changing line 57 in the metric definition above.\n", + "\n", + "Of course, you should keep all the metrics that we define to make sure that redness of values doesn't turn into a problem at a later stage in the model development." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "🟣 Encord Active is an open-source framework for computer vision model testing, evaluation, and validation. **Check out the project on [GitHub](https://github.com/encord-team/encord-active), leave a star 🌟** if you like it. We welcome you to [contribute](https://docs.encord.com/docs/active-contributing) if you find something is missing.\n", + "\n", + "---\n", + "\n", + "👉 Check out the 📖 [Encord Blog](https://encord.com/blog/) and 📺 [YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n", + "\n", + "---\n", + "\n", + "Thanks for now!" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ⏭️ Next: Learn how to add custom embeddings to 🟣 Encord Active" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "What should you check out next? 👀 Learn how to add custom embeddings to Encord Active. The Colab notebook will cover:\n", + "\n", + "* Example code for **adding custom image and object embeddings** to your Encord Active project.\n", + "\n", + "### $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ *👇*" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ⬅️ [*Previous Notebook*](./Encord_Active_HuggingFace_Dataset_Exploration.ipynb) $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ [*Next Notebook*](./Encord_Active_Add_Custom_Embeddings.ipynb) *➡️*" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/local-notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb b/local-notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb new file mode 100644 index 0000000..fcaafda --- /dev/null +++ b/local-notebooks/Encord_Active_HuggingFace_Dataset_Exploration.ipynb @@ -0,0 +1,725 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Ix90mmYg-S_f" + }, + "source": [ + "
\n", + "

\"Open\n", + "\"License\"\n", + "\"PyPi\n", + "\"PyPi\n", + "\n", + "\"docs\"\n", + "\n", + "\"Join\n", + "\n", + "\""Encord\n", + "\n", + "\n", + "

\n", + "

\n", + "\"Twitter

\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J3xuMLJinnX5" + }, + "source": [ + "
\n", + "

\n", + " \n", + " \n", + " \n", + "

\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iebgJ1JbFCwy" + }, + "source": [ + "# 🟣 Encord Active | 🤗 HuggingFace Dataset Exploration" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eq0L5XrAFd31" + }, + "source": [ + "## 🏁 Overview" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Z_mRHrJ_Jm7n" + }, + "source": [ + "👋 Hi there! In this notebook, you will use Encord Active (EA) to explore the quality of a dataset from the [Hugging Face Datasets](https://huggingface.co/datasets) library.\n", + "\n", + "\n", + "> ⚠️ **Prerequisites:** you should have `encord-active` [installed](https://docs.encord.com/docs/active-overview) in your environment.\n", + "\n", + "This 📒 notebook will cover:\n", + "* Using 🤗 Datasets to download and generate the dataset.\n", + "* Creating an Encord Active project.\n", + "* Inspecting problematic images in the dataset.\n", + "* Exploring more features with the EA UI.\n", + "\n", + "
\n", + "\n", + "> 💡 Learn more about 🟣 Encord Active:\n", + "* [GitHub](https://github.com/encord-team/encord-active)\n", + "* [Docs](https://docs.encord.com/docs/active-overview)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yv_kz9VtCJSe" + }, + "source": [ + "## 🛠️ Install Encord Active" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pwoSxHf7BcxX" + }, + "source": [ + "📌 `python3.9`, `python3.10`, and `python3.11` are the version requirements to run 🟣Encord Active." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "owdkTcPbP88e" + }, + "outputs": [], + "source": [ + "# Assert that python is 3.9 or 3.10 instead\n", + "import sys\n", + "assert sys.version_info.minor in [9, 10, 11], \"Encord Active only supported for python 3.9, 3.10, and 3.11.\"\n", + "\n", + "!pip install encord-active &> /dev/null\n", + "!encord-active --version" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qD0LBtnK-REL" + }, + "source": [ + "## 📥 Install the 🤗 Hugging Face Datasets package" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jehfrSQq0Iag" + }, + "source": [ + "👟 Run the following installation script for [🤗 Datasets](https://huggingface.co/datasets).\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4pXOBnMILgGV" + }, + "outputs": [], + "source": [ + "# Install the Hugging Face Datasets library\n", + "%pip install datasets &> /dev/null" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zXDKGS6G-W2-" + }, + "source": [ + "# 📨 Download a Hugging Face Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ziYuDPDgQQG-" + }, + "source": [ + "You can explore the [Hugging Face dataset](https://huggingface.co/datasets) directory and loady any dataset prefer to explore.\n", + "\n", + "\n", + "Here, install [`sashs/dog-food`](https://huggingface.co/datasets/sasha/dog-food) dataset where there are 3000 images consists of dogs and foods." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tdK0ZJ3-Ag9j" + }, + "outputs": [], + "source": [ + "from datasets import load_dataset, concatenate_datasets\n", + "from pathlib import Path\n", + "import shutil\n", + "from tqdm import tqdm\n", + "\n", + "# Use load_dataset function to download any dataset on the Hugging Face\n", + "# You can browse through datasets here: https://huggingface.co/datasets\n", + "dataset_dict = load_dataset('sasha/dog-food')\n", + "dataset = concatenate_datasets([d for d in dataset_dict.values()])\n", + "\n", + "huggingface_dataset_path = Path.cwd() / \"huggingface_dataset\"\n", + "\n", + "if huggingface_dataset_path.exists():\n", + " shutil.rmtree(huggingface_dataset_path)\n", + "huggingface_dataset_path.mkdir()\n", + "\n", + "for counter, item in tqdm(enumerate(dataset)):\n", + " image = item['image']\n", + " image.save(f'./huggingface_dataset/{counter}.{image.format}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AXxhLhin_WUT" + }, + "source": [ + "# 🔧 Create an 🟣 Encord Active project" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nneOC2M3TesF" + }, + "source": [ + "## 👉 Add the Dataset to an 🟣 Encord Active Project" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QqorHdUDSwIv" + }, + "source": [ + "The code below sets up a project using Encord Active, initializes it with image files, and runs metrics on the project's data.\n", + "\n", + "* It obtains a list of all the image files from `huggingface_dataset` directory with the `collect_all_images` that takes a root folder path as input and returns a list of Path objects representing image files within the root folder\n", + "\n", + "* Initializes a local project using Encord Active's `init_local_project` function\n", + "\n", + "* Creates a project in the specified `projects_dir` directory with the image files and project name\n", + "\n", + "* Calls the [`run_metrics_by_embedding_type`](https://docs.encord.com/active/docs/sdk/run-metrics/#running-data-or-label-metrics-only) function to run metrics for the image embeddings (`EmbeddingType.IMAGE`). The metrics will be executed on the data in `project_path`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PCBKNbXN5sZ0" + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "from encord_active.lib.metrics.execute import run_metrics, run_metrics_by_embedding_type\n", + "from encord_active.lib.metrics.metric import EmbeddingType\n", + "from encord_active.lib.project.local import ProjectExistsError, init_local_project\n", + "from encord_active.lib.project.project import Project\n", + "\n", + "def collect_all_images(root_folder: Path) -> list[Path]:\n", + " image_extensions = {\".jpg\", \".jpeg\", \".png\", \".bmp\"}\n", + " image_paths = []\n", + "\n", + " for file_path in root_folder.glob(\"**/*\"):\n", + " if file_path.suffix.lower() in image_extensions:\n", + " image_paths.append(file_path)\n", + "\n", + " return image_paths\n", + "\n", + "# Enter path to the downloaded torchvision project\n", + "root_folder = Path(\"./huggingface_dataset\")\n", + "projects_dir = Path.cwd()\n", + "\n", + "if not projects_dir.exists():\n", + " projects_dir.mkdir()\n", + "\n", + "image_files = collect_all_images(root_folder)\n", + "\n", + "try:\n", + " project_path: Path = init_local_project(\n", + " files = image_files,\n", + " target = projects_dir,\n", + " project_name = \"sample_ea_project\",\n", + " symlinks = False,\n", + " )\n", + "except ProjectExistsError as e:\n", + " project_path = Path(\"./sample_ea_project\")\n", + " print(e) # A project already exist with that name at the given path.\n", + "\n", + "run_metrics_by_embedding_type(\n", + " EmbeddingType.IMAGE,\n", + " data_dir=project_path,\n", + " use_cache_only=True\n", + ")\n", + "\n", + "ea_project = Project(project_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VLlodjncU8f4" + }, + "source": [ + "# 📥 Import helper functions\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5mUW34kCVmCX" + }, + "source": [ + "Now import some helper functions from Encord Active and with visualization libraries to visualize the images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "INU_TIhxU_bn" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import plotly.express as px\n", + "\n", + "from encord_active.lib.charts.data_quality_summary import create_image_size_distribution_chart, create_outlier_distribution_chart\n", + "from encord_active.lib.dataset.summary_utils import get_all_image_sizes, get_metric_summary, get_median_value_of_2d_array\n", + "from encord_active.lib.metrics.utils import load_available_metrics\n", + "from encord_active.lib.dataset.outliers import MetricsSeverity, get_all_metrics_outliers\n", + "from encord_active.lib.common.image_utils import load_or_fill_image\n", + "from encord_active.lib.charts.histogram import get_histogram\n", + "\n", + "def plot_top_k_images(metric_name: str, metrics_data_summary: MetricsSeverity, project: Project, k: int, show_description: bool = False, ascending: bool = True):\n", + " metric_df = metrics_data_summary.metrics[metric_name].df\n", + " metric_df.sort_values(by='score', ascending=ascending, inplace=True)\n", + "\n", + " for _, row in metric_df.head(k).iterrows():\n", + " image = load_or_fill_image(row, project.file_structure)\n", + " plt.imshow(image)\n", + " plt.show()\n", + " print(f\"{metric_name} score: {row['score']}\")\n", + " if show_description:\n", + " print(f\"{row['description']}\")\n", + "\n", + "def plot_metric_distribution(metric_name: str, metric_data_summary: MetricsSeverity):\n", + " fig = px.histogram(metrics_data_summary.metrics[metric_name].df, x=\"score\", nbins=50)\n", + "\n", + " fig.update_layout(title=f\"{metric_name} score distribution\", bargap=0.2)\n", + " fig.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HlcKNEnUUnKI" + }, + "source": [ + "# 🔔 Plot image size distributions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UDm0oVjzT8ZO" + }, + "outputs": [], + "source": [ + "image_sizes = get_all_image_sizes(ea_project.file_structure)\n", + "median_image_dimension = get_median_value_of_2d_array(image_sizes)\n", + "\n", + "fig = create_image_size_distribution_chart(image_sizes)\n", + "\n", + "print(f\"Total images in the dataset: {len(image_sizes)}\")\n", + "print(f\"Median image sizes: {median_image_dimension[0]}x{median_image_dimension[1]}\")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p8itAs0SUxMJ" + }, + "source": [ + "# 📈 Show total outliers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "m0ULGHxRUWsH" + }, + "outputs": [], + "source": [ + "available_metrics = load_available_metrics(ea_project.file_structure.metrics)\n", + "metrics_data_summary = get_metric_summary(available_metrics)\n", + "all_metrics_outliers = get_all_metrics_outliers(metrics_data_summary)\n", + "fig = create_outlier_distribution_chart(all_metrics_outliers, \"tomato\", 'orange')\n", + "\n", + "print(f'Total severe outliers: {metrics_data_summary.total_unique_severe_outliers} \\n'\n", + " f'Total moderate outliers: {metrics_data_summary.total_unique_moderate_outliers}')\n", + "\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wdAHQn-vbfeo" + }, + "source": [ + "# 🧐 Inspect problematic images" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IyQhWgxyWClS" + }, + "source": [ + "Now you will have to inspect the dataset for problematic images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FT5I5dGhVZNb" + }, + "outputs": [], + "source": [ + "# First, get the list of available metrics\n", + "[metric.name for metric in available_metrics]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OjtOB7Jzr7Dl" + }, + "source": [ + "# 👁️ Visualize score distributions based on metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OIfht5swsAHe" + }, + "outputs": [], + "source": [ + "for metric in available_metrics:\n", + " plot_metric_distribution(metric.name, metrics_data_summary)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lbuCMR3IiA8f" + }, + "source": [ + "# Get the smallest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LiFuNRCogHWd" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Area', metrics_data_summary, ea_project, k=5, ascending=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pBjOfkIviFta" + }, + "source": [ + "# Get the biggest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ziYYWe82hxzg" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Area', metrics_data_summary, ea_project, k=5, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mRznGY3OiMej" + }, + "source": [ + "# Get the blurriest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iOLwSfV5iRsw" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Blur', metrics_data_summary, ea_project, k=5, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f9uj-9YairJi" + }, + "source": [ + "# Get the brightest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZuezcOvwivGX" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Brightness', metrics_data_summary, ea_project, k=5, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xJTm7fGmmmpX" + }, + "source": [ + "# Get the darkest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5bqBxlZ0mqFt" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Brightness', metrics_data_summary, ea_project, k=5, ascending=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "957ribVtjVZo" + }, + "source": [ + "# Get the least unique images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ACyNR_S2iyT1" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Image Singularity', metrics_data_summary, ea_project, k=15, show_description=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fDwoY8JVwnnA" + }, + "source": [ + "# Get the images that have the smallest aspect ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BRlu5blZwVH0" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Aspect Ratio', metrics_data_summary, ea_project, k=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "254nYRNAwxbX" + }, + "source": [ + "# Get the images that have the biggest aspect ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SbJpit1EwyBV" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Aspect Ratio', metrics_data_summary, ea_project, k=10, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "80zLaV2FXoLx" + }, + "source": [ + "# ✅ Wrap Up: Explore more features with 🟣 Encord Active UI\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XJmKEaRUelq2" + }, + "source": [ + "\n", + "This was just a small part of Encord Active's capabilities. Use Encord Active app to explore more of your dataset, labels, and model performance via easy to use user interface. With Encord Active UI, you can:\n", + "\n", + "* Understand the data and label distribution\n", + "* Search through data in natural language\n", + "* Detect exact and near duplicate images\n", + "* Detect label errors and biases\n", + "* Gain insights into your model’s weak areas\n", + "* Generate model explainability reports\n", + "* Test, validate, and evaluate your models with advanced error analysis\n", + "\n", + "\n", + "
\n", + "\n", + "![Encord Active UI](https://images.prismic.io/encord/73635182-4f04-4299-a992-a4d383e19765_image2.gif?auto=compress,format)\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yI9LwxR4X9ER" + }, + "source": [ + "🟣 Encord Active is an open source toolkit to prioritize the most valuable image data for labeling to supercharge model performance! **Check out the project on [GitHub](https://github.com/encord-team/encord-active), leave a star 🌟** if you like it. We welcome you to [contribute](https://docs.encord.com/docs/active-contributing) if you find something is missing.\n", + "\n", + "---\n", + "\n", + "👉 Check out the 📖 [Encord Blog](https://encord.com/blog/) and 📺 [YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n", + "\n", + "---\n", + "\n", + "Thanks for now!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SUyAHyUKPVJn" + }, + "source": [ + "# ⏭️ Next: Learn how to build custom metrics functions in 🟣 Encord Active" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hSnIVJSkPVJn" + }, + "source": [ + "What should you check out next? 👀 Learn how to build custom metrics functions in Encord Active. The Colab notebook will cover code samples and example walkthroughs for:\n", + "* Defining metric sub-classes.\n", + "* Executing metric functions.\n", + "* Investigating custom metrics in the Encord Active UI.\n", + "\n", + "### $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ *👇*" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GouuGlveX97N" + }, + "source": [ + "### ⬅️ [*Previous Notebook*](./Encord_Active_Torchvision_Dataset_Exploration.ipynb) $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ [*Next Notebook*](./Encord_Active_Building_a_Custom_Metric_Function.ipynb) *➡️*\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "yv_kz9VtCJSe" + ], + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/local-notebooks/Encord_Active_Torchvision_Dataset_Exploration.ipynb b/local-notebooks/Encord_Active_Torchvision_Dataset_Exploration.ipynb new file mode 100644 index 0000000..9d69324 --- /dev/null +++ b/local-notebooks/Encord_Active_Torchvision_Dataset_Exploration.ipynb @@ -0,0 +1,689 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "RaxG4fvPK5dn" + }, + "source": [ + "
\n", + "

\"Open\n", + "\"License\"\n", + "\"PyPi\n", + "\"PyPi\n", + "\n", + "\"docs\"\n", + "\n", + "\"Join\n", + "\n", + "\""Encord\n", + "\n", + "\n", + "

\n", + "

\n", + "\"Twitter

\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3J2CLN4f0qt0" + }, + "source": [ + "
\n", + "

\n", + " \n", + " \n", + " \n", + "

\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vXywCRMtLCch" + }, + "source": [ + "# 🟣 Encord Active | 🔦 Torchvision Dataset Exploration\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CmaPL0STLnez" + }, + "source": [ + "## 🚀 Overview" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_5rxwf3cMcZr" + }, + "source": [ + "👋 Hi there! In this notebook, you will use Encord Active to explore the quality of a dataset from the built-in samples in the [`torchvision.datasets`](https://pytorch.org/vision/stable/datasets.html) module.\n", + "\n", + "> ⚠️ **Prerequisites:** you should have `encord-active` [installed](https://docs.encord.com/docs/active-overview) in your environment.\n", + "\n", + "This 📒 notebook will cover:\n", + "* Downloading a dataset through the built-in datasets in the `torchvision.datasets` module.\n", + "* Creating an Encord Active project.\n", + "* Inspecting problematic images in the dataset.\n", + "* Exploring more features with Encord Active UI.\n", + "\n", + "
\n", + "\n", + "> 💡 Learn more about 🟣 Encord Active:\n", + "* [GitHub](https://github.com/encord-team/encord-active)\n", + "* [Docs](https://docs.encord.com/docs/active-overview)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qD0LBtnK-REL" + }, + "source": [ + "## 📥 Install 🟣 Encord-Active" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2Y79puNuhpZp" + }, + "source": [ + "📌 `python3.9`, `python3.10`, and `python3.11` are the version requirements to run 🟣Encord Active." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "APt_Z1BUU-xI" + }, + "outputs": [], + "source": [ + "# Assert that python is 3.9 or 3.10 instead\n", + "import sys\n", + "assert sys.version_info.minor in [9, 10, 11], \"Encord Active only supported for python 3.9, 3.10, and 3.11.\"\n", + "\n", + "!pip install encord-active &> /dev/null\n", + "!encord-active --version" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zPEcbw2l5f1u" + }, + "outputs": [], + "source": [ + "#@title Optional: Install `numpy 1.23.5` as a utility library for this project.\n", + "\n", + "%pip install -U -q numpy==1.23.5 # If you encounter a numpy error later in the code, comment/uncomment this line, and run after this point" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zXDKGS6G-W2-" + }, + "source": [ + "# 📨 Download TorchVision Dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SiKeDVCWYK0l" + }, + "source": [ + "You can install any torhcvision dataset. Here, we will install [Caltech101](https://pytorch.org/vision/stable/generated/torchvision.datasets.Caltech101.html#torchvision.datasets.Caltech101) dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RPkyi5wb_Vws" + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from torchvision import datasets\n", + "\n", + "datasets.Caltech101(Path.cwd(), target_type=\"category\", download=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AXxhLhin_WUT" + }, + "source": [ + "# 🔧 Create an 🟣 Encord Active project" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4kxQ0AoEZI0x" + }, + "source": [ + "## 👉 Add the Dataset to Your 🟣 Encord Active Project" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dRR8bPc_t9UJ" + }, + "source": [ + "The code below essentially sets up a project using Encord Active, initializes it with image files, and then runs metrics on the project's data.\n", + "\n", + "- Obtain a list of all the Caltech 101 image files from the dataset directory with the `collect_all_images` that takes a root folder path as input and returns a list of Path objects representing image files within the root folder\n", + "\n", + "- Initialize a local project using Encord Active's `init_local_project` function\n", + "\n", + "- Creates a project in the specified `projects_dir` directory with the provided image files and project name\n", + "\n", + "- Call the [`run_metrics_by_embedding_type`](https://docs.encord.com/active/docs/sdk/run-metrics/#running-data-or-label-metrics-only) function to run metrics for the image embeddings (EmbeddingType.IMAGE). The metrics will be executed on the data in `project_path`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PCBKNbXN5sZ0" + }, + "outputs": [], + "source": [ + "from encord_active.lib.metrics.execute import run_metrics, run_metrics_by_embedding_type\n", + "from encord_active.lib.metrics.metric import EmbeddingType\n", + "from encord_active.lib.project.local import ProjectExistsError, init_local_project\n", + "from encord_active.lib.project.project import Project\n", + "\n", + "def collect_all_images(root_folder: Path) -> list[Path]:\n", + " image_extensions = {\".jpg\", \".jpeg\", \".png\", \".bmp\"}\n", + " image_paths = []\n", + "\n", + " for file_path in root_folder.glob(\"**/*\"):\n", + " if file_path.suffix.lower() in image_extensions:\n", + " image_paths.append(file_path)\n", + "\n", + " return image_paths\n", + "\n", + "# Enter path to the downloaded torchvision project\n", + "root_folder = Path(\"./caltech101\")\n", + "projects_dir = Path(\"./ea/\")\n", + "\n", + "if not projects_dir.exists():\n", + " projects_dir.mkdir()\n", + "\n", + "image_files = collect_all_images(root_folder)\n", + "\n", + "try:\n", + " project_path: Path = init_local_project(\n", + " files = image_files,\n", + " target = projects_dir,\n", + " project_name = \"sample_ea_project\",\n", + " symlinks = False,\n", + " )\n", + "except ProjectExistsError as e:\n", + " project_path = Path(\"./ea/sample_ea_project\")\n", + " print(e) # A project already exist with that name at the given path.\n", + "\n", + "run_metrics_by_embedding_type(\n", + " EmbeddingType.IMAGE,\n", + " data_dir=project_path,\n", + " use_cache_only=True\n", + ")\n", + "\n", + "ea_project = Project(project_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VLlodjncU8f4" + }, + "source": [ + "# 📥 Import helper functions\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Dkz0oi6BwK8H" + }, + "source": [ + "Now import some helper functions from Encord Active and with visualization libraries to visualize the images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "INU_TIhxU_bn" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import plotly.express as px\n", + "\n", + "from encord_active.lib.charts.data_quality_summary import create_image_size_distribution_chart, create_outlier_distribution_chart\n", + "from encord_active.lib.dataset.summary_utils import get_all_image_sizes, get_metric_summary, get_median_value_of_2d_array\n", + "from encord_active.lib.metrics.utils import load_available_metrics\n", + "from encord_active.lib.dataset.outliers import MetricsSeverity, get_all_metrics_outliers\n", + "from encord_active.lib.common.image_utils import load_or_fill_image\n", + "from encord_active.lib.charts.histogram import get_histogram\n", + "\n", + "def plot_top_k_images(metric_name: str, metrics_data_summary: MetricsSeverity, project: Project, k: int, show_description: bool = False, ascending: bool = True):\n", + " metric_df = metrics_data_summary.metrics[metric_name].df\n", + " metric_df.sort_values(by='score', ascending=ascending, inplace=True)\n", + "\n", + " for _, row in metric_df.head(k).iterrows():\n", + " image = load_or_fill_image(row, project.file_structure)\n", + " plt.imshow(image)\n", + " plt.show()\n", + " print(f\"{metric_name} score: {row['score']}\")\n", + " if show_description:\n", + " print(f\"{row['description']}\")\n", + "\n", + "def plot_metric_distribution(metric_name: str, metric_data_summary: MetricsSeverity):\n", + " fig = px.histogram(metrics_data_summary.metrics[metric_name].df, x=\"score\", nbins=50)\n", + "\n", + " fig.update_layout(title=f\"{metric_name} score distribution\", bargap=0.2)\n", + " fig.show()\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HlcKNEnUUnKI" + }, + "source": [ + "# 🔔 Plot image size distributions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UDm0oVjzT8ZO" + }, + "outputs": [], + "source": [ + "image_sizes = get_all_image_sizes(ea_project.file_structure)\n", + "median_image_dimension = get_median_value_of_2d_array(image_sizes)\n", + "\n", + "fig = create_image_size_distribution_chart(image_sizes)\n", + "\n", + "print(f\"Total images in the dataset: {len(image_sizes)}\")\n", + "print(f\"Median image sizes: {median_image_dimension[0]}x{median_image_dimension[1]}\")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p8itAs0SUxMJ" + }, + "source": [ + "# 📈 Show total outliers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "m0ULGHxRUWsH" + }, + "outputs": [], + "source": [ + "available_metrics = load_available_metrics(ea_project.file_structure.metrics)\n", + "metrics_data_summary = get_metric_summary(available_metrics)\n", + "all_metrics_outliers = get_all_metrics_outliers(metrics_data_summary)\n", + "fig = create_outlier_distribution_chart(all_metrics_outliers, \"tomato\", 'orange')\n", + "\n", + "print(f'Total severe outliers: {metrics_data_summary.total_unique_severe_outliers} \\n'\n", + " f'Total moderate outliers: {metrics_data_summary.total_unique_moderate_outliers}')\n", + "\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wdAHQn-vbfeo" + }, + "source": [ + "# 🧐 Inspect problematic images\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R9Kb0wQXV8TN" + }, + "source": [ + "Now you will have to inspect the dataset for problematic images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FT5I5dGhVZNb" + }, + "outputs": [], + "source": [ + "# First, get the list of available metrics\n", + "[metric.name for metric in available_metrics]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OjtOB7Jzr7Dl" + }, + "source": [ + "## 👁️ Visualize score distributions based on metric" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OIfht5swsAHe" + }, + "outputs": [], + "source": [ + "for metric in available_metrics:\n", + " plot_metric_distribution(metric.name, metrics_data_summary)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lbuCMR3IiA8f" + }, + "source": [ + "## ▪️ Get the smallest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LiFuNRCogHWd" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Area', metrics_data_summary, ea_project, k=5, ascending=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pBjOfkIviFta" + }, + "source": [ + "## ⬛️ Get the biggest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ziYYWe82hxzg" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Area', metrics_data_summary, ea_project, k=5, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mRznGY3OiMej" + }, + "source": [ + "## 🌫️ Get the blurriest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iOLwSfV5iRsw" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Blur', metrics_data_summary, ea_project, k=5, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f9uj-9YairJi" + }, + "source": [ + "## 🔆 Get the brightest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZuezcOvwivGX" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Brightness', metrics_data_summary, ea_project, k=5, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xJTm7fGmmmpX" + }, + "source": [ + "## ▓ Get the darkest images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5bqBxlZ0mqFt" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Brightness', metrics_data_summary, ea_project, k=5, ascending=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "957ribVtjVZo" + }, + "source": [ + "## 🚀 Get the least unique images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ACyNR_S2iyT1" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Image Singularity', metrics_data_summary, ea_project, k=15, show_description=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fDwoY8JVwnnA" + }, + "source": [ + "## ▫️ Get the images that have the smallest aspect ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BRlu5blZwVH0" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Aspect Ratio', metrics_data_summary, ea_project, k=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "254nYRNAwxbX" + }, + "source": [ + "## 🔳 Get the images that have the biggest aspect ratio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SbJpit1EwyBV" + }, + "outputs": [], + "source": [ + "plot_top_k_images('Aspect Ratio', metrics_data_summary, ea_project, k=10, ascending=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1Ap6XhG2yM45" + }, + "source": [ + "# ✅ Wrap Up: Explore more features with 🟣 Encord Active UI\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f8QDqSA6qNxA" + }, + "source": [ + "This was just a small part of Encord Active's capabilities. Use Encord Active app to explore more of your dataset, labels, and model performance via easy to use user interface.\n", + "\n", + "\n", + "With the Encord Active UI, you can:\n", + "\n", + "* Understand the data and label distribution.\n", + "* Search through data in natural language.\n", + "* Detect exact and near duplicate images.\n", + "* Detect label errors and biases.\n", + "* Gain insights into your model’s weak areas.\n", + "* Generate model explainability reports.\n", + "* Test, validate, and evaluate your models with advanced error analysis.\n", + "\n", + "
\n", + "\n", + "![Encord Active UI](https://images.prismic.io/encord/73635182-4f04-4299-a992-a4d383e19765_image2.gif?auto=compress,format)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fqjUr2ZYy5PZ" + }, + "source": [ + "🟣 Encord Active is an open source toolkit to prioritize the most valuable image data for labeling to supercharge model performance! **Check out the project on [GitHub](https://github.com/encord-team/encord-active), leave a star 🌟** if you like it. We welcome you to [contribute](https://docs.encord.com/docs/active-contributing) if you find something is missing.\n", + "\n", + "---\n", + "\n", + "👉 Check out the 📖 [Encord Blog](https://encord.com/blog/) and 📺 [YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n", + "\n", + "---\n", + "\n", + "Thanks for now!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RmzNTiz2q3ya" + }, + "source": [ + "# ⏭️ Next: Learn how to use 🟣 Encord Active to explore 🤗 Face Datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nUFw6ClerSQU" + }, + "source": [ + "What do you think you should check out next? 👀 Learn how to use Encord Active to explore Hugging Face Datasets. The Colab notebook will cover:\n", + "\n", + "* Using 🤗 Datasets to download and generate the dataset.\n", + "* Creating an Encord Active project.\n", + "* Inspecting problematic images in the dataset.\n", + "* Exploring more features with Encord Active UI.\n", + "\n", + "### $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ *👇*" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "56xRVycX0M1W" + }, + "source": [ + "### ⬅️ [*Previous Notebook*](./02_Encord_Active___Import_project_(self_hosting).ipynb) $~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~$ [*Next Notebook*](./Encord_Active_HuggingFace_Dataset_Exploration.ipynb) *➡️*" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "qD0LBtnK-REL" + ], + "provenance": [], + "toc_visible": true + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/local-notebooks/Encord_Notebooks_Building_Semantic_Search_for_Visual_Data.ipynb b/local-notebooks/Encord_Notebooks_Building_Semantic_Search_for_Visual_Data.ipynb new file mode 100644 index 0000000..03f468e --- /dev/null +++ b/local-notebooks/Encord_Notebooks_Building_Semantic_Search_for_Visual_Data.ipynb @@ -0,0 +1,1135 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "r-Pvokzj2Ae2" + }, + "source": [ + "
\n", + "

\"Open\n", + "\n", + "\"Licence\"\n", + "\"PyPi\n", + "\"PyPi\n", + "\n", + "\"docs\"\n", + "\""Join\n", + "\n", + "

\n", + "

\n", + "\"Twitter

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "G1A8rqIvHmkZ" + }, + "source": [ + "
\n", + "

\n", + " \n", + " \n", + " \n", + "

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "_MKTDxun2UnL" + }, + "source": [ + "# 🟣 Encord Notebooks | 🔎 Building Semantic Search for Visual Data\n", + "\n", + "\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "MavN92f-JSxY" + }, + "source": [ + "## 🏁 Overview" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "TTEXBlLjJQoY" + }, + "source": [ + "👋 Hi there! In this notebook, we will build a semantic search engine using CLIP and ChatGPT.\n", + "\n", + "We will use an 🟣 Encord-Active sandbox project to the search over.\n", + "The dataset is to COCO Validation dataset." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "aHpx0kNd299C" + }, + "source": [ + "## 📥 Install 🟣 Encord-Active\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "4B_e9Gt5JVIZ" + }, + "source": [ + "👟 Run the following script to install 🟣[Encord Active](https://docs.encord.com/active/docs/).\n", + "\n", + "
\n", + "\n", + "📌 `python3.9` and `python3.10` are the version requirements to run 🟣Encord Active.\n", + "\n", + "
\n", + "\n", + "\n", + "👉 Depending on your internet speed this might take 1-3 minutes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tOVM55XJ3Dio" + }, + "outputs": [], + "source": [ + "# Assert that python is 3.9 or 3.10 instead\n", + "import sys\n", + "assert sys.version_info.minor in [9, 10], \"Encord Active only supported for python 3.9 and 3.10.\"\n", + "\n", + "# Install Encord Active\n", + "!python -m pip install -qq encord-active==0.1.60" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "V8yUUayLPv2O" + }, + "source": [ + "> # Please _RESTART_ your runtime before going any further.\n", + "We've noticed some complications with the latest version of Google Colab and Numpy, which is fixed by restarting the runtime." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "J4EjZixRQLo9" + }, + "source": [ + "Later, we'll also need the `openai` and `langchain` modules, so let's install them as well." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CLOXJMrxSeby" + }, + "outputs": [], + "source": [ + "!python -m pip install -qq langchain openai" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "fZS7ktx74Vz3" + }, + "source": [ + "## 📩 Download an 🟣 Encord Active sandbox project\n", + "\n", + "🌆 We will use the [COCO Validation set](https://paperswithcode.com/dataset/coco) project for this notebook 📙." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Xy9PZ4IR4mkE" + }, + "outputs": [], + "source": [ + "project_name = \"[open-source][validation]-coco-2017-dataset\"\n", + "!encord-active download --project-name $project_name" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "rAqm2jbK5Cat" + }, + "source": [ + "# 📨 Import all the necessary libraries" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "H4c46b0gJdxh" + }, + "source": [ + "In this section, you will import the key libraries that will be used for building the semantic search engine. These libraries play a crucial role in executing the code examples and demonstrating the concepts covered in the walkthrough." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MdpLp1215H3b" + }, + "outputs": [], + "source": [ + "import os\n", + "import random\n", + "import sys\n", + "from functools import reduce\n", + "from getpass import getpass\n", + "from pathlib import Path\n", + "from pprint import pprint\n", + "from time import perf_counter\n", + "from typing import List\n", + "\n", + "import clip\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import torch\n", + "import umap\n", + "from encord_active.lib.common.image_utils import show_image_and_draw_polygons\n", + "from encord_active.lib.common.iterator import DatasetIterator\n", + "from encord_active.lib.db.connection import DBConnection\n", + "from encord_active.lib.db.merged_metrics import (\n", + " MergedMetrics,\n", + " ensure_initialised_merged_metrics,\n", + ")\n", + "from encord_active.lib.project.project import Project\n", + "from faiss import IndexFlatIP\n", + "from langchain.chat_models import ChatOpenAI\n", + "from langchain.llms import OpenAI\n", + "from langchain.output_parsers import PydanticOutputParser, RetryWithErrorOutputParser\n", + "from langchain.prompts import (\n", + " AIMessagePromptTemplate,\n", + " ChatPromptTemplate,\n", + " HumanMessagePromptTemplate,\n", + " PromptTemplate,\n", + " SystemMessagePromptTemplate,\n", + ")\n", + "from langchain.schema import AIMessage, HumanMessage, SystemMessage\n", + "from PIL import Image\n", + "from pydantic import BaseModel, Field, root_validator, validator\n", + "from sklearn.preprocessing import normalize\n", + "from tqdm.auto import tqdm\n", + "\n", + "# Another patch to make Colab work\n", + "sys.stdout.fileno = lambda: 0\n", + "sys.stderr.fileno = lambda: 1\n", + "# End patch\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "JWqfxzEiSRF_" + }, + "source": [ + "First, load the Encord Project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VPUbwF0tSRaB" + }, + "outputs": [], + "source": [ + "project = Project(Path(project_name)).load()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6g-00M506FBu" + }, + "outputs": [], + "source": [ + "class DatasetImage(BaseModel):\n", + " image: Path\n", + " data_hash: str\n", + "\n", + "\n", + "iterator = DatasetIterator(project.file_structure.project_dir)\n", + "\n", + "# 🗒️ List all images in the project\n", + "project_images: list[DatasetImage] = [\n", + " DatasetImage(\n", + " image=data_unit[1],\n", + " data_hash=iterator.du_hash,\n", + " )\n", + " for data_unit in iterator.iterate()\n", + "]\n", + "project_img_df = pd.DataFrame(project_images)\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "NwYzxSLpSe-E" + }, + "source": [ + "You've loaded the image paths and associated data hashes to be able to match them to other queries later." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vt6n7JV1Sx8x" + }, + "outputs": [], + "source": [ + "project_img_df = pd.DataFrame([i.dict() for i in project_images])\n", + "project_img_df.head()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "KN89jj3CUW7Y" + }, + "source": [ + "# 📎Embedding Images with CLIP" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "-z0b9z0p8hbK" + }, + "source": [ + "In the following cells, you will learn how to embed images with CLIP. You will load in a bunch of images from the COCO Validation project and compute the CLIP embeddings.\n", + "\n", + "Next, you will see how to search these embeddings based on both new Images and on Text.\n", + "\n", + "Encord have made OpenAI's [CLIP model](https://github.com/openai/CLIP) available via PIP for ease of use.\n", + "The dependency is already installed with `encord-active` so nothing needs to be done.\n", + "\n", + "However, if you want the dependency in isolation, you can install it with the following command:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Kp7Q5JqK9HVm" + }, + "outputs": [], + "source": [ + "#!python -m pip install clip-ea" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "D7uEVi649vJf" + }, + "source": [ + "With the installation, it's easy to instantiate a pretrained model to use for embedding images:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IVnGiMfK9HRf" + }, + "outputs": [], + "source": [ + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "clip_model, preprocess = clip.load(\"ViT-B/32\", device=device)\n", + "print(f\"Model loaded on the {'CPU' if device == 'cpu' else 'GPU'}\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "WoZafMd1-QXx" + }, + "source": [ + "Now embed some images. For starters, grab 1000 images and embed them in batches of 100 images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3U3uiQf29Gw7" + }, + "outputs": [], + "source": [ + "BATCH_SIZE = 100\n", + "DB_SIZE = 1000\n", + "image_list = project_img_df.image.to_list()\n", + "db_images, unindexed_images = image_list[:DB_SIZE], image_list[DB_SIZE:]\n", + "\n", + "@torch.inference_mode()\n", + "def embed_images(model, images: list[Path], device):\n", + " out: list[np.ndarray] = []\n", + " for batch_start in tqdm(range(0, len(images), BATCH_SIZE)):\n", + " batch = images[batch_start : batch_start + BATCH_SIZE]\n", + " if not batch:\n", + " continue\n", + "\n", + " batch_images = [preprocess(Image.open(i).convert(\"RGB\")) for i in batch]\n", + " if len(batch_images) == 1:\n", + " tensors = batch_images[0].to(device)[None]\n", + " else:\n", + " tensors = torch.stack(batch_images).to(device)\n", + " out.append(clip_model.encode_image(tensors).detach().cpu().numpy())\n", + "\n", + " # create one np array with all images\n", + " if len(out) == 1:\n", + " return out[0]\n", + " return np.concatenate(out, axis=0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3M9RU9dYBJNc" + }, + "outputs": [], + "source": [ + "t0 = perf_counter()\n", + "embeddings = embed_images(clip_model, db_images, device=device)\n", + "t1 = perf_counter()\n", + "print(f\"Embedding {embeddings.shape[0]} images took {t1 - t0:.3f} seconds ({embeddings.shape[0] / (t1-t0):.3f} img/sec)\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "C9lnbzGMuA60" + }, + "outputs": [], + "source": [ + "!nvidia-smi" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "DhRiGpt4VSnZ" + }, + "source": [ + "# 📊 See how it looks with Umap" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "LLW8v35JhFcB" + }, + "source": [ + "Umap is one of multiple ways of embedding high dimensional data into 2D, so we can plot it.\n", + "Similar high-dimensional vectors should end up close to each other in the low-dimensional space." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "n48aatIGFSXJ" + }, + "outputs": [], + "source": [ + "reducer = umap.UMAP(random_state=0)\n", + "embeddings_2d = reducer.fit_transform(embeddings)\n", + "\n", + "fig, ax = plt.subplots()\n", + "ax.scatter(*embeddings_2d.T)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "S_BSqpWCVXOm" + }, + "source": [ + "## ✂️ Indexing and searching CLIP Embeddings" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "9sUo7WwcC8MC" + }, + "source": [ + "To be able to search embeddings efficiently, it makes sense to build an index over the embeddings for efficient searching.\n", + "\n", + "In this example, you'll keep it simple and build the index using `faiss`, as it's already available on Colab." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1DKMrTCvC4ym" + }, + "outputs": [], + "source": [ + "index = IndexFlatIP(embeddings.shape[1])\n", + "index.add(normalize(embeddings))\n", + "# ☝️ That's it really. Normalizing the vectors to unit norm makes the search equivalent to cosine similarity." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "3uRqVDRZDwz9" + }, + "source": [ + "With the index, you can now query the embeddings 🔍" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PR2UACA-FYoE" + }, + "outputs": [], + "source": [ + "random.seed(0)\n", + "\n", + "num_neighbors = 3\n", + "num_tries = 5\n", + "\n", + "# Sample random image outside the ones in the index\n", + "query_indices = random.sample(list(range(len(unindexed_images))), k=num_tries)\n", + "query_images = [unindexed_images[i] for i in query_indices]\n", + "\n", + "# Do search and embedding\n", + "query_embeddings = embed_images(clip_model, query_images, device=device)\n", + "similarities, indices = index.search(normalize(query_embeddings), k=num_neighbors)\n", + "query_2d = reducer.transform(query_embeddings)\n", + "\n", + "# Plotting\n", + "fig, axs = plt.subplots(num_tries, num_neighbors+2, figsize=(15, 15))\n", + "for try_, (img, emb_2d, nn_similarities, nn_indices) in enumerate(zip(query_images, query_2d, similarities, indices)):\n", + " # Plot 2D embeddings\n", + " axs[try_, 0].scatter(*embeddings_2d.T)\n", + " axs[try_, 0].axis(\"off\")\n", + " axs[try_, 0].scatter(*emb_2d.T, c=\"red\")\n", + " axs[try_, 0].scatter(*embeddings_2d[nn_indices].T, c=\"orange\")\n", + "\n", + " # Plot images\n", + " axs[try_, 1].set_title(\"Query Image\")\n", + " axs[try_, 1].imshow(Image.open(img))\n", + " axs[try_, 1].axis(\"off\")\n", + " for sim, neighbor, ax in zip(nn_similarities, nn_indices, axs[try_, 2:]):\n", + " ax.set_title(f\"Similarity: {sim:.3f}\")\n", + " ax.imshow(Image.open(db_images[neighbor]))\n", + " ax.axis(\"off\")\n", + "fig.tight_layout()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "4FEQMgDDRX6M" + }, + "source": [ + "\n", + "✨ It gets even more powerful when you search via text embeddings!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "x1VUJUNLD5iF" + }, + "outputs": [], + "source": [ + "text_queries = [\n", + " \"surfing\",\n", + " \"motorbikes\",\n", + " \"transportation\",\n", + " \"red flowers in a vase\"\n", + "]\n", + "num_neighbors = 3\n", + "num_tries = len(text_queries)\n", + "\n", + "# Do search and embeddings\n", + "text_tensors = torch.concatenate([clip.tokenize(t) for t in text_queries], dim=0).to(device)\n", + "query_embeddings = clip_model.encode_text(\n", + " text_tensors\n", + ").detach().cpu().numpy()\n", + "similarities, indices = index.search(normalize(query_embeddings), k=num_neighbors)\n", + "query_2d = reducer.transform(query_embeddings)\n", + "\n", + "# Plot\n", + "fig, axs = plt.subplots(num_tries, num_neighbors+1, figsize=(15, 12))\n", + "for try_, (query, emb_2d, nn_similarities, nn_indices) in enumerate(zip(text_queries, query_2d, similarities, indices)):\n", + " # Plot 2D embeddings\n", + " axs[try_, 0].scatter(*embeddings_2d.T)\n", + " axs[try_, 0].axis(\"off\")\n", + " axs[try_, 0].scatter(*emb_2d.T, c=\"red\")\n", + " axs[try_, 0].scatter(*embeddings_2d[nn_indices].T, c=\"orange\")\n", + " axs[try_, 0].set_title(f'Query: \"{query}\"')\n", + "\n", + " # Plot images\n", + " for sim, neighbor, ax in zip(nn_similarities, nn_indices, axs[try_, 1:]):\n", + " ax.set_title(f\"Similarity: {sim:.3f}\")\n", + " ax.imshow(Image.open(db_images[neighbor]))\n", + " ax.axis(\"off\")\n", + "\n", + "fig.tight_layout()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "TvbJbXjkVz_V" + }, + "source": [ + "#🔎 Indirect Search with ChatGPT" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "BqFWTikCSe5w" + }, + "source": [ + "\n", + "\n", + "For this you'll use `langchain` to get started. So let's do that.\n", + "\n", + "Steps:\n", + "1. Load Quality Metrics from the Encord Project\n", + "2. Setup prompt\n", + "3. Ask ChatGPT for help" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "I7pSyo6vXBmb" + }, + "source": [ + "Get the complete data frame from the project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VWWECTADW8dx" + }, + "outputs": [], + "source": [ + "ensure_initialised_merged_metrics(project.file_structure)\n", + "with DBConnection(project.file_structure) as conn:\n", + " df = MergedMetrics(conn).all()\n", + "\n", + "df[\"data_hash\"] = df.index.str.split(\"_\", expand=False).str[1]" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "PqWlT6wlYtOX" + }, + "source": [ + "A few insights from the table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yNYmJaWIYtop" + }, + "outputs": [], + "source": [ + "pd.set_option(\"display.precision\", 3)\n", + "print(df.describe().to_string())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "7oPPTgHMTCy_" + }, + "outputs": [], + "source": [ + "#@title 🗝️ Set api key and instantiate model\n", + "OPENAI_API_KEY = getpass(\"What's your OpenAI API key? \")\n", + "os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY\n", + "\n", + "model_name = 'text-davinci-003'\n", + "temperature = 0.0\n", + "model = OpenAI(model_name=model_name, temperature=temperature)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "1Y--DAmiZzZi" + }, + "source": [ + "Prepare the prompts:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TQM3IVyaTBZl" + }, + "outputs": [], + "source": [ + "# Define the prompt that we'll be giving ChatGPT\n", + "def form_prompt(dataframe, parser):\n", + " system_message_prompt = SystemMessagePromptTemplate.from_template(\n", + " \"You are a helpful assistant that translates human queries to filters that apply to a data frame.\"\n", + " )\n", + " columns_str = \"\\n\".join(dataframe.columns)\n", + " instructions_prompt = HumanMessagePromptTemplate.from_template(\n", + " f\"Columns in the dataframe are: \\n{columns_str}\\n\\n\"\n", + " f\"Data frame description: \\n{dataframe.describe()}\\n\\n\"\n", + " \"Here are some rules:\\n\"\n", + " \"1. Top, highest, or largest means the highest quartile.\\n\"\n", + " \"2. Bottom, least, and lowest means the lowest quartile.\\n\"\n", + " \"3. `min_value` and `max_value` should be floats or ints related to the data frame description above.\\n\"\n", + " \"4. `min_value` cannot be larger than the `max_value`\\n\"\n", + " 'If you are not able to answer, please respond with [{{filters: [{{\"column\": \"unknown\", \"min_value\": -1, \"max_value\": -1}}]}}\\n\\n'\n", + " )\n", + " query_prompt = HumanMessagePromptTemplate(\n", + " prompt=PromptTemplate(\n", + " template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n", + " input_variables=[\"query\"],\n", + " partial_variables={\"format_instructions\": parser.get_format_instructions()}\n", + " )\n", + " )\n", + " return ChatPromptTemplate.from_messages([system_message_prompt, instructions_prompt, query_prompt])\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "X3pQHPOUS1am" + }, + "outputs": [], + "source": [ + "# Define pydantic model for filter outputs\n", + "class Filter(BaseModel):\n", + " column: str = Field(description=\"The column of the provided dataframe to filter\")\n", + " min_value: float = Field(description=\"The minimum value to include\")\n", + " max_value: float = Field(description=\"The maximum value to include\")\n", + "\n", + " @validator(\"column\")\n", + " def column_exists(cls, field):\n", + " if field == \"unknown\":\n", + " return field\n", + "\n", + " if field not in df.columns:\n", + " raise ValueError(\"The specified column does not exist in the provided dataframe\")\n", + " return field\n", + "\n", + " @root_validator()\n", + " def check_min_smaller_than_max(cls, values):\n", + " min_value = values.get(\"min_value\")\n", + " max_value = values.get(\"max_value\")\n", + "\n", + " if not isinstance(min_value, (float, int)):\n", + " raise ValueError(f\"`min_value` should be a number\")\n", + "\n", + " if not isinstance(max_value, (float, int)):\n", + " raise ValueError(f\"`max_value` should be a number\")\n", + "\n", + " if min_value > max_value:\n", + " raise ValueError(f\"`min_value` ({min_value}) cannot be larger than `max_value` ({max_value})\")\n", + " return values\n", + "\n", + "class Filters(BaseModel):\n", + " filters: list[Filter] = Field(description=\"A list of filters needed to be applied in given order\")\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "2tWeDwlxTni4" + }, + "source": [ + "See an example of what you would pass to ChatGPT:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qDcqbJoQTt-4" + }, + "outputs": [], + "source": [ + "parser = PydanticOutputParser(pydantic_object=Filters)\n", + "example_query = \"What are all the images with both high contrast and many objects?\"\n", + "input_prompt = form_prompt(df, parser).format_prompt(query=example_query, format_instructions=parser.get_format_instructions())\n", + "pprint(input_prompt.to_string())" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "IsaaXt3KTxph" + }, + "source": [ + "And now the final bit, which is stitching it all together." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zAeOhb_FZyjh" + }, + "outputs": [], + "source": [ + "def do_indirect_query(model, query:str, dataframe: pd.DataFrame):\n", + " # Generate the prompt\n", + " parser = PydanticOutputParser(pydantic_object=Filters)\n", + " input_prompt = form_prompt(dataframe, parser).format_prompt(query=query, format_instructions=parser.get_format_instructions())\n", + "\n", + " # Ask ChatGPT for help given the prompt\n", + " response = model(input_prompt.to_string())\n", + "\n", + " # Parse the output with a retry\n", + " output: Filters | None = None\n", + " try:\n", + " output = parser.parse(response)\n", + " except:\n", + " print(f\"Trying to fix error after receiving {response}\")\n", + " retry_parser = RetryWithErrorOutputParser.from_llm(parser=parser, llm=OpenAI(temperature=0))\n", + " try:\n", + " output = retry_parser.parse_with_prompt(response, input_prompt)\n", + " except:\n", + " pass\n", + "\n", + " if not output or not output.filters or output.filters[0].column == \"unknown\":\n", + " print(f\"This query couldn't be processed properly. The response gotten from ChatGPT was: {response}\")\n", + " return None\n", + "\n", + " # Do the actual filtering\n", + " subset_df = df.copy()\n", + " for filter in output.filters:\n", + " subset_df = subset_df[subset_df[filter.column].between(filter.min_value, filter.max_value, inclusive=\"both\")]\n", + " return subset_df.sort_values([f.column for f in output.filters], ascending=False).reset_index(), output\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "kR9GYoVQUvnJ" + }, + "source": [ + "Try it out for an example query on the entire dataset." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "36q2RwIKUv7O" + }, + "outputs": [], + "source": [ + "example_query = \"What are all the images with both high contrast and many objects?\"\n", + "subset_df, filters = do_indirect_query(model, example_query, df)\n", + "print(f\"Number of results: {subset_df.shape[0]}\")\n", + "print(\"Filters\")\n", + "pprint(filters)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "V060nwWHVBli" + }, + "source": [ + "Plot the results to see the actual images found based on the query." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YBFiNBSinb-s" + }, + "outputs": [], + "source": [ + "def plot_top_k_data_units(df, filters: Filters, k=12, cols=3):\n", + " rows = k // cols if k % cols == 0 else k // cols + 1\n", + " fig, axs = plt.subplots(rows, cols, figsize=(cols*3, rows*3))\n", + " axs = axs.reshape(-1)\n", + " fig.suptitle(\"; \".join(map(lambda f: f.column, filters.filters)))\n", + "\n", + " for (idx, row), ax in zip(df.iterrows(), axs):\n", + " img = show_image_and_draw_polygons(row, project.file_structure)\n", + " ax.imshow(img)\n", + " ax.set_title(\"; \".join([f\"{row[f.column]:.3f}\" for f in filters.filters]))\n", + " ax.axis(\"off\")\n", + " fig.tight_layout()\n", + " return fig\n", + "\n", + "_ = plot_top_k_data_units(subset_df, filters)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "8Q3ZB2omWVZO" + }, + "source": [ + "# 🪢 Putting it all together" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "-zc9_Mwaj2xh" + }, + "source": [ + "\n", + "\n", + "Now that you know how to do direct semantic queries with CLIP and indirect semantic queries with ChatGPT, combine them.\n", + "\n", + "The steps are:\n", + "\n", + "1. Compute embeddings for the entire dataset\n", + "2. Define some direct and indirect query pairs\n", + "3. Use an index to find the nearest neighbors based on CLIP Embeddings\n", + "4. Use ChatGPT to refine the search by indirect queries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q9M2B991WqPU" + }, + "outputs": [], + "source": [ + "# Set some thresholds for the CLIP search\n", + "num_neighbors = 1000\n", + "similarity_threshold = 0.265 # 👈 The minimum similarity required to be considered relevant" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "w0YSFqh81utW" + }, + "outputs": [], + "source": [ + "# Embed the entire dataset\n", + "t0 = perf_counter()\n", + "project_embeddings = embed_images(clip_model, image_list, device=device)\n", + "t1 = perf_counter()\n", + "print(f\"Embedding {project_embeddings.shape[0]} images took {t1 - t0:.3f} seconds ({project_embeddings.shape[0] / (t1-t0):.3f} img/sec)\")\n", + "\n", + "# Create an index\n", + "project_index = IndexFlatIP(project_embeddings.shape[1])\n", + "project_index.add(normalize(project_embeddings))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5OFvWXNcWAZT" + }, + "outputs": [], + "source": [ + "# Define queries\n", + "direct_queries = [\n", + " \"outdoor sports\",\n", + " \"transportation\"\n", + "]\n", + "indirect_queries = [\n", + " \"All the images with high brightness and many objects\",\n", + " \"All the objects with high annotation quality\"\n", + "]\n", + "\n", + "# Embed direct queries\n", + "text_tensors = torch.concatenate([clip.tokenize(t) for t in direct_queries], dim=0).to(device)\n", + "query_embeddings = clip_model.encode_text(\n", + " text_tensors\n", + ").detach().cpu().numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lE-bf50CWFMG" + }, + "outputs": [], + "source": [ + "# Do the direct semantic querying\n", + "similarities, indices = index.search(normalize(query_embeddings), k=num_neighbors)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aQRO6M_2j2C1" + }, + "outputs": [], + "source": [ + "# Filter dataframe based on search result.\n", + "for (in_query, di_query, sim, idx) in zip(indirect_queries, direct_queries, similarities, indices):\n", + " idx = idx[sim>similarity_threshold]\n", + " data_hashes = set(project_img_df.iloc[idx].data_hash.to_list())\n", + "\n", + " filtered_df = df.copy()\n", + "\n", + " # Filter project data\n", + " clip_filtered_df = filtered_df[filtered_df.data_hash.isin(data_hashes)]\n", + " gpt_result = do_indirect_query(model, in_query, clip_filtered_df)\n", + "\n", + " if gpt_result is None:\n", + " print(f\"Chat GPT failed to produce valid filters for the indirect query {in_query}\")\n", + " continue\n", + "\n", + " gpt_filtered_df, filters = gpt_result\n", + "\n", + " print(f\"Results for direct query: '{di_query}' and indirect query: '{in_query}'\")\n", + " print(f\"Found {gpt_filtered_df.shape[0]} results matching the query based of {clip_filtered_df.shape[0]} semantically similar images.\")\n", + " print(f\"Based on filters: {filters}\")\n", + " print(\"- \" * 10)\n", + " fig = plot_top_k_data_units(gpt_filtered_df, filters)\n", + " fig.suptitle(f\"IQ: '{in_query}', DQ: '{di_query}'\", fontsize=16)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "MQITOFpyWg9n" + }, + "source": [ + "# ✅ Wrap up" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "Z4CPsdC2YBfj" + }, + "source": [ + "\n", + "📓This Colab notebook showed you how to build a semantic search engine for visual search data using CLIP and ChatGPT.\n", + "\n", + "---\n", + "\n", + "🟣 Encord Active is an open-source framework for computer vision model testing, evaluation, and validation. Check out the project on [GitHub](https://github.com/encord-team/encord-active), leave a star 🌟 if you like it, and leave an issue if you find something is missing.\n", + "\n", + "---\n", + "\n", + "👉 Check out our 📖[blog](https://encord.com/blog/webinar-semantic-visual-search-chatgpt-clip/) and 📺[YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "IOF2pqteYRF4" + }, + "source": [ + "### ✨ Want more walthroughs like this? Check out the 🟣 [Encord Notebooks repository](https://github.com/encord-team/encord-notebooks/)." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/local-notebooks/Encord_Notebooks_How_To_Fine_Tuning_SAM.ipynb b/local-notebooks/Encord_Notebooks_How_To_Fine_Tuning_SAM.ipynb new file mode 100644 index 0000000..7fe7631 --- /dev/null +++ b/local-notebooks/Encord_Notebooks_How_To_Fine_Tuning_SAM.ipynb @@ -0,0 +1,795 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ujdhO_AMlaxh" + }, + "source": [ + "
\n", + "

\"Open\n", + "\"License\"\n", + "\"PyPi\n", + "\"PyPi\n", + "\n", + "\"docs\"\n", + "\n", + "\"Join\n", + "\n", + "\""Encord\n", + "\n", + "\n", + "

\n", + "

\n", + "\"Twitter

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "lOXCsrzileKN" + }, + "source": [ + "
\n", + "

\n", + " \n", + " \n", + " \n", + "

\n", + "
" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "tbJThT5Ol_lr" + }, + "source": [ + "# 🟣 Encord Notebooks | 🔧 How to fine-tune Segment Anything Model (SAM)\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "n6eLcQDjmvmg" + }, + "source": [ + "## 🏁 Overview" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "aiehQvBj5Crc" + }, + "source": [ + "👋 Hi there!\n", + "\n", + "\n", + "\n", + "This is the notebook gives you a walkthrough on fine-tuning [Segment Anything Model](https://encord.com/blog/segment-anything-model-explained/) (SAM) to a specific application.\n", + "\n", + "You will use the stamp verification dataset on [Kaggle]( https://www.kaggle.com/datasets/rtatman/stamp-verification-staver-dataset) since it has:\n", + "* data SAM is unlikely to have seen (scans of invoices with stamps),\n", + "* precise ground truth segmentation masks,\n", + "* and bounding boxes which we can use as prompts to SAM.\n", + "\n", + "This tutorial has been prepared by [Alex Bonnet](https://encord.com/author/alexandre-bonnet/), ML Solutions Engineer at Encord.\n", + "\n", + "\n", + "\n", + "
\n", + "\n", + "> 💡 If you want to read more about Encord Active checkout our [GitHub](https://github.com/encord-team/encord-active) and [documentation](https://docs.encord.com/docs/active-overview).\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "YSsqv3dWoVQ6" + }, + "source": [ + " ## 📰 Complementary Blog Post" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "-qRmUVKwDu-9" + }, + "source": [ + "![How To Fine-Tune Segment Anything - Encord Blog](https://images.prismic.io/encord/fc9dadaa-a011-4de1-b0eb-e7a55f854081_Group%2048096157.png?ixlib=gatsbyFP&auto=compress%2Cformat&fit=max)\n", + "\n", + "This notebook implements the steps discussed in the blog post: https://encord.com/blog/learn-how-to-fine-tune-the-segment-anything-model-sam/\n", + "\n", + "Check it 🔼 out for a comprehensive walkthrough." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "8xFZSDikKMsA" + }, + "source": [ + "## 📥 Installation and Set Up" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "n7rshBkIH3bl" + }, + "source": [ + "To ensure a smooth experience with this walkthrough notebook, you need to install the necessary libraries, dependencies, and model family. This step is essential for running the code and executing the examples effectively.\n", + "\n", + "By installing these libraries upfront, you'll have everything you need to follow along and explore the notebook without any interruptions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r0oru8hAn6q2" + }, + "outputs": [], + "source": [ + "! pip install kaggle &> /dev/null\n", + "! pip install torch torchvision &> /dev/null\n", + "! pip install opencv-python pycocotools matplotlib onnxruntime onnx &> /dev/null\n", + "! pip install git+https://github.com/facebookresearch/segment-anything.git &> /dev/null\n", + "! wget https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth &> /dev/null" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "YkwevXNd2Ofw" + }, + "source": [ + "**Action Required:** Place your kaggle.json file into the files in the notebook workspace. More info here https://github.com/Kaggle/kaggle-api#api-credentials" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0uTL0fDZEOnl" + }, + "outputs": [], + "source": [ + "! mkdir ~/.kaggle\n", + "! mv kaggle.json ~/.kaggle/\n", + "! chmod 600 ~/.kaggle/kaggle.json" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sD5Kt6lO_HIw" + }, + "outputs": [], + "source": [ + "! kaggle datasets download rtatman/stamp-verification-staver-dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zJP-eL2_EA52" + }, + "outputs": [], + "source": [ + "! unzip stamp-verification-staver-dataset.zip &> /dev/null" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "dXzO_ZRWIEmz" + }, + "source": [ + "## 📩 Importing Relevant Libraries" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "I52JuXm7IVWt" + }, + "source": [ + "In this section, you will import the key libraries that will be used for dataset manipulation and visualization. These libraries play a crucial role in executing the code examples and demonstrating the concepts covered in the walkthrough." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lwmQm0C3n_3D" + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import cv2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Gv4ob2wRE9CS" + }, + "outputs": [], + "source": [ + "# Exclude scans with zero or multiple bboxes (of the first 100)\n", + "stamps_to_exclude = {\n", + " 'stampDS-00008',\n", + " 'stampDS-00010',\n", + " 'stampDS-00015',\n", + " 'stampDS-00021',\n", + " 'stampDS-00027',\n", + " 'stampDS-00031',\n", + " 'stampDS-00039',\n", + " 'stampDS-00041',\n", + " 'stampDS-00049',\n", + " 'stampDS-00053',\n", + " 'stampDS-00059',\n", + " 'stampDS-00069',\n", + " 'stampDS-00073',\n", + " 'stampDS-00080',\n", + " 'stampDS-00090',\n", + " 'stampDS-00098',\n", + " 'stampDS-00100'\n", + "}.union({\n", + " 'stampDS-00012',\n", + " 'stampDS-00013',\n", + " 'stampDS-00014',\n", + "}) # Exclude 3 scans that aren't the type of scan we want to be fine tuning for" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "WNJGn1BsKUMS" + }, + "source": [ + "## 🛠️ Preprocess the dataset" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "KhXGI8HXyIFi" + }, + "source": [ + "You'll need to preprocess the scans from numpy arrays to pytorch tensors. To do this, follow what happens inside [`SamPredictor.set_image`](https://github.com/facebookresearch/segment-anything/blob/c1910835a32a05cbb79bdacbec8f25914a7e3a20/segment_anything/predictor.py#L34-L60) and [`SamPredictor.set_torch_image`](https://github.com/facebookresearch/segment-anything/blob/c1910835a32a05cbb79bdacbec8f25914a7e3a20/segment_anything/predictor.py#L63) which preprocesses the image.\n", + "\n", + "\n", + "\n", + "First, extract the bounding box coordinates which will be used to feed into SAM as prompts." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PNrV8CN8F9G0" + }, + "outputs": [], + "source": [ + "bbox_coords = {}\n", + "for f in sorted(Path('ground-truth-maps/ground-truth-maps/').iterdir())[:100]:\n", + " k = f.stem[:-3]\n", + " if k not in stamps_to_exclude:\n", + " im = cv2.imread(f.as_posix())\n", + " gray=cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)\n", + " contours, hierarchy = cv2.findContours(gray,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)[-2:]\n", + " if len(contours) > 1:\n", + " x,y,w,h = cv2.boundingRect(contours[0])\n", + " height, width, _ = im.shape\n", + " bbox_coords[k] = np.array([x, y, x + w, y + h])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "zsv0IGBDyMkS" + }, + "source": [ + "Extract the ground truth segmentation masks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lz7B4NDoJRxJ" + }, + "outputs": [], + "source": [ + "ground_truth_masks = {}\n", + "for k in bbox_coords.keys():\n", + " gt_grayscale = cv2.imread(f'ground-truth-pixel/ground-truth-pixel/{k}-px.png', cv2.IMREAD_GRAYSCALE)\n", + " ground_truth_masks[k] = (gt_grayscale == 0)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "FsX7SxD8KYOP" + }, + "source": [ + "## 👀 Inspect the images, bounding box prompts, and the ground truth segmentation masks" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Bz8C8QaxoT6N" + }, + "outputs": [], + "source": [ + "# Helper functions provided in https://github.com/facebookresearch/segment-anything/blob/9e8f1309c94f1128a6e5c047a10fdcb02fc8d651/notebooks/predictor_example.ipynb\n", + "def show_mask(mask, ax, random_color=False):\n", + " if random_color:\n", + " color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)\n", + " else:\n", + " color = np.array([30/255, 144/255, 255/255, 0.6])\n", + " h, w = mask.shape[-2:]\n", + " mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)\n", + " ax.imshow(mask_image)\n", + "\n", + "def show_box(box, ax):\n", + " x0, y0 = box[0], box[1]\n", + " w, h = box[2] - box[0], box[3] - box[1]\n", + " ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor=(0,0,0,0), lw=2))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ncVrlh5fyed9" + }, + "source": [ + "We can see here that the ground truth mask is extremely tight which will be good for calculating an accurate loss.\n", + "The bounding box overlaid will be a good prompt." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3csOAxFju_Pi" + }, + "outputs": [], + "source": [ + "name = 'stampDS-00004'\n", + "image = cv2.imread(f'scans/scans/{name}.png')\n", + "\n", + "plt.figure(figsize=(10,10))\n", + "plt.imshow(image)\n", + "show_box(bbox_coords[name], plt.gca())\n", + "show_mask(ground_truth_masks[name], plt.gca())\n", + "plt.axis('off')\n", + "plt.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "IJIFDGaUKfQp" + }, + "source": [ + "## 🧑‍🍳 Prepare Fine-Tuning" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OdTD9CTxKena" + }, + "outputs": [], + "source": [ + "model_type = 'vit_b'\n", + "checkpoint = 'sam_vit_b_01ec64.pth'\n", + "device = 'cuda:0'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HjTIJtLxP8ZG" + }, + "outputs": [], + "source": [ + "from segment_anything import SamPredictor, sam_model_registry\n", + "sam_model = sam_model_registry[model_type](checkpoint=checkpoint)\n", + "sam_model.to(device)\n", + "sam_model.train();" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "MKZFlHjdKlhr" + }, + "source": [ + "### 🔁 Convert the input images into a format SAM's internal functions expect." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "Bu0MdejGylZY" + }, + "source": [ + "First, use [`utils.transform.ResizeLongestSide`](https://github.com/facebookresearch/segment-anything/blob/c1910835a32a05cbb79bdacbec8f25914a7e3a20/segment_anything/predictor.py#L31) to resize the image, as this is the transformer used inside the predictor.\n", + "\n", + "Then convert the image to a pytorch tensor and use the SAM's [preprocess method](https://github.com/facebookresearch/segment-anything/blob/c1910835a32a05cbb79bdacbec8f25914a7e3a20/segment_anything/modeling/sam.py#L164) to finish preprocessing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jtPYpirbK3Wi" + }, + "outputs": [], + "source": [ + "# Preprocess the images\n", + "from collections import defaultdict\n", + "\n", + "import torch\n", + "\n", + "from segment_anything.utils.transforms import ResizeLongestSide\n", + "\n", + "transformed_data = defaultdict(dict)\n", + "for k in bbox_coords.keys():\n", + " image = cv2.imread(f'scans/scans/{k}.png')\n", + " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", + " transform = ResizeLongestSide(sam_model.image_encoder.img_size)\n", + " input_image = transform.apply_image(image)\n", + " input_image_torch = torch.as_tensor(input_image, device=device)\n", + " transformed_image = input_image_torch.permute(2, 0, 1).contiguous()[None, :, :, :]\n", + "\n", + " input_image = sam_model.preprocess(transformed_image)\n", + " original_image_size = image.shape[:2]\n", + " input_size = tuple(transformed_image.shape[-2:])\n", + "\n", + " transformed_data[k]['image'] = input_image\n", + " transformed_data[k]['input_size'] = input_size\n", + " transformed_data[k]['original_image_size'] = original_image_size" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QxnY6TMGKjdc" + }, + "outputs": [], + "source": [ + "# Set up the optimizer, hyperparameter tuning will improve performance here\n", + "lr = 1e-4\n", + "wd = 0\n", + "optimizer = torch.optim.Adam(sam_model.mask_decoder.parameters(), lr=lr, weight_decay=wd)\n", + "\n", + "loss_fn = torch.nn.MSELoss()\n", + "# loss_fn = torch.nn.BCELoss()\n", + "keys = list(bbox_coords.keys())" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "sRHCNdzZy3dt" + }, + "source": [ + "## 🚀 Run SAM Fine-Tuning" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "9DIYcFKu14nr" + }, + "source": [ + "This is the main training loop.\n", + "\n", + "Improvements to be made include batching and moving the computation of the image and prompt embeddings outside the loop since we are not tuning these parts of the model, this will speed up training as we should not recompute the embeddings during each epoch.\n", + "\n", + "> ⚠️ Sometimes the optimizer gets lost in the parameter space and the loss function blows up. Restarting from scratch (including running all cells below 'Prepare Fine Tuning' in order to start with default weights again) should solve it.\n", + "\n", + "📝 In a production implementation, a better choice of optimiser/loss function will certainly help." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WRQ6yd_PM_B9" + }, + "outputs": [], + "source": [ + "from statistics import mean\n", + "\n", + "from tqdm import tqdm\n", + "from torch.nn.functional import threshold, normalize\n", + "\n", + "num_epochs = 100\n", + "losses = []\n", + "\n", + "for epoch in range(num_epochs):\n", + " epoch_losses = []\n", + " # Just train on the first 20 examples\n", + " for k in keys[:20]:\n", + " input_image = transformed_data[k]['image'].to(device)\n", + " input_size = transformed_data[k]['input_size']\n", + " original_image_size = transformed_data[k]['original_image_size']\n", + "\n", + " # No grad here as we don't want to optimise the encoders\n", + " with torch.no_grad():\n", + " image_embedding = sam_model.image_encoder(input_image)\n", + "\n", + " prompt_box = bbox_coords[k]\n", + " box = transform.apply_boxes(prompt_box, original_image_size)\n", + " box_torch = torch.as_tensor(box, dtype=torch.float, device=device)\n", + " box_torch = box_torch[None, :]\n", + "\n", + " sparse_embeddings, dense_embeddings = sam_model.prompt_encoder(\n", + " points=None,\n", + " boxes=box_torch,\n", + " masks=None,\n", + " )\n", + " low_res_masks, iou_predictions = sam_model.mask_decoder(\n", + " image_embeddings=image_embedding,\n", + " image_pe=sam_model.prompt_encoder.get_dense_pe(),\n", + " sparse_prompt_embeddings=sparse_embeddings,\n", + " dense_prompt_embeddings=dense_embeddings,\n", + " multimask_output=False,\n", + " )\n", + "\n", + " upscaled_masks = sam_model.postprocess_masks(low_res_masks, input_size, original_image_size).to(device)\n", + " binary_mask = normalize(threshold(upscaled_masks, 0.0, 0))\n", + "\n", + " gt_mask_resized = torch.from_numpy(np.resize(ground_truth_masks[k], (1, 1, ground_truth_masks[k].shape[0], ground_truth_masks[k].shape[1]))).to(device)\n", + " gt_binary_mask = torch.as_tensor(gt_mask_resized > 0, dtype=torch.float32)\n", + "\n", + " loss = loss_fn(binary_mask, gt_binary_mask)\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + " epoch_losses.append(loss.item())\n", + " losses.append(epoch_losses)\n", + " print(f'EPOCH: {epoch}')\n", + " print(f'Mean loss: {mean(epoch_losses)}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UKqIxUgAOTzp" + }, + "outputs": [], + "source": [ + "mean_losses = [mean(x) for x in losses]\n", + "mean_losses\n", + "\n", + "plt.plot(list(range(len(mean_losses))), mean_losses)\n", + "plt.title('Mean epoch loss')\n", + "plt.xlabel('Epoch Number')\n", + "plt.ylabel('Loss')\n", + "\n", + "plt.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "TuDlIiRjmitT" + }, + "source": [ + "## 📏 Compare the fine-tuned model to the original model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "J9fZiPoIKXYW" + }, + "outputs": [], + "source": [ + "# Load up the model with default weights\n", + "sam_model_orig = sam_model_registry[model_type](checkpoint=checkpoint)\n", + "sam_model_orig.to(device);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3dIKKKHOn_7R" + }, + "outputs": [], + "source": [ + "# Set up predictors for both tuned and original models\n", + "from segment_anything import sam_model_registry, SamPredictor\n", + "predictor_tuned = SamPredictor(sam_model)\n", + "predictor_original = SamPredictor(sam_model_orig)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nhNHx-6kpEWu" + }, + "outputs": [], + "source": [ + "# The model has not seen keys[21] (or keys[20]) since we only trained on keys[:20]\n", + "k = keys[21]\n", + "image = cv2.imread(f'scans/scans/{k}.png')\n", + "image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", + "\n", + "predictor_tuned.set_image(image)\n", + "predictor_original.set_image(image)\n", + "\n", + "input_bbox = np.array(bbox_coords[k])\n", + "\n", + "masks_tuned, _, _ = predictor_tuned.predict(\n", + " point_coords=None,\n", + " box=input_bbox,\n", + " multimask_output=False,\n", + ")\n", + "\n", + "masks_orig, _, _ = predictor_original.predict(\n", + " point_coords=None,\n", + " box=input_bbox,\n", + " multimask_output=False,\n", + ")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "Df2oxBaxxXrt" + }, + "source": [ + "See here that the tuned model is starting to ignore the whitespace between the words, which is what the ground truths show. With further training, more data and further hyperparameter tuning you will be able to improve this result.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sH6NorejpTii" + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "_, axs = plt.subplots(1, 2, figsize=(25, 25))\n", + "\n", + "\n", + "axs[0].imshow(image)\n", + "show_mask(masks_tuned, axs[0])\n", + "show_box(input_bbox, axs[0])\n", + "axs[0].set_title('Mask with Tuned Model', fontsize=26)\n", + "axs[0].axis('off')\n", + "\n", + "\n", + "axs[1].imshow(image)\n", + "show_mask(masks_orig, axs[1])\n", + "show_box(input_bbox, axs[1])\n", + "axs[1].set_title('Mask with Untuned Model', fontsize=26)\n", + "axs[1].axis('off')\n", + "\n", + "plt.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "H8eMyK1vNP4J" + }, + "source": [ + "# ✅ Wrap up" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ysWfdTinMjeI" + }, + "source": [ + "If the image does not render due to size limitations, you can view it here:\n", + "\n", + "![fine-tuned model vs sam model - encord notebooks](https://storage.googleapis.com/encord-notebooks/fine-tune%20SAM/tuned_model_comparison.png)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "-w9Hph-7NLnm" + }, + "source": [ + "\n", + "📓This Colab notebook showed you how to fine-tune Segment Anything Model (SAM) on your own data. If you would like to learn more, check out the [complementary blog post](https://encord.com/blog/learn-how-to-fine-tune-the-segment-anything-model-sam/).\n", + "\n", + "---\n", + "\n", + "🟣 Encord Active is an open-source framework for computer vision model testing, evaluation, and validation. **Check out the project on [GitHub](https://github.com/encord-team/encord-active), leave a star 🌟** if you like it. We welcome you to [contribute](https://docs.encord.com/docs/active-contributing) if you find something is missing.\n", + "\n", + "---\n", + "\n", + "👉 Check out our 📖[blog](https://encord.com/blog/) and 📺[YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "Lpkoq9YIQGWI" + }, + "source": [ + "### ✨ Want more walthroughs like this? Check out the 🟣 [Encord Notebooks repository](https://github.com/encord-team/encord-notebooks/tree/9617d8bc6cea52563ecb18bf173c2043195403e8)." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "provenance": [] + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/Encord_Notebooks__demo_ea_native_display.ipynb b/notebooks/Encord_Notebooks__demo_ea_native_display.ipynb index 0f0c0da..d61d78e 100644 --- a/notebooks/Encord_Notebooks__demo_ea_native_display.ipynb +++ b/notebooks/Encord_Notebooks__demo_ea_native_display.ipynb @@ -1,30 +1,18 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, "cells": [ { "cell_type": "markdown", + "metadata": { + "id": "nZ6Bu6FLvxYn" + }, "source": [ "
\n", "

\"Open\n", - "\n", "\"Licence\"\n", "\"PyPi\n", "\"PyPi\n", "\n", - "\"docs\"\n", + "\"docs\"\n", "\n", " \"Join\n", "\""Encord\n", @@ -34,36 +22,36 @@ "

\n", "\"Twitter

\n", "
" - ], - "metadata": { - "id": "nZ6Bu6FLvxYn" - } + ] }, { "cell_type": "markdown", - "source": [ - "# 🟣 Encord Notebooks | 📥 Explore Encord Active's `0.1.70` Native UI" - ], "metadata": { "id": "lZmVMPzW6JQV" - } + }, + "source": [ + "# 🟣 Encord Notebooks | 📥 Explore Encord Active's `0.1.73` Native UI" + ] }, { "cell_type": "markdown", - "source": [ - "## 🏁 Overview" - ], "metadata": { "id": "3LgvE_e362FL" - } + }, + "source": [ + "## 🏁 Overview" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "Yej19g4c64t8" + }, "source": [ "👋 Hi there! This notebook gives you a quick way to test Encord Active with a sandbox project and without installing anything locally.\n", "\n", "This 📒 notebook will cover:\n", - "* Install the Encord Active `0.1.70` release.\n", + "* Install the Encord Active `0.1.73` release.\n", "* Launch the UI with a `quickstart` project.\n", "* Explore the all-new Encord Active UI 🤩.\n", "\n", @@ -71,55 +59,57 @@ "\n", "\n", "💡If you want to learn more about 🟣 Encord Active checkout our [GitHub](https://github.com/encord-team/encord-active) and [documentation](https://encord-active-docs.web.app/)." - ], - "metadata": { - "id": "Yej19g4c64t8" - } + ] }, { "cell_type": "markdown", - "source": [ - "## 📥 Install Encord Active `0.1.70` Release\n" - ], "metadata": { "id": "jFRLWcZg-EVz" - } + }, + "source": [ + "## 📥 Install Encord Active `0.1.73` Release\n" + ] }, { "cell_type": "code", - "source": [ - "!python -m pip install encord-active==0.1.70" - ], + "execution_count": null, "metadata": { "id": "iHDRDFkaeBNh" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "!python -m pip install encord-active==0.1.73" + ] }, { "cell_type": "code", - "source": [ - "!encord-active --version" - ], + "execution_count": null, "metadata": { "id": "QIexBBIw15V_" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "!encord-active --version" + ] }, { "cell_type": "code", - "source": [ - "!encord-active download --project-name quickstart" - ], + "execution_count": null, "metadata": { "id": "19mNpYvVes1R" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "!encord-active download --project-name quickstart" + ] }, { "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qctxDtWReD7X" + }, + "outputs": [], "source": [ "from google.colab.output import eval_js\n", "from IPython.display import Javascript\n", @@ -143,47 +133,45 @@ "\n", "# Start encord active in the background\n", "get_ipython().system_raw(f\"ENV=packaged API_URL='{be_url}' ALLOWED_ORIGIN='{fe_url}' encord-active start &\")" - ], - "metadata": { - "id": "qctxDtWReD7X" - }, - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", - "source": [ - "# Show EA in a cell\n", - "show_url(fe_url)" - ], + "execution_count": null, "metadata": { "id": "Y_ftsgzvf0WX" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "# Show EA in a cell\n", + "show_url(fe_url)" + ] }, { "cell_type": "code", - "source": [ - "!echo \"Alternatively use this link: {fe_url} to open the fronted in a new tab\"" - ], + "execution_count": null, "metadata": { "id": "596xvqv6hWaz" }, - "execution_count": null, - "outputs": [] + "outputs": [], + "source": [ + "!echo \"Alternatively use this link: {fe_url} to open the fronted in a new tab\"" + ] }, { "cell_type": "markdown", - "source": [ - "# ✅ Wrap up" - ], "metadata": { "id": "-2QFNYviDB4Q" - } + }, + "source": [ + "# ✅ Wrap up" + ] }, { "cell_type": "markdown", + "metadata": { + "id": "pPE2ppItDE0M" + }, "source": [ "\n", "📓This Colab notebook showed you how to download a quickstart project with Encord Active. If you would like to learn more, check out our [documentation](https://docs.encord.com/docs/active-overview) to find more concrete workflow and guides.\n", @@ -196,10 +184,5 @@ "\n", "👉 Check out our 📖[blog](https://encord.com/blog/) and 📺[YouTube](https://www.youtube.com/@encord) channel to stay up-to-date with the latest in computer vision, foundation models, active learning, and data-centric AI.\n", "\n" - ], - "metadata": { - "id": "pPE2ppItDE0M" - } + ] } - ] -}