Skip to content

Commit

Permalink
misc: More types and tidying
Browse files Browse the repository at this point in the history
  • Loading branch information
Jim-Encord committed Jan 8, 2025
1 parent 2e6bb30 commit 68ee994
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 55 deletions.
83 changes: 34 additions & 49 deletions encord/utilities/coco/exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from collections import defaultdict
from dataclasses import dataclass
from itertools import chain
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Set, Tuple, Union

import numpy as np
from pycocotools import mask as cocomask
Expand Down Expand Up @@ -43,16 +43,16 @@ class CocoAnnotation(BaseModel):
id_: int
image_id: int
is_crowd: int
segmentation: Union[List[List[float]], Dict[str,Any]]
segmentation: Union[List[List[float]], Dict[str, Any]]
keypoints: Optional[List[float]] = None
num_keypoints: Optional[int] = None
track_id: Optional[int] = None
encord_track_uuid: Optional[str] = None
rotation: Optional[float] = None
classifications: Optional[Dict[str,Any]] = None
classifications: Optional[Dict[str, Any]] = None
manual_annotation: Optional[bool] = None

def to_dict(self) -> Dict[str,Any]:
def to_dict(self) -> Dict[str, Any]:
return {
"area": self.area,
"bbox": list(self.bbox),
Expand Down Expand Up @@ -108,13 +108,13 @@ class CocoExporter:

def __init__(
self,
labels_list: List[Dict[str,Any]],
labels_list: List[Dict[str, Any]],
ontology: OntologyStructure,
include_videos: bool = True,
) -> None:
self._labels_list = labels_list
self._ontology = ontology
self._coco_json: Dict[str,Any] = {}
self._coco_json: Dict[str, Any] = {}
self._current_annotation_id = 0
self._object_hash_to_track_id_map: Dict[str, int] = {}
self._coco_categories_id_to_ontology_object_map: Dict = {} # TODO: do we need this?
Expand All @@ -126,15 +126,15 @@ def __init__(
self._id_and_object_hash_to_answers_map: Optional[Dict[Tuple[int, str], Dict]] = None
self._include_videos = include_videos

def export(self) -> Dict[str,Any]:
def export(self) -> Dict[str, Any]:
self._coco_json["info"] = self.get_info()
self._coco_json["categories"] = self.get_categories()
self._coco_json["images"] = self.get_images()
self._coco_json["annotations"] = [x.to_dict() for x in self.get_all_annotations()]

return self._coco_json

def get_info(self) -> Dict[str,Optional[str]]:
def get_info(self) -> Dict[str, Optional[str]]:
return {
"description": self.get_description(),
"contributor": None, # TODO: these fields also need a response
Expand All @@ -153,7 +153,7 @@ def get_description(self) -> Optional[str]:

return res

def get_categories(self) -> List[Dict]:
def get_categories(self) -> List[Dict[str, Any]]:
"""This does not translate classifications as they are not part of the Coco spec."""
categories = []
for object_ in self._ontology.objects:
Expand Down Expand Up @@ -192,7 +192,7 @@ def add_to_object_map_and_get_next_id(self, object_: Object) -> int:
def get_category_name(self, object_: Object) -> str:
return object_.name

def get_images(self) -> List:
def get_images(self) -> List[Dict[str, Any]]:
"""All the data is in the specific label_row"""
images = []

Expand All @@ -216,17 +216,8 @@ def get_dicom(self, data_unit: Dict) -> List:
for key, label in data_unit["labels"].items()
]

def get_image(self, data_unit: Dict) -> Dict:
def get_image(self, data_unit: Dict[str, Any]) -> Dict[str, Any]:
# TODO: we probably want a map of this image id to image hash in our DB, including the image_group hash.

"""
TODO: next up: here we need to branch off and create the videos
* coco_url, height, width will be the same
* id will be continuous
* file_name will be also continuous according to all the images that are being extracted from the video.
Do all the frames, and the ones without annotations will just have no corresponding annotations. We can
still later have an option to exclude them and delete the produced images.
"""
image_id = len(self._data_hash_to_image_id_map)
data_hash = data_unit["data_hash"]
self._data_hash_to_image_id_map[(data_hash, 0)] = image_id
Expand All @@ -240,7 +231,7 @@ def get_image(self, data_unit: Dict) -> Dict:
"width": data_unit["width"],
}

def get_video_images(self, data_unit: Dict) -> List[Dict]:
def get_video_images(self, data_unit: Dict[str, Any]) -> List[Dict[str, Any]]:
if not self._include_videos:
return []

Expand Down Expand Up @@ -268,7 +259,7 @@ def get_video_images(self, data_unit: Dict) -> List[Dict]:

def _dicom_label_to_coco_image(
self, frame: int, data_hash: str, series_width: int, series_height: int, dicom_label: Dict
) -> Dict:
) -> Dict[str, Any]:
image_id = len(self._data_hash_to_image_id_map)
# ideally this should be verify_arg, but currently we can't be sure that the metadata is on every frame
metadata = dicom_label.get("metadata")
Expand Down Expand Up @@ -302,7 +293,7 @@ def get_video_image(
height: int,
width: int,
frame_num: int,
) -> Dict:
) -> Dict[str, Any]:
image_id = len(self._data_hash_to_image_id_map)
self._data_hash_to_image_id_map[(data_hash, frame_num)] = image_id

Expand Down Expand Up @@ -480,9 +471,7 @@ def get_bounding_box(
category_id = self.get_category_id(object_)
id_, is_crowd, track_id, encord_track_uuid, manual_annotation = self.get_coco_annotation_default_fields(object_)

classifications = self.get_flat_classifications(
object_, image_id, object_answers, object_actions
)
classifications = self.get_flat_classifications(object_, image_id, object_answers, object_actions)

return CocoAnnotation(
area=area,
Expand Down Expand Up @@ -526,9 +515,7 @@ def get_rotatable_bounding_box(

rotation = object_["rotatableBoundingBox"]["theta"]

classifications = self.get_flat_classifications(
object_, image_id, object_answers, object_actions
)
classifications = self.get_flat_classifications(object_, image_id, object_answers, object_actions)

return CocoAnnotation(
area=area,
Expand Down Expand Up @@ -607,9 +594,7 @@ def get_polyline(
category_id = self.get_category_id(object_)
id_, is_crowd, track_id, encord_track_uuid, manual_annotation = self.get_coco_annotation_default_fields(object_)

classifications = self.get_flat_classifications(
object_, image_id, object_answers, object_actions
)
classifications = self.get_flat_classifications(object_, image_id, object_answers, object_actions)

return CocoAnnotation(
area=area,
Expand Down Expand Up @@ -815,7 +800,7 @@ def get_skeleton(

def get_flat_classifications(
self, object_: Dict, image_id: int, object_answers: Dict, object_actions: Dict
) -> Dict[str,Any]:
) -> Dict[str, Any]:
object_hash = object_["objectHash"]
feature_hash = object_["featureHash"]

Expand Down Expand Up @@ -853,9 +838,9 @@ def get_flat_static_classifications(
self,
object_hash: str,
object_feature_hash: str,
object_answers: Dict[str,Any],
object_answers: Dict[str, Any],
feature_hash_to_attribute_map: Dict[str, Attribute],
) -> Dict[str,Any]:
) -> Dict[str, Any]:
ret = {}
classifications = object_answers[object_hash]["classifications"]
for classification in classifications:
Expand All @@ -880,12 +865,12 @@ def get_flat_static_classifications(

def get_id_and_object_hash_to_answers_map(
self,
object_actions: Dict[str,Any],
object_actions: Dict[str, Any],
) -> Dict[Tuple[int, str], Dict]:
if self._id_and_object_hash_to_answers_map is not None:
return self._id_and_object_hash_to_answers_map

ret: Dict[Tuple[int, str], Dict[str,Any]] = defaultdict(Dict)
ret: Dict[Tuple[int, str], Dict[str, Any]] = defaultdict(Dict)
feature_hash_to_attribute_map = self.get_feature_hash_to_flat_object_attribute_map()
for object_hash, payload in object_actions.items():
for action in payload["actions"]:
Expand All @@ -896,7 +881,7 @@ def get_id_and_object_hash_to_answers_map(

attribute = feature_hash_to_attribute_map[feature_hash]
answers = action["answers"]
answers_dict: Dict[str,Any] = {}
answers_dict: Dict[str, Any] = {}

if attribute.get_property_type() == PropertyType.TEXT:
answers_dict.update(self.get_text_answer(attribute, answers))
Expand All @@ -918,8 +903,8 @@ def get_flat_dynamic_classifications(
object_hash: str,
feature_hash: str,
image_id: int,
id_and_object_hash_to_answers_map: Dict[Tuple[int, str], Dict[str,Any]],
) -> Dict[str,Any]:
id_and_object_hash_to_answers_map: Dict[Tuple[int, str], Dict[str, Any]],
) -> Dict[str, Any]:
ret = {}
id_and_object_hash = (image_id, object_hash)

Expand All @@ -931,7 +916,7 @@ def get_flat_dynamic_classifications(
return ret

def add_unselected_attributes(
self, feature_hash: str, attributes_dict: Dict[str,Optional[bool]], match_dynamic_attributes: bool
self, feature_hash: str, attributes_dict: Dict[str, Optional[bool]], match_dynamic_attributes: bool
) -> None:
"""
Attributes which have never been selected will not show up in the actions map. They will need to be
Expand Down Expand Up @@ -963,13 +948,13 @@ def get_attributes_for_feature_hash(self, feature_hash: str) -> List[Attribute]:

return ret

def get_radio_answer(self, attribute: Attribute, answers: List[Dict[str,str]]) -> Dict[str,str]:
def get_radio_answer(self, attribute: Attribute, answers: List[Dict[str, str]]) -> Dict[str, str]:
answer = answers[0] # radios only have one answer by definition
return {attribute.name: answer["name"]}

def get_checklist_answer(self, attribute: Attribute, answers: Dict[str,Any]) -> Dict[str,bool]:
ret: Dict[str,bool] = {}
found_checklist_answers: set[str] = set()
def get_checklist_answer(self, attribute: Attribute, answers: List[Dict[str, Any]]) -> Dict[str, bool]:
ret: Dict[str, bool] = {}
found_checklist_answers: Set[str] = set()

for answer in answers:
found_checklist_answers.add(answer["name"])
Expand All @@ -980,10 +965,10 @@ def get_checklist_answer(self, attribute: Attribute, answers: Dict[str,Any]) ->

return ret

def get_text_answer(self, attribute: Attribute, answers: str) -> Dict[str,Any]:
def get_text_answer(self, attribute: Attribute, answers: str) -> Dict[str, Any]:
return {attribute.name: answers}

def get_category_id(self, object_: Dict[str,Any]) -> int:
def get_category_id(self, object_: Dict[str, Any]) -> int:
feature_hash = object_["featureHash"]
try:
return self._feature_hash_to_coco_category_id_map[feature_hash]
Expand All @@ -994,7 +979,7 @@ def get_category_id(self, object_: Dict[str,Any]) -> int:
) from None

def get_coco_annotation_default_fields(
self, object_: Dict[str,Any]
self, object_: Dict[str, Any]
) -> Tuple[
int,
int,
Expand Down Expand Up @@ -1026,4 +1011,4 @@ def get_and_set_track_id(self, object_hash: str) -> int:
return next_track_id

def get_image_id(self, data_hash: str, frame_num: int = 0) -> int:
return self._data_hash_to_image_id_map[(data_hash, frame_num)]
return self._data_hash_to_image_id_map[(data_hash, frame_num)]
6 changes: 4 additions & 2 deletions tests/objects/data/data_1.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
labels = {
from typing import Any, Dict

labels: Dict[str, Any] = {
"label_hash": "1b4398aa-1d4d-4873-8470-2ac9fa373ac0",
"branch_name": "main",
"created_at": "2023-02-09 14:12:03",
Expand Down Expand Up @@ -167,7 +169,7 @@
"label_status": "LABELLED",
}

ontology = {
ontology: Dict[str, Any] = {
"objects": [
{"id": "1", "name": "Epiglottis ", "color": "#D33115", "shape": "bounding_box", "featureNodeHash": "249c9370"},
{"id": "2", "name": "Larynx", "color": "#E27300", "shape": "bounding_box", "featureNodeHash": "e3c87d43"},
Expand Down
8 changes: 4 additions & 4 deletions tests/test_coco_export.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
from typing import Any, Dict
from unittest.mock import patch

import pytest

from encord.objects.ontology_structure import OntologyStructure
from tests.objects.data.data_1 import ontology as BASE_ONTOLOGY_DICT
from tests.objects.data.data_1 import labels as BASE_LABEL_DICT
from tests.objects.data.data_1 import ontology as BASE_ONTOLOGY_DICT

ontology_structure = OntologyStructure.from_dict(BASE_ONTOLOGY_DICT)
EXPECTED_COCO_RESULT = {
EXPECTED_COCO_RESULT: Dict[str, Any] = {
"info": {
"description": "failing_video_new.mp4",
"contributor": None,
Expand Down Expand Up @@ -251,6 +252,5 @@ def test_coco_exporter_with_coco_extra():
from encord.utilities.coco.exporter import CocoExporter
except ImportError:
return

output = CocoExporter([BASE_LABEL_DICT], ontology_structure).export()
assert output == EXPECTED_COCO_RESULT
assert output == EXPECTED_COCO_RESULT

0 comments on commit 68ee994

Please sign in to comment.