misc: More types and tidying

encord-team · Jan 8, 2025 · 68ee994 · 68ee994
1 parent 2e6bb30
commit 68ee994
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 55 deletions.
diff --git a/encord/utilities/coco/exporter.py b/encord/utilities/coco/exporter.py
@@ -11,7 +11,7 @@
 from collections import defaultdict
 from dataclasses import dataclass
 from itertools import chain
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
 import numpy as np
 from pycocotools import mask as cocomask
@@ -43,16 +43,16 @@ class CocoAnnotation(BaseModel):
     id_: int
     image_id: int
     is_crowd: int
-    segmentation: Union[List[List[float]], Dict[str,Any]]
+    segmentation: Union[List[List[float]], Dict[str, Any]]
     keypoints: Optional[List[float]] = None
     num_keypoints: Optional[int] = None
     track_id: Optional[int] = None
     encord_track_uuid: Optional[str] = None
     rotation: Optional[float] = None
-    classifications: Optional[Dict[str,Any]] = None
+    classifications: Optional[Dict[str, Any]] = None
     manual_annotation: Optional[bool] = None
 
-    def to_dict(self) -> Dict[str,Any]:
+    def to_dict(self) -> Dict[str, Any]:
         return {
             "area": self.area,
             "bbox": list(self.bbox),
@@ -108,13 +108,13 @@ class CocoExporter:
 
     def __init__(
         self,
-        labels_list: List[Dict[str,Any]],
+        labels_list: List[Dict[str, Any]],
         ontology: OntologyStructure,
         include_videos: bool = True,
     ) -> None:
         self._labels_list = labels_list
         self._ontology = ontology
-        self._coco_json: Dict[str,Any] = {}
+        self._coco_json: Dict[str, Any] = {}
         self._current_annotation_id = 0
         self._object_hash_to_track_id_map: Dict[str, int] = {}
         self._coco_categories_id_to_ontology_object_map: Dict = {}  # TODO: do we need this?
@@ -126,15 +126,15 @@ def __init__(
         self._id_and_object_hash_to_answers_map: Optional[Dict[Tuple[int, str], Dict]] = None
         self._include_videos = include_videos
 
-    def export(self) -> Dict[str,Any]:
+    def export(self) -> Dict[str, Any]:
         self._coco_json["info"] = self.get_info()
         self._coco_json["categories"] = self.get_categories()
         self._coco_json["images"] = self.get_images()
         self._coco_json["annotations"] = [x.to_dict() for x in self.get_all_annotations()]
 
         return self._coco_json
 
-    def get_info(self) -> Dict[str,Optional[str]]:
+    def get_info(self) -> Dict[str, Optional[str]]:
         return {
             "description": self.get_description(),
             "contributor": None,  # TODO: these fields also need a response
@@ -153,7 +153,7 @@ def get_description(self) -> Optional[str]:
 
         return res
 
-    def get_categories(self) -> List[Dict]:
+    def get_categories(self) -> List[Dict[str, Any]]:
         """This does not translate classifications as they are not part of the Coco spec."""
         categories = []
         for object_ in self._ontology.objects:
@@ -192,7 +192,7 @@ def add_to_object_map_and_get_next_id(self, object_: Object) -> int:
     def get_category_name(self, object_: Object) -> str:
         return object_.name
 
-    def get_images(self) -> List:
+    def get_images(self) -> List[Dict[str, Any]]:
         """All the data is in the specific label_row"""
         images = []
 
@@ -216,17 +216,8 @@ def get_dicom(self, data_unit: Dict) -> List:
             for key, label in data_unit["labels"].items()
         ]
 
-    def get_image(self, data_unit: Dict) -> Dict:
+    def get_image(self, data_unit: Dict[str, Any]) -> Dict[str, Any]:
         # TODO: we probably want a map of this image id to image hash in our DB, including the image_group hash.
-
-        """
-        TODO: next up: here we need to branch off and create the videos
-        * coco_url, height, width will be the same
-        * id will be continuous
-        * file_name will be also continuous according to all the images that are being extracted from the video.
-        Do all the frames, and the ones without annotations will just have no corresponding annotations. We can
-        still later have an option to exclude them and delete the produced images.
-        """
         image_id = len(self._data_hash_to_image_id_map)
         data_hash = data_unit["data_hash"]
         self._data_hash_to_image_id_map[(data_hash, 0)] = image_id
@@ -240,7 +231,7 @@ def get_image(self, data_unit: Dict) -> Dict:
             "width": data_unit["width"],
         }
 
-    def get_video_images(self, data_unit: Dict) -> List[Dict]:
+    def get_video_images(self, data_unit: Dict[str, Any]) -> List[Dict[str, Any]]:
         if not self._include_videos:
             return []
 
@@ -268,7 +259,7 @@ def get_video_images(self, data_unit: Dict) -> List[Dict]:
 
     def _dicom_label_to_coco_image(
         self, frame: int, data_hash: str, series_width: int, series_height: int, dicom_label: Dict
-    ) -> Dict:
+    ) -> Dict[str, Any]:
         image_id = len(self._data_hash_to_image_id_map)
         # ideally this should be verify_arg, but currently we can't be sure that the metadata is on every frame
         metadata = dicom_label.get("metadata")
@@ -302,7 +293,7 @@ def get_video_image(
         height: int,
         width: int,
         frame_num: int,
-    ) -> Dict:
+    ) -> Dict[str, Any]:
         image_id = len(self._data_hash_to_image_id_map)
         self._data_hash_to_image_id_map[(data_hash, frame_num)] = image_id
 
@@ -480,9 +471,7 @@ def get_bounding_box(
         category_id = self.get_category_id(object_)
         id_, is_crowd, track_id, encord_track_uuid, manual_annotation = self.get_coco_annotation_default_fields(object_)
 
-        classifications = self.get_flat_classifications(
-            object_, image_id, object_answers, object_actions
-        )
+        classifications = self.get_flat_classifications(object_, image_id, object_answers, object_actions)
 
         return CocoAnnotation(
             area=area,
@@ -526,9 +515,7 @@ def get_rotatable_bounding_box(
 
         rotation = object_["rotatableBoundingBox"]["theta"]
 
-        classifications = self.get_flat_classifications(
-            object_, image_id, object_answers, object_actions
-        )
+        classifications = self.get_flat_classifications(object_, image_id, object_answers, object_actions)
 
         return CocoAnnotation(
             area=area,
@@ -607,9 +594,7 @@ def get_polyline(
         category_id = self.get_category_id(object_)
         id_, is_crowd, track_id, encord_track_uuid, manual_annotation = self.get_coco_annotation_default_fields(object_)
 
-        classifications = self.get_flat_classifications(
-            object_, image_id, object_answers, object_actions
-        )
+        classifications = self.get_flat_classifications(object_, image_id, object_answers, object_actions)
 
         return CocoAnnotation(
             area=area,
@@ -815,7 +800,7 @@ def get_skeleton(
 
     def get_flat_classifications(
         self, object_: Dict, image_id: int, object_answers: Dict, object_actions: Dict
-    ) -> Dict[str,Any]:
+    ) -> Dict[str, Any]:
         object_hash = object_["objectHash"]
         feature_hash = object_["featureHash"]
 
@@ -853,9 +838,9 @@ def get_flat_static_classifications(
         self,
         object_hash: str,
         object_feature_hash: str,
-        object_answers: Dict[str,Any],
+        object_answers: Dict[str, Any],
         feature_hash_to_attribute_map: Dict[str, Attribute],
-    ) -> Dict[str,Any]:
+    ) -> Dict[str, Any]:
         ret = {}
         classifications = object_answers[object_hash]["classifications"]
         for classification in classifications:
@@ -880,12 +865,12 @@ def get_flat_static_classifications(
 
     def get_id_and_object_hash_to_answers_map(
         self,
-        object_actions: Dict[str,Any],
+        object_actions: Dict[str, Any],
     ) -> Dict[Tuple[int, str], Dict]:
         if self._id_and_object_hash_to_answers_map is not None:
             return self._id_and_object_hash_to_answers_map
 
-        ret: Dict[Tuple[int, str], Dict[str,Any]] = defaultdict(Dict)
+        ret: Dict[Tuple[int, str], Dict[str, Any]] = defaultdict(Dict)
         feature_hash_to_attribute_map = self.get_feature_hash_to_flat_object_attribute_map()
         for object_hash, payload in object_actions.items():
             for action in payload["actions"]:
@@ -896,7 +881,7 @@ def get_id_and_object_hash_to_answers_map(
 
                 attribute = feature_hash_to_attribute_map[feature_hash]
                 answers = action["answers"]
-                answers_dict: Dict[str,Any] = {}
+                answers_dict: Dict[str, Any] = {}
 
                 if attribute.get_property_type() == PropertyType.TEXT:
                     answers_dict.update(self.get_text_answer(attribute, answers))
@@ -918,8 +903,8 @@ def get_flat_dynamic_classifications(
         object_hash: str,
         feature_hash: str,
         image_id: int,
-        id_and_object_hash_to_answers_map: Dict[Tuple[int, str], Dict[str,Any]],
-    ) -> Dict[str,Any]:
+        id_and_object_hash_to_answers_map: Dict[Tuple[int, str], Dict[str, Any]],
+    ) -> Dict[str, Any]:
         ret = {}
         id_and_object_hash = (image_id, object_hash)
 
@@ -931,7 +916,7 @@ def get_flat_dynamic_classifications(
         return ret
 
     def add_unselected_attributes(
-        self, feature_hash: str, attributes_dict: Dict[str,Optional[bool]], match_dynamic_attributes: bool
+        self, feature_hash: str, attributes_dict: Dict[str, Optional[bool]], match_dynamic_attributes: bool
     ) -> None:
         """
         Attributes which have never been selected will not show up in the actions map. They will need to be
@@ -963,13 +948,13 @@ def get_attributes_for_feature_hash(self, feature_hash: str) -> List[Attribute]:
 
         return ret
 
-    def get_radio_answer(self, attribute: Attribute, answers: List[Dict[str,str]]) -> Dict[str,str]:
+    def get_radio_answer(self, attribute: Attribute, answers: List[Dict[str, str]]) -> Dict[str, str]:
         answer = answers[0]  # radios only have one answer by definition
         return {attribute.name: answer["name"]}
 
-    def get_checklist_answer(self, attribute: Attribute, answers: Dict[str,Any]) -> Dict[str,bool]:
-        ret: Dict[str,bool] = {}
-        found_checklist_answers: set[str] = set()
+    def get_checklist_answer(self, attribute: Attribute, answers: List[Dict[str, Any]]) -> Dict[str, bool]:
+        ret: Dict[str, bool] = {}
+        found_checklist_answers: Set[str] = set()
 
         for answer in answers:
             found_checklist_answers.add(answer["name"])
@@ -980,10 +965,10 @@ def get_checklist_answer(self, attribute: Attribute, answers: Dict[str,Any]) ->
 
         return ret
 
-    def get_text_answer(self, attribute: Attribute, answers: str) -> Dict[str,Any]:
+    def get_text_answer(self, attribute: Attribute, answers: str) -> Dict[str, Any]:
         return {attribute.name: answers}
 
-    def get_category_id(self, object_: Dict[str,Any]) -> int:
+    def get_category_id(self, object_: Dict[str, Any]) -> int:
         feature_hash = object_["featureHash"]
         try:
             return self._feature_hash_to_coco_category_id_map[feature_hash]
@@ -994,7 +979,7 @@ def get_category_id(self, object_: Dict[str,Any]) -> int:
             ) from None
 
     def get_coco_annotation_default_fields(
-        self, object_: Dict[str,Any]
+        self, object_: Dict[str, Any]
     ) -> Tuple[
         int,
         int,
@@ -1026,4 +1011,4 @@ def get_and_set_track_id(self, object_hash: str) -> int:
             return next_track_id
 
     def get_image_id(self, data_hash: str, frame_num: int = 0) -> int:
-        return self._data_hash_to_image_id_map[(data_hash, frame_num)]
+        return self._data_hash_to_image_id_map[(data_hash, frame_num)]
diff --git a/tests/objects/data/data_1.py b/tests/objects/data/data_1.py
@@ -1,4 +1,6 @@
-labels = {
+from typing import Any, Dict
+
+labels: Dict[str, Any] = {
     "label_hash": "1b4398aa-1d4d-4873-8470-2ac9fa373ac0",
     "branch_name": "main",
     "created_at": "2023-02-09 14:12:03",
@@ -167,7 +169,7 @@
     "label_status": "LABELLED",
 }
 
-ontology = {
+ontology: Dict[str, Any] = {
     "objects": [
         {"id": "1", "name": "Epiglottis ", "color": "#D33115", "shape": "bounding_box", "featureNodeHash": "249c9370"},
         {"id": "2", "name": "Larynx", "color": "#E27300", "shape": "bounding_box", "featureNodeHash": "e3c87d43"},

diff --git a/tests/test_coco_export.py b/tests/test_coco_export.py
@@ -1,13 +1,14 @@
+from typing import Any, Dict
 from unittest.mock import patch
 
 import pytest
 
 from encord.objects.ontology_structure import OntologyStructure
-from tests.objects.data.data_1 import ontology as BASE_ONTOLOGY_DICT
 from tests.objects.data.data_1 import labels as BASE_LABEL_DICT
+from tests.objects.data.data_1 import ontology as BASE_ONTOLOGY_DICT
 
 ontology_structure = OntologyStructure.from_dict(BASE_ONTOLOGY_DICT)
-EXPECTED_COCO_RESULT = {
+EXPECTED_COCO_RESULT: Dict[str, Any] = {
     "info": {
         "description": "failing_video_new.mp4",
         "contributor": None,
@@ -251,6 +252,5 @@ def test_coco_exporter_with_coco_extra():
         from encord.utilities.coco.exporter import CocoExporter
     except ImportError:
         return
-
     output = CocoExporter([BASE_LABEL_DICT], ontology_structure).export()
-    assert output == EXPECTED_COCO_RESULT
+    assert output == EXPECTED_COCO_RESULT