feat: load an audio sdk object class label row [merge waiting on PR t…

…o prod] (#812)
encord-team · Dec 11, 2024 · 39b4a9a · 39b4a9a
1 parent 3d3b419
commit 39b4a9a
Show file tree

Hide file tree

Showing 9 changed files with 443 additions and 49 deletions.
diff --git a/encord/objects/common.py b/encord/objects/common.py
@@ -35,6 +35,7 @@ class Shape(StringEnum):
     POLYLINE = "polyline"
     ROTATABLE_BOUNDING_BOX = "rotatable_bounding_box"
     BITMASK = "bitmask"
+    AUDIO = "audio"
 
 
 class DeidentifyRedactTextMode(Enum):

diff --git a/encord/objects/coordinates.py b/encord/objects/coordinates.py
@@ -337,7 +337,12 @@ def to_dict(self, by_alias=True, exclude_none=True) -> Dict[str, Any]:
         return {str(i): x.to_dict() for i, x in enumerate(self.values)}
 
 
+class AudioCoordinates(BaseDTO):
+    pass
+
+
 Coordinates = Union[
+    AudioCoordinates,
     BoundingBoxCoordinates,
     RotatableBoundingBoxCoordinates,
     PointCoordinate,
@@ -354,4 +359,5 @@ def to_dict(self, by_alias=True, exclude_none=True) -> Dict[str, Any]:
     Shape.POLYLINE: PolylineCoordinates,
     Shape.SKELETON: SkeletonCoordinates,
     Shape.BITMASK: BitmaskCoordinates,
+    Shape.AUDIO: AudioCoordinates,
 }
diff --git a/encord/objects/ontology_labels_impl.py b/encord/objects/ontology_labels_impl.py
@@ -29,6 +29,7 @@
     LABEL_ROW_BUNDLE_CREATE_LIMIT,
     LABEL_ROW_BUNDLE_GET_LIMIT,
 )
+from encord.objects import Shape
 from encord.objects.attributes import Attribute
 from encord.objects.bundled_operations import (
     BundledCreateRowsPayload,
@@ -46,6 +47,7 @@
     DEFAULT_MANUAL_ANNOTATION,
 )
 from encord.objects.coordinates import (
+    AudioCoordinates,
     BitmaskCoordinates,
     BoundingBoxCoordinates,
     Coordinates,
@@ -820,7 +822,7 @@ def add_object_instance(self, object_instance: ObjectInstance, force: bool = Tru
             LabelRowError: If the object instance is already part of another LabelRowV2.
         """
 
-        self._method_not_supported_for_audio()
+        self._method_not_supported_for_audio(range_only=object_instance.is_range_only())
 
         self._check_labelling_is_initalised()
 
@@ -844,8 +846,9 @@ def add_object_instance(self, object_instance: ObjectInstance, force: bool = Tru
         self._objects_map[object_hash] = object_instance
         object_instance._parent = self
 
-        frames = set(_frame_views_to_frame_numbers(object_instance.get_annotations()))
-        self._add_to_frame_to_hashes_map(object_instance, frames)
+        if not object_instance.is_range_only():
+            frames = set(_frame_views_to_frame_numbers(object_instance.get_annotations()))
+            self._add_to_frame_to_hashes_map(object_instance, frames)
 
     def add_classification_instance(self, classification_instance: ClassificationInstance, force: bool = False) -> None:
         """
@@ -1034,14 +1037,13 @@ def remove_object(self, object_instance: ObjectInstance):
         Args:
             object_instance: The object instance to remove.
         """
-        self._method_not_supported_for_audio()
-
         self._check_labelling_is_initalised()
 
         self._objects_map.pop(object_instance.object_hash)
-        self._remove_from_frame_to_hashes_map(
-            _frame_views_to_frame_numbers(object_instance.get_annotations()), object_instance.object_hash
-        )
+        if not object_instance.is_range_only():
+            self._remove_from_frame_to_hashes_map(
+                _frame_views_to_frame_numbers(object_instance.get_annotations()), object_instance.object_hash
+            )
         object_instance._parent = None
 
     def to_encord_dict(self) -> Dict[str, Any]:
@@ -1626,6 +1628,22 @@ def _to_object_answers(self) -> Dict[str, Any]:
                 "classifications": list(reversed(all_static_answers)),
                 "objectHash": obj.object_hash,
             }
+
+            # At some point, we also want to add these to the other modalities
+            if self.data_type == DataType.AUDIO:
+                annotation = obj.get_annotations()[0]
+                ret[obj.object_hash]["range"] = [[range.start, range.end] for range in obj.range_list]
+                ret[obj.object_hash]["createdBy"] = annotation.created_by
+                ret[obj.object_hash]["createdAt"] = annotation.created_at.strftime(DATETIME_LONG_STRING_FORMAT)
+                ret[obj.object_hash]["lastEditedBy"] = annotation.last_edited_by
+                ret[obj.object_hash]["lastEditedAt"] = annotation.last_edited_at.strftime(DATETIME_LONG_STRING_FORMAT)
+                ret[obj.object_hash]["manualAnnotation"] = annotation.manual_annotation
+                ret[obj.object_hash]["featureHash"] = obj.feature_hash
+                ret[obj.object_hash]["name"] = obj.ontology_item.name
+                ret[obj.object_hash]["color"] = obj.ontology_item.color
+                ret[obj.object_hash]["shape"] = obj.ontology_item.shape.value
+                ret[obj.object_hash]["value"] = _lower_snake_case(obj.ontology_item.name)
+
         return ret
 
     def _to_object_actions(self) -> Dict[str, Any]:
@@ -2060,6 +2078,7 @@ def _parse_label_row_dict(self, label_row_dict: dict) -> LabelRowReadOnlyData:
 
     def _parse_labels_from_dict(self, label_row_dict: dict):
         classification_answers = label_row_dict["classification_answers"]
+        object_answers = label_row_dict["object_answers"]
 
         for data_unit in label_row_dict["data_units"].values():
             data_type = DataType(label_row_dict["data_type"])
@@ -2072,6 +2091,7 @@ def _parse_labels_from_dict(self, label_row_dict: dict):
                     classification_answers,
                     frame,
                 )
+                self._add_objects_answers(object_answers)
 
             elif data_type == DataType.VIDEO or data_type == DataType.DICOM or data_type == DataType.NIFTI:
                 for frame, frame_data in data_unit["labels"].items():
@@ -2081,22 +2101,23 @@ def _parse_labels_from_dict(self, label_row_dict: dict):
                         frame_data["classifications"], classification_answers, frame_num
                     )
                     self._add_frame_metadata(frame_num, frame_data.get("metadata"))
+                self._add_objects_answers(object_answers)
 
             elif data_type == DataType.DICOM_STUDY:
-                pass
+                pass  # TODO: _add_object_answers a NO-OP here as well?
 
             elif data_type == DataType.MISSING_DATA_TYPE:
                 raise NotImplementedError(f"Got an unexpected data type `{data_type}`")
 
             elif data_type == DataType.AUDIO or data_type == DataType.PDF or data_type == DataType.PLAIN_TEXT:
+                self._add_objects_instances_from_objects_without_frames(object_answers)
                 self._add_classification_instances_from_classifications_without_frames(classification_answers)
 
             else:
                 exhaustive_guard(data_type)
 
             self._add_data_unit_metadata(data_type, data_unit.get("metadata"))
 
-        self._add_objects_answers(label_row_dict)
         self._add_action_answers(label_row_dict)
 
     def _add_frame_metadata(self, frame: int, metadata: Optional[Dict[str, str]]):
@@ -2117,6 +2138,18 @@ def _add_data_unit_metadata(self, data_type: DataType, metadata: Optional[Dict[s
                 f"Unexpected metadata for the data type: {data_type}. Please update the Encord SDK to the latest version."
             )
 
+    def _add_objects_instances_from_objects_without_frames(
+        self,
+        object_answers: dict,
+    ):
+        for object_answer in object_answers.values():
+            ranges: Ranges = []
+            for range_elem in object_answer["range"]:
+                ranges.append(Range(range_elem[0], range_elem[1]))
+
+            object_instance = self._create_new_object_instance_with_ranges(object_answer, ranges)
+            self.add_object_instance(object_instance)
+
     def _add_object_instances_from_objects(
         self,
         objects_list: List[dict],
@@ -2130,8 +2163,8 @@ def _add_object_instances_from_objects(
             else:
                 self._add_coordinates_to_object_instance(frame_object_label, frame)
 
-    def _add_objects_answers(self, label_row_dict: dict):
-        for answer in label_row_dict["object_answers"].values():
+    def _add_objects_answers(self, object_answers: dict):
+        for answer in object_answers.values():
             object_hash = answer["objectHash"]
             # In cases when we had an object, added some attributes for this object, and then removed the object,
             # in some label rows we still have such "orphaned" answers.
@@ -2173,6 +2206,48 @@ def _create_new_object_instance(self, frame_object_label: dict, frame: int) -> O
         )
         return object_instance
 
+    # This is only to be used by non-frame modalities (e.g. Audio)
+    def _create_new_object_instance_with_ranges(
+        self,
+        object_answer: dict,
+        ranges: Ranges,
+    ) -> ObjectInstance:
+        feature_hash = object_answer["featureHash"]
+        object_hash = object_answer["objectHash"]
+
+        label_class = self._ontology.structure.get_child_by_hash(feature_hash, type_=Object)
+
+        frame_info_dict = {k: v for k, v in object_answer.items() if v is not None}
+        frame_info_dict.setdefault("confidence", 1.0)  # confidence sometimes not present.
+        object_frame_instance_info = ObjectInstance.FrameInfo.from_dict(frame_info_dict)
+
+        expected_shape: Shape
+        if self._label_row_read_only_data.data_type == DataType.AUDIO:
+            expected_shape = Shape.AUDIO
+        else:
+            unknown_data_type = self._label_row_read_only_data.data_type
+            raise RuntimeError(f"Unexpected data type[{unknown_data_type}] for range based objects")
+        if label_class.shape != expected_shape:
+            raise LabelRowError("Unsupported object shape for data type")
+        object_instance = ObjectInstance(label_class, object_hash=object_hash)
+        object_instance.set_for_frames(
+            AudioCoordinates(),
+            ranges,
+            created_at=object_frame_instance_info.created_at,
+            created_by=object_frame_instance_info.created_by,
+            confidence=object_frame_instance_info.confidence,
+            manual_annotation=object_frame_instance_info.manual_annotation,
+            last_edited_at=object_frame_instance_info.last_edited_at,
+            last_edited_by=object_frame_instance_info.last_edited_by,
+            reviews=object_frame_instance_info.reviews,
+            overwrite=True,
+            # Always overwrite during label row dict parsing, as older dicts known to have duplicates
+        )
+        answer_list = object_answer["classifications"]
+        object_instance.set_answer_from_list(answer_list)
+
+        return object_instance
+
     def _add_coordinates_to_object_instance(
         self,
         frame_object_label: dict,
@@ -2237,8 +2312,8 @@ def _add_classification_instances_from_classifications_without_frames(
     ):
         for classification_answer in classification_answers.values():
             ranges: Ranges = []
-            for range in classification_answer["range"]:
-                ranges.append(Range(range[0], range[1]))
+            for range_elem in classification_answer["range"]:
+                ranges.append(Range(range_elem[0], range_elem[1]))
 
             classification_instance = self._create_new_classification_instance_with_ranges(
                 classification_answer, ranges
@@ -2346,8 +2421,8 @@ def _check_labelling_is_initalised(self):
                 "to do so first."
             )
 
-    def _method_not_supported_for_audio(self):
-        if self.data_type == DataType.AUDIO:
+    def _method_not_supported_for_audio(self, range_only: bool = False):
+        if self.data_type == DataType.AUDIO and not range_only:
             raise LabelRowError("This method is not supported for audio.")
 
     def __repr__(self) -> str: