Skip to content

Commit

Permalink
feat: load an audio sdk object class label row [merge waiting on PR t…
Browse files Browse the repository at this point in the history
…o prod] (#812)
  • Loading branch information
james-encord authored Dec 11, 2024
1 parent 3d3b419 commit 39b4a9a
Show file tree
Hide file tree
Showing 9 changed files with 443 additions and 49 deletions.
1 change: 1 addition & 0 deletions encord/objects/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class Shape(StringEnum):
POLYLINE = "polyline"
ROTATABLE_BOUNDING_BOX = "rotatable_bounding_box"
BITMASK = "bitmask"
AUDIO = "audio"


class DeidentifyRedactTextMode(Enum):
Expand Down
6 changes: 6 additions & 0 deletions encord/objects/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,12 @@ def to_dict(self, by_alias=True, exclude_none=True) -> Dict[str, Any]:
return {str(i): x.to_dict() for i, x in enumerate(self.values)}


class AudioCoordinates(BaseDTO):
pass


Coordinates = Union[
AudioCoordinates,
BoundingBoxCoordinates,
RotatableBoundingBoxCoordinates,
PointCoordinate,
Expand All @@ -354,4 +359,5 @@ def to_dict(self, by_alias=True, exclude_none=True) -> Dict[str, Any]:
Shape.POLYLINE: PolylineCoordinates,
Shape.SKELETON: SkeletonCoordinates,
Shape.BITMASK: BitmaskCoordinates,
Shape.AUDIO: AudioCoordinates,
}
107 changes: 91 additions & 16 deletions encord/objects/ontology_labels_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
LABEL_ROW_BUNDLE_CREATE_LIMIT,
LABEL_ROW_BUNDLE_GET_LIMIT,
)
from encord.objects import Shape
from encord.objects.attributes import Attribute
from encord.objects.bundled_operations import (
BundledCreateRowsPayload,
Expand All @@ -46,6 +47,7 @@
DEFAULT_MANUAL_ANNOTATION,
)
from encord.objects.coordinates import (
AudioCoordinates,
BitmaskCoordinates,
BoundingBoxCoordinates,
Coordinates,
Expand Down Expand Up @@ -820,7 +822,7 @@ def add_object_instance(self, object_instance: ObjectInstance, force: bool = Tru
LabelRowError: If the object instance is already part of another LabelRowV2.
"""

self._method_not_supported_for_audio()
self._method_not_supported_for_audio(range_only=object_instance.is_range_only())

self._check_labelling_is_initalised()

Expand All @@ -844,8 +846,9 @@ def add_object_instance(self, object_instance: ObjectInstance, force: bool = Tru
self._objects_map[object_hash] = object_instance
object_instance._parent = self

frames = set(_frame_views_to_frame_numbers(object_instance.get_annotations()))
self._add_to_frame_to_hashes_map(object_instance, frames)
if not object_instance.is_range_only():
frames = set(_frame_views_to_frame_numbers(object_instance.get_annotations()))
self._add_to_frame_to_hashes_map(object_instance, frames)

def add_classification_instance(self, classification_instance: ClassificationInstance, force: bool = False) -> None:
"""
Expand Down Expand Up @@ -1034,14 +1037,13 @@ def remove_object(self, object_instance: ObjectInstance):
Args:
object_instance: The object instance to remove.
"""
self._method_not_supported_for_audio()

self._check_labelling_is_initalised()

self._objects_map.pop(object_instance.object_hash)
self._remove_from_frame_to_hashes_map(
_frame_views_to_frame_numbers(object_instance.get_annotations()), object_instance.object_hash
)
if not object_instance.is_range_only():
self._remove_from_frame_to_hashes_map(
_frame_views_to_frame_numbers(object_instance.get_annotations()), object_instance.object_hash
)
object_instance._parent = None

def to_encord_dict(self) -> Dict[str, Any]:
Expand Down Expand Up @@ -1626,6 +1628,22 @@ def _to_object_answers(self) -> Dict[str, Any]:
"classifications": list(reversed(all_static_answers)),
"objectHash": obj.object_hash,
}

# At some point, we also want to add these to the other modalities
if self.data_type == DataType.AUDIO:
annotation = obj.get_annotations()[0]
ret[obj.object_hash]["range"] = [[range.start, range.end] for range in obj.range_list]
ret[obj.object_hash]["createdBy"] = annotation.created_by
ret[obj.object_hash]["createdAt"] = annotation.created_at.strftime(DATETIME_LONG_STRING_FORMAT)
ret[obj.object_hash]["lastEditedBy"] = annotation.last_edited_by
ret[obj.object_hash]["lastEditedAt"] = annotation.last_edited_at.strftime(DATETIME_LONG_STRING_FORMAT)
ret[obj.object_hash]["manualAnnotation"] = annotation.manual_annotation
ret[obj.object_hash]["featureHash"] = obj.feature_hash
ret[obj.object_hash]["name"] = obj.ontology_item.name
ret[obj.object_hash]["color"] = obj.ontology_item.color
ret[obj.object_hash]["shape"] = obj.ontology_item.shape.value
ret[obj.object_hash]["value"] = _lower_snake_case(obj.ontology_item.name)

return ret

def _to_object_actions(self) -> Dict[str, Any]:
Expand Down Expand Up @@ -2060,6 +2078,7 @@ def _parse_label_row_dict(self, label_row_dict: dict) -> LabelRowReadOnlyData:

def _parse_labels_from_dict(self, label_row_dict: dict):
classification_answers = label_row_dict["classification_answers"]
object_answers = label_row_dict["object_answers"]

for data_unit in label_row_dict["data_units"].values():
data_type = DataType(label_row_dict["data_type"])
Expand All @@ -2072,6 +2091,7 @@ def _parse_labels_from_dict(self, label_row_dict: dict):
classification_answers,
frame,
)
self._add_objects_answers(object_answers)

elif data_type == DataType.VIDEO or data_type == DataType.DICOM or data_type == DataType.NIFTI:
for frame, frame_data in data_unit["labels"].items():
Expand All @@ -2081,22 +2101,23 @@ def _parse_labels_from_dict(self, label_row_dict: dict):
frame_data["classifications"], classification_answers, frame_num
)
self._add_frame_metadata(frame_num, frame_data.get("metadata"))
self._add_objects_answers(object_answers)

elif data_type == DataType.DICOM_STUDY:
pass
pass # TODO: _add_object_answers a NO-OP here as well?

elif data_type == DataType.MISSING_DATA_TYPE:
raise NotImplementedError(f"Got an unexpected data type `{data_type}`")

elif data_type == DataType.AUDIO or data_type == DataType.PDF or data_type == DataType.PLAIN_TEXT:
self._add_objects_instances_from_objects_without_frames(object_answers)
self._add_classification_instances_from_classifications_without_frames(classification_answers)

else:
exhaustive_guard(data_type)

self._add_data_unit_metadata(data_type, data_unit.get("metadata"))

self._add_objects_answers(label_row_dict)
self._add_action_answers(label_row_dict)

def _add_frame_metadata(self, frame: int, metadata: Optional[Dict[str, str]]):
Expand All @@ -2117,6 +2138,18 @@ def _add_data_unit_metadata(self, data_type: DataType, metadata: Optional[Dict[s
f"Unexpected metadata for the data type: {data_type}. Please update the Encord SDK to the latest version."
)

def _add_objects_instances_from_objects_without_frames(
self,
object_answers: dict,
):
for object_answer in object_answers.values():
ranges: Ranges = []
for range_elem in object_answer["range"]:
ranges.append(Range(range_elem[0], range_elem[1]))

object_instance = self._create_new_object_instance_with_ranges(object_answer, ranges)
self.add_object_instance(object_instance)

def _add_object_instances_from_objects(
self,
objects_list: List[dict],
Expand All @@ -2130,8 +2163,8 @@ def _add_object_instances_from_objects(
else:
self._add_coordinates_to_object_instance(frame_object_label, frame)

def _add_objects_answers(self, label_row_dict: dict):
for answer in label_row_dict["object_answers"].values():
def _add_objects_answers(self, object_answers: dict):
for answer in object_answers.values():
object_hash = answer["objectHash"]
# In cases when we had an object, added some attributes for this object, and then removed the object,
# in some label rows we still have such "orphaned" answers.
Expand Down Expand Up @@ -2173,6 +2206,48 @@ def _create_new_object_instance(self, frame_object_label: dict, frame: int) -> O
)
return object_instance

# This is only to be used by non-frame modalities (e.g. Audio)
def _create_new_object_instance_with_ranges(
self,
object_answer: dict,
ranges: Ranges,
) -> ObjectInstance:
feature_hash = object_answer["featureHash"]
object_hash = object_answer["objectHash"]

label_class = self._ontology.structure.get_child_by_hash(feature_hash, type_=Object)

frame_info_dict = {k: v for k, v in object_answer.items() if v is not None}
frame_info_dict.setdefault("confidence", 1.0) # confidence sometimes not present.
object_frame_instance_info = ObjectInstance.FrameInfo.from_dict(frame_info_dict)

expected_shape: Shape
if self._label_row_read_only_data.data_type == DataType.AUDIO:
expected_shape = Shape.AUDIO
else:
unknown_data_type = self._label_row_read_only_data.data_type
raise RuntimeError(f"Unexpected data type[{unknown_data_type}] for range based objects")
if label_class.shape != expected_shape:
raise LabelRowError("Unsupported object shape for data type")
object_instance = ObjectInstance(label_class, object_hash=object_hash)
object_instance.set_for_frames(
AudioCoordinates(),
ranges,
created_at=object_frame_instance_info.created_at,
created_by=object_frame_instance_info.created_by,
confidence=object_frame_instance_info.confidence,
manual_annotation=object_frame_instance_info.manual_annotation,
last_edited_at=object_frame_instance_info.last_edited_at,
last_edited_by=object_frame_instance_info.last_edited_by,
reviews=object_frame_instance_info.reviews,
overwrite=True,
# Always overwrite during label row dict parsing, as older dicts known to have duplicates
)
answer_list = object_answer["classifications"]
object_instance.set_answer_from_list(answer_list)

return object_instance

def _add_coordinates_to_object_instance(
self,
frame_object_label: dict,
Expand Down Expand Up @@ -2237,8 +2312,8 @@ def _add_classification_instances_from_classifications_without_frames(
):
for classification_answer in classification_answers.values():
ranges: Ranges = []
for range in classification_answer["range"]:
ranges.append(Range(range[0], range[1]))
for range_elem in classification_answer["range"]:
ranges.append(Range(range_elem[0], range_elem[1]))

classification_instance = self._create_new_classification_instance_with_ranges(
classification_answer, ranges
Expand Down Expand Up @@ -2346,8 +2421,8 @@ def _check_labelling_is_initalised(self):
"to do so first."
)

def _method_not_supported_for_audio(self):
if self.data_type == DataType.AUDIO:
def _method_not_supported_for_audio(self, range_only: bool = False):
if self.data_type == DataType.AUDIO and not range_only:
raise LabelRowError("This method is not supported for audio.")

def __repr__(self) -> str:
Expand Down
Loading

0 comments on commit 39b4a9a

Please sign in to comment.