From 3ee74fca8728e5e51aeead86b798b60567a247b7 Mon Sep 17 00:00:00 2001 From: Clinton Wee Date: Mon, 30 Dec 2024 17:34:43 +0800 Subject: [PATCH] - Use HtmlCoordinates and TextCoordinates to hold location of labels on text files - Only allow classifications on frame=0 for non-geometric files --- encord/objects/classification_instance.py | 34 ++- encord/objects/coordinates.py | 45 ++-- encord/objects/html_node.py | 2 +- encord/objects/ontology_labels_impl.py | 27 ++- encord/objects/ontology_object_instance.py | 52 ++--- tests/objects/data/audio_labels.py | 6 +- tests/objects/test_label_structure.py | 240 +++++++++------------ 7 files changed, 201 insertions(+), 205 deletions(-) diff --git a/encord/objects/classification_instance.py b/encord/objects/classification_instance.py index b24411c6b..32a2ab99c 100644 --- a/encord/objects/classification_instance.py +++ b/encord/objects/classification_instance.py @@ -31,7 +31,7 @@ from encord.common.range_manager import RangeManager from encord.common.time_parser import parse_datetime -from encord.constants.enums import DataType +from encord.constants.enums import DataType, is_geometric from encord.exceptions import LabelRowError from encord.objects.answers import Answer, ValueType, _get_static_answer_map from encord.objects.attributes import ( @@ -55,6 +55,17 @@ from encord.objects import LabelRowV2 +# For Audio and Text files, classifications can only be applied to Range(start=0, end=0) +# Because we treat the entire file as being on one frame (for classifications, its different for objects) +def _verify_non_geometric_classifications_range(ranges_to_add: Ranges, label_row: Optional[LabelRowV2]) -> None: + is_range_only_on_frame_0 = len(ranges_to_add) == 1 and ranges_to_add[0].start == 0 and ranges_to_add[0].end == 0 + if label_row is not None and not is_geometric(label_row.data_type) and not is_range_only_on_frame_0: + raise LabelRowError( + "For audio files and text files, classifications can only be attached to frame=0 " + "You may use `ClassificationInstance.set_for_frames(frames=Range(start=0, end=0))`." + ) + + class ClassificationInstance: def __init__( self, @@ -104,6 +115,10 @@ def feature_hash(self) -> str: def _last_frame(self) -> Union[int, float]: if self._parent is None or self._parent.data_type is DataType.DICOM: return float("inf") + elif self._parent is not None and self._parent.data_type == "text/html": + # For HTML files, the entire file is treated as one frame + # Note: for Audio and Plain Text, classifications must be applied to ALL the "frames" + return 1 else: return self._parent.number_of_frames @@ -139,7 +154,11 @@ def _set_for_ranges( reviews: Optional[List[dict]], ): new_range_manager = RangeManager(frame_class=frames) - conflicting_ranges = self._is_classification_already_present_on_range(new_range_manager.get_ranges()) + ranges_to_add = new_range_manager.get_ranges() + + _verify_non_geometric_classifications_range(ranges_to_add, self._parent) + + conflicting_ranges = self._is_classification_already_present_on_range(ranges_to_add) if conflicting_ranges and not overwrite: raise LabelRowError( f"The classification '{self.classification_hash}' already exists " @@ -147,14 +166,13 @@ def _set_for_ranges( f"Set 'overwrite' parameter to True to override." ) - ranges_to_add = new_range_manager.get_ranges() for range_to_add in ranges_to_add: self._check_within_range(range_to_add.end) """ At this point, this classification instance operates on ranges, NOT on frames. - We therefore leave only FRAME 0 in the map.The frame_data for FRAME 0 will be - treated as the data for all "frames" in this classification instance. + We therefore leave only FRAME 0 in the map. The frame_data for FRAME 0 will be + treated as the data for the entire classification instance. """ self._set_frame_and_frame_data( frame=0, @@ -221,8 +239,6 @@ def set_for_frames( last_edited_at = datetime.now() if self._range_only: - # Audio range should cover entire audio file - # Text range should always be [0, 0] self._set_for_ranges( frames=frames, overwrite=overwrite, @@ -687,7 +703,9 @@ def _is_selectable_child_attribute(self, attribute: Attribute) -> bool: def _check_within_range(self, frame: int) -> None: if frame < 0 or frame >= self._last_frame: raise LabelRowError( - f"The supplied frame of `{frame}` is not within the acceptable bounds of `0` to `{self._last_frame}`." + f"The supplied frame of `{frame}` is not within the acceptable bounds of `0` to `{self._last_frame}`. " + f"Note: for non-geometric data (e.g. {DataType.AUDIO} and {DataType.PLAIN_TEXT}), " + f"the entire file has only 1 frame." ) def _is_classification_already_present(self, frames: Iterable[int]) -> Set[int]: diff --git a/encord/objects/coordinates.py b/encord/objects/coordinates.py index abd602404..c23cd1b10 100644 --- a/encord/objects/coordinates.py +++ b/encord/objects/coordinates.py @@ -359,30 +359,31 @@ class TextCoordinates(BaseDTO): Represents coordinates for a text file Attributes: - range_html (List[HtmlRange]): A list of HtmlRange objects range (Ranges): Ranges of chars for simple text files """ - range_html: Optional[List[HtmlRange]] = None - range: Optional[Ranges] = None + range: Ranges - def __post_init__(self): - if self.range_html is None and self.range is None: - raise ValueError("At least one of either `range` or `range_html` must be set.") - if self.range_html is not None and self.range is not None: - raise ValueError("Only one of either `range` or `range_html` must be set.") +class HtmlCoordinates(BaseDTO): + """ + Represents coordinates for a html file + + Attributes: + range_html (List[HtmlRange]): A list of HtmlRange objects + """ - if self.range_html is not None and len(self.range_html) == 0: - raise ValueError("Range HTML list must contain at least one html range.") + range: List[HtmlRange] - if self.range is not None and len(self.range) == 0: - raise ValueError("Range list must contain at least one range.") + +NON_GEOMETRIC_COORDINATES = {AudioCoordinates, TextCoordinates, HtmlCoordinates} Coordinates = Union[ AudioCoordinates, TextCoordinates, + Union[HtmlCoordinates, TextCoordinates], + HtmlCoordinates, BoundingBoxCoordinates, RotatableBoundingBoxCoordinates, PointCoordinate, @@ -392,14 +393,14 @@ def __post_init__(self): BitmaskCoordinates, ] -ACCEPTABLE_COORDINATES_FOR_ONTOLOGY_ITEMS: Dict[Shape, Type[Coordinates]] = { - Shape.BOUNDING_BOX: BoundingBoxCoordinates, - Shape.ROTATABLE_BOUNDING_BOX: RotatableBoundingBoxCoordinates, - Shape.POINT: PointCoordinate, - Shape.POLYGON: PolygonCoordinates, - Shape.POLYLINE: PolylineCoordinates, - Shape.SKELETON: SkeletonCoordinates, - Shape.BITMASK: BitmaskCoordinates, - Shape.AUDIO: AudioCoordinates, - Shape.TEXT: TextCoordinates, +ACCEPTABLE_COORDINATES_FOR_ONTOLOGY_ITEMS: Dict[Shape, List[Type[Coordinates]]] = { + Shape.BOUNDING_BOX: [BoundingBoxCoordinates], + Shape.ROTATABLE_BOUNDING_BOX: [RotatableBoundingBoxCoordinates], + Shape.POINT: [PointCoordinate], + Shape.POLYGON: [PolygonCoordinates], + Shape.POLYLINE: [PolylineCoordinates], + Shape.SKELETON: [SkeletonCoordinates], + Shape.BITMASK: [BitmaskCoordinates], + Shape.AUDIO: [AudioCoordinates], + Shape.TEXT: [TextCoordinates, HtmlCoordinates], } diff --git a/encord/objects/html_node.py b/encord/objects/html_node.py index 3b7d2a828..0bfa43ce9 100644 --- a/encord/objects/html_node.py +++ b/encord/objects/html_node.py @@ -56,7 +56,7 @@ def to_dict(self): } def __hash__(self): - return hash(self.__repr__()) + return f"{self.start.node}-{self.start.offset}-{self.end.node}-{self.end.offset}" @classmethod def from_dict(cls, d: dict): diff --git a/encord/objects/ontology_labels_impl.py b/encord/objects/ontology_labels_impl.py index d76215577..c0b25528c 100644 --- a/encord/objects/ontology_labels_impl.py +++ b/encord/objects/ontology_labels_impl.py @@ -40,7 +40,7 @@ BundledWorkflowReopenPayload, ) from encord.objects.classification import Classification -from encord.objects.classification_instance import ClassificationInstance +from encord.objects.classification_instance import ClassificationInstance, _verify_non_geometric_classifications_range from encord.objects.constants import ( # pylint: disable=unused-import # for backward compatibility DATETIME_LONG_STRING_FORMAT, DEFAULT_CONFIDENCE, @@ -51,6 +51,7 @@ BitmaskCoordinates, BoundingBoxCoordinates, Coordinates, + HtmlCoordinates, PointCoordinate, PolygonCoordinates, PolylineCoordinates, @@ -833,9 +834,15 @@ def add_object_instance(self, object_instance: ObjectInstance, force: bool = Tru # We want to ensure that we are only adding the object_instance to a label_row # IF AND ONLY IF the file type is text/html and the object_instance has range_html set if self.file_type == "text/html" and object_instance.range_html is None: - raise LabelRowError("Unable to assign object instance without a html range to a html file") + raise LabelRowError( + "Unable to assign object instance without a html range to a html file. " + f"Please ensure the object instance exists on frame=0, and has coordinates of type {HtmlCoordinates}." + ) elif self.file_type != "text/html" and object_instance.range_html is not None: - raise LabelRowError("Unable to assign object instance with a html range to a non-html file") + raise LabelRowError( + "Unable to assign object instance with a html range to a non-html file. " + f"Please ensure the object instance does not have coordinates of type {HtmlCoordinates}." + ) if object_instance.is_assigned_to_label_row(): raise LabelRowError( @@ -898,10 +905,10 @@ def add_classification_instance(self, classification_instance: ClassificationIns ) """ - Implementation here diverges because audio data will operate on ranges, whereas + Implementation here diverges because non-geometric data will operate on ranges, whereas everything else will operate on frames. """ - if self.data_type == DataType.AUDIO: + if not is_geometric(self.data_type): self._add_classification_instance_for_range( classification_instance=classification_instance, force=force, @@ -930,7 +937,7 @@ def add_classification_instance(self, classification_instance: ClassificationIns self._classifications_to_frames[classification_instance.ontology_item].update(frames) self._add_to_frame_to_hashes_map(classification_instance, frames) - # This should only be used for Audio classification instances + # This should only be used for Non-geometric data def _add_classification_instance_for_range( self, classification_instance: ClassificationInstance, @@ -938,6 +945,10 @@ def _add_classification_instance_for_range( ): classification_hash = classification_instance.classification_hash ranges_to_add = classification_instance.range_list + + if classification_instance.is_range_only(): + _verify_non_geometric_classifications_range(ranges_to_add, self) + already_present_ranges = self._is_classification_already_present_on_ranges( classification_instance.ontology_item, ranges_to_add ) @@ -970,7 +981,7 @@ def remove_classification(self, classification_instance: ClassificationInstance) classification_hash = classification_instance.classification_hash self._classifications_map.pop(classification_hash) - if self.data_type == DataType.AUDIO: + if not is_geometric(self.data_type): range_manager = self._classifications_to_ranges[classification_instance.ontology_item] ranges_to_remove = classification_instance.range_list range_manager.remove_ranges(ranges_to_remove) @@ -2326,7 +2337,7 @@ def _create_new_html_object_instance( object_instance = ObjectInstance(label_class, object_hash=object_hash) object_instance.set_for_frames( - TextCoordinates(range_html=[HtmlRange.from_dict(x) for x in range_html]), + HtmlCoordinates(range=[HtmlRange.from_dict(x) for x in range_html]), frames=0, created_at=object_frame_instance_info.created_at, created_by=object_frame_instance_info.created_by, diff --git a/encord/objects/ontology_object_instance.py b/encord/objects/ontology_object_instance.py index c895e02df..ad8e3ad02 100644 --- a/encord/objects/ontology_object_instance.py +++ b/encord/objects/ontology_object_instance.py @@ -43,8 +43,10 @@ from encord.objects.constants import DEFAULT_CONFIDENCE, DEFAULT_MANUAL_ANNOTATION from encord.objects.coordinates import ( ACCEPTABLE_COORDINATES_FOR_ONTOLOGY_ITEMS, + NON_GEOMETRIC_COORDINATES, AudioCoordinates, Coordinates, + HtmlCoordinates, TextCoordinates, ) from encord.objects.frames import ( @@ -177,8 +179,8 @@ def range_html(self) -> Optional[HtmlRanges]: coordinates = non_geometric_annotation.coordinates - if isinstance(coordinates, TextCoordinates): - return coordinates.range_html + if isinstance(coordinates, HtmlCoordinates): + return coordinates.range else: return None @@ -483,7 +485,7 @@ def set_for_frames( """ if self._non_geometric: - if not isinstance(coordinates, AudioCoordinates) and not isinstance(coordinates, TextCoordinates): + if not isinstance(coordinates, tuple(NON_GEOMETRIC_COORDINATES)): raise LabelRowError("Expecting non-geometric coordinate type") elif frames != 0: @@ -502,24 +504,11 @@ def set_for_frames( "Cannot overwrite existing data for a frame. Set `overwrite` to `True` to overwrite." ) - check_coordinate_type(coordinates, self._ontology_object) + check_coordinate_type(coordinates, self._ontology_object, self._parent) - if isinstance(coordinates, (AudioCoordinates, TextCoordinates)) and coordinates.range is not None: - if self._parent is not None and self._parent == "text/html": - raise LabelRowError( - "For html labels, ensure the `range_html` property " - "is set when instantiating the TextCoordinates." - ) - - self.check_within_range(coordinates.range[0].end) - - elif isinstance(coordinates, TextCoordinates) and coordinates.range_html is not None: - if self._parent is not None and self._parent.file_type != "text/html": - raise LabelRowError( - "For non-html labels, ensure the `range` property " - "is set when instantiating the TextCoordinates." - ) - # Unable to validate xPaths here. Do validation in BE instead + if isinstance(coordinates, (TextCoordinates, AudioCoordinates)): + for non_geometric_range in coordinates.range: + self.check_within_range(non_geometric_range.end) else: self.check_within_range(frame) @@ -960,23 +949,38 @@ def __lt__(self, other: ObjectInstance) -> bool: return self._object_hash < other._object_hash -def check_coordinate_type(coordinates: Coordinates, ontology_object: Object) -> None: +def check_coordinate_type(coordinates: Coordinates, ontology_object: Object, parent: Optional[LabelRowV2]) -> None: """ Check if the coordinate type matches the expected type for the ontology object. Args: coordinates (Coordinates): The coordinates to check. ontology_object (Object): The ontology object to check against. + parent (LabelRowV2): The parent label row (if any) of the ontology object. Raises: LabelRowError: If the coordinate type does not match the expected type. """ - expected_coordinate_type = ACCEPTABLE_COORDINATES_FOR_ONTOLOGY_ITEMS[ontology_object.shape] - if not isinstance(coordinates, expected_coordinate_type): + expected_coordinate_types = ACCEPTABLE_COORDINATES_FOR_ONTOLOGY_ITEMS[ontology_object.shape] + if all( + not isinstance(coordinates, expected_coordinate_type) for expected_coordinate_type in expected_coordinate_types + ): raise LabelRowError( - f"Expected a coordinate of type `{expected_coordinate_type}`, but got type `{type(coordinates)}`." + f"Expected coordinates of one of the following types: `{expected_coordinate_types}`, but got type `{type(coordinates)}`." ) + # An ontology object with `Text` shape can have both coordinates `HtmlCoordinates` and `TextCoordinates` + # Therefore, we need to further check the file type, to ensure that `HtmlCoordinates` are only used for + # HTML files, and `TextCoordinates` are only used for plain text files. + if isinstance(coordinates, TextCoordinates): + if parent is not None and parent == "text/html": + raise LabelRowError(f"Expected coordinates of type {HtmlCoordinates}`, but got type `{type(coordinates)}`.") + elif isinstance(coordinates, HtmlCoordinates): + if parent is not None and parent.file_type != "text/html": + raise LabelRowError( + "For non-html labels, ensure the `range` property " "is set when instantiating the TextCoordinates." + ) + class DynamicAnswerManager: """ diff --git a/tests/objects/data/audio_labels.py b/tests/objects/data/audio_labels.py index 5a5d398fb..547054f83 100644 --- a/tests/objects/data/audio_labels.py +++ b/tests/objects/data/audio_labels.py @@ -39,7 +39,7 @@ "manualAnnotation": True, }, ], - "range": [[0, 1]], + "range": [[0, 0]], "createdBy": "user1Hash", "createdAt": "Tue, 05 Nov 2024 09:41:37 ", "lastEditedBy": "user1Hash", @@ -71,7 +71,7 @@ "manualAnnotation": True, }, ], - "range": [[0, 1]], + "range": [[0, 0]], "createdBy": "user1Hash", "createdAt": "Tue, 05 Nov 2024 09:41:37 ", "lastEditedBy": "user1Hash", @@ -101,7 +101,7 @@ "manualAnnotation": True, }, ], - "range": [[0, 1]], + "range": [[0, 0]], "createdBy": "user1Hash", "createdAt": "Tue, 05 Nov 2024 09:41:37 ", "lastEditedBy": "user1Hash", diff --git a/tests/objects/test_label_structure.py b/tests/objects/test_label_structure.py index 910c468ef..8ded1de78 100644 --- a/tests/objects/test_label_structure.py +++ b/tests/objects/test_label_structure.py @@ -1,5 +1,4 @@ import datetime -import json import math from dataclasses import asdict from unittest.mock import Mock, PropertyMock @@ -23,6 +22,7 @@ from encord.objects.coordinates import ( AudioCoordinates, BoundingBoxCoordinates, + HtmlCoordinates, PointCoordinate, PolygonCoordinates, TextCoordinates, @@ -664,50 +664,42 @@ def test_add_and_get_classification_instances_to_audio_label_row(ontology): label_row.from_labels_dict(EMPTY_AUDIO_LABELS) classification_instance_1 = ClassificationInstance(text_classification, range_only=True) - classification_instance_2 = ClassificationInstance(text_classification, range_only=True) - classification_instance_3 = ClassificationInstance(checklist_classification, range_only=True) + classification_instance_2 = ClassificationInstance(checklist_classification, range_only=True) - classification_instance_1.set_for_frames(Range(1, 2)) - classification_instance_2.set_for_frames(Range(3, 4)) - classification_instance_3.set_for_frames(Range(1, 4)) + classification_instance_1.set_for_frames(Range(0, 0)) + classification_instance_2.set_for_frames(Range(0, 0)) label_row.add_classification_instance(classification_instance_1) label_row.add_classification_instance(classification_instance_2) - label_row.add_classification_instance(classification_instance_3) classification_instances = label_row.get_classification_instances() + assert set(classification_instances) == { classification_instance_1, classification_instance_2, - classification_instance_3, } filtered_classification_instances = label_row.get_classification_instances(text_classification) - assert set(filtered_classification_instances) == {classification_instance_1, classification_instance_2} + assert set(filtered_classification_instances) == {classification_instance_1} overlapping_classification_instance = ClassificationInstance(text_classification, range_only=True) - overlapping_classification_instance.set_for_frames(1) + overlapping_classification_instance.set_for_frames(0) - with pytest.raises(LabelRowError): + with pytest.raises(LabelRowError) as e: label_row.add_classification_instance(overlapping_classification_instance) - overlapping_classification_instance.remove_from_frames(1) - overlapping_classification_instance.set_for_frames(5) - label_row.add_classification_instance(overlapping_classification_instance) - with pytest.raises(LabelRowError): - overlapping_classification_instance.set_for_frames(1) + assert e.value.message == ( + f"A ClassificationInstance '{overlapping_classification_instance.classification_hash}' was already added " + "and has overlapping frames. Overlapping frames that were " + "found are `[(0:0)]`. Make sure that you only add classifications " + "which are on frames where the same type of classification does not yet exist." + ) # Do not raise if overwrite flag is passed - overlapping_classification_instance.set_for_frames(1, overwrite=True) + overlapping_classification_instance.set_for_frames(0, overwrite=True) label_row.remove_classification(classification_instance_1) - overlapping_classification_instance.set_for_frames(1) - - with pytest.raises(LabelRowError): - overlapping_classification_instance.set_for_frames(3) - - classification_instance_2.remove_from_frames(3) - overlapping_classification_instance.set_for_frames(3) + overlapping_classification_instance.set_for_frames(0) def test_object_instance_answer_for_static_attributes(): @@ -1136,37 +1128,25 @@ def test_non_range_classification_cannot_be_added_to_audio_label_row(ontology): label_row.add_classification_instance(classification_instance) -def test_audio_classification_overwrite(ontology, empty_audio_label_row: LabelRowV2): - classification_instance = ClassificationInstance(checklist_classification, range_only=True) - classification_instance.set_for_frames(Range(start=0, end=100)) - empty_audio_label_row.add_classification_instance(classification_instance) - - with pytest.raises(LabelRowError): - classification_instance.set_for_frames(Range(start=5, end=20)) - - with pytest.raises(LabelRowError): - classification_instance.set_for_frames(Range(start=100, end=101)) - - # No error when set overwrite to True - classification_instance.set_for_frames(Range(start=100, end=101), overwrite=True) - range_list = classification_instance.range_list - assert len(range_list) == 1 - assert range_list[0].start == 0 - assert range_list[0].end == 101 - - -def test_audio_classification_exceed_max_frames(ontology, empty_audio_label_row: LabelRowV2): - classification_instance = ClassificationInstance(checklist_classification, range_only=True) - classification_instance.set_for_frames(Range(start=0, end=100)) - empty_audio_label_row.add_classification_instance(classification_instance) +def test_non_geometric_label_rows_can_only_have_classifications_on_frame_0( + ontology, + empty_audio_label_row: LabelRowV2, + empty_plain_text_label_row: LabelRowV2, + empty_html_text_label_row: LabelRowV2, +): + for label_row in [empty_audio_label_row, empty_html_text_label_row, empty_plain_text_label_row]: + classification_instance = ClassificationInstance(checklist_classification, range_only=True) + classification_instance.set_for_frames(Range(start=0, end=0)) + label_row.add_classification_instance(classification_instance) - with pytest.raises(LabelRowError): - classification_instance.set_for_frames(Range(start=200, end=5000)) + with pytest.raises(LabelRowError) as e: + classification_instance.set_for_frames(Range(start=0, end=1)) - range_list = classification_instance.range_list - assert len(range_list) == 1 - assert range_list[0].start == 0 - assert range_list[0].end == 100 + assert e.value.message == ( + "For audio files and text files, classifications can only be " + "attached to frame=0 You may use " + "`ClassificationInstance.set_for_frames(frames=Range(start=0, end=0))`." + ) def test_audio_object_exceed_max_frames(ontology, empty_audio_label_row: LabelRowV2): @@ -1236,24 +1216,17 @@ def test_get_annotations_from_audio_object(ontology) -> None: assert annotation.reviews is None -def test_audio_classification_can_be_added_edited_and_removed(ontology, empty_audio_label_row: LabelRowV2): +def test_audio_classification_can_be_added_and_removed(ontology, empty_audio_label_row: LabelRowV2): label_row = empty_audio_label_row classification_instance = ClassificationInstance(checklist_classification, range_only=True) - classification_instance.set_for_frames(Range(start=0, end=1500)) + classification_instance.set_for_frames(Range(start=0, end=0)) range_list = classification_instance.range_list assert len(range_list) == 1 assert range_list[0].start == 0 - assert range_list[0].end == 1500 + assert range_list[0].end == 0 label_row.add_classification_instance(classification_instance) assert len(label_row.get_classification_instances()) == 1 - classification_instance.set_for_frames(Range(start=2000, end=2499)) - range_list = classification_instance.range_list - assert len(range_list) == 2 - assert range_list[0].start == 0 - assert range_list[0].end == 1500 - assert range_list[1].start == 2000 - assert range_list[1].end == 2499 label_row.remove_classification(classification_instance) assert len(label_row.get_classification_instances()) == 0 @@ -1290,20 +1263,14 @@ def test_audio_object_can_be_added_edited_and_removed(ontology, empty_audio_labe def test_get_annotations_from_html_text_object(ontology) -> None: now = datetime.datetime.now() - range_html=HtmlRange( - start=HtmlNode( - node="/html[1]/body[1]/div[1]/text()[1]", - offset=50 - ), - end=HtmlNode( - node="/html[1]/body[1]/div[1]/text()[1]", - offset=60 - ) + range = HtmlRange( + start=HtmlNode(node="/html[1]/body[1]/div[1]/text()[1]", offset=50), + end=HtmlNode(node="/html[1]/body[1]/div[1]/text()[1]", offset=60), ) object_instance = ObjectInstance(text_obj_ontology_item) object_instance.set_for_frames( - TextCoordinates(range_html=[range_html]), + HtmlCoordinates(range=[range]), created_at=now, created_by="user1", last_edited_at=now, @@ -1324,24 +1291,17 @@ def test_get_annotations_from_html_text_object(ontology) -> None: assert annotation.reviews is None -def test_html_text_classification_can_be_added_edited_and_removed(ontology, empty_html_text_label_row: LabelRowV2): +def test_html_text_classification_can_be_added_removed(ontology, empty_html_text_label_row: LabelRowV2): label_row = empty_html_text_label_row classification_instance = ClassificationInstance(checklist_classification, range_only=True) - classification_instance.set_for_frames(Range(start=0, end=1500)) + classification_instance.set_for_frames(Range(start=0, end=0)) range_list = classification_instance.range_list assert len(range_list) == 1 assert range_list[0].start == 0 - assert range_list[0].end == 1500 + assert range_list[0].end == 0 label_row.add_classification_instance(classification_instance) assert len(label_row.get_classification_instances()) == 1 - classification_instance.set_for_frames(Range(start=2000, end=2499)) - range_list = classification_instance.range_list - assert len(range_list) == 2 - assert range_list[0].start == 0 - assert range_list[0].end == 1500 - assert range_list[1].start == 2000 - assert range_list[1].end == 2499 label_row.remove_classification(classification_instance) assert len(label_row.get_classification_instances()) == 0 @@ -1351,28 +1311,28 @@ def test_html_text_object_can_be_added_edited_and_removed(ontology, empty_html_t label_row = empty_html_text_label_row obj_instance = ObjectInstance(text_obj_ontology_item) - initial_range_html = [ + initial_range = [ HtmlRange( start=HtmlNode(node="start_node", offset=50), end=HtmlNode(node="end_node", offset=100), ) ] - obj_instance.set_for_frames(TextCoordinates(range_html=initial_range_html)) - range_html = obj_instance.range_html + obj_instance.set_for_frames(HtmlCoordinates(range=initial_range)) + range = obj_instance.range_html - assert range_html is not None - assert len(range_html) == 1 - assert range_html[0].start.node == "start_node" - assert range_html[0].start.offset == 50 - assert range_html[0].end.node == "end_node" - assert range_html[0].end.offset == 100 + assert range is not None + assert len(range) == 1 + assert range[0].start.node == "start_node" + assert range[0].start.offset == 50 + assert range[0].end.node == "end_node" + assert range[0].end.offset == 100 label_row.add_object_instance(obj_instance) assert len(label_row.get_classification_instances()) == 0 assert len(label_row.get_object_instances()) == 1 - edited_range_html = [ + edited_range = [ HtmlRange( start=HtmlNode(node="start_node_edited", offset=70), end=HtmlNode(node="end_node_edited", offset=90), @@ -1383,23 +1343,23 @@ def test_html_text_object_can_be_added_edited_and_removed(ontology, empty_html_t ), ] - obj_instance.set_for_frames(TextCoordinates(range_html=edited_range_html), overwrite=True) - range_html = obj_instance.range_html - assert range_html is not None - assert len(range_html) == 2 - assert range_html[0].start.node == "start_node_edited" - assert range_html[0].start.offset == 70 - assert range_html[0].end.node == "end_node_edited" - assert range_html[0].end.offset == 90 - - assert range_html[1].start.node == "start_node_new" - assert range_html[1].start.offset == 5 - assert range_html[1].end.node == "end_node_new" - assert range_html[1].end.offset == 7 + obj_instance.set_for_frames(HtmlCoordinates(range=edited_range), overwrite=True) + range = obj_instance.range_html + assert range is not None + assert len(range) == 2 + assert range[0].start.node == "start_node_edited" + assert range[0].start.offset == 70 + assert range[0].end.node == "end_node_edited" + assert range[0].end.offset == 90 + + assert range[1].start.node == "start_node_new" + assert range[1].start.offset == 5 + assert range[1].end.node == "end_node_new" + assert range[1].end.offset == 7 obj_instance.remove_from_frames(frames=0) - range_html = obj_instance.range_html - assert range_html is None + range = obj_instance.range_html + assert range is None def test_html_text_object_cannot_be_added_to_non_html_label_row( @@ -1407,32 +1367,38 @@ def test_html_text_object_cannot_be_added_to_non_html_label_row( ) -> None: obj_instance = ObjectInstance(text_obj_ontology_item) - initial_range_html = [ + initial_range = [ HtmlRange( start=HtmlNode(node="start_node", offset=50), end=HtmlNode(node="end_node", offset=100), ) ] - obj_instance.set_for_frames(TextCoordinates(range_html=initial_range_html)) - range_html = obj_instance.range_html + obj_instance.set_for_frames(HtmlCoordinates(range=initial_range)) + range = obj_instance.range_html - assert range_html is not None - assert len(range_html) == 1 - assert range_html[0].start.node == "start_node" - assert range_html[0].start.offset == 50 - assert range_html[0].end.node == "end_node" - assert range_html[0].end.offset == 100 + assert range is not None + assert len(range) == 1 + assert range[0].start.node == "start_node" + assert range[0].start.offset == 50 + assert range[0].end.node == "end_node" + assert range[0].end.offset == 100 with pytest.raises(LabelRowError) as e: empty_audio_label_row.add_object_instance(obj_instance) - assert str(e.value.message) == "Unable to assign object instance with a html range to a non-html file" + assert str(e.value.message) == ( + "Unable to assign object instance with a html range to a non-html file. " + f"Please ensure the object instance does not have coordinates of type {HtmlCoordinates}." + ) with pytest.raises(LabelRowError) as e: empty_plain_text_label_row.add_object_instance(obj_instance) - assert str(e.value.message) == "Unable to assign object instance with a html range to a non-html file" + assert str(e.value.message) == ( + "Unable to assign object instance with a html range to a non-html file. " + f"Please ensure the object instance does not have coordinates of type {HtmlCoordinates}." + ) def test_set_for_frames_with_range_html_throws_error_if_used_incorrectly( @@ -1445,27 +1411,27 @@ def test_set_for_frames_with_range_html_throws_error_if_used_incorrectly( ) ] - # Adding range_html to an object instance where the object's shape is NOT text + # Adding HtmlCoordinates to an object instance where the object's shape is NOT text audio_obj_instance = ObjectInstance(audio_obj_ontology_item) with pytest.raises(LabelRowError) as e: - audio_obj_instance.set_for_frames(coordinates=TextCoordinates(range_html=range_html)) + audio_obj_instance.set_for_frames(coordinates=HtmlCoordinates(range=range_html)) assert ( - str(e.value.message) == f"Expected a coordinate of type `{AudioCoordinates}`, but got type `{TextCoordinates}`." + str(e.value.message) + == f"Expected coordinates of one of the following types: `[{AudioCoordinates}]`, but got type `{HtmlCoordinates}`." ) - # Adding range_html to an object instance which is attached to a label row where the + # Adding HtmlCoordinates to an object instance which is attached to a label row where the # file type is NOT 'text/html' html_text_obj_instance = ObjectInstance(text_obj_ontology_item) - html_text_obj_instance.set_for_frames(coordinates=TextCoordinates(), frames=0) - empty_plain_text_label_row.add_object_instance(html_text_obj_instance) + html_text_obj_instance.set_for_frames(coordinates=HtmlCoordinates(range=range_html)) with pytest.raises(LabelRowError) as e: - html_text_obj_instance.set_for_frames(coordinates=TextCoordinates(range_html=range_html), overwrite=True) + empty_plain_text_label_row.add_object_instance(html_text_obj_instance) assert ( - str(e.value.message) - == "For non-html labels, ensure the `range` property is set when instantiating the TextCoordinates." + str(e.value.message) == "Unable to assign object instance with a html range to a non-html file. " + f"Please ensure the object instance does not have coordinates of type {HtmlCoordinates}." ) @@ -1495,24 +1461,17 @@ def test_get_annotations_from_plain_text_object(ontology) -> None: assert annotation.reviews is None -def test_plain_text_classification_can_be_added_edited_and_removed(ontology, empty_plain_text_label_row: LabelRowV2): +def test_plain_text_classification_can_be_added_and_removed(ontology, empty_plain_text_label_row: LabelRowV2): label_row = empty_plain_text_label_row classification_instance = ClassificationInstance(checklist_classification, range_only=True) - classification_instance.set_for_frames(Range(start=0, end=1500)) + classification_instance.set_for_frames(Range(start=0, end=0)) range_list = classification_instance.range_list assert len(range_list) == 1 assert range_list[0].start == 0 - assert range_list[0].end == 1500 + assert range_list[0].end == 0 label_row.add_classification_instance(classification_instance) assert len(label_row.get_classification_instances()) == 1 - classification_instance.set_for_frames(Range(start=2000, end=2499)) - range_list = classification_instance.range_list - assert len(range_list) == 2 - assert range_list[0].start == 0 - assert range_list[0].end == 1500 - assert range_list[1].start == 2000 - assert range_list[1].end == 2499 label_row.remove_classification(classification_instance) assert len(label_row.get_classification_instances()) == 0 @@ -1558,4 +1517,7 @@ def test_plain_text_object_cannot_be_added_to_html_label_row(ontology, empty_htm with pytest.raises(LabelRowError) as e: label_row.add_object_instance(obj_instance) - assert str(e.value.message) == "Unable to assign object instance without a html range to a html file" + assert str(e.value.message) == ( + "Unable to assign object instance without a html range to a html file. " + f"Please ensure the object instance exists on frame=0, and has coordinates of type {HtmlCoordinates}." + )