encord-team · oscar-encord · Dec 14, 2023 · Dec 15, 2023 · Dec 15, 2023 · Jan 10, 2024
diff --git a/encord/objects/classification_instance.py b/encord/objects/classification_instance.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import logging
 from collections import defaultdict
 from copy import deepcopy
 from dataclasses import dataclass, field
@@ -133,21 +134,26 @@ def set_for_frames(
 
         frames_list = frames_class_to_frames_list(frames)
 
-        self._check_classification_already_present(frames_list)
+        if self._check_classification_already_present(frames_list):
+            logging.warning(f'Skipping {frames_list} as already present for {self.ontology_item}')
+            return
 
         for frame in frames_list:
-            self._check_within_range(frame)
-            self._set_frame_and_frame_data(
-                frame,
-                overwrite=overwrite,
-                created_at=created_at,
-                created_by=created_by,
-                confidence=confidence,
-                manual_annotation=manual_annotation,
-                last_edited_at=last_edited_at,
-                last_edited_by=last_edited_by,
-                reviews=reviews,
-            )
+            if self._check_within_range(frame):
+                self._set_frame_and_frame_data(
+                    frame,
+                    overwrite=overwrite,
+                    created_at=created_at,
+                    created_by=created_by,
+                    confidence=confidence,
+                    manual_annotation=manual_annotation,
+                    last_edited_at=last_edited_at,
+                    last_edited_by=last_edited_by,
+                    reviews=reviews,
+                )
+            else:
+                logging.warning(f'Cutting {frame} in {frames_list}')
+                return
 
         if self.is_assigned_to_label_row():
             assert self._parent is not None
@@ -528,22 +534,18 @@ def _is_selectable_child_attribute(self, attribute: Attribute) -> bool:
         top_attribute = ontology_classification.attributes[0]
         return _search_child_attributes(attribute, top_attribute, self._static_answer_map)
 
-    def _check_within_range(self, frame: int) -> None:
+    def _check_within_range(self, frame: int) -> bool:
         if frame < 0 or frame >= self._last_frame:
-            raise LabelRowError(
-                f"The supplied frame of `{frame}` is not within the acceptable bounds of `0` to `{self._last_frame}`."
-            )
+            return False
+        return True
 
-    def _check_classification_already_present(self, frames: Iterable[int]) -> None:
+    def _check_classification_already_present(self, frames: Iterable[int]) -> bool:
         if self._parent is None:
-            return
+            return False
         already_present_frame = self._parent._is_classification_already_present(self.ontology_item, frames)
         if already_present_frame is not None:
-            raise LabelRowError(
-                f"The LabelRowV2, that this classification is part of, already has a classification of the same type "
-                f"on frame `{already_present_frame}`. The same type of classification can only be present once per "
-                f"frame per LabelRowV2."
-            )
+            return True
+        return False
 
     def __repr__(self):
         return (

diff --git a/encord/objects/ontology_labels_impl.py b/encord/objects/ontology_labels_impl.py
@@ -10,7 +10,7 @@
 from encord.client import EncordClientProject
 from encord.client import LabelRow as OrmLabelRow
 from encord.constants.enums import DataType
-from encord.exceptions import LabelRowError, WrongProjectTypeError
+from encord.exceptions import LabelRowError, WrongProjectTypeError, OntologyError
 from encord.http.bundle import Bundle, BundleResultHandler, BundleResultMapper, bundled_operation
 from encord.http.limits import (
     LABEL_ROW_BUNDLE_CREATE_LIMIT,
@@ -1326,17 +1326,22 @@ def _add_object_instances_from_objects(
             object_hash = frame_object_label["objectHash"]
             if object_hash not in self._objects_map:
                 object_instance = self._create_new_object_instance(frame_object_label, frame)
-                self.add_object_instance(object_instance)
+                if object_instance:
+                    self.add_object_instance(object_instance)
+                else:
+                    logging.warning(f'Skipping object {object_hash} since it is not in the ontology.')
             else:
                 self._add_coordinates_to_object_instance(frame_object_label, frame)
 
     def _add_objects_answers(self, label_row_dict: dict):
         for answer in label_row_dict["object_answers"].values():
             object_hash = answer["objectHash"]
-            object_instance = self._objects_map[object_hash]
-
-            answer_list = answer["classifications"]
-            object_instance.set_answer_from_list(answer_list)
+            object_instance = self._objects_map.get(object_hash)
+            if object_instance:
+                answer_list = answer["classifications"]
+                object_instance.set_answer_from_list(answer_list)
+            else:
+                logging.warning(f'Skipping answers for object {object_hash} as it has no corresponding object.')
 
     def _add_action_answers(self, label_row_dict: dict):
         for answer in label_row_dict["object_actions"].values():
@@ -1346,12 +1351,14 @@ def _add_action_answers(self, label_row_dict: dict):
             answer_list = answer["actions"]
             object_instance.set_answer_from_list(answer_list)
 
-    def _create_new_object_instance(self, frame_object_label: dict, frame: int) -> ObjectInstance:
+    def _create_new_object_instance(self, frame_object_label: dict, frame: int) -> ObjectInstance | None:
         ontology = self._ontology.structure
         feature_hash = frame_object_label["featureHash"]
         object_hash = frame_object_label["objectHash"]
 
         label_class = ontology.get_child_by_hash(feature_hash, type_=Object)
+        if not label_class:
+            return None
         object_instance = ObjectInstance(label_class, object_hash=object_hash)
 
         coordinates = self._get_coordinates(frame_object_label)
@@ -1422,7 +1429,11 @@ def _add_classification_instances_from_classifications(
                 classification_instance = self._create_new_classification_instance(
                     frame_classification_label, frame, classification_answers
                 )
-                self.add_classification_instance(classification_instance)
+                if classification_instance:
+                    try:
+                        self.add_classification_instance(classification_instance)
+                    except LabelRowError:
+                        logging.warning(f'Skipping {frame}')
             else:
                 self._add_frames_to_classification_instance(frame_classification_label, frame)
 
@@ -1444,11 +1455,14 @@ def _parse_image_group_frame_level_data(self, label_row_data_units: dict) -> Dic
 
     def _create_new_classification_instance(
         self, frame_classification_label: dict, frame: int, classification_answers: dict
-    ) -> ClassificationInstance:
+    ) -> ClassificationInstance | None:
         feature_hash = frame_classification_label["featureHash"]
         classification_hash = frame_classification_label["classificationHash"]
 
         label_class = self._ontology.structure.get_child_by_hash(feature_hash, type_=Classification)
+        if not label_class:
+            logging.warning(f'Skipping classification hash:{classification_hash} as no ontology object was found.')
+            return None
         classification_instance = ClassificationInstance(label_class, classification_hash=classification_hash)
 
         frame_view = ClassificationInstance.FrameData.from_dict(frame_classification_label)
@@ -1463,7 +1477,10 @@ def _create_new_classification_instance(
             reviews=frame_view.reviews,
         )
 
-        answers_dict = classification_answers[classification_hash]["classifications"]
+        answers_dict = classification_answers.get(classification_hash, {}).get("classifications")
+        if not answers_dict:
+            logging.warning(f'Skipping classification hash:{classification_hash} as no corresponding answer was found.')
+            return None
         self._add_static_answers_from_dict(classification_instance, answers_dict)
 
         return classification_instance

diff --git a/encord/objects/ontology_object_instance.py b/encord/objects/ontology_object_instance.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import logging
 from collections import defaultdict
 from copy import deepcopy
 from dataclasses import dataclass, field
@@ -396,9 +397,8 @@ def set_for_frames(
             existing_frame_data = self._frames_to_instance_data.get(frame)
 
             if overwrite is False and existing_frame_data is not None:
-                raise LabelRowError(
-                    "Cannot overwrite existing data for a frame. Set `overwrite` to `True` to overwrite."
-                )
+                logging.warning(f'Skipping overwrite for {frame} in {frames_list}')
+                return
 
             check_coordinate_type(coordinates, self._ontology_object)
             self.check_within_range(frame)

diff --git a/encord/objects/ontology_structure.py b/encord/objects/ontology_structure.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import logging
 from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional, Type, cast
 from uuid import uuid4
@@ -28,7 +29,7 @@ def get_child_by_hash(
         self,
         feature_node_hash: str,
         type_: Optional[Type[OntologyElementT]] = None,
-    ) -> OntologyElementT:
+    ) -> OntologyElementT | None:
         """
         Returns the first child node of this ontology tree node with the matching feature node hash. If there is
         more than one child with the same feature node hash in the ontology tree node, then the ontology would be in
@@ -52,8 +53,7 @@ def get_child_by_hash(
             found_item = _get_element_by_hash(feature_node_hash, classification.attributes)
             if found_item is not None:
                 return checked_cast(found_item, type_)
-
-        raise OntologyError(f"Item not found: can't find an item with a hash {feature_node_hash} in the ontology.")
+        logging.warning(f"Item not found: can't find an item with a hash {feature_node_hash} in the ontology.")
 
     def get_child_by_title(
         self,

diff --git a/encord/oe_label_cleaning/UGhent-orphaned-label-cleaner.py b/encord/oe_label_cleaning/UGhent-orphaned-label-cleaner.py
@@ -0,0 +1,61 @@
+import json
+from pprint import pprint
+
+from encord import EncordUserClient
+
+# set the ranges for the classifications you want to keep
+TRUE_LABEL_RANGES = [[19280, 19580], [22248, 22548]]
+
+# Connect to encord
+keyfile = "/Users/encord/oe-public-key-private-key.txt"
+user_client = EncordUserClient.create_with_ssh_private_key(ssh_private_key_path=keyfile)
+
+# get the project
+proj_hash = "6508ede1-cfd4-4eb7-bdc2-83508e805879"
+project = user_client.get_project(proj_hash)
+# get the label row for the specific data unit
+data_hash = "561e8ed5-b65b-4dfb-9556-8fd73d968b43"
+label_rows = project.list_label_rows_v2(data_hashes=[data_hash])
+if len(label_rows) == 1:
+    lr = label_rows.pop()
+else:
+    raise NotImplementedError("Program not built for multiple label rows")
+
+# initialise labels, save a backup copy of labels and
+lr.initialise_labels()
+lr_dict = lr.to_encord_dict()
+# save a backup of the label row
+with open(f"{lr.label_hash}_bkp.json", "w") as f:
+    json.dump(lr_dict, f)
+
+# get the labels-by-frame dictionary
+lab_row_data_unit = list(lr_dict["data_units"].keys())[0]
+labels_by_frame = lr_dict["data_units"][lab_row_data_unit]["labels"]
+
+# iterate through frame numbers
+for frame_num in labels_by_frame.keys():
+    # is the frame number NOT within one of our desired frame ranges
+    in_true_label_range = True
+    for tlr in TRUE_LABEL_RANGES:
+        in_true_label_range = in_true_label_range and (not (tlr[0] <= int(frame_num) <= tlr[1]))
+    # look for non-desired classifications that contain a classification
+    if in_true_label_range and labels_by_frame[frame_num]["classifications"] != []:
+        print("REMOVING CLASSIFICATION FROM FRAME:", frame_num, labels_by_frame[frame_num]["classifications"])
+        # get ALL classification instances for that frame
+        bad_class_instance_list = lr.get_classification_instances(filter_frames=int(frame_num))
+        # when there is one classification per frame, extract from list.
+        if len(bad_class_instance_list) == 1:
+            bad_class_instance = bad_class_instance_list.pop()
+            bad_class_instance.remove_from_frames(int(frame_num))
+        else:
+            # TODO: if you have multiple classifications in a frame then you will need to filter on classification hash.
+            raise NotImplementedError("Only one classification per frame is supported")
+
+# save an backup of the label row before initialising
+with open(f"{lr.label_hash}_edited.json", "w") as f:
+    json.dump(lr.to_encord_dict(), f)
+
+print(f"FINISHED LABEL FILE: {lr.label_hash}_edited.json")
+
+# CHECK JSONs BEFORE SAVING ! ! ! ! ! ! !
+# lr.save()
diff --git a/encord/oe_label_cleaning/__init__.py b/encord/oe_label_cleaning/__init__.py