Skip to content

Commit

Permalink
SDK and HTML PR
Browse files Browse the repository at this point in the history
  • Loading branch information
clinton-encord committed Dec 30, 2024
1 parent 93e3754 commit 92ec53a
Show file tree
Hide file tree
Showing 14 changed files with 945 additions and 180 deletions.
4 changes: 4 additions & 0 deletions encord/common/range_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ def remove_ranges(self, ranges_to_remove: Ranges) -> None:
for r in ranges_to_remove:
self.remove_range(r)

def clear_ranges(self) -> None:
"""Clear all ranges."""
self.ranges = []

def get_ranges(self) -> Ranges:
"""Return the sorted list of merged ranges."""
copied_ranges = [range.copy() for range in self.ranges]
Expand Down
14 changes: 14 additions & 0 deletions encord/constants/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,17 @@ def from_upper_case_string(string: str) -> DataType:

def to_upper_case_string(self) -> str:
return self.value.upper()


GEOMETRIC_TYPES = {
DataType.VIDEO,
DataType.IMAGE,
DataType.IMG_GROUP,
DataType.DICOM,
DataType.DICOM_STUDY,
DataType.NIFTI,
}


def is_geometric(data_type: DataType) -> bool:
return data_type in GEOMETRIC_TYPES
2 changes: 2 additions & 0 deletions encord/objects/classification_instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ def set_for_frames(
last_edited_at = datetime.now()

if self._range_only:
# Audio range should cover entire audio file
# Text range should always be [0, 0]
self._set_for_ranges(
frames=frames,
overwrite=overwrite,
Expand Down
1 change: 1 addition & 0 deletions encord/objects/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class Shape(StringEnum):
ROTATABLE_BOUNDING_BOX = "rotatable_bounding_box"
BITMASK = "bitmask"
AUDIO = "audio"
TEXT = "text"


class DeidentifyRedactTextMode(Enum):
Expand Down
44 changes: 43 additions & 1 deletion encord/objects/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
from encord.exceptions import LabelRowError
from encord.objects.bitmask import BitmaskCoordinates
from encord.objects.common import Shape
from encord.objects.frames import Ranges
from encord.objects.html_node import HtmlRange
from encord.orm.base_dto import BaseDTO


Expand Down Expand Up @@ -338,11 +340,49 @@ def to_dict(self, by_alias=True, exclude_none=True) -> Dict[str, Any]:


class AudioCoordinates(BaseDTO):
pass
"""
Represents coordinates for an audio file
Attributes:
range (Ranges): Ranges in milliseconds for audio files
"""

range: Ranges

def __post_init__(self):
if len(self.range) == 0:
raise ValueError("Range list must contain at least one range.")


class TextCoordinates(BaseDTO):
"""
Represents coordinates for a text file
Attributes:
range_html (List[HtmlRange]): A list of HtmlRange objects
range (Ranges): Ranges of chars for simple text files
"""

range_html: Optional[List[HtmlRange]] = None
range: Optional[Ranges] = None

def __post_init__(self):
if self.range_html is None and self.range is None:
raise ValueError("At least one of either `range` or `range_html` must be set.")

if self.range_html is not None and self.range is not None:
raise ValueError("Only one of either `range` or `range_html` must be set.")

if self.range_html is not None and len(self.range_html) == 0:
raise ValueError("Range HTML list must contain at least one html range.")

if self.range is not None and len(self.range) == 0:
raise ValueError("Range list must contain at least one range.")


Coordinates = Union[
AudioCoordinates,
TextCoordinates,
BoundingBoxCoordinates,
RotatableBoundingBoxCoordinates,
PointCoordinate,
Expand All @@ -351,6 +391,7 @@ class AudioCoordinates(BaseDTO):
SkeletonCoordinates,
BitmaskCoordinates,
]

ACCEPTABLE_COORDINATES_FOR_ONTOLOGY_ITEMS: Dict[Shape, Type[Coordinates]] = {
Shape.BOUNDING_BOX: BoundingBoxCoordinates,
Shape.ROTATABLE_BOUNDING_BOX: RotatableBoundingBoxCoordinates,
Expand All @@ -360,4 +401,5 @@ class AudioCoordinates(BaseDTO):
Shape.SKELETON: SkeletonCoordinates,
Shape.BITMASK: BitmaskCoordinates,
Shape.AUDIO: AudioCoordinates,
Shape.TEXT: TextCoordinates,
}
69 changes: 69 additions & 0 deletions encord/objects/html_node.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""
---
title: "Objects - HTML Node"
slug: "sdk-ref-objects-html-node"
hidden: false
metadata:
title: "Objects - HTML Node"
description: "Encord SDK Objects - HTML Node."
category: "64e481b57b6027003f20aaa0"
---
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Collection, List, Union, cast

from encord.orm.base_dto import BaseDTO


class HtmlNode(BaseDTO):
"""
A class representing a single HTML node, with the node and offset.
Attributes:
node (str): The xpath of the node
offset (int): The offset of the content from the xpath
"""

node: str
offset: int

def __repr__(self):
return f"(Node: {self.node} Offset: {self.offset})"


class HtmlRange(BaseDTO):
"""
A class representing a section of HTML with a start and end node.
Attributes:
start (HtmlNode): The starting node of the range.
end (HtmlNode): The ending node of the range.
"""

start: HtmlNode
end: HtmlNode

def __repr__(self):
return f"({self.start} - {self.end})"

def to_dict(self):
return {
"start": {"node": self.start.node, "offset": self.start.offset},
"end": {"node": self.end.node, "offset": self.end.offset},
}

def __hash__(self):
return hash(self.__repr__())

@classmethod
def from_dict(cls, d: dict):
return HtmlRange(
start=HtmlNode(node=d["start"]["node"], offset=d["start"]["offset"]),
end=HtmlNode(node=d["end"]["node"], offset=d["end"]["offset"]),
)


HtmlRanges = List[HtmlRange]
Loading

0 comments on commit 92ec53a

Please sign in to comment.