Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 107 additions & 29 deletions doctr/utils/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ def _detach(boxes: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
return list(loc_preds), list(obj_scores)


def resolve_enclosing_bbox(bboxes: list[BoundingBox] | np.ndarray) -> BoundingBox | np.ndarray:
def resolve_enclosing_bbox(
bboxes: list[BoundingBox] | np.ndarray,
) -> BoundingBox | np.ndarray:
"""Compute enclosing bbox either from:

Args:
Expand All @@ -96,7 +98,9 @@ def resolve_enclosing_bbox(bboxes: list[BoundingBox] | np.ndarray) -> BoundingBo
return (min(x), min(y)), (max(x), max(y))


def resolve_enclosing_rbbox(rbboxes: list[np.ndarray], intermed_size: int = 1024) -> np.ndarray:
def resolve_enclosing_rbbox(
rbboxes: list[np.ndarray], intermed_size: int = 1024
) -> np.ndarray:
"""Compute enclosing rotated bbox either from:

Args:
Expand Down Expand Up @@ -130,7 +134,11 @@ def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray:
"""
angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions
rotation_mat = np.array(
[[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=points.dtype
[
[np.cos(angle_rad), -np.sin(angle_rad)],
[np.sin(angle_rad), np.cos(angle_rad)],
],
dtype=points.dtype,
)
return np.matmul(points, rotation_mat.T)

Expand All @@ -145,10 +153,12 @@ def compute_expanded_shape(img_shape: tuple[int, int], angle: float) -> tuple[in
Returns:
the height and width of the rotated image
"""
points: np.ndarray = np.array([
[img_shape[1] / 2, img_shape[0] / 2],
[-img_shape[1] / 2, img_shape[0] / 2],
])
points: np.ndarray = np.array(
[
[img_shape[1] / 2, img_shape[0] / 2],
[-img_shape[1] / 2, img_shape[0] / 2],
]
)

rotated_points = rotate_abs_points(points, angle)

Expand Down Expand Up @@ -176,7 +186,10 @@ def rotate_abs_geoms(
"""
# Switch to polygons
polys = (
np.stack([geoms[:, [0, 1]], geoms[:, [2, 1]], geoms[:, [2, 3]], geoms[:, [0, 3]]], axis=1)
np.stack(
[geoms[:, [0, 1]], geoms[:, [2, 1]], geoms[:, [2, 3]], geoms[:, [0, 3]]],
axis=1,
)
if geoms.ndim == 2
else geoms
)
Expand All @@ -191,13 +204,19 @@ def rotate_abs_geoms(
# Switch back to top-left corner as referential
target_shape = compute_expanded_shape(img_shape, angle) if expand else img_shape
# Clip coords to fit since there is no expansion
rotated_polys[..., 0] = (rotated_polys[..., 0] + target_shape[1] / 2).clip(0, target_shape[1])
rotated_polys[..., 1] = (target_shape[0] / 2 - rotated_polys[..., 1]).clip(0, target_shape[0])
rotated_polys[..., 0] = (rotated_polys[..., 0] + target_shape[1] / 2).clip(
0, target_shape[1]
)
rotated_polys[..., 1] = (target_shape[0] / 2 - rotated_polys[..., 1]).clip(
0, target_shape[0]
)

return rotated_polys


def remap_boxes(loc_preds: np.ndarray, orig_shape: tuple[int, int], dest_shape: tuple[int, int]) -> np.ndarray:
def remap_boxes(
loc_preds: np.ndarray, orig_shape: tuple[int, int], dest_shape: tuple[int, int]
) -> np.ndarray:
"""Remaps a batch of rotated locpred (N, 4, 2) expressed for an origin_shape to a destination_shape.
This does not impact the absolute shape of the boxes, but allow to calculate the new relative RotatedBbox
coordinates after a resizing of the image.
Expand All @@ -213,12 +232,18 @@ def remap_boxes(loc_preds: np.ndarray, orig_shape: tuple[int, int], dest_shape:
if len(dest_shape) != 2:
raise ValueError(f"Mask length should be 2, was found at: {len(dest_shape)}")
if len(orig_shape) != 2:
raise ValueError(f"Image_shape length should be 2, was found at: {len(orig_shape)}")
raise ValueError(
f"Image_shape length should be 2, was found at: {len(orig_shape)}"
)
orig_height, orig_width = orig_shape
dest_height, dest_width = dest_shape
mboxes = loc_preds.copy()
mboxes[:, :, 0] = ((loc_preds[:, :, 0] * orig_width) + (dest_width - orig_width) / 2) / dest_width
mboxes[:, :, 1] = ((loc_preds[:, :, 1] * orig_height) + (dest_height - orig_height) / 2) / dest_height
mboxes[:, :, 0] = (
(loc_preds[:, :, 0] * orig_width) + (dest_width - orig_width) / 2
) / dest_width
mboxes[:, :, 1] = (
(loc_preds[:, :, 1] * orig_height) + (dest_height - orig_height) / 2
) / dest_height

return mboxes

Expand Down Expand Up @@ -263,19 +288,31 @@ def rotate_boxes(
# Compute rotation matrix
angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions
rotation_mat = np.array(
[[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=_boxes.dtype
[
[np.cos(angle_rad), -np.sin(angle_rad)],
[np.sin(angle_rad), np.cos(angle_rad)],
],
dtype=_boxes.dtype,
)
# Rotate absolute points
points: np.ndarray = np.stack((_boxes[:, :, 0] * orig_shape[1], _boxes[:, :, 1] * orig_shape[0]), axis=-1)
points: np.ndarray = np.stack(
(_boxes[:, :, 0] * orig_shape[1], _boxes[:, :, 1] * orig_shape[0]), axis=-1
)
image_center = (orig_shape[1] / 2, orig_shape[0] / 2)
rotated_points = image_center + np.matmul(points - image_center, rotation_mat)
rotated_boxes: np.ndarray = np.stack(
(rotated_points[:, :, 0] / orig_shape[1], rotated_points[:, :, 1] / orig_shape[0]), axis=-1
(
rotated_points[:, :, 0] / orig_shape[1],
rotated_points[:, :, 1] / orig_shape[0],
),
axis=-1,
)

# Apply a mask if requested
if target_shape is not None:
rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape)
rotated_boxes = remap_boxes(
rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape
)

return rotated_boxes

Expand Down Expand Up @@ -305,7 +342,14 @@ def rotate_image(
int(max(0, ceil(exp_shape[0] - image.shape[0]))),
int(max(0, ceil(exp_shape[1] - image.shape[1]))),
)
exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
exp_img = np.pad(
image,
(
(h_pad // 2, h_pad - h_pad // 2),
(w_pad // 2, w_pad - w_pad // 2),
(0, 0),
),
)
else:
exp_img = image

Expand All @@ -316,15 +360,38 @@ def rotate_image(
# Pad to get the same aspect ratio
if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]):
# Pad width
if (rot_img.shape[0] / rot_img.shape[1]) > (image.shape[0] / image.shape[1]):
h_pad, w_pad = 0, int(rot_img.shape[0] * image.shape[1] / image.shape[0] - rot_img.shape[1])
if (rot_img.shape[0] / rot_img.shape[1]) > (
image.shape[0] / image.shape[1]
):
h_pad, w_pad = (
0,
int(
rot_img.shape[0] * image.shape[1] / image.shape[0]
- rot_img.shape[1]
),
)
# Pad height
else:
h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0
rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0)))
h_pad, w_pad = (
int(
rot_img.shape[1] * image.shape[0] / image.shape[1]
- rot_img.shape[0]
),
0,
)
rot_img = np.pad(
rot_img,
(
(h_pad // 2, h_pad - h_pad // 2),
(w_pad // 2, w_pad - w_pad // 2),
(0, 0),
),
)
if preserve_origin_shape:
# rescale
rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR)
rot_img = cv2.resize(
rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR
)

return rot_img

Expand Down Expand Up @@ -359,13 +426,17 @@ def estimate_page_angle(polys: np.ndarray) -> float:
with np.errstate(divide="raise", invalid="raise"):
try:
return float(
np.median(np.arctan((yleft - yright) / (xright - xleft)) * 180 / np.pi) # Y axis from top to bottom!
np.median(
np.degrees(np.arctan((yleft - yright) / (xright - xleft)))
) # Y axis from top to bottom!
)
except FloatingPointError:
return 0.0


def convert_to_relative_coords(geoms: np.ndarray, img_shape: tuple[int, int]) -> np.ndarray:
def convert_to_relative_coords(
geoms: np.ndarray, img_shape: tuple[int, int]
) -> np.ndarray:
"""Convert a geometry to relative coordinates

Args:
Expand Down Expand Up @@ -404,7 +475,9 @@ def extract_crops(img: np.ndarray, boxes: np.ndarray) -> list[np.ndarray]:
if boxes.shape[0] == 0:
return []
if boxes.shape[1] != 4:
raise AssertionError("boxes are expected to be relative and in order (xmin, ymin, xmax, ymax)")
raise AssertionError(
"boxes are expected to be relative and in order (xmin, ymin, xmax, ymax)"
)

# Project relative coordinates
_boxes = boxes.copy()
Expand All @@ -420,7 +493,10 @@ def extract_crops(img: np.ndarray, boxes: np.ndarray) -> list[np.ndarray]:


def extract_rcrops(
img: np.ndarray, polys: np.ndarray, dtype=np.float32, assume_horizontal: bool = False
img: np.ndarray,
polys: np.ndarray,
dtype=np.float32,
assume_horizontal: bool = False,
) -> list[np.ndarray]:
"""Created cropped images from list of rotated bounding boxes

Expand All @@ -436,7 +512,9 @@ def extract_rcrops(
if polys.shape[0] == 0:
return []
if polys.shape[1:] != (4, 2):
raise AssertionError("polys are expected to be quadrilateral, of shape (N, 4, 2)")
raise AssertionError(
"polys are expected to be quadrilateral, of shape (N, 4, 2)"
)

# Project relative coordinates
_boxes = polys.copy()
Expand Down