Skip to content

Commit a71b68b

Browse files
Travvy88Nikita Shevtsov
and
Nikita Shevtsov
authored
TLDR-473 add bbox class from dedoc (#13)
Co-authored-by: Nikita Shevtsov <[email protected]>
1 parent dd93431 commit a71b68b

File tree

14 files changed

+93
-26
lines changed

14 files changed

+93
-26
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
Changelog
22
=========
33

4+
v0.3.3 (2023-09-28)
5+
-------------------
6+
* Update `BBox` class
7+
48
v0.3.2 (2023-09-25)
59
-------------------
610
* Add intervals to dependencies versions

VERSION

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.3.2
1+
0.3.3

dedocutils/data_structures/bbox.py

+58-5
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
1+
import math
12
from collections import OrderedDict
23
from dataclasses import dataclass
34
from typing import Dict, Tuple
45

6+
import numpy as np
7+
58

69
@dataclass
710
class BBox:
811
"""
9-
Box around some page object, the coordinate system starts from top left corner
12+
Bounding box around some page object, the coordinate system starts from top left corner.
13+
"""
14+
"""
1015
1116
0------------------------------------------------------------------------------------------------> x
1217
| BBox
@@ -21,10 +26,19 @@ class BBox:
2126
|
2227
V y
2328
"""
24-
x_top_left: int
25-
y_top_left: int
26-
width: int
27-
height: int
29+
def __init__(self, x_top_left: int, y_top_left: int, width: int, height: int) -> None:
30+
"""
31+
The following parameters should have values of pixels number.
32+
33+
:param x_top_left: x coordinate of the bbox top left corner
34+
:param y_top_left: y coordinate of the bbox top left corner
35+
:param width: bounding box width
36+
:param height: bounding box height
37+
"""
38+
self.x_top_left = x_top_left
39+
self.y_top_left = y_top_left
40+
self.width = width
41+
self.height = height
2842

2943
@property
3044
def x_bottom_right(self) -> int:
@@ -34,17 +48,56 @@ def x_bottom_right(self) -> int:
3448
def y_bottom_right(self) -> int:
3549
return self.y_top_left + self.height
3650

51+
@staticmethod
52+
def crop_image_by_box(image: np.ndarray, bbox: "BBox") -> np.ndarray:
53+
return image[bbox.y_top_left:bbox.y_bottom_right, bbox.x_top_left:bbox.x_bottom_right]
54+
55+
def rotate_coordinates(self, angle_rotate: float, image_shape: Tuple[int]) -> None:
56+
xb, yb = self.x_top_left, self.y_top_left
57+
xe, ye = self.x_bottom_right, self.y_bottom_right
58+
rad = angle_rotate * math.pi / 180
59+
60+
xc = image_shape[1] / 2
61+
yc = image_shape[0] / 2
62+
63+
bbox_xb = min((int(float(xb - xc) * math.cos(rad) - float(yb - yc) * math.sin(rad) + xc)), image_shape[1])
64+
bbox_yb = min((int(float(yb - yc) * math.cos(rad) + float(xb - xc) * math.sin(rad) + yc)), image_shape[0])
65+
bbox_xe = min((int(float(xe - xc) * math.cos(rad) - float(ye - yc) * math.sin(rad) + xc)), image_shape[1])
66+
bbox_ye = min((int(float(ye - yc) * math.cos(rad) + float(xe - xc) * math.sin(rad) + yc)), image_shape[0])
67+
self.__init__(bbox_xb, bbox_yb, bbox_xe - bbox_xb, bbox_ye - bbox_yb)
68+
69+
def __str__(self) -> str:
70+
return f"BBox(x = {self.x_top_left} y = {self.y_top_left}, w = {self.width}, h = {self.height})"
71+
72+
def __repr__(self) -> str:
73+
return self.__str__()
74+
3775
@property
3876
def square(self) -> int:
77+
"""
78+
Square of the bbox.
79+
"""
3980
return self.height * self.width
4081

4182
@staticmethod
4283
def from_two_points(top_left: Tuple[int, int], bottom_right: Tuple[int, int]) -> "BBox":
84+
"""
85+
Make the bounding box from two points.
86+
87+
:param top_left: (x, y) point of the bbox top left corner
88+
:param bottom_right: (x, y) point of the bbox bottom right corner
89+
"""
4390
x_top_left, y_top_left = top_left
4491
x_bottom_right, y_bottom_right = bottom_right
4592
return BBox(x_top_left=x_top_left, y_top_left=y_top_left, width=x_bottom_right - x_top_left, height=y_bottom_right - y_top_left)
4693

4794
def have_intersection_with_box(self, box: "BBox", threshold: float = 0.3) -> bool:
95+
"""
96+
Check if the current bounding box has the intersection with another one.
97+
98+
:param box: another bounding box to check intersection with
99+
:param threshold: the lowest value of the intersection over union used get boolean result
100+
"""
48101
# determine the (x, y)-coordinates of the intersection rectangle
49102
x_min = max(self.x_top_left, box.x_top_left)
50103
y_min = max(self.y_top_left, box.y_top_left)

dedocutils/text_detection/doctr_text_detector/doctr/models/classification/resnet/pytorch.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
# noqa
22
from typing import Any, Callable, Dict, List, Optional, Tuple
3+
34
from torch import nn
45
from torchvision.models.resnet import BasicBlock
56
from torchvision.models.resnet import ResNet as TVResNet
67
from torchvision.models.resnet import resnet18 as tv_resnet18
78
from torchvision.models.resnet import resnet34 as tv_resnet34
89
from torchvision.models.resnet import resnet50 as tv_resnet50
10+
911
from dedocutils.text_detection.doctr_text_detector.doctr.datasets.vocabs import VOCABS
1012
from ...utils import conv_sequence_pt, load_pretrained_params
1113

dedocutils/text_detection/doctr_text_detector/doctr/models/classification/zoo.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from typing import Any
22

3+
from .predictor import CropOrientationPredictor
34
from .. import classification
45
from ..preprocessor import PreProcessor
5-
from .predictor import CropOrientationPredictor
66

77
__all__ = ["crop_orientation_predictor"]
88

dedocutils/text_detection/doctr_text_detector/doctr/models/detection/differentiable_binarization/base.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1+
from typing import List, Tuple
2+
13
import cv2
24
import numpy as np
35
import pyclipper
4-
5-
from typing import List, Tuple
66
from shapely.geometry import Polygon
7+
78
from ..core import DetectionPostProcessor
89

910
__all__ = ['DBPostProcessor']

dedocutils/text_detection/doctr_text_detector/doctr/models/detection/differentiable_binarization/pytorch.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
1+
from typing import Any, Callable, Dict, List, Optional
2+
13
import numpy as np
24
import torch
3-
45
from torch import nn
5-
from typing import Any, Callable, Dict, List, Optional
66
from torch.nn import functional as F
77
from torchvision.models import resnet34, resnet50
88
from torchvision.models._utils import IntermediateLayerGetter
99
from torchvision.ops.deform_conv import DeformConv2d
10-
from ...utils import load_pretrained_params
10+
1111
from .base import DBPostProcessor, _DBNet
12+
from ...utils import load_pretrained_params
1213

1314
__all__ = ['DBNet', 'db_resnet50', 'db_resnet34', 'db_resnet50_rotation']
1415

dedocutils/text_detection/doctr_text_detector/doctr/models/detection/linknet/base.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1+
from typing import List, Tuple
2+
13
import cv2
24
import numpy as np
35
import pyclipper
4-
56
from shapely.geometry import Polygon
6-
from typing import List, Tuple
7+
78
from dedocutils.text_detection.doctr_text_detector.doctr.models.core import BaseModel
89
from ..core import DetectionPostProcessor
910

dedocutils/text_detection/doctr_text_detector/doctr/models/detection/linknet/pytorch.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
1+
from typing import Any, Callable, Dict, List, Optional, Tuple
2+
13
import numpy as np
24
import torch
3-
45
from torch import nn
5-
from typing import Any, Callable, Dict, List, Optional, Tuple
66
from torch.nn import functional as F
77
from torchvision.models._utils import IntermediateLayerGetter
8+
89
from dedocutils.text_detection.doctr_text_detector.doctr.models.classification import resnet18, resnet34, resnet50
9-
from ...utils import load_pretrained_params
1010
from .base import LinkNetPostProcessor, _LinkNet
11+
from ...utils import load_pretrained_params
1112

1213
__all__ = ['LinkNet', 'linknet_resnet18', 'linknet_resnet34', 'linknet_resnet50']
1314

dedocutils/text_detection/doctr_text_detector/doctr/models/detection/zoo.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from typing import Any
22

3+
from .predictor import DetectionPredictor
34
from .. import detection
45
from ..preprocessor import PreProcessor
5-
from .predictor import DetectionPredictor
66

77
__all__ = ["detection_predictor"]
88

dedocutils/text_detection/doctr_text_detector/doctr/transforms/functional/pytorch.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
# noqa
2-
import numpy as np
3-
import torch
4-
52
from copy import deepcopy
63
from typing import Tuple
4+
5+
import numpy as np
6+
import torch
77
from torchvision.transforms import functional as F
8+
89
from dedocutils.text_detection.doctr_text_detector.doctr.utils.geometry import rotate_abs_geoms
910
from .base import create_shadow_mask, crop_boxes
1011

dedocutils/text_detection/doctr_text_detector/doctr/transforms/modules/base.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import math
22
import random
3+
from typing import Any, Callable, Dict, List, Tuple
4+
35
import numpy as np
46

5-
from typing import Any, Callable, Dict, List, Tuple
67
from dedocutils.text_detection.doctr_text_detector.doctr.utils.repr import NestedObject
78
from .. import functional as F
89

dedocutils/text_detection/doctr_text_detector/doctr/transforms/modules/pytorch.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
# noqa
22
import math
3+
from typing import Any, Dict, Optional, Tuple, Union
4+
35
import numpy as np
46
import torch
5-
6-
from typing import Any, Dict, Optional, Tuple, Union
77
from PIL.Image import Image
88
from torch.nn.functional import pad
99
from torchvision.transforms import functional as F
1010
from torchvision.transforms import transforms as T
11+
1112
from ..functional.pytorch import random_shadow
1213

1314
__all__ = ['Resize', 'GaussianNoise', 'ChannelShuffle', 'RandomHorizontalFlip', 'RandomShadow']

dedocutils/text_detection/doctr_text_detector/doctr/utils/geometry.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1+
from math import ceil
2+
from typing import List, Optional, Tuple, Union
3+
14
import cv2
25
import numpy as np
36

4-
from math import ceil
5-
from typing import List, Optional, Tuple, Union
67
from .common_types import BoundingBox, Polygon4P
78

89
__all__ = ['bbox_to_polygon', 'polygon_to_bbox', 'resolve_enclosing_bbox', 'resolve_enclosing_rbbox',

0 commit comments

Comments
 (0)