diff --git a/config/EfficientNet-Lite/nanodet-EfficientNet-Lite0_320.yml b/config/EfficientNet-Lite/nanodet-EfficientNet-Lite0_320.yml index 1e43f1034..c621021ab 100644 --- a/config/EfficientNet-Lite/nanodet-EfficientNet-Lite0_320.yml +++ b/config/EfficientNet-Lite/nanodet-EfficientNet-Lite0_320.yml @@ -64,6 +64,7 @@ data: brightness: 0.2 contrast: [0.6, 1.4] saturation: [0.5, 1.2] + jitter_box: 0.0 normalize: [[127.0, 127.0, 127.0], [128.0, 128.0, 128.0]] val: name: CocoDataset diff --git a/config/EfficientNet-Lite/nanodet-EfficientNet-Lite1_416.yml b/config/EfficientNet-Lite/nanodet-EfficientNet-Lite1_416.yml index 2e83ab378..2ec4c28db 100644 --- a/config/EfficientNet-Lite/nanodet-EfficientNet-Lite1_416.yml +++ b/config/EfficientNet-Lite/nanodet-EfficientNet-Lite1_416.yml @@ -65,6 +65,7 @@ data: brightness: 0.2 contrast: [0.6, 1.4] saturation: [0.5, 1.2] + jitter_box: 0.0 normalize: [[127.0, 127.0, 127.0], [128.0, 128.0, 128.0]] val: name: CocoDataset diff --git a/config/EfficientNet-Lite/nanodet-EfficientNet-Lite2_512.yml b/config/EfficientNet-Lite/nanodet-EfficientNet-Lite2_512.yml index 62278a6c6..cce13ed1c 100644 --- a/config/EfficientNet-Lite/nanodet-EfficientNet-Lite2_512.yml +++ b/config/EfficientNet-Lite/nanodet-EfficientNet-Lite2_512.yml @@ -65,6 +65,7 @@ data: brightness: 0.2 contrast: [0.6, 1.4] saturation: [0.5, 1.2] + jitter_box: 0.0 normalize: [[127.0, 127.0, 127.0], [128.0, 128.0, 128.0]] val: name: CocoDataset diff --git a/config/RepVGG/nanodet-RepVGG-A0_416.yml b/config/RepVGG/nanodet-RepVGG-A0_416.yml index 669451275..34a0cfb05 100644 --- a/config/RepVGG/nanodet-RepVGG-A0_416.yml +++ b/config/RepVGG/nanodet-RepVGG-A0_416.yml @@ -61,6 +61,7 @@ data: brightness: 0.2 contrast: [0.6, 1.4] saturation: [0.5, 1.2] + jitter_box: 0.0 normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] val: name: CocoDataset diff --git a/config/nanodet-g.yml b/config/nanodet-g.yml index 93cb9820b..a20e248ee 100644 --- a/config/nanodet-g.yml +++ b/config/nanodet-g.yml @@ -68,6 +68,7 @@ data: brightness: 0.2 contrast: [0.6, 1.4] saturation: [0.5, 1.2] + jitter_box: 0.0 normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] val: name: CocoDataset diff --git a/config/nanodet-m-0.5x.yml b/config/nanodet-m-0.5x.yml index f5e6e85c9..5cce3be90 100644 --- a/config/nanodet-m-0.5x.yml +++ b/config/nanodet-m-0.5x.yml @@ -63,6 +63,7 @@ data: brightness: 0.2 contrast: [0.6, 1.4] saturation: [0.5, 1.2] + jitter_box: 0.0 normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] val: name: CocoDataset diff --git a/config/nanodet-m-1.5x-416.yml b/config/nanodet-m-1.5x-416.yml index f4ff310bb..c297b0bb1 100644 --- a/config/nanodet-m-1.5x-416.yml +++ b/config/nanodet-m-1.5x-416.yml @@ -63,6 +63,7 @@ data: brightness: 0.2 contrast: [0.6, 1.4] saturation: [0.5, 1.2] + jitter_box: 0.0 normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] val: name: CocoDataset diff --git a/config/nanodet-m-1.5x.yml b/config/nanodet-m-1.5x.yml index c622c2f73..75742b06f 100644 --- a/config/nanodet-m-1.5x.yml +++ b/config/nanodet-m-1.5x.yml @@ -63,6 +63,7 @@ data: brightness: 0.2 contrast: [0.6, 1.4] saturation: [0.5, 1.2] + jitter_box: 0.0 normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] val: name: CocoDataset diff --git a/config/nanodet-m-416.yml b/config/nanodet-m-416.yml index 58c84ad8d..b055894dc 100644 --- a/config/nanodet-m-416.yml +++ b/config/nanodet-m-416.yml @@ -63,6 +63,7 @@ data: brightness: 0.2 contrast: [0.6, 1.4] saturation: [0.5, 1.2] + jitter_box: 0.0 normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] val: name: CocoDataset diff --git a/config/nanodet-m.yml b/config/nanodet-m.yml index 1c719fd37..ae3246b36 100644 --- a/config/nanodet-m.yml +++ b/config/nanodet-m.yml @@ -57,6 +57,7 @@ data: brightness: 0.2 contrast: [0.6, 1.4] saturation: [0.5, 1.2] + jitter_box: 0.0 normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] val: name: CocoDataset diff --git a/config/nanodet_custom_xml_dataset.yml b/config/nanodet_custom_xml_dataset.yml index 5bc4d3079..2e6e8a902 100644 --- a/config/nanodet_custom_xml_dataset.yml +++ b/config/nanodet_custom_xml_dataset.yml @@ -60,6 +60,7 @@ data: brightness: 0.2 contrast: [0.8, 1.2] saturation: [0.8, 1.2] + jitter_box: 0.0 normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] val: name: XMLDataset diff --git a/nanodet/data/transform/warp.py b/nanodet/data/transform/warp.py index a102348f8..1a07b8116 100644 --- a/nanodet/data/transform/warp.py +++ b/nanodet/data/transform/warp.py @@ -103,6 +103,30 @@ def get_translate_matrix(translate, width, height): return T +def get_jitter_boxes(boxes, ratio=0.0): + """ + :param boxes: + :param ratio: adjust each box boundary independently + :return: + """ + x_min, y_min, x_max, y_max = (boxes[:, i] for i in range(4)) + width = x_max - x_min + height = y_max - y_min + y_center = y_min + height / 2.0 + x_center = x_min + width / 2.0 + + distortion = 1.0 + np.random.uniform(-ratio, ratio, boxes.shape) + y_min_jitter = height * distortion[:, 0] + x_min_jitter = width * distortion[:, 1] + y_max_jitter = height * distortion[:, 2] + x_max_jitter = width * distortion[:, 3] + + y_min, y_max = y_center - (y_min_jitter / 2.0), y_center + (y_max_jitter / 2.0) + x_min, x_max = x_center - (x_min_jitter / 2.0), x_center + (x_max_jitter / 2.0) + jitter_boxes = np.vstack((x_min, y_min, x_max, y_max)).T + return jitter_boxes + + def get_resize_matrix(raw_shape, dst_shape, keep_ratio): """ Get resize matrix for resizing raw img to input size @@ -274,6 +298,7 @@ class ShapeTransform: shear: Random shear degree. translate: Random translate ratio. flip: Random flip probability. + jitter_box: Random adjust box width and height. """ def __init__( @@ -287,6 +312,7 @@ def __init__( shear: float = 0.0, translate: float = 0.0, flip: float = 0.0, + jitter_box: float = 0.0, **kwargs ): self.keep_ratio = keep_ratio @@ -298,6 +324,7 @@ def __init__( self.shear_degree = shear self.flip_prob = flip self.translate_ratio = translate + self.jitter_box_ratio = jitter_box def __call__(self, meta_data, dst_shape): raw_img = meta_data["img"] @@ -342,6 +369,7 @@ def __call__(self, meta_data, dst_shape): meta_data["warp_matrix"] = M if "gt_bboxes" in meta_data: boxes = meta_data["gt_bboxes"] + boxes = get_jitter_boxes(boxes, self.jitter_box_ratio) meta_data["gt_bboxes"] = warp_boxes(boxes, M, dst_shape[0], dst_shape[1]) if "gt_masks" in meta_data: for i, mask in enumerate(meta_data["gt_masks"]):