uci-uav-forge · EricPedley · May 16, 2024 · May 16, 2024 · May 16, 2024 · May 16, 2024
diff --git a/yolo_to_yolo/data_types.py b/yolo_to_yolo/data_types.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-from yolo_io_types import Task
+from .yolo_io_types import Task
 
 
 class Point(NamedTuple):
@@ -39,3 +39,4 @@ class YoloImageData(NamedTuple):
     task: Task
     image: np.ndarray
     labels: list[YoloLabel]
+
diff --git a/yolo_to_yolo/generic_reader.py b/yolo_to_yolo/generic_reader.py
@@ -0,0 +1,46 @@
+from pathlib import Path
+from typing import Generator
+
+from yolo_to_yolo.data_types import YoloImageData
+from yolo_to_yolo.yolo_io_types import DatasetDescriptor, PredictionTask, Task, YoloSubsetDirs
+
+
+class GenericYoloReader:
+    def __init__(
+        self,
+        yaml_path: Path,
+        prediction_task: PredictionTask,
+    ) -> None:
+        self.prediction_task = prediction_task
+
+        self.yaml_path = yaml_path
+
+        self.descriptor = DatasetDescriptor.from_yaml(self.yaml_path)
+        self.descriptor.check_dirs_exist()
+
+    @property
+    def parent_dir(self) -> Path:
+        return self.descriptor.parent_dir
+
+    @property
+    def train_dirs(self) -> YoloSubsetDirs:
+        return self.descriptor.train_dirs
+
+    @property
+    def val_dirs(self) -> YoloSubsetDirs:
+        return self.descriptor.val_dirs
+
+    @property
+    def test_dirs(self) -> YoloSubsetDirs:
+        return self.descriptor.test_dirs
+
+    @property
+    def classes(self) -> tuple[str, ...]:
+        return self.descriptor.classes
+
+    def read(
+        self,
+        tasks: tuple[Task, ...] = (Task.TRAIN, Task.VAL, Task.TEST),
+        img_file_pattern: str = "*.png"
+    ) -> Generator[YoloImageData, None, None]:
+      raise NotImplementedError()
diff --git a/yolo_to_yolo/godot_reader.py b/yolo_to_yolo/godot_reader.py
@@ -0,0 +1,103 @@
+from pathlib import Path
+from typing import Iterable, Generator
+
+import numpy as np
+from PIL import Image
+
+from yolo_to_yolo.generic_reader import GenericYoloReader
+
+from .data_types import YoloImageData, YoloLabel, YoloBbox
+from .yolo_io_types import DatasetDescriptor, Task, PredictionTask
+from .godot_utils import get_polygon, give_normalized_bounding_box
+import os
+
+
+class GodotReader:
+    """
+    Reader for YOLO training data.
+
+    Outputs a box for each different label (so 4 boxes per target). Needs to be pipelined
+    into a data transformer that will group the boxes and filter labels to be used in training.
+
+    Example:
+        reader = GodotReader("/datasets/godot_raw/godot_data_0", PredictionTask.DETECTION)
+        writer = YoloWriter("/datasets/godot_processed/0", PredictionTask.DETECTION)
+        writer.write(reader.read())
+    """
+    def __init__(
+        self,
+        dataset_folder_path: Path,
+        split_proportions: tuple[float,float,float] = (0.7, 0.2, 0.1)
+    ) -> None:
+        self.dataset_folder_path = dataset_folder_path
+        self.split_proportions = split_proportions
+
+    def read(
+        self,
+    ) -> Generator[YoloImageData, None, None]:
+        num_imgs = len(os.listdir(self.dataset_folder_path / "images"))
+        for i in range(num_imgs):
+            progress = i / num_imgs
+            img_path = self.dataset_folder_path / "images" / f"image{i}.png"
+            masks_path = self.dataset_folder_path / "masks" / f"{i}"
+            if progress < self.split_proportions[0]:
+                task = Task.TRAIN
+            elif progress < self.split_proportions[0] + self.split_proportions[1]:
+                task = Task.VAL
+            else:
+                task = Task.TEST
+            yield self._process_img_path(img_path, masks_path, task, i)
+
+    def _process_img_path(self, img_path: Path, masks_path: Path, task: Task, id: int) -> YoloImageData:
+        image = np.array(Image.open(img_path))
+        data_labels = []
+        for mask_fname in os.listdir(masks_path):
+            # file names will be like shape_name,letter_name,shape_col,letter_col_index.png
+            mask_path = masks_path / mask_fname
+            mask = np.array(Image.open(mask_path))
+            polygon = get_polygon(mask)
+            if len(polygon) == 0:
+                continue
+            normalized_polygon = polygon / np.array([mask.shape[1], mask.shape[0]])
+            bbox = give_normalized_bounding_box(normalized_polygon)
+            labels, index = mask_fname.split("_")
+            if labels == 'person':
+                data_labels.append(
+                    YoloLabel(
+                        location=bbox,
+                        classname=labels
+                    )
+                )
+                continue
+            shape_name, letter_name, shape_col, letter_col = labels.split(",")
+            data_labels.extend([
+                    YoloLabel(
+                        location=bbox,
+                        classname=shape_name
+                    ),
+                    YoloLabel(
+                        location=bbox,
+                        classname=letter_name
+                    ),
+                    YoloLabel(
+                        location=bbox,
+                        classname=f"shape:{shape_col}"
+                    ),
+                    YoloLabel(
+                        location=bbox,
+                        classname=f"char:{letter_col}"
+                    )
+                ] 
+            )
+        return YoloImageData(
+            img_id=str(id),
+            task=task,
+            image=image,
+            labels=data_labels
+        )
+
+
+
+    @staticmethod
+    def _get_id_from_filename(filename: Path) -> str:
+        return filename.stem
diff --git a/yolo_to_yolo/godot_utils.py b/yolo_to_yolo/godot_utils.py
@@ -0,0 +1,90 @@
+import cv2
+import numpy as np
+import os
+from dataclasses import dataclass
+from .data_types import YoloBbox
+
+def get_polygon(shape_img: cv2.Mat) -> np.ndarray:
+    '''
+    Returns the enclosing polygon of the shape in the image. The polygon is a list of points, each point being a list of 2 coordinates.
+    '''
+    im = cv2.cvtColor(shape_img, cv2.COLOR_BGR2GRAY)
+    im = cv2.threshold(im, 253, 255, cv2.THRESH_BINARY)[1]
+    contours, hierarchy = cv2.findContours(im, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    if len(contours)==0:
+        return np.array([]) 
+    if len(contours)>1:
+        if os.getenv("VERBOSE") is not None:
+            print("Warning: multiple contours found")
+        # combine contours and return convex hull
+        contours = np.concatenate([c.reshape(-1,2) for c in contours])
+        contours = cv2.convexHull(contours)
+        return contours.reshape(-1,2)
+    return np.array(contours[0]).reshape(-1,2)
+
+@dataclass
+class LetterBoxInfo:
+    x: int
+    y: int
+    width: int
+    height: int
+    letter_label: int
+
+def get_letter_box(polygon_points: np.ndarray, img_shape: [], letter_label: str) -> LetterBoxInfo:
+    # polygon_points = [[x, y], [x, y], [x,y], ...]
+    # returns the bounding box for the shape containing the letter
+    x_min, x_max, y_min, y_max = None, None, None, None
+    for point in polygon_points:
+        x = point[0]
+        y = point[1]
+        if x_min == None or x < x_min:
+            x_min = x
+        if x_max == None or x > x_max:
+            x_max = x
+        if y_min == None or y < y_min:
+            y_min = y
+        if y_max == None or y > y_max:
+            y_max = y
+    x = x_min * img_shape[0]
+    y = y_min * img_shape[1]
+    width = (x_max - x_min) * img_shape[0]
+    height = (y_max - y_min) * img_shape[1]
+    letter_box = LetterBoxInfo(int(x), int(y), int(width), int(height), letter_label)
+    return letter_box
+
+
+def give_normalized_bounding_box( norm_polygon_array: np.ndarray) -> YoloBbox:
+    '''Returns bounding box as proportion of image dimensions, x_center,y_center,w,h'''
+
+    x_coord = norm_polygon_array[:,0]
+    y_coord = norm_polygon_array[:,1]
+
+    if len(x_coord) == 0 or len(y_coord) == 0:
+        # Handle the case where one or both arrays are empty
+        return None
+
+    min_x, min_y = np.min(x_coord), np.min(y_coord)
+    max_x, max_y = np.max(x_coord), np.max(y_coord)
+    w = max_x - min_x
+    h = max_y - min_y
+    x = (max_x+min_x)/2
+    y = (max_y+min_y)/2
+
+    return YoloBbox(x, y, w, h)
+
+
+def preprocess_img(img):
+    # only augment half of them (why? I made this up on a whim.)
+    if np.random.randint(0,2) == 0:
+        # blur image with random kernel size
+        kernel_size = 3 + 2*np.random.randint(0, 4)
+        if np.random.randint(0,2)==0:
+            img = cv2.GaussianBlur(img, (kernel_size, kernel_size), 0)
+        else:
+            img = cv2.boxFilter(img, -1, (kernel_size, kernel_size))
+        # add random noise with random variance
+        variance = np.random.randint(2, 20)
+        img = img + np.random.normal(0, variance, img.shape)
+        # clamp values to 0-255
+        np.clip(img, 0, 255, out=img)
+    return img
diff --git a/yolo_to_yolo/run_godot_reader.py b/yolo_to_yolo/run_godot_reader.py
@@ -0,0 +1,54 @@
+from yolo_to_yolo.godot_reader import GodotReader
+from yolo_to_yolo.yolo_io import YoloWriter
+from yolo_to_yolo.yolo_io_types import PredictionTask
+from yolo_to_yolo.data_types import YoloImageData
+from pathlib import Path
+from tqdm import tqdm
+
+# run me with py -m yolo_to_yolo.run_godot_reader
+if __name__ == "__main__":
+    dataset_id = '4000'
+    in_path = f'/datasets/godot_raw/godot_data_{dataset_id}'
+    out_path = f'/datasets/godot_processed/{dataset_id}_all_labels'
+
+    reader = GodotReader(
+        Path(in_path),
+    )
+
+    shape_classnames = [
+        "circle",
+        "semicircle",
+        "quartercircle",
+        "triangle",
+        "rectangle",
+        "pentagon",
+        "star",
+        "cross",
+        "person",
+        *"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+        "shape:white",
+        "shape:black",
+        "shape:red",
+        "shape:blue",
+        "shape:green",
+        "shape:purple",
+        "shape:brown",
+        "shape:orange",
+        "char:white",
+        "char:black",
+        "char:red",
+        "char:blue",
+        "char:green",
+        "char:purple",
+        "char:brown",
+        "char:orange"
+    ]
+
+    writer = YoloWriter(
+        Path(out_path),
+        PredictionTask.DETECTION,
+        shape_classnames
+    )
+
+    writer.write(tqdm(reader.read()))
+