uci-uav-forge · MinhxNguyen7 · Apr 19, 2024 · May 1, 2024 · May 1, 2024 · May 1, 2024
diff --git a/.gitignore b/.gitignore
@@ -16,7 +16,12 @@ recorded_images
 .vscode
 test/*.png
 trained_model.pth
-venv/
+/*venv*/
+
+# PyTorch Lightning Logs
+lightning_logs
+
+# Flight data
 logs_esc
 gpx/
 flight_logs
diff --git a/imaging_training_2024 b/imaging_training_2024
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,3 @@
 ultralytics==8.0.190
+line_profiler
 pyserial
diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
     name='uavf_2024',
     version='0.0.1',
     packages=find_packages(include=[
-        'uavf_2024', 'uavf_2024.*'
+        'uavf_2024', 'uavf_2024.*', 'imaging_training_2024', 'imaging_training_2024.*'
     ]),
     install_requires=[
         'numpy',

diff --git a/tests/imaging/general_classifier_tests.py b/tests/imaging/general_classifier_tests.py
@@ -0,0 +1,37 @@
+from typing import Sequence, get_type_hints
+from unittest import TestCase
+
+import torch
+
+from uavf_2024.imaging.general_classifier import GeneralClassifier, ResNet, resnet18, resnet34, resnet50
+
+
+class ResNetTests(TestCase):
+    NUM_CLASSES = [2, 3, 4]
+    INPUT_SHAPE = (3, 3, 224, 224)
+
+    def _resnet_sanity_check(self, model: ResNet):
+        data = torch.randn(ResNetTests.INPUT_SHAPE)
+
+        output: list[torch.Tensor] = model(data)
+
+        self.assertEqual(len(output), len(ResNetTests.NUM_CLASSES))
+
+        for i, tensor in enumerate(output):
+            self.assertEqual(tensor.shape, (ResNetTests.INPUT_SHAPE[0], ResNetTests.NUM_CLASSES[i]))
+
+    def test_resnet18(self):
+        model = resnet18(ResNetTests.NUM_CLASSES)
+
+        self._resnet_sanity_check(model)
+
+    def test_resnet34(self):
+        model = resnet34(ResNetTests.NUM_CLASSES)
+
+        self._resnet_sanity_check(model)
+
+    def test_resnet50(self):
+        model = resnet50(ResNetTests.NUM_CLASSES)
+
+        self._resnet_sanity_check(model)
+
diff --git a/tests/imaging/image_processor_tests.py b/tests/imaging/image_processor_tests.py
@@ -1,20 +1,22 @@
 from __future__ import annotations
+
+import os
 import shutil
-import torch
-from torchvision.ops import box_iou
 import unittest
-from uavf_2024.imaging.image_processor import ImageProcessor
-from uavf_2024.imaging.imaging_types import HWC, FullBBoxPrediction, FullBBoxGroundTruth, Image, CertainTargetDescriptor, LETTERS, SHAPES, COLORS
-from uavf_2024.imaging import profiler
-import numpy as np
-import os
 from time import time
-from tqdm import tqdm
-import line_profiler
-from memory_profiler import profile as mem_profile
+
+import cv2  # for debugging purposes
+import numpy as np
 import pandas as pd
-import sys
-import cv2 #for debugging purposes
+import torch
+from memory_profiler import profile as mem_profile
+from torchvision.ops import box_iou
+from tqdm import tqdm
+
+from uavf_2024.imaging import profiler
+from uavf_2024.imaging.image_processor import ImageProcessor
+from uavf_2024.imaging.imaging_types import FullBBoxPrediction, FullBBoxGroundTruth, Image, CertainTargetDescriptor, \
+    LEGACY_LETTERS, SHAPES, COLORS
 
 CURRENT_FILE_PATH = os.path.dirname(os.path.realpath(__file__))
 
@@ -181,7 +183,7 @@ def parse_str_dataset(imgs_path, labels_path) -> tuple[list[Image], list[list[Fu
 
 def generate_confusion_matrices(true_values: list[list[FullBBoxGroundTruth]], pred_values: list[list[FullBBoxPrediction]], out_folder: str) -> None:
     shape_confusion = np.zeros((len(SHAPES), len(SHAPES)))
-    letter_confusion = np.zeros((len(LETTERS), len(LETTERS)))
+    letter_confusion = np.zeros((len(LEGACY_LETTERS), len(LEGACY_LETTERS)))
     shape_col_confusion = np.zeros((len(COLORS), len(COLORS)))
     letter_col_confusion = np.zeros((len(COLORS), len(COLORS)))
 
@@ -216,7 +218,7 @@ def generate_confusion_matrices(true_values: list[list[FullBBoxGroundTruth]], pr
     for name, confusion_matrix, index in zip(
         ["shape", "letter", "shape_col", "letter_col"],
         [shape_confusion, letter_confusion, shape_col_confusion, letter_col_confusion],
-        [SHAPES, LETTERS, COLORS, COLORS]
+        [SHAPES, LEGACY_LETTERS, COLORS, COLORS]
     ):
         for i in range(len(index)):
             if confusion_matrix[i,i] < max(confusion_matrix[i]):
@@ -237,8 +239,8 @@ def test_runs_without_crashing(self):
     @profiler
     def test_benchmark_fullsize_images(self):
         image_processor = ImageProcessor(
-            shape_batch_size=20,
-            letter_batch_size=30
+            detector_batch_size=20,
+            classifier_batch_size=30
         )
         sample_input = Image.from_file(f"{CURRENT_FILE_PATH}/2024_test_data/fullsize_dataset/images/1080p.png")
         times = []
@@ -277,7 +279,7 @@ def test_metrics(self, gen_confusion_matrices = True):
             prediction_list = []
 
         for img, ground_truth in zip(imgs, labels):
-            predictions = image_processor.process_image(img)
+            predictions = list(image_processor.process_image(img))
 
 
             if gen_confusion_matrices:
@@ -335,7 +337,7 @@ def test_irl_dataset(self, gen_confusion_matrices = True, verbose=True, **kwargs
             prediction_list = []
 
         for img, ground_truth in zip(imgs, labels):
-            predictions = image_processor.process_image(img)
+            predictions = list(image_processor.process_image(img))
 
 
             if gen_confusion_matrices:
@@ -386,9 +388,8 @@ def test_lightweight_process_many(self):
         # assert the result is a list[fullbboxpred] and has numbers in prob_descriptors
         image_processor = ImageProcessor()
         sample_input = Image.from_file(f"{CURRENT_FILE_PATH}/2024_test_data/fullsize_dataset/images/1080p.png")
-        res = image_processor.process_image_lightweight(sample_input)
+        res = list(image_processor.process_image_lightweight(sample_input))
 
-        assert type(res) is list
         assert type(res[0]) is FullBBoxPrediction
         if len(res) > 1:
             assert np.any(res[0].descriptor.letter_probs) and np.any(res[0].descriptor.shape_col_probs)

diff --git a/tests/imaging/integ_tests.py b/tests/imaging/integ_tests.py
@@ -3,7 +3,7 @@
 from uavf_2024.imaging.area_coverage import AreaCoverageTracker
 from uavf_2024.imaging.image_processor import ImageProcessor
 from uavf_2024.imaging.tracker import TargetTracker
-from uavf_2024.imaging.imaging_types import FullBBoxPrediction, Image, ProbabilisticTargetDescriptor, Target3D, COLORS, SHAPES, LETTERS, CertainTargetDescriptor
+from uavf_2024.imaging.imaging_types import FullBBoxPrediction, Image, ProbabilisticTargetDescriptor, Target3D, COLORS, SHAPES, LEGACY_LETTERS, CertainTargetDescriptor
 from uavf_2024.imaging.utils import calc_match_score
 import os
 import numpy as np

diff --git a/uavf_2024/imaging/detection/__init__.py b/uavf_2024/imaging/detection/__init__.py
@@ -0,0 +1,2 @@
+from .shape_detector import ShapeDetector
+from .one_detector import OneDetector
diff --git a/uavf_2024/imaging/detection/one_detector.py b/uavf_2024/imaging/detection/one_detector.py
@@ -0,0 +1,38 @@
+import os
+from pathlib import Path
+from .yolo_detector import YOLODetector
+
+
+_CURRENT_FILE_DIR = Path(os.path.realpath(__file__)).parent
+
+
+class OneDetector(YOLODetector):
+    """
+    Single-class target detection; first shot in two-shot pipeline.
+    """
+
+    # Placeholder
+    # TODO: Update with actual confusion matrix. These are copied values.
+    CONFUSION_MATRIX: dict[str, list[float]] = {
+        'circle': [0.83, 0, 0, 0, 0, .01, 0, 0, 0],
+        'semicircle': [.01, .67, .28, .02, .05, .03, 0, 0, .01],
+        'quartercircle': [0, .18, .43, 0, .41, .17, 0, 0, 0],
+        'triangle': [0, .03, 0, .91, .01, 0, 0, 0, 0],
+        'rectangle': [.01, 0, .19, 0, .46, .08, 0, 0, 0],
+        'pentagon': [.10, .03, .08, 0, .01, .68, 0, 0, 0],
+        'star': [0, .01, 0, .04, 0, 0, .97, .02, 0],
+        'cross': [0, .04, 0, .01, 0, 0, 0, .96, .03],
+        'person': [0, .01, 0, .01, .01, 0, 0, 0, .91]
+    }
+
+    def __init__(
+        self,
+        img_size: int = 640,
+        model_path: str = "weights/v8n-isaac-target-only.pt",
+        confusion_matrix: dict[str, list[float]] | None = None,
+    ):
+        super().__init__(
+            img_size,
+            _CURRENT_FILE_DIR / model_path,
+            confusion_matrix if confusion_matrix is not None else OneDetector.CONFUSION_MATRIX
+        )
diff --git a/uavf_2024/imaging/detection/shape_detector.py b/uavf_2024/imaging/detection/shape_detector.py
@@ -0,0 +1,32 @@
+from __future__ import annotations
+import os
+
+from .yolo_detector import YOLODetector
+
+CURRENT_FILE_PATH = os.path.dirname(os.path.realpath(__file__))
+
+
+class ShapeDetector(YOLODetector):
+    CONFUSION_MATRIX: dict[str, list[float]] = {
+        'circle': [0.83, 0, 0, 0, 0, .01, 0, 0, 0],
+        'semicircle': [.01, .67, .28, .02, .05, .03, 0, 0, .01],
+        'quartercircle': [0, .18, .43, 0, .41, .17, 0, 0, 0],
+        'triangle': [0, .03, 0, .91, .01, 0, 0, 0, 0],
+        'rectangle': [.01, 0, .19, 0, .46, .08, 0, 0, 0],
+        'pentagon': [.10, .03, .08, 0, .01, .68, 0, 0, 0],
+        'star': [0, .01, 0, .04, 0, 0, .97, .02, 0],
+        'cross': [0, .04, 0, .01, 0, 0, 0, .96, .03],
+        'person': [0, .01, 0, .01, .01, 0, 0, 0, .91]
+    }
+
+    def __init__(
+        self,
+        img_size: int = 640,
+        model_path: str = f"{CURRENT_FILE_PATH}/weights/v8n-best.pt",
+        confusion_matrix: dict[str, list[float]] | None = None,
+    ):
+        super().__init__(
+            img_size,
+            model_path,
+            confusion_matrix if confusion_matrix is not None else ShapeDetector.CONFUSION_MATRIX
+        )
diff --git a/uavf_2024/imaging/detection/weights/seg-v8n-best.pt b/uavf_2024/imaging/detection/weights/seg-v8n-best.pt
diff --git a/uavf_2024/imaging/detection/weights/v8n-best.pt b/uavf_2024/imaging/detection/weights/v8n-best.pt
diff --git a/uavf_2024/imaging/detection/weights/v8n-isaac-target-only.pt b/uavf_2024/imaging/detection/weights/v8n-isaac-target-only.pt
diff --git a/uavf_2024/imaging/detection/yolo_detector.py b/uavf_2024/imaging/detection/yolo_detector.py
@@ -0,0 +1,54 @@
+from __future__ import annotations
+from pathlib import Path
+import warnings
+from ultralytics import YOLO
+from ultralytics.engine.results import Results, Boxes
+import numpy as np
+from ..imaging_types import Tile, DetectionResult, img_coord_t, SHAPES
+import os
+from .. import profiler
+
+
+class YOLODetector:
+    """
+    Wrapper class for YOLO-based detection.
+    """
+    def __init__(self, img_size: int, model_path: str | Path, confusion_matrix: dict[str, list[float]]):
+        self.yolo = YOLO(Path(model_path))
+        rand_input = np.random.rand(1, img_size, img_size, 3).astype(np.float32)
+        self.yolo.predict(list(rand_input), verbose=False)
+        self.num_processed = 0
+        self.confusion_matrix = confusion_matrix
+
+    @profiler
+    def predict(self, tiles: tuple[Tile], confidence_threshold=0.25) -> list[DetectionResult]:
+        imgs_list = [tile.img.get_array() for tile in tiles if tile is not None]
+        predictions: list[Results] = self.yolo(imgs_list, verbose=False, conf=confidence_threshold)
+
+        full_results = []
+        for img_index, single_pred in enumerate(predictions):
+            if not isinstance(single_pred.boxes, Boxes):
+                warnings.warn(f"{self.__class__.__name__}.predict() could not extract Boxes from YOLO output")
+                continue
+
+            boxes: Boxes = single_pred.boxes
+            for box, prob, cls in zip(boxes.xywh, boxes.conf, boxes.cls):
+                x, y, w, h = box.int()
+                x -= int(w / 2)  # adjust to make x,y the top left
+                y -= int(h / 2)
+                confidences = np.zeros(9)
+                confidences[cls.int()] = prob
+                full_results.append(
+                    DetectionResult(
+                        x=img_coord_t(x.item()) + tiles[img_index].x,
+                        y=img_coord_t(y.item()) + tiles[img_index].y,
+                        width=img_coord_t(w.item()),
+                        height=img_coord_t(h.item()),
+                        confidences=np.array(self.confusion_matrix[SHAPES[cls.int()]]),
+                        img=tiles[img_index].img.make_sub_image(x, y, w, h),
+                        id=self.num_processed
+                    )
+                )
+                self.num_processed += 1
+
+        return full_results
diff --git a/uavf_2024/imaging/general_classifier/__init__.py b/uavf_2024/imaging/general_classifier/__init__.py
@@ -0,0 +1,2 @@
+from .resnet import * 
+from .general_classifier import *
diff --git a/uavf_2024/imaging/general_classifier/general_classifier.py b/uavf_2024/imaging/general_classifier/general_classifier.py
@@ -0,0 +1,78 @@
+import os
+from pathlib import Path
+from typing import Callable, Iterable, Sequence
+
+import numpy as np
+import torch
+
+from uavf_2024.imaging.imaging_types import SHAPES, COLORS, CHARACTERS, Image, ProbabilisticTargetDescriptor, CHW
+from . import ResNet, resnet18
+
+
+_CURRENT_FILE_PATH = Path(os.path.realpath(__file__)).parent
+
+
+class GeneralClassifier:
+    """
+    Custom model to classify color, shape, and character from cropped bbox.
+    """
+    def __init__(
+        self, 
+        model_relative_path: str = "general_classifier_best.pt", 
+        model_factory: Callable[[Sequence[int]], ResNet] = resnet18, 
+        device=torch.device("cuda:0")
+    ):
+        super().__init__()
+        self.device = device
+
+        self.model_path = _CURRENT_FILE_PATH / model_relative_path
+
+        if not self.model_path.is_file():
+            raise FileNotFoundError(f"Model file not found: {self.model_path}")
+
+        if not (self.model_path.suffix == ".pth" or self.model_path.suffix == ".pt"):
+            raise ValueError(f"Model file must be a PyTorch model file: {self.model_path}")
+
+        # Load the model
+        self.model_path = self.model_path
+        self.model = model_factory([len(SHAPES), len(COLORS), len(CHARACTERS), len(COLORS)])
+        self.model.load_state_dict(torch.load(self.model_path))
+        self.model.to(device=self.device)
+        self.model.eval()
+
+    @staticmethod
+    def _format_image(image: Image) -> Image:
+        """
+        Formats the image to be passed to the model.
+
+        Pad and resize the image to 224x224, change the dimension order to CHW, and normalize to [0, 1] float32.
+        """
+        arr = image.make_square(224).get_array().astype(np.float32) / 255.0
+        square = Image(arr, image.dim_order)
+        square.change_dim_order(CHW)
+        return square
+
+    def predict(self, images_batch: Iterable[Image]) -> Iterable[ProbabilisticTargetDescriptor]:
+        """
+        Passes the input through the model and transforms the output into a ProbabilisticTargetDescriptor.
+        """
+        square_crops_chw = map(__class__._format_image, images_batch)
+        gpu_batch = self.create_gpu_tensor_batch(square_crops_chw)
+
+        with torch.no_grad():
+            # List of batches, one for each of the heads
+            # I.e., the shape is (category, batch_size, num_classes)
+            raw: list[torch.Tensor] = self.model(gpu_batch)
+
+            for shape_dist, shape_color_dist, character_dist, character_color_dist in zip(*raw):
+                yield ProbabilisticTargetDescriptor(
+                    shape_dist.cpu().numpy(),
+                    character_dist.cpu().numpy(),
+                    shape_color_dist.cpu().numpy(),
+                    character_color_dist.cpu().numpy()
+                )
+
+    def create_gpu_tensor_batch(self, images_batch: Iterable[Image]) -> torch.Tensor:
+        return torch.stack(
+            [torch.tensor(img.get_array()) for img in images_batch]
+        ).to(device=self.device)
diff --git a/uavf_2024/imaging/general_classifier/general_classifier_best.pt b/uavf_2024/imaging/general_classifier/general_classifier_best.pt