diff --git a/.gitignore b/.gitignore index 267315c..f57b3a8 100644 --- a/.gitignore +++ b/.gitignore @@ -131,4 +131,6 @@ dmypy.json node_modules .serverless .env.* -**/test_images/*.txt \ No newline at end of file +**/test_images/*.txt +**/local_prediction/*.cfg +**/local_prediction/*.weights diff --git a/lambda_backend/predict_microservice/base_inference.py b/lambda_backend/predict_microservice/base_inference.py new file mode 100644 index 0000000..b3c44af --- /dev/null +++ b/lambda_backend/predict_microservice/base_inference.py @@ -0,0 +1,148 @@ +import cv2 +import numpy as np +from configparser import RawConfigParser + +CONFIDENCE_THRESHOLD = 0.3 +NMS_IOU_THRESHOLD = 0.4 +SCALE_FACTOR = 0.00392 + +class BaseInference: + """ + Base Inference class for predictions + + ... + + Attributes + ---------- + weight_path : str + path to the .weights file + config_path : str + path to the .cfg file + classes : list + names of classes detected + score_threshold : float + threshold to classify object as detected + nms_threshold : float + threshold for non-max suppression + + Methods + ------- + run() + Obtains predicted boxes + """ + + def __init__(self, weight_path, config_path, classes, score_threshold=None, nms_thresh=None): + self.weight_path = weight_path + self.config_path = config_path + self.classes = classes + self.net = None + self.score_threshold = score_threshold if score_threshold is not None else CONFIDENCE_THRESHOLD + self.nms_thresh = nms_thresh if nms_thresh is not None else NMS_IOU_THRESHOLD + + self._initialize_model() + self._read_config() + + def _initialize_model(self): + # Load Yolo + self.net = cv2.dnn.readNet( + self.weight_path, + self.config_path + ) + layer_names = self.net.getLayerNames() + # Gets the indexes of layers with unconnected outputs, + # then stores the associated names into output_layers + self.output_layers = [layer_names[i[0] - 1] for i in self.net.getUnconnectedOutLayers()] + + def _read_config(self): + cfg = RawConfigParser(strict=False) + cfg.read(self.config_path) + + assert 'net' in cfg, 'No net section in config' + + net_dict = dict(cfg.items('net')) + + assert 'height' in net_dict and 'weight' in net_dict, 'No height and/or weight in config' + self.train_height_width = (int(net_dict['height']), int(net_dict['width'])) + + def run(self, img, height=None, width=None): + """ + Parameters + ---------- + img : cv2.Mat + Image as a matrix + height : int, optional + Height of img (default is None) + width : int, optional + Width of img (default is None) + + Returns + ------- + class_ids : list(int) + Class IDs of boxes + box_dims : list(list(int)) + Dimensions of boxes + box_confidences : list(float) + Confidence scores of boxes + box_dims_norm : list(list(float)) + Normalised dimensions of boxes + indexes : list(int) + Indexes of boxes that passed NMS + """ + + # If run is called without height or width given + if height is None or width is None: + height, width, channels = img.shape + + # Detecting objects + blob = cv2.dnn.blobFromImage( + image = img, + scalefactor = SCALE_FACTOR, + size = self.train_height_width, + mean = (0,0,0), + swapRB = True, + crop = False + ) + + self.net.setInput(blob) + outs = self.net.forward(self.output_layers) + + class_ids, box_dims, box_confidences, box_dims_norm, indexes = self._get_filtered_boxes(outs, height, width) + + return class_ids, box_dims, box_confidences, box_dims_norm, indexes + + def _get_filtered_boxes(self, output, height, width): + # Showing informations on the screen + class_ids = [] + box_confidences = [] + box_dims = [] + # Saving to txt + box_dims_norm = [] + + for out in output: + for detection in out: + + scores = detection[5:] + class_id = np.argmax(scores) + confidence = scores[class_id] + if confidence > self.score_threshold: + # Object detected + center_x = int(detection[0] * width) + center_y = int(detection[1] * height) + w = int(detection[2] * width) + h = int(detection[3] * height) + + # Rectangle coordinates + x = int(center_x - w / 2) + y = int(center_y - h / 2) + + box_dims.append([x, y, w, h]) + box_confidences.append(float(confidence)) + class_ids.append(class_id) + + # Save normalised format + box_dims_norm.append(detection[:4]) + + indexes = cv2.dnn.NMSBoxes(box_dims, box_confidences, self.score_threshold, self.nms_thresh) + indexes = [int(i) for i in indexes] + + return class_ids, box_dims, box_confidences, box_dims_norm, indexes diff --git a/lambda_backend/predict_microservice/handler.py b/lambda_backend/predict_microservice/handler.py index cf45ccd..a44e749 100644 --- a/lambda_backend/predict_microservice/handler.py +++ b/lambda_backend/predict_microservice/handler.py @@ -1,29 +1,17 @@ -import numpy as np -import cv2 - -from os.path import join import os -import glob import json import base64 from utils import exception_handler, retrieve_numpy_image, parse_multipart_data, get_response_headers +from service_inference import ServiceInference -ALLOWED_TYPES = ["image/jpeg"] - -# Load Yolo -net = cv2.dnn.readNet( - join("weights", "yolov4-tiny-obj.weights"), - join("weights", "yolov4-tiny-obj.cfg") -) - -# Names of custom objects -# Refer to colab notebook for index to class mappings -classes = ["hold"] +DEFAULT_WEIGHTS = os.path.join("weights", "yolov4-tiny-obj.weights") +DEFAULT_CONFIG = os.path.join("weights", "yolov4-tiny-obj.cfg") +DEFAULT_CLASSES = ["hold"] -layer_names = net.getLayerNames() -output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()] +ALLOWED_TYPES = ["image/jpeg"] +inference = ServiceInference(DEFAULT_WEIGHTS, DEFAULT_CONFIG, DEFAULT_CLASSES) @exception_handler def predict(event, context): @@ -57,68 +45,11 @@ def predict(event, context): assert image_dict["type"] in ALLOWED_TYPES, "Unallowed file type" img = retrieve_numpy_image(image_dict["content"]) - height, width, channels = img.shape scaled_width = int(width_dict["content"].decode("utf-8")) - # If given width is 0, do not scale - scaled_width = scaled_width if scaled_width != 0 else width - scaled_height = int((scaled_width / width) * height) - - # Image Blob - blob = cv2.dnn.blobFromImage( - img, - 0.00392, - (416, 416), - (0, 0, 0), - True, - crop=False - ) - - net.setInput(blob) - outs = net.forward(output_layers) - - box_dimensions = [] - box_confidences = [] - class_ids = [] - - for out in outs: - for detection in out: - scores = detection[5:] - class_id = np.argmax(scores) - confidence = scores[class_id] - if confidence > 0.3: - # Object detected - center_x = int(detection[0] * scaled_width) - center_y = int(detection[1] * scaled_height) - w = int(detection[2] * scaled_width) - h = int(detection[3] * scaled_height) - - # Rectangle coordinates - x = int(center_x - w / 2) - y = int(center_y - h / 2) - - box_dimensions.append([x, y, w, h]) - box_confidences.append(float(confidence)) - class_ids.append(class_id) - - boxes = [] - # Non Maximum Suppression - indexes = cv2.dnn.NMSBoxes(box_dimensions, box_confidences, 0.5, 0.4) - for i in indexes: - i = int(i) - x, y, w, h = box_dimensions[i] - boxes.append({ - "x": x, - "y": y, - "w": w, - "h": h, - "confidence": float(box_confidences[i]), - "class": str(classes[class_ids[i]]) - }) - - # Sort boxes in descending sizes - boxes = sorted(boxes, key=lambda box: box["w"] * box["h"], reverse=True) + # Run inference on image + scaled_height, scaled_width, boxes = inference.run(img, scaled_width) return { "statusCode": "200", diff --git a/lambda_backend/predict_microservice/service_inference.py b/lambda_backend/predict_microservice/service_inference.py new file mode 100644 index 0000000..058286d --- /dev/null +++ b/lambda_backend/predict_microservice/service_inference.py @@ -0,0 +1,78 @@ +from base_inference import BaseInference + +class ServiceInference(BaseInference): + """ + Inference class for predictions on predict_microservice + + ... + + Methods + ------- + run(img, scaled_width) + Obtains predicted boxes for predict_microservice + """ + + def run(self, img, scaled_width): + """ + Parameters + ---------- + img : cv2.Mat + Image as a matrix + scaled_width : int + Scaled width of images + + Returns + ------- + scaled_height : int + Scaled height of img + scaled_width : int + Scaled width of img + boxes : list + List of predicted boxes in JSON format + """ + + height, width = img.shape + # If given width is 0, do not scale + scaled_width = scaled_width if scaled_width != 0 else width + scaled_height = int((scaled_width / width) * height) + + class_ids, box_dims, box_confidences, _, indexes = super().run(img, scaled_height, scaled_width) + + boxes = self._get_boxes_dict(box_dims, box_confidences, class_ids, indexes) + return scaled_height, scaled_width, boxes + + def _get_boxes_dict(self, box_dims, box_confidences, class_ids, indexes): + """ + Parameters + ---------- + box_dims : list + Dimensions of predicted boxes + box_confidences : list + Confidence scores of predicted boxes + class_ids : list + Class IDs of predicted boxes + indexes : list + Indexes of predicted boxes after NMS + + Returns + ------- + boxes : list + List of predicted boxes in JSON format + """ + + boxes = [] + for i in indexes: + x, y, w, h = box_dims[i] + boxes.append({ + "x": x, + "y": y, + "w": w, + "h": h, + "confidence": float(box_confidences[i]), + "class": str(self.classes[class_ids[i]]) + }) + + # Sort boxes in descending sizes + boxes = sorted(boxes, key=lambda box: box["w"] * box["h"], reverse=True) + + return boxes diff --git a/lambda_backend/predict_microservice/weights/yolov4-tiny-obj.cfg b/lambda_backend/predict_microservice/weights/yolov4-tiny-obj.cfg index 9f6a8e5..ce72478 100644 --- a/lambda_backend/predict_microservice/weights/yolov4-tiny-obj.cfg +++ b/lambda_backend/predict_microservice/weights/yolov4-tiny-obj.cfg @@ -1,10 +1,10 @@ [net] # Testing -batch=8 -subdivisions=16 +#batch=64 +#subdivisions=16 # Training -# batch=64 -# subdivisions=16 +batch=64 +subdivisions=16 width=640 height=640 channels=3 @@ -226,7 +226,7 @@ iou_normalizer=0.07 iou_loss=ciou ignore_thresh = .7 truth_thresh = 1 -random=0 +random=1 resize=1.5 nms_kind=greedynms beta_nms=0.6 @@ -275,7 +275,7 @@ iou_normalizer=0.07 iou_loss=ciou ignore_thresh = .7 truth_thresh = 1 -random=0 +random=1 resize=1.5 nms_kind=greedynms beta_nms=0.6 diff --git a/lambda_backend/predict_microservice/weights/yolov4-tiny-obj.weights b/lambda_backend/predict_microservice/weights/yolov4-tiny-obj.weights index 6822033..f6c755c 100644 Binary files a/lambda_backend/predict_microservice/weights/yolov4-tiny-obj.weights and b/lambda_backend/predict_microservice/weights/yolov4-tiny-obj.weights differ diff --git a/model_training/Train_yolov4_tiny.ipynb b/model_training/Train_yolov4_tiny.ipynb index 31ba8c6..b54c971 100644 --- a/model_training/Train_yolov4_tiny.ipynb +++ b/model_training/Train_yolov4_tiny.ipynb @@ -1,6 +1,6 @@ { "nbformat": 4, - "nbformat_minor": 0, + "nbformat_minor": 2, "metadata": { "accelerator": "GPU", "colab": { @@ -29,9 +29,6 @@ "cells": [ { "cell_type": "markdown", - "metadata": { - "id": "Kcap186UUOyC" - }, "source": [ "#### Steps\n", "\n", @@ -40,26 +37,22 @@ "3. Run this notebook on Google Colab with GPU\n", "4. Rename `$BACKUP_DIR` accordingly. For example, if the folder is named yolov4_tiny, assign it `/mydrive/yolov4_tiny`\n", "5. Edit `CLASSES` accordingly" - ] + ], + "metadata": { + "id": "Kcap186UUOyC" + } }, { "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "bwp6NIrsZZFP", - "scrolled": true, - "outputId": "da7b4ad0-ab36-4814-f4cf-38917b97ddd9" - }, + "execution_count": 1, "source": [ - "# Check if NVIDIA GPU is enabled\n", + "# Check if NVIDIA GPU is enabled\r\n", "!nvidia-smi" ], - "execution_count": 1, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Mon Jun 7 14:14:49 2021 \n", "+-----------------------------------------------------------------------------+\n", @@ -81,209 +74,211 @@ "|=============================================================================|\n", "| No running processes found |\n", "+-----------------------------------------------------------------------------+\n" - ], - "name": "stdout" + ] } - ] + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bwp6NIrsZZFP", + "scrolled": true, + "outputId": "da7b4ad0-ab36-4814-f4cf-38917b97ddd9" + } }, { "cell_type": "code", - "metadata": { - "id": "SWewdvMGv8Fd" - }, + "execution_count": 2, "source": [ - "import glob\n", - "import os\n", - "import re\n", - "import random\n", - "\n", + "import glob\r\n", + "import os\r\n", + "import re\r\n", + "import random\r\n", + "\r\n", "random.seed(42)" ], - "execution_count": 2, - "outputs": [] + "outputs": [], + "metadata": { + "id": "SWewdvMGv8Fd" + } }, { "cell_type": "code", - "metadata": { - "id": "q2A2gPchx7vL" - }, + "execution_count": 3, "source": [ - "CLASSES = [\"hold\"]\n", - "\n", - "os.environ[\"NUM_CLASSES\"] = f\"{len(CLASSES)}\"\n", - "os.environ[\"NUM_FILTERS\"] = f\"{(len(CLASSES) + 5)*3}\"\n", - "os.environ[\"CLASS_NAMES\"] = \"\\r\\n\".join(CLASSES)\n", + "CLASSES = [\"hold\"]\r\n", + "\r\n", + "os.environ[\"NUM_CLASSES\"] = f\"{len(CLASSES)}\"\r\n", + "os.environ[\"NUM_FILTERS\"] = f\"{(len(CLASSES) + 5)*3}\"\r\n", + "os.environ[\"CLASS_NAMES\"] = \"\\r\\n\".join(CLASSES)\r\n", "os.environ[\"BACKUP_DIR\"] = \"/mydrive/yolov4_tiny\"" ], - "execution_count": 3, - "outputs": [] + "outputs": [], + "metadata": { + "id": "q2A2gPchx7vL" + } }, { "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "oHQ6gLMeC7tK", - "outputId": "43bfc136-fc07-4a48-945f-1b463c4f2f65" - }, + "execution_count": 4, "source": [ - "!echo $NUM_CLASSES\n", - "!echo $NUM_FILTERS\n", - "!echo $CLASS_NAMES\n", + "!echo $NUM_CLASSES\r\n", + "!echo $NUM_FILTERS\r\n", + "!echo $CLASS_NAMES\r\n", "!echo $BACKUP_DIR" ], - "execution_count": 4, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "1\n", "18\n", "hold\n", "/mydrive/yolov4_tiny\n" - ], - "name": "stdout" + ] } - ] - }, - { - "cell_type": "code", + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, - "id": "9e9ZW3sqMEPO", - "outputId": "c1764d4a-aa22-4832-f7d7-9cf5ad796102" - }, + "id": "oHQ6gLMeC7tK", + "outputId": "43bfc136-fc07-4a48-945f-1b463c4f2f65" + } + }, + { + "cell_type": "code", + "execution_count": 5, "source": [ - "from google.colab import drive\n", - "drive.mount('/content/gdrive')\n", + "from google.colab import drive\r\n", + "drive.mount('/content/gdrive')\r\n", "!ln -s /content/gdrive/MyDrive/ /mydrive" ], - "execution_count": 5, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Mounted at /content/gdrive\n" - ], - "name": "stdout" + ] } - ] + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9e9ZW3sqMEPO", + "outputId": "c1764d4a-aa22-4832-f7d7-9cf5ad796102" + } }, { "cell_type": "markdown", - "metadata": { - "id": "t7utW4in4azV" - }, "source": [ "**1) Clone the Darknet**\n", "\n" - ] + ], + "metadata": { + "id": "t7utW4in4azV" + } }, { "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "e03U7Zi-qMr2", - "outputId": "d1f1c69e-9c2b-4d81-8fd2-5c50bff275dd" - }, + "execution_count": 6, "source": [ "!git clone https://github.com/AlexeyAB/darknet" ], - "execution_count": 6, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "Cloning into 'darknet'...\n", "remote: Enumerating objects: 15069, done.\u001b[K\n", "remote: Total 15069 (delta 0), reused 0 (delta 0), pack-reused 15069\u001b[K\n", "Receiving objects: 100% (15069/15069), 13.44 MiB | 9.50 MiB/s, done.\n", "Resolving deltas: 100% (10244/10244), done.\n" - ], - "name": "stdout" + ] } - ] + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "e03U7Zi-qMr2", + "outputId": "d1f1c69e-9c2b-4d81-8fd2-5c50bff275dd" + } }, { "cell_type": "markdown", - "metadata": { - "id": "7gDOF3mU-TAn" - }, "source": [ "**2) Compile Darknet using Nvidia GPU**\n" - ] + ], + "metadata": { + "id": "7gDOF3mU-TAn" + } }, { "cell_type": "code", - "metadata": { - "id": "5H-Taj06Tw1c", - "scrolled": true - }, + "execution_count": null, "source": [ - "# change makefile to have GPU and OPENCV enabled\n", - "%cd darknet\n", - "!sed -i 's/OPENCV=0/OPENCV=1/' Makefile\n", - "!sed -i 's/GPU=0/GPU=1/' Makefile\n", - "!sed -i 's/CUDNN=0/CUDNN=1/' Makefile\n", + "# change makefile to have GPU and OPENCV enabled\r\n", + "%cd darknet\r\n", + "!sed -i 's/OPENCV=0/OPENCV=1/' Makefile\r\n", + "!sed -i 's/GPU=0/GPU=1/' Makefile\r\n", + "!sed -i 's/CUDNN=0/CUDNN=1/' Makefile\r\n", "!make" ], - "execution_count": null, - "outputs": [] + "outputs": [], + "metadata": { + "id": "5H-Taj06Tw1c", + "scrolled": true + } }, { "cell_type": "markdown", - "metadata": { - "id": "gAOLtA_qI9vF" - }, "source": [ "**3) Configure Darknet network for training YOLO V4 Tiny**" - ] + ], + "metadata": { + "id": "gAOLtA_qI9vF" + } }, { "cell_type": "code", - "metadata": { - "id": "s-RpscgU853t" - }, + "execution_count": 8, "source": [ "!cp cfg/yolov4-tiny-custom.cfg cfg/yolov4-tiny-obj.cfg" ], - "execution_count": 8, - "outputs": [] + "outputs": [], + "metadata": { + "id": "s-RpscgU853t" + } }, { "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "5ZgVQRop_vwR", - "outputId": "83e9c920-e212-47ec-d071-390b58865cd7" - }, + "execution_count": 9, "source": [ - "# filters=(classes + 5)x3 \n", - "\n", - "!sed -i 's/batch=1/batch=64/' cfg/yolov4-tiny-obj.cfg\n", - "!sed -i 's/subdivisions=1/subdivisions=16/' cfg/yolov4-tiny-obj.cfg\n", - "!sed -i 's/max_batches = 500200/max_batches = 8000/' cfg/yolov4-tiny-obj.cfg\n", - "!sed -i 's/steps=400000,450000/steps=6400,7200/' cfg/yolov4-tiny-obj.cfg\n", - "!sed -i 's/width=416/width=640/' cfg/yolov4-tiny-obj.cfg\n", - "!sed -i 's/height=416/height=640/' cfg/yolov4-tiny-obj.cfg\n", - "!sed -i \"220 s@classes=80@classes=$NUM_CLASSES@\" cfg/yolov4-tiny-obj.cfg\n", - "!sed -i \"269 s@classes=80@classes=$NUM_CLASSES@\" cfg/yolov4-tiny-obj.cfg\n", - "!sed -i \"212 s@filters=255@filters=$NUM_FILTERS@\" cfg/yolov4-tiny-obj.cfg\n", - "!sed -i \"263 s@filters=255@filters=$NUM_FILTERS@\" cfg/yolov4-tiny-obj.cfg\n", - "\n", + "# filters=(classes + 5)x3 \r\n", + "\r\n", + "!sed -i 's/batch=1/batch=64/' cfg/yolov4-tiny-obj.cfg\r\n", + "!sed -i 's/subdivisions=1/subdivisions=16/' cfg/yolov4-tiny-obj.cfg\r\n", + "!sed -i 's/max_batches = 500200/max_batches = 8000/' cfg/yolov4-tiny-obj.cfg\r\n", + "!sed -i 's/steps=400000,450000/steps=6400,7200/' cfg/yolov4-tiny-obj.cfg\r\n", + "!sed -i 's/width=416/width=640/' cfg/yolov4-tiny-obj.cfg\r\n", + "!sed -i 's/height=416/height=640/' cfg/yolov4-tiny-obj.cfg\r\n", + "!sed -i \"s/random=0/random=1/\"\r\n", + "!sed -i \"220 s@classes=80@classes=$NUM_CLASSES@\" cfg/yolov4-tiny-obj.cfg\r\n", + "!sed -i \"269 s@classes=80@classes=$NUM_CLASSES@\" cfg/yolov4-tiny-obj.cfg\r\n", + "!sed -i \"212 s@filters=255@filters=$NUM_FILTERS@\" cfg/yolov4-tiny-obj.cfg\r\n", + "!sed -i \"263 s@filters=255@filters=$NUM_FILTERS@\" cfg/yolov4-tiny-obj.cfg\r\n", + "\r\n", "!tail -n 20 cfg/yolov4-tiny-obj.cfg" ], - "execution_count": 9, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "pad=1\n", "filters=18\n", @@ -305,135 +300,138 @@ "resize=1.5\n", "nms_kind=greedynms\n", "beta_nms=0.6\n" - ], - "name": "stdout" + ] } - ] - }, - { - "cell_type": "code", + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, - "id": "7AIBw_psIclz", - "outputId": "8a8d0a96-c910-4ca6-b663-192be3f09f57" - }, + "id": "5ZgVQRop_vwR", + "outputId": "83e9c920-e212-47ec-d071-390b58865cd7" + } + }, + { + "cell_type": "code", + "execution_count": 10, "source": [ - "!echo \"$CLASS_NAMES\" > data/obj.names\n", - "!echo -e \"classes = $NUM_CLASSES\\r\\ntrain = data/train.txt\\r\\nvalid = data/test.txt\\r\\nnames = data/obj.names\\r\\nbackup = $BACKUP_DIR\" > data/obj.data\n", - "!mkdir data/obj\n", + "!echo \"$CLASS_NAMES\" > data/obj.names\r\n", + "!echo -e \"classes = $NUM_CLASSES\\r\\ntrain = data/train.txt\\r\\nvalid = data/test.txt\\r\\nnames = data/obj.names\\r\\nbackup = $BACKUP_DIR\" > data/obj.data\r\n", + "!mkdir data/obj\r\n", "!cat data/obj.data" ], - "execution_count": 10, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ "classes = 1\r\n", "train = data/train.txt\r\n", "valid = data/test.txt\r\n", "names = data/obj.names\r\n", "backup = /mydrive/yolov4_tiny\n" - ], - "name": "stdout" + ] } - ] + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7AIBw_psIclz", + "outputId": "8a8d0a96-c910-4ca6-b663-192be3f09f57" + } }, { "cell_type": "code", - "metadata": { - "id": "eZlkzFMW7I_N" - }, + "execution_count": null, "source": [ - "# Download weights yolov4 tiny\n", - "\n", + "# Download weights yolov4 tiny\r\n", + "\r\n", "!wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.conv.29 " ], - "execution_count": null, - "outputs": [] + "outputs": [], + "metadata": { + "id": "eZlkzFMW7I_N" + } }, { "cell_type": "markdown", - "metadata": { - "id": "9RbVKJjoncW2" - }, "source": [ "**4) Extract Images**\n", "\n", "The images need to be inside a zip archive called \"images.zip\" and they need to be inside the folder \"yolov3\" on Google Drive" - ] + ], + "metadata": { + "id": "9RbVKJjoncW2" + } }, { "cell_type": "code", - "metadata": { - "id": "_fHrzCQq4nsn" - }, + "execution_count": null, "source": [ - "!rm -r data/obj\n", + "!rm -r data/obj\r\n", "!ls data" ], - "execution_count": null, - "outputs": [] + "outputs": [], + "metadata": { + "id": "_fHrzCQq4nsn" + } }, { "cell_type": "code", - "metadata": { - "id": "jhb5nZvsQ_96", - "scrolled": true - }, + "execution_count": null, "source": [ "!unzip -o $BACKUP_DIR/images.zip -d data/obj" ], - "execution_count": null, - "outputs": [] + "outputs": [], + "metadata": { + "id": "jhb5nZvsQ_96", + "scrolled": true + } }, { "cell_type": "code", - "metadata": { - "id": "DGUyXxeYX0IP", - "scrolled": false - }, + "execution_count": null, "source": [ - "# We're going to convert the class index on the .txt files. \n", - "# For example, if the indices labelled are 14 and 15, they will be converted to 0 and 1\n", - "# Can be doubled checked in `print(index_list)`\n", - "\n", - "index_list = []\n", - "\n", - "txt_file_paths = glob.glob(r\"data/obj/*.txt\")\n", - "for i, file_path in enumerate(txt_file_paths):\n", - " with open(file_path, \"r\") as f_o:\n", - " lines = f_o.readlines()\n", - " text_converted = []\n", - " print(i, file_path)\n", - " for line in lines:\n", - " numbers = re.findall(\"[0-9.]+\", line)\n", - " if numbers:\n", - " index = int(numbers[0])\n", - " if index not in index_list:\n", - " index_list.append(index)\n", - " converted_index = index_list.index(index)\n", - " \n", - " text = \"{} {} {} {} {}\".format(converted_index, numbers[1], numbers[2], numbers[3], numbers[4])\n", - " text_converted.append(text)\n", - "\n", - " # Write file\n", - " with open(file_path, 'w') as fp:\n", - " for item in text_converted:\n", - " fp.writelines(\"%s\\n\" % item)\n", - "\n", + "# We're going to convert the class index on the .txt files. \r\n", + "# For example, if the indices labelled are 14 and 15, they will be converted to 0 and 1\r\n", + "# Can be doubled checked in `print(index_list)`\r\n", + "\r\n", + "index_list = []\r\n", + "\r\n", + "txt_file_paths = glob.glob(r\"data/obj/*.txt\")\r\n", + "for i, file_path in enumerate(txt_file_paths):\r\n", + " with open(file_path, \"r\") as f_o:\r\n", + " lines = f_o.readlines()\r\n", + " text_converted = []\r\n", + " print(i, file_path)\r\n", + " for line in lines:\r\n", + " numbers = re.findall(\"[0-9.]+\", line)\r\n", + " if numbers:\r\n", + " index = int(numbers[0])\r\n", + " if index not in index_list:\r\n", + " index_list.append(index)\r\n", + " converted_index = index_list.index(index)\r\n", + " \r\n", + " text = \"{} {} {} {} {}\".format(converted_index, numbers[1], numbers[2], numbers[3], numbers[4])\r\n", + " text_converted.append(text)\r\n", + "\r\n", + " # Write file\r\n", + " with open(file_path, 'w') as fp:\r\n", + " for item in text_converted:\r\n", + " fp.writelines(\"%s\\n\" % item)\r\n", + "\r\n", "print(index_list)" ], - "execution_count": null, - "outputs": [] + "outputs": [], + "metadata": { + "id": "DGUyXxeYX0IP", + "scrolled": false + } }, { "cell_type": "code", - "metadata": { - "id": "5I_0ht7ITMUu", - "scrolled": false - }, + "execution_count": null, "source": [ "import glob\n", "images_list = glob.glob(\"data/obj/*.jpg\")\n", @@ -443,14 +441,15 @@ "test_images_list = images_list[4*len(images_list)//5:]\n", "print(train_images_list)" ], - "execution_count": null, - "outputs": [] + "outputs": [], + "metadata": { + "id": "5I_0ht7ITMUu", + "scrolled": false + } }, { "cell_type": "code", - "metadata": { - "id": "EzygoAtMSnon" - }, + "execution_count": 70, "source": [ "#Create training.txt and test.txt file\n", "\n", @@ -460,50 +459,45 @@ "with open(\"data/test.txt\", \"w\") as f:\n", " f.write(\"\\n\".join(test_images_list)) " ], - "execution_count": 70, - "outputs": [] + "outputs": [], + "metadata": { + "id": "EzygoAtMSnon" + } }, { "cell_type": "markdown", - "metadata": { - "id": "CeSiuLrXoAjc" - }, "source": [ "**5) Start the training**" - ] + ], + "metadata": { + "id": "CeSiuLrXoAjc" + } }, { "cell_type": "code", - "metadata": { - "id": "S3_g3jclUzMm", - "scrolled": true - }, + "execution_count": 71, "source": [ "# Start the training\n", "!./darknet detector train data/obj.data cfg/yolov4-tiny-obj.cfg yolov4-tiny.conv.29 -dont_show" ], - "execution_count": 71, - "outputs": [] + "outputs": [], + "metadata": { + "id": "S3_g3jclUzMm", + "scrolled": true + } }, { "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "X_Yf9cwm1BmM", - "scrolled": true, - "outputId": "d8dae19e-e34f-4de0-84d8-53f1ff6426ec" - }, + "execution_count": 77, "source": [ "# Validate the weights files to see which performs the best\n", "\n", "!./darknet detector map data/obj.data cfg/yolov4-tiny-obj.cfg /mydrive/yolov4_tiny/yolov4-tiny-obj_8000.weights" ], - "execution_count": 77, "outputs": [ { "output_type": "stream", + "name": "stdout", "text": [ " CUDA-version: 11000 (11020), cuDNN: 7.6.5, GPU count: 1 \n", " OpenCV version: 3.2.0\n", @@ -580,22 +574,29 @@ " `-points 101` for MS COCO \n", " `-points 11` for PascalVOC 2007 (uncomment `difficult` in voc.data) \n", " `-points 0` (AUC) for ImageNet, PascalVOC 2010-2012, your custom dataset\n" - ], - "name": "stdout" + ] } - ] + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "X_Yf9cwm1BmM", + "scrolled": true, + "outputId": "d8dae19e-e34f-4de0-84d8-53f1ff6426ec" + } }, { "cell_type": "code", - "metadata": { - "id": "pJOEUDbp4Fsc" - }, + "execution_count": 75, "source": [ "# Save the config\n", "!cp cfg/yolov4-tiny-obj.cfg /mydrive/yolov4_tiny/yolov4-tiny-obj.cfg" ], - "execution_count": 75, - "outputs": [] + "outputs": [], + "metadata": { + "id": "pJOEUDbp4Fsc" + } } ] } \ No newline at end of file diff --git a/model_training/local_prediction/Pipfile b/model_training/local_prediction/Pipfile index 308a1f7..3138f64 100644 --- a/model_training/local_prediction/Pipfile +++ b/model_training/local_prediction/Pipfile @@ -8,6 +8,7 @@ numpy = "*" opencv-python = "*" autopep8 = "*" requests-toolbelt = "*" +configparser = "*" [dev-packages] diff --git a/model_training/local_prediction/Pipfile.lock b/model_training/local_prediction/Pipfile.lock index 82bf5e8..e2162b0 100644 --- a/model_training/local_prediction/Pipfile.lock +++ b/model_training/local_prediction/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "623ae6539b7b58beaa30387a62f50d3db03ed977fa0628dc6246a8fd6e15551c" + "sha256": "65b08d89cb51c6c0445a4dc1f74cbd96b02570255659782e4b2be75677904ba7" }, "pipfile-spec": 6, "requires": { @@ -31,77 +31,93 @@ ], "version": "==2021.5.30" }, - "chardet": { + "charset-normalizer": { "hashes": [ - "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa", - "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5" + "sha256:88fce3fa5b1a84fdcb3f603d889f723d1dd89b26059d0123ca435570e848d5e1", + "sha256:c46c3ace2d744cfbdebceaa3c19ae691f53ae621b39fd7570f59d14fb7f2fd12" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", - "version": "==4.0.0" + "markers": "python_version >= '3'", + "version": "==2.0.3" + }, + "configparser": { + "hashes": [ + "sha256:85d5de102cfe6d14a5172676f09d19c465ce63d6019cf0a4ef13385fc535e828", + "sha256:af59f2cdd7efbdd5d111c1976ecd0b82db9066653362f0962d7bf1d3ab89a1fa" + ], + "index": "pypi", + "version": "==5.0.2" }, "idna": { "hashes": [ - "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", - "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" + "sha256:14475042e284991034cb48e06f6851428fb14c4dc953acd9be9a5e95c7b6dd7a", + "sha256:467fbad99067910785144ce333826c71fb0e63a425657295239737f7ecd125f3" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", - "version": "==2.10" + "markers": "python_version >= '3'", + "version": "==3.2" }, "numpy": { "hashes": [ - "sha256:1676b0a292dd3c99e49305a16d7a9f42a4ab60ec522eac0d3dd20cdf362ac010", - "sha256:16f221035e8bd19b9dc9a57159e38d2dd060b48e93e1d843c49cb370b0f415fd", - "sha256:43909c8bb289c382170e0282158a38cf306a8ad2ff6dfadc447e90f9961bef43", - "sha256:4e465afc3b96dbc80cf4a5273e5e2b1e3451286361b4af70ce1adb2984d392f9", - "sha256:55b745fca0a5ab738647d0e4db099bd0a23279c32b31a783ad2ccea729e632df", - "sha256:5d050e1e4bc9ddb8656d7b4f414557720ddcca23a5b88dd7cff65e847864c400", - "sha256:637d827248f447e63585ca3f4a7d2dfaa882e094df6cfa177cc9cf9cd6cdf6d2", - "sha256:6690080810f77485667bfbff4f69d717c3be25e5b11bb2073e76bb3f578d99b4", - "sha256:66fbc6fed94a13b9801fb70b96ff30605ab0a123e775a5e7a26938b717c5d71a", - "sha256:67d44acb72c31a97a3d5d33d103ab06d8ac20770e1c5ad81bdb3f0c086a56cf6", - "sha256:6ca2b85a5997dabc38301a22ee43c82adcb53ff660b89ee88dded6b33687e1d8", - "sha256:6e51534e78d14b4a009a062641f465cfaba4fdcb046c3ac0b1f61dd97c861b1b", - "sha256:70eb5808127284c4e5c9e836208e09d685a7978b6a216db85960b1a112eeace8", - "sha256:830b044f4e64a76ba71448fce6e604c0fc47a0e54d8f6467be23749ac2cbd2fb", - "sha256:8b7bb4b9280da3b2856cb1fc425932f46fba609819ee1c62256f61799e6a51d2", - "sha256:a9c65473ebc342715cb2d7926ff1e202c26376c0dcaaee85a1fd4b8d8c1d3b2f", - "sha256:c1c09247ccea742525bdb5f4b5ceeacb34f95731647fe55774aa36557dbb5fa4", - "sha256:c5bf0e132acf7557fc9bb8ded8b53bbbbea8892f3c9a1738205878ca9434206a", - "sha256:db250fd3e90117e0312b611574cd1b3f78bec046783195075cbd7ba9c3d73f16", - "sha256:e515c9a93aebe27166ec9593411c58494fa98e5fcc219e47260d9ab8a1cc7f9f", - "sha256:e55185e51b18d788e49fe8305fd73ef4470596b33fc2c1ceb304566b99c71a69", - "sha256:ea9cff01e75a956dbee133fa8e5b68f2f92175233de2f88de3a682dd94deda65", - "sha256:f1452578d0516283c87608a5a5548b0cdde15b99650efdfd85182102ef7a7c17", - "sha256:f39a995e47cb8649673cfa0579fbdd1cdd33ea497d1728a6cb194d6252268e48" + "sha256:01721eefe70544d548425a07c80be8377096a54118070b8a62476866d5208e33", + "sha256:0318c465786c1f63ac05d7c4dbcecd4d2d7e13f0959b01b534ea1e92202235c5", + "sha256:05a0f648eb28bae4bcb204e6fd14603de2908de982e761a2fc78efe0f19e96e1", + "sha256:1412aa0aec3e00bc23fbb8664d76552b4efde98fb71f60737c83efbac24112f1", + "sha256:25b40b98ebdd272bc3020935427a4530b7d60dfbe1ab9381a39147834e985eac", + "sha256:2d4d1de6e6fb3d28781c73fbde702ac97f03d79e4ffd6598b880b2d95d62ead4", + "sha256:38e8648f9449a549a7dfe8d8755a5979b45b3538520d1e735637ef28e8c2dc50", + "sha256:4a3d5fb89bfe21be2ef47c0614b9c9c707b7362386c9a3ff1feae63e0267ccb6", + "sha256:635e6bd31c9fb3d475c8f44a089569070d10a9ef18ed13738b03049280281267", + "sha256:73101b2a1fef16602696d133db402a7e7586654682244344b8329cdcbbb82172", + "sha256:791492091744b0fe390a6ce85cc1bf5149968ac7d5f0477288f78c89b385d9af", + "sha256:7a708a79c9a9d26904d1cca8d383bf869edf6f8e7650d85dbc77b041e8c5a0f8", + "sha256:88c0b89ad1cc24a5efbb99ff9ab5db0f9a86e9cc50240177a571fbe9c2860ac2", + "sha256:8a326af80e86d0e9ce92bcc1e65c8ff88297de4fa14ee936cb2293d414c9ec63", + "sha256:8a92c5aea763d14ba9d6475803fc7904bda7decc2a0a68153f587ad82941fec1", + "sha256:91c6f5fc58df1e0a3cc0c3a717bb3308ff850abdaa6d2d802573ee2b11f674a8", + "sha256:95b995d0c413f5d0428b3f880e8fe1660ff9396dcd1f9eedbc311f37b5652e16", + "sha256:9749a40a5b22333467f02fe11edc98f022133ee1bfa8ab99bda5e5437b831214", + "sha256:978010b68e17150db8765355d1ccdd450f9fc916824e8c4e35ee620590e234cd", + "sha256:9a513bd9c1551894ee3d31369f9b07460ef223694098cf27d399513415855b68", + "sha256:a75b4498b1e93d8b700282dc8e655b8bd559c0904b3910b144646dbbbc03e062", + "sha256:c6a2324085dd52f96498419ba95b5777e40b6bcbc20088fddb9e8cbb58885e8e", + "sha256:d7a4aeac3b94af92a9373d6e77b37691b86411f9745190d2c351f410ab3a791f", + "sha256:d9e7912a56108aba9b31df688a4c4f5cb0d9d3787386b87d504762b6754fbb1b", + "sha256:dff4af63638afcc57a3dfb9e4b26d434a7a602d225b42d746ea7fe2edf1342fd", + "sha256:e46ceaff65609b5399163de5893d8f2a82d3c77d5e56d976c8b5fb01faa6b671", + "sha256:f01f28075a92eede918b965e86e8f0ba7b7797a95aa8d35e1cc8821f5fc3ad6a", + "sha256:fd7d7409fa643a91d0a05c7554dd68aa9c9bb16e186f6ccfe40d6e003156e33a" ], "index": "pypi", - "version": "==1.20.3" + "version": "==1.21.1" }, "opencv-python": { "hashes": [ - "sha256:0118a086fad8d77acdf46ac68df49d4167fbb85420f8bcf2615d7b74fc03aae0", - "sha256:050227e5728ea8316ec114aca8f43d56253cbb1c50983e3b136a988254a83118", - "sha256:08327a38564786bf73e387736f080e8ad4c110b394ca4af2ecec8277b305bf44", - "sha256:0a3aef70b7c53bbd22ade86a4318b8a2ad98d3c3ed3d0c315f18bf1a2d868709", - "sha256:10325c3fd571e33a11eb5f0e5d265d73baef22dbb34c977f28df7e22de47b0bc", - "sha256:2436b71346d1eed423577fac8cd3aa9c0832ea97452444dc7f856b2f09600dba", - "sha256:4b8814d3f0cf01e8b8624125f7dcfb095893abcc04083cb4968fa1629bc81161", - "sha256:4e6c2d8320168a4f76822fbb76df3b18688ac5e068d49ac38a4ce39af0f8e1a6", - "sha256:6b2573c6367ec0052b37e375d18638a885dd7a10a5ef8dd726b391969c227f23", - "sha256:6e2070e35f2aaca3d1259093c786d4e373004b36d89a94e81943247c6ed3d4e1", - "sha256:89a2b45429bf945988a17b0404431d9d8fdc9e04fb2450b56fa01f6f9477101d", - "sha256:8cf81f53ac5ad900ca443a8252c4e0bc1256f1c2cb2d8459df2ba1ac014dfa36", - "sha256:9680ab256ab31bdafd74f6cf55eb570e5629b5604d50fd69dd1bd2a8124f0611", - "sha256:a8020cc6145c6934192189058743a55189750df6dff894396edb8b35a380cc48", - "sha256:b3bef3f2a2ab3c201784d12ec6b5c9e61c920c15b6854d8d2f62fd019e3df846", - "sha256:b724a96eeb88842bd2371b1ffe2da73b6295063ba5c029aa34139d25b8315a3f", - "sha256:c446555cbbc4f5e809f9c15ac1b6200024032d9859f5ac5a2ca7669d09e4c91c", - "sha256:d9004e2cc90bb2862cdc1d062fac5163d3def55b200081d4520d3e90b4c7197b", - "sha256:ef3102b70aa59ab3fed69df30465c1b7587d681e963dfff5146de233c75df7ba", - "sha256:f12f39c1e5001e1c00df5873e3eee6f0232b7723a60b7ef438b1e23f1341df0e" + "sha256:05c5139d620e8d02f7ce0921796d55736fa19fa15e2ec00a388db2eb1ae1e9a1", + "sha256:085232718f28bddd265da480874c37db5c7354cb08f23f4a68a8639b16276a89", + "sha256:18a4a14015eee30d9cd514db8cdefbf594b1d5c234762d27abe512d62a333bc3", + "sha256:205a73adb29c37e42475645519e612e843a985475da993d10b4d5daa6afec36a", + "sha256:3c001d3feec7f3140f1fb78dfc52ca28122db8240826882d175a208a89d2731b", + "sha256:437f30e300725e1d1b3744dbfbc66a523a4744792b58f3dbe1e9140c8f4dfba5", + "sha256:5366fcd6eae4243add3c8c92142045850f1db8e464bcf0b75313e1596b2e3671", + "sha256:54c64e86a087841869901fd34462bb6bec01cd4652800fdf5d92fe7b0596c82f", + "sha256:6763729fcfee2a08e069aa1982c9a8c1abf55b9cdf2fb9640eda1d85bdece19a", + "sha256:68813b720b88e4951e84399b9a8a7b532d45a07a96ea8f539636242f862e32e0", + "sha256:7f41b97d84ac66bdf13cb4d9f4dad3e159525ba1e3f421e670c787ce536eb70a", + "sha256:831b92fe63ce18dd628f71104da7e60596658b75e2fa16b83aefa3eb10c115e2", + "sha256:881f3d85269500e0c7d72b140a6ebb5c14a089f8140fb9da7ce01f12a245858e", + "sha256:8852be06c0749fef0d9c58f532bbcb0570968c59e41cf56b90f5c92593c6e108", + "sha256:8b5bc61be7fc8565140b746288b370a4bfdb4edb9d680b66bb914e7690485db1", + "sha256:8d3282138f3a8646941089aae142684910ebe40776266448eab5f4bb609fc63f", + "sha256:9a78558b5ae848386edbb843c761e5fed5a8480be9af16274a5a78838529edeb", + "sha256:b42bbba9f5421865377c7960bd4f3dd881003b322a6bf46ed2302b89224d102b", + "sha256:c360cb76ad1ddbd5d2d3e730b42f2ff6e4be08ea6f4a6eefacca175d27467e8f", + "sha256:cdc3363c2911d7cfc6c9f55308c51c2841a7aecbf0bf5e791499d220ce89d880", + "sha256:e1f54736272830a1e895cedf7a4ee67737e31e966d380c82a81ef22515d043a3", + "sha256:e42c644a70d5c54f53a4b114dbd88b4eb83f42a9ca998f07bd5682f3f404efcc", + "sha256:f1bda4d144f5204e077ca4571453ebb2015e5748d5e0043386c92c2bbf7f52eb", + "sha256:f3ac2355217114a683f3f72a9c40a5890914a59c4a2df62e4083c66ff65c9cf9" ], "index": "pypi", - "version": "==4.5.2.54" + "version": "==4.5.3.56" }, "pycodestyle": { "hashes": [ @@ -113,11 +129,11 @@ }, "requests": { "hashes": [ - "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804", - "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e" + "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24", + "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", - "version": "==2.25.1" + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", + "version": "==2.26.0" }, "requests-toolbelt": { "hashes": [ @@ -137,11 +153,11 @@ }, "urllib3": { "hashes": [ - "sha256:753a0374df26658f99d826cfe40394a686d05985786d946fbe4165b5148f5a7c", - "sha256:a7acd0977125325f516bda9735fa7142b909a8d01e8b2e4c8108d0984e6e0098" + "sha256:39fb8672126159acb139a7718dd10806104dec1e2f0f6c88aab05d17df10c8d4", + "sha256:f57b4c16c62fa2760b7e3d97c35b255512fb6b59a259730f36ba32ce9f8e342f" ], "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", - "version": "==1.26.5" + "version": "==1.26.6" } }, "develop": {} diff --git a/model_training/local_prediction/README.md b/model_training/local_prediction/README.md index 43e13b1..9f0ac24 100644 --- a/model_training/local_prediction/README.md +++ b/model_training/local_prediction/README.md @@ -11,6 +11,7 @@ Tested on ## Getting Started 1. Install `pipenv` if not done so already. + ```bash $ pip install --user pipenv ... @@ -18,6 +19,7 @@ Tested on Successfully installed appdirs-1.4.4 distlib-0.3.2 pipenv-2021.5.29 virtualenv-20.4.7 virtualenv-clone-0.5.4 ``` 1. Install dependencies, then launch the subshell. + ```bash $ pipenv install ... @@ -27,15 +29,16 @@ Tested on $ pipenv shell Launching subshell in virtual environment... ``` -1. Running the Python script now will give predictions on images in `.\test_images\` based on weights and config given by the repo, located at [`..\..\lambda_backend\predict_microservice\weights`](https://github.com/nandium/RouteMaker/tree/main/lambda_backend/predict_microservice/weights). Images will appear with the predicted bounding boxes, and the labels will be saved as `.txt` files saved in the `.\test_images\` folder. +1. Running the Python script now will give predictions on images in `./test_images/` based on weights and config given by the repo, located at [`../../lambda_backend/predict_microservice/weights`](https://github.com/nandium/RouteMaker/tree/main/lambda_backend/predict_microservice/weights). Images will appear with the predicted bounding boxes, and the labels will be saved as `.txt` files saved in the `./test_images/` folder. + ```bash $ python yolo_object_detection.py ``` -1. To obtain predictions on new images, simply place them in `.\test_images\` and run the script. -1. Run LabelImg and open the `.\test_images\` directory. You can now adjust the bounding boxes and subsequently save the corresponding `.txt` files. The images can then be used as training data for the model. +1. To obtain predictions on new images, simply place them in `./test_images/` and run the script. +1. Run LabelImg and open the `./test_images/` directory. You can now adjust the bounding boxes and subsequently save the corresponding `.txt` files. The images can then be used as training data for the model. 1. Some images may face an issue where a `ZeroDivisionError` occurs when saving using LabelImg. Be sure to check for this by saving after a single edit to avoid losing all your work. - ```bash - ... + + ```Python ZeroDivisionError: float division by zero ``` If this occurs, change the image format from: @@ -46,9 +49,20 @@ Tested on 1. Zip up all training data - images and labels (`.txt`) - into a zipfile `images.zip`. 1. Create a folder `/yolov4_tiny` in your Google drive, and save `images.zip` inside it. -1. Open up `../Train_yolov4_tiny.ipynb` in Google Colab, then click Runtime > Run All (Ctrl+F9). +1. Open up [`../Train_yolov4_tiny.ipynb`](https://github.com/nandium/RouteMaker/blob/main/model_training/Train_yolov4_tiny.ipynb) in Google Colab, then click Runtime > Run All (Ctrl+F9). 1. The model will be trained with the hyperparameters as set in the document. You can update them as you see fit, following guidance from the [darknet documentation](https://github.com/AlexeyAB/darknet#when-should-i-stop-training). Be sure to commit changes made if so. 1. The key performance indicator will be the mAP (higher better). + ```bash mean average precision (mAP@0.50) = 0.823387, or 82.34 % ``` + +## Training Locally + +1. Make sure requirements are setup as on the [darknet repo](https://github.com/AlexeyAB/darknet#requirements-for-windows-linux-and-macos). NVIDIA GPU is required. +1. Follow the steps in [`../Train_yolov4_tiny.ipynb`](https://github.com/nandium/RouteMaker/blob/main/model_training/Train_yolov4_tiny.ipynb) to configure envinronment variables, hyperparameters, and run scripts. + +## Saving Configurations + +1. Once the _improved_ model has been trained, the relevant config and weight files should be committed to this repo. These are located at [`/lambda_backend/predict_microservice/weights`](https://github.com/nandium/RouteMaker/tree/main/lambda_backend/predict_microservice/weights). +1. The training/validation split used to train should also be saved. This is located in this folder in `./train_val_split/`. \ No newline at end of file diff --git a/model_training/local_prediction/base_inference.py b/model_training/local_prediction/base_inference.py new file mode 120000 index 0000000..e866e57 --- /dev/null +++ b/model_training/local_prediction/base_inference.py @@ -0,0 +1 @@ +../../lambda_backend/predict_microservice/base_inference.py \ No newline at end of file diff --git a/model_training/local_prediction/local_inference.py b/model_training/local_prediction/local_inference.py new file mode 100644 index 0000000..b041d97 --- /dev/null +++ b/model_training/local_prediction/local_inference.py @@ -0,0 +1,133 @@ +"""YOLO Object Detection - Local Inference + +This script allows the user to obtain the output bounding boxes +for trained models, and save them to .txt files, +and/or visualize them using opencv2. +""" + +from base_inference import BaseInference, CONFIDENCE_THRESHOLD, NMS_IOU_THRESHOLD + +import cv2 +import numpy as np +import glob +import random +import argparse + +import os + +DEFAULT_WEIGHTS = os.path.join(os.pardir, os.pardir, "lambda_backend", "predict_microservice", "weights", "yolov4-tiny-obj.weights") +DEFAULT_CONFIG = os.path.join(os.pardir, os.pardir, "lambda_backend", "predict_microservice", "weights", "yolov4-tiny-obj.cfg") +DEFAULT_IMAGES = os.path.join(os.curdir, "test_images") +# Can read classes from a file if more are ever added +DEFAULT_CLASSES = ["hold"] + +IMSHOW_FONT = cv2.FONT_HERSHEY_PLAIN + +class LocalInference(BaseInference): + """ + Inference class for local predictions + + ... + + Attributes + ---------- + images_path : str + Path to the test images folder + will_save : bool + To save predicted boxes for each image to .txt files + will_show : bool + To show predicted boxes for each image using opencv2 + + Methods + ------- + run() + Obtains predicted boxes for visualization and/or saving to file + """ + + def __init__(self, weight_path, config_path, classes, score_threshold, nms_thresh, images_path, will_save, will_show, is_random): + super().__init__(weight_path, config_path, classes, score_threshold, nms_thresh) + + self.images = glob.glob(os.path.join(images_path, "*.jpg")) + + self.will_save = will_save + self.will_show = will_show + self.is_random = is_random + + def run(self): + + self.colors = np.random.uniform(0, 255, size=(len(self.classes), 3)) + + if self.is_random: + random.shuffle(self.images) + + for img_path in self.images: + # Loading image + img = cv2.imread(img_path) + # img = cv2.resize(img, None, fx=0.6, fy=0.6) + + class_ids, box_dims, box_confidences, box_dims_norm, indexes = super().run(img) + if self.will_save: + self._save_labelfile(img_path, class_ids, box_dims_norm, indexes) + if self.will_show: + self._show(img, class_ids, box_dims, indexes) + cv2.destroyAllWindows() + + def _show(self, img, class_ids, box_dims, indexes): + for i in indexes: + x, y, w, h = box_dims[i] + label = str(self.classes[class_ids[i]]) + color = self.colors[class_ids[i]] + cv2.rectangle(img, (x, y), (x + w, y + h), color, 2) + cv2.putText(img, label, (x, y + 30), IMSHOW_FONT, 1, color, 2) + + cv2.imshow("Image", img) + key = cv2.waitKey(0) + + def _save_labelfile(self, img_path, class_ids, box_dims_norm, indexes): + # Get filename for labelfile + labelfile = os.path.splitext(img_path)[0] + with open(labelfile + ".txt", "w+") as f: + for i in indexes: + class_id = class_ids[i] + # Normalised format for yolo labeling + nx, ny, nw, nh = box_dims_norm[i] + f.write(f'{class_id} {nx} {ny} {nw} {nh}\n') + +def add_bool_arg(parser, name, default=True, msg=""): + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument('--' + name, dest=name, action='store_true', help=msg) + group.add_argument('--no-' + name, dest=name, action='store_false') + parser.set_defaults(**{name:default}) + +def setup_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("-w", "--weights", help="path to learned weights from model", default=DEFAULT_WEIGHTS) + parser.add_argument("-c", "--config", help="path to config of yolo", default=DEFAULT_CONFIG) + parser.add_argument("-i", "--images", help="path to test images", default=DEFAULT_IMAGES) + parser.add_argument("-s", "--score", help="score threshold", default=CONFIDENCE_THRESHOLD, type=float) + parser.add_argument("-n", "--nms", help="nms threshold", default=NMS_IOU_THRESHOLD, type=float) + add_bool_arg(parser, 'save', msg="save to labelfiles") + add_bool_arg(parser, 'show', msg="visualise using opencv2") + add_bool_arg(parser, 'random', msg="randomise image visualisation order") + return parser + +def main(): + # Parsing arguments + parser = setup_parser() + args = parser.parse_args() + + inference = LocalInference( + weight_path = args.weights, + config_path = args.config, + classes = DEFAULT_CLASSES, + score_threshold = args.score, + nms_thresh = args.nms, + images_path = args.images, + will_save = args.save, + will_show = args.show, + is_random = args.random + ) + inference.run() + +if __name__ == "__main__": + main() diff --git a/model_training/local_prediction/yolo_object_detection.py b/model_training/local_prediction/yolo_object_detection.py deleted file mode 100644 index bf47514..0000000 --- a/model_training/local_prediction/yolo_object_detection.py +++ /dev/null @@ -1,108 +0,0 @@ -import cv2 -import numpy as np -import glob -import random -import argparse - -from os.path import join -import os - -def normalise(x, y, w, h, W, H): - x += w/2 - x /= W - y += h/2 - y /= H - w /= W - h /= H - return x, y, w, h - -DEF_WEIGHTS = join(os.pardir, os.pardir, "lambda_backend", "predict_microservice", "weights", "yolov4-tiny-obj.weights") -DEF_CONFIG = join(os.pardir, os.pardir, "lambda_backend", "predict_microservice", "weights", "yolov4-tiny-obj.cfg") - -# Parsing arguments -parser = argparse.ArgumentParser() -parser.add_argument("-w", "--weights", help="learned weights from model", default=DEF_WEIGHTS) -parser.add_argument("-c", "--config", help="config of yolo", default=DEF_CONFIG) -args = parser.parse_args() - -# Load Yolo -net = cv2.dnn.readNet( - args.weights, - args.config -) - -# Name custom object -# Refer to colab notebook for index to class mappings -classes = ["hold"] - -# Images path -images_path = glob.glob(join(os.curdir, "test_images", "*.jpg")) - -layer_names = net.getLayerNames() -output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()] -colors = np.random.uniform(0, 255, size=(len(classes), 3)) - -# Insert here the path of your images -random.shuffle(images_path) -# loop through all the images -for img_path in images_path: - # Loading image - img = cv2.imread(img_path) - # img = cv2.resize(img, None, fx=0.6, fy=0.6) - height, width, channels = img.shape - # Get filename for labelfile - labelfile = os.path.splitext(img_path)[0] - - # Detecting objects - blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False) - - net.setInput(blob) - outs = net.forward(output_layers) - - # Showing informations on the screen - class_ids = [] - confidences = [] - boxes = [] - - for out in outs: - for detection in out: - - scores = detection[5:] - class_id = np.argmax(scores) - confidence = scores[class_id] - if confidence > 0.3: - # Object detected - center_x = int(detection[0] * width) - center_y = int(detection[1] * height) - w = int(detection[2] * width) - h = int(detection[3] * height) - - # Rectangle coordinates - x = int(center_x - w / 2) - y = int(center_y - h / 2) - - boxes.append([x, y, w, h]) - confidences.append(float(confidence)) - class_ids.append(class_id) - - indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) - font = cv2.FONT_HERSHEY_PLAIN - f = open(labelfile + ".txt", "w+") - for i in indexes: - i = int(i) - x, y, w, h = boxes[i] - label = str(classes[class_ids[i]]) - color = colors[class_ids[i]] - cv2.rectangle(img, (x, y), (x + w, y + h), color, 2) - cv2.putText(img, label, (x, y + 30), font, 1, color, 2) - - # Normalise for yolo labeling format - x, y, w, h = normalise(x, y, w, h, width, height) - class_id = class_ids[i] - f.write(f'{class_id} {x} {y} {w} {h}\n') - f.close() - - cv2.imshow("Image", img) - key = cv2.waitKey(0) - -cv2.destroyAllWindows()