From 8c09a5139728b85a4f16c746232d792d3bf3d15c Mon Sep 17 00:00:00 2001 From: Elia Innocenti Date: Wed, 17 Jul 2024 09:36:34 +0200 Subject: [PATCH] Refactor --- .gitignore | 4 - inference/test.py | 35 +++- inference/tmp.py | 171 ------------------ ...odel_fp16.tflite => model_fp16_p6k.tflite} | Bin models/{model.tflite => model_p6k.tflite} | Bin scripts/augment_dataset.py | 46 +++++ scripts/check_annotations.py | 1 + scripts/create_annotations.py | 1 + scripts/get_data.py | 1 + scripts/plot_bounding_boxes_p6k.py | 1 + scripts/prepare_dataset.py | 1 + 11 files changed, 78 insertions(+), 183 deletions(-) delete mode 100644 inference/tmp.py rename models/{model_fp16.tflite => model_fp16_p6k.tflite} (100%) rename models/{model.tflite => model_p6k.tflite} (100%) diff --git a/.gitignore b/.gitignore index aa1a1ac..256ece0 100644 --- a/.gitignore +++ b/.gitignore @@ -9,9 +9,6 @@ __pycache__/ **/__pycache__/ -# models -#models/ - # git # TODO: unignore when ready .gitattributes @@ -22,4 +19,3 @@ CONTRIBUTING.md # tmp files scripts/tmp.py -inference/tmp.py \ No newline at end of file diff --git a/inference/test.py b/inference/test.py index 8fa7113..a6ce651 100644 --- a/inference/test.py +++ b/inference/test.py @@ -1,3 +1,7 @@ +""" +TODO: add file and function descriptions +""" + import numpy as np import tensorflow as tf import cv2 @@ -5,6 +9,8 @@ from scripts.plot_bounding_boxes_p6k import plot_bounding_box as plot_bounding_box +# FIXME: fix bounding boxes visualization +# TODO: study parameters and results base_path = "../../../Data/" rparis6k_path = os.path.join(base_path, 'datasets', 'rparis6k') @@ -12,7 +18,7 @@ download_path = "/Users/eliainnocenti/Downloads" # Load the TFLite model -interpreter = tf.lite.Interpreter(model_path="../models/model.tflite") +interpreter = tf.lite.Interpreter(model_path="../models/model_p6k.tflite") interpreter.allocate_tensors() # Get input and output details @@ -21,6 +27,12 @@ def prepare_image(image_path, input_shape): + """ + + :param image_path: + :param input_shape: + :return: + """ img = cv2.imread(image_path) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (input_shape[1], input_shape[2])) @@ -30,6 +42,11 @@ def prepare_image(image_path, input_shape): def detect_objects(image_path): + """ + + :param image_path: + :return: + """ input_shape = input_details[0]['shape'] img = prepare_image(image_path, input_shape) @@ -43,7 +60,7 @@ def detect_objects(image_path): return boxes, class_scores -# Example usage with test set images +# Example usage with test set images # monument # confidence #image_path = os.path.join(rparis6k_path, 'images', 'paris_defense_000041.jpg') # defense 1 # 0.6 #image_path = os.path.join(rparis6k_path, 'images', 'paris_eiffel_000170.jpg') # eiffel 2 # 0.3 #image_path = os.path.join(rparis6k_path, 'images', 'paris_invalides_000090.jpg') # invalides 3 # 0.2 @@ -56,15 +73,17 @@ def detect_objects(image_path): #image_path = os.path.join(rparis6k_path, 'images', 'paris_sacrecoeur_000279.jpg') # sacrecoeur 10 # 0.3 - 0.4 #image_path = os.path.join(rparis6k_path, 'images', 'paris_triomphe_000348.jpg') # triomphe 11 # 0.4 - 0.5 -# Examples usage with personal images -image_path = os.path.join(download_path, 'eiffel.jpg') -#image_path = os.path.join(download_path, 'louvre.jpg') -#image_path = os.path.join(download_path, 'pantheon.jpg') +# Examples usage with personal images # monument # confidence +#image_path = os.path.join(download_path, 'eiffel.jpg') # eiffel # 0. +#image_path = os.path.join(download_path, 'louvre.jpg') # louvre # 0. +#image_path = os.path.join(download_path, 'pantheon.jpg') # pantheon # 0. +#image_path = os.path.join(download_path, 'sacrecoeur.jpg') # sacrecoeur # 0.5 +#image_path = os.path.join(download_path, 'moulinrouge.jpg') # moulinrouge # 0.2 boxes, class_scores = detect_objects(image_path) # Post-processing -confidence_threshold = 0.2 +confidence_threshold = 0.2 # 0.5 max_boxes = 10 # Get the class with highest score for each box @@ -88,7 +107,7 @@ def detect_objects(image_path): # Print results and draw boxes for i in top_indices: # Convert normalized coordinates to pixel coordinates - # FIXME: which format do I have? [xmin, ymin, xmax, ymax] or [xmin, ymin, width, height]? + # FIXME: which format do I have in output? [xmin, ymin, xmax, ymax] or [xmin, ymin, width, height]? ''' xmin, ymin, xmax, ymax = filtered_boxes[i] diff --git a/inference/tmp.py b/inference/tmp.py deleted file mode 100644 index e65e739..0000000 --- a/inference/tmp.py +++ /dev/null @@ -1,171 +0,0 @@ -""" -TODO: add file and function descriptions -""" - -import os.path -import random - -import tensorflow as tf -import matplotlib.pyplot as plt -import matplotlib.patches as patches -import numpy as np - -base_path = "../../../Data/" - -# Load the TFLite model -interpreter = tf.lite.Interpreter(model_path='../models/model.tflite') -interpreter.allocate_tensors() - -# Get input and output details -input_details = interpreter.get_input_details() -output_details = interpreter.get_output_details() - - -def load_image_into_numpy_array(path): - """ - - :param path: - :return: - """ - image = tf.io.read_file(path) - image = tf.image.decode_jpeg(image, channels=3) - input_shape = input_details[0]['shape'] - image = tf.image.resize(image, (input_shape[1], input_shape[2])) - image = tf.cast(image, tf.float32) / 255.0 # Normalize pixel values - return tf.expand_dims(image, 0).numpy() # Add batch dimension - - -def run_inference(image_np): - """ - - :param image_np: - :return: - """ - interpreter.set_tensor(input_details[0]['index'], image_np) - interpreter.invoke() - - # Get the results - boxes = interpreter.get_tensor(output_details[0]['index']) - classes_scores = interpreter.get_tensor(output_details[1]['index']) - - return boxes, classes_scores - - -def visualize_detections(image_path, boxes, classes_scores, threshold=0.5): - """ - - :param image_path: - :param boxes: - :param classes_scores: - :param threshold: - :return: - """ - image = plt.imread(image_path) - fig, ax = plt.subplots(1) - ax.imshow(image) - - height, width, _ = image.shape - for box, class_score in zip(boxes[0], classes_scores[0]): - score = np.max(class_score) - - '''debug''' - print(f"image_path: {image_path}") - print(f"box: {box}") - print(f"class_score: {class_score}") - print(f"score: {score}") - - if score > threshold: - ymin, xmin, ymax, xmax = box - rect = patches.Rectangle((xmin*width, ymin*height), (xmax-xmin)*width, (ymax-ymin)*height, - linewidth=1, edgecolor='r', facecolor='none') - ax.add_patch(rect) - class_id = np.argmax(class_score) - plt.text(xmin*width, ymin*height, f'{class_id}: {score:.2f}', color='red') - - plt.show() - - -def train_images(): - """ - - :return: - """ - train_path = '../data/rparis6k/sets/train/train.txt' - - if not os.path.exists(train_path): - print(f"Error: Train file not found: {train_path}") - return - - with open(train_path, 'r') as f: - train_images = [line.strip() for line in f] - - random.shuffle(train_images) # TODO: check - - for image_name in train_images[:5]: - image_path = os.path.join(base_path, 'datasets', 'rparis6k', 'images', image_name) - image_np = load_image_into_numpy_array(image_path) - boxes, classes_scores = run_inference(image_np) - - '''debug''' - print(f"Image: {image_name}") - print(boxes) - print(classes_scores) - - visualize_detections(image_path, boxes, classes_scores) - - -def validation_images(): - """ - - :return: - """ - validation_path = '../data/rparis6k/sets/validation/val.txt' - - if not os.path.exists(validation_path): - print(f"Error: Validation file not found: {validation_path}") - return - - with open(validation_path, 'r') as f: - validation_images = [line.strip() for line in f] - - for image_name in validation_images[:5]: - image_path = os.path.join(base_path, 'datasets', 'rparis6k', 'images', image_name) - image_np = load_image_into_numpy_array(image_path) - boxes, classes_scores = run_inference(image_np) - visualize_detections(image_path, boxes, classes_scores) - - -def test_images(): - """ - - :return: - """ - test_path = '../data/rparis6k/sets/test/test.txt' - - if not os.path.exists(test_path): - print(f"Error: Test file not found: {test_path}") - return - - with open(test_path, 'r') as f: - test_images = [line.strip() for line in f] - - for image_name in test_images[:1]: # TODO: check - image_path = os.path.join(base_path, 'datasets', 'rparis6k', 'images', image_name) - image_np = load_image_into_numpy_array(image_path) - boxes, classes_scores = run_inference(image_np) - visualize_detections(image_path, boxes, classes_scores) - - -def main(): - """ - - :return: - """ - - train_images() - #validation_images() - #test_images() - - -if __name__ == '__main__': - main() diff --git a/models/model_fp16.tflite b/models/model_fp16_p6k.tflite similarity index 100% rename from models/model_fp16.tflite rename to models/model_fp16_p6k.tflite diff --git a/models/model.tflite b/models/model_p6k.tflite similarity index 100% rename from models/model.tflite rename to models/model_p6k.tflite diff --git a/scripts/augment_dataset.py b/scripts/augment_dataset.py index 737510a..3869aab 100644 --- a/scripts/augment_dataset.py +++ b/scripts/augment_dataset.py @@ -1,3 +1,7 @@ +""" +TODO: add file and function descriptions +""" + import albumentations as A import cv2 import json @@ -7,6 +11,11 @@ def get_transform(set='train'): + """ + + :param set: + :return: + """ bboxes_params = A.BboxParams(format='coco', min_visibility=0.3, label_fields=['class_labels']) # TODO: check min_visibility if set == 'train': @@ -40,6 +49,13 @@ def get_transform(set='train'): def clip_bbox(bbox, image_width, image_height): + """ + + :param bbox: + :param image_width: + :param image_height: + :return: + """ x_min, y_min, width, height = bbox x_min = max(0, min(x_min, image_width - 1)) # TODO: check -1 @@ -51,12 +67,29 @@ def clip_bbox(bbox, image_width, image_height): def validate_bbox(bbox, image_width, image_height): + """ + + :param bbox: + :param image_width: + :param image_height: + :return: + """ x, y, w, h = bbox return 0 <= x < image_width and 0 <= y < image_height and x + w <= image_width and y + h <= image_height def apply_augmentation(image_path, bboxes, class_labels, output_path, output_filename, transform): + """ + + :param image_path: + :param bboxes: + :param class_labels: + :param output_path: + :param output_filename: + :param transform: + :return: + """ # Read the image image = cv2.imread(image_path) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) @@ -77,6 +110,16 @@ def apply_augmentation(image_path, bboxes, class_labels, output_path, output_fil def augment_dataset(input_path, output_path, transform, n_images, n_annotations, num_augmentations=5): + """ + + :param input_path: + :param output_path: + :param transform: + :param n_images: + :param n_annotations: + :param num_augmentations: + :return: + """ # Load the original COCO JSON file with open(os.path.join(input_path, 'labels.json'), 'r') as f: coco_data = json.load(f) @@ -163,7 +206,10 @@ def augment_dataset(input_path, output_path, transform, n_images, n_annotations, def main(): + """ + :return: + """ train_dataset_path = 'path/to/dataset/train/' validation_dataset_path = 'path/to/dataset/validation/' test_dataset_path = 'path/to/dataset/test/' diff --git a/scripts/check_annotations.py b/scripts/check_annotations.py index 3b2ec40..8ce2909 100644 --- a/scripts/check_annotations.py +++ b/scripts/check_annotations.py @@ -1,4 +1,5 @@ """ +TODO: update file and function descriptions This script checks the consistency of image annotations for datasets such as rparis6k, ensuring that each image has corresponding objects correctly annotated. diff --git a/scripts/create_annotations.py b/scripts/create_annotations.py index 3754cec..7e104e5 100644 --- a/scripts/create_annotations.py +++ b/scripts/create_annotations.py @@ -1,4 +1,5 @@ """ +TODO: add file and function descriptions This script creates annotations for the Oxford 5k and Paris 6k datasets in XML or JSON format. Classes: diff --git a/scripts/get_data.py b/scripts/get_data.py index 5039b58..72a2a30 100644 --- a/scripts/get_data.py +++ b/scripts/get_data.py @@ -1,4 +1,5 @@ """ +TODO: add file and function descriptions This script downloads image datasets required for the Revisited Oxford and Paris retrieval benchmarks. License: diff --git a/scripts/plot_bounding_boxes_p6k.py b/scripts/plot_bounding_boxes_p6k.py index d1d5d10..7aa40a7 100644 --- a/scripts/plot_bounding_boxes_p6k.py +++ b/scripts/plot_bounding_boxes_p6k.py @@ -1,4 +1,5 @@ """ +TODO: add file and function descriptions This script provides functions to visualize bounding box annotations on images for the Revisited Paris (rparis6k) dataset. License: diff --git a/scripts/prepare_dataset.py b/scripts/prepare_dataset.py index b8ef57b..e5a7762 100644 --- a/scripts/prepare_dataset.py +++ b/scripts/prepare_dataset.py @@ -1,4 +1,5 @@ """ +TODO: add file and function descriptions This script handles the loading, splitting, and preparation of image datasets for training, validation, and testing. Functions: