Refactor

eliainnocenti · Jul 12, 2024 · ad37c86 · ad37c86
1 parent 44168b9
commit ad37c86
Show file tree

Hide file tree

Showing 6 changed files with 68 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -1 +1,59 @@
-# Oxford5k-Paris6k-ObjectDetection
+# Oxford5k-Paris6k-ObjectDetection
+
+This project aims to create an object detection model for monument recognition using the Oxford5k and Paris6k datasets. The model is built using MediaPipe Model Maker for transfer learning, starting from a pre-trained model.
+
+## Project Overview
+
+The main objective of this project is to adapt the Oxford5k and Paris6k datasets, originally designed for image retrieval, for object detection tasks. This involved significant work in converting the annotations from their original format (stored in .pkl files) to standard object detection formats such as Pascal VOC and COCO.
+
+## Key Features
+
+- Adaptation of Oxford5k and Paris6k datasets for object detection
+- Custom scripts for data preprocessing and annotation conversion
+- Transfer learning using MediaPipe Model Maker
+- Support for both Pascal VOC and COCO annotation formats
+
+## Getting Started
+
+1. Clone the repository
+2. Install the required dependencies
+3. Run the data preparation scripts in the `scripts/` directory
+4. Use the Jupyter notebooks in the `training/` directory for model training
+
+## Data Preparation
+
+The `scripts/` directory contains various Python scripts for data preparation:
+
+- `get_data.py`: downloads the original datasets
+- `create_annotations.py`: converts original annotations to Pascal VOC and COCO formats
+- `prepare_dataset.py`: prepares the dataset for training
+- `check_annotations.py`: verifies the correctness of the converted annotations
+
+## Training
+
+The `training/` directory contains Jupyter notebooks for model training:
+
+- `mediapipe_object_detector_model_customization_template.ipynb`: template for MediaPipe Model Maker
+- `mp_training_paris6k.ipynb`: specific training notebook for the Paris6k dataset
+
+## Inference
+
+Use the scripts in the `inference/` directory to run object detection on new images.
+
+## License
+
+This project is licensed under the [LICENSE NAME] - see the [LICENSE.txt](LICENSE.txt) file for details.
+
+## Acknowledgments
+
+- Original Oxford5k and Paris6k dataset creators
+- MediaPipe team for their Model Maker tool
+
+## References
+
+- [Oxford5k Dataset](http://www.robots.ox.ac.uk/~vgg/data/oxbuildings/)
+- [Paris6k Dataset](http://www.robots.ox.ac.uk/~vgg/data/parisbuildings/)
+
+## Authors
+
+- [Elia Innocenti](https://github.com/eliainnocenti)
diff --git a/inference/test.py b/inference/test.py
@@ -11,7 +11,7 @@
 base_path = "../../../Data/"
 
 # Load the TFLite model
-interpreter = tf.lite.Interpreter(model_path='../models/model2.tflite')
+interpreter = tf.lite.Interpreter(model_path='../models/model.tflite')
 interpreter.allocate_tensors()
 
 # Get input and output details
@@ -145,7 +145,7 @@ def main():
     :return:
     """
 
-    #train_images()
+    train_images()
     #validation_images()
     #test_images()
 

diff --git a/scripts/README.md b/scripts/README.md
@@ -15,7 +15,7 @@ python get_data.py
 python prepare_dataset.py
 ```
 
-<!-- TODO: implement scheme for the data processing -->
+<!-- TODO: implement the scheme for the data processing -->
 
 ```mermaid
 graph TD;

diff --git a/scripts/create_annotations.py b/scripts/create_annotations.py
@@ -36,6 +36,7 @@
     Creates a list of classes from the dataset.
 
 10. get_id_by_name(categories, name):
+    Gets the ID of a category by its name.
 
 11. process_data(folder_name, data, image_folder, output_folder, monuments_list, type='xml', levels=2):
     Processes the dataset to create annotations in XML or JSON format.
@@ -503,7 +504,7 @@ def _process_data_json(data, image_folder, output_folder, monuments_list, levels
                 _objects[monument] = []
             _objects[monument].append(Object(f"{monument}", "Unspecified", "0", str(level), bbox))
 
-        # merge bbox for the same monument # FIXME: choose another way to merge
+        # merge bbox for the same monument # FIXME: choose another way to merge (?)
         for monument in _objects.keys():
             xmin_avg, ymin_avg, xmax_avg, ymax_avg = 0, 0, 0, 0
             difficulty = 0
@@ -585,7 +586,7 @@ def process_data(folder_name, data, image_folder, output_folder, monuments_list,
     print("Annotations created successfully")
 
 
-def main(datasets=None, type='xml', levels=2):
+def main(datasets=None, type='xml', levels=1):
     """
     Main function to create annotations for the specified datasets.
 

diff --git a/scripts/prepare_dataset.py b/scripts/prepare_dataset.py
@@ -90,7 +90,6 @@ def split_train_val_test(dataset_name, train_percent=0.7, val_percent=0.2, test_
         with open(labels_file, 'r') as file:
             labels_json = json.load(file)
         # insert in images only the images that have at least one annotation
-        # TODO: check
         images = [image['file_name'] for image in labels_json['images']]
         for image in labels_json['images']:
             if image['id'] not in [annotation['image_id'] for annotation in labels_json['annotations']]:
@@ -190,7 +189,7 @@ def split_annotations(dataset_name, type='json'):
         return
 
 
-def prepare_dataset(dataset_name, type='xml', levels=3):
+def prepare_dataset(dataset_name, type='xml', levels=1):
     """
     Prepares the dataset by creating annotations and splitting it into training, validation, and test sets.
 
@@ -239,7 +238,7 @@ def main():
     :return: None
     """
     datasets = [
-        #'roxford5k',
+        #'roxford5k', # TODO: uncomment
         'rparis6k'
     ]
 

diff --git a/training/mp_training_paris6k.ipynb b/training/mp_training_paris6k.ipynb