From 515ac64597c3b921876f812f7c4b1567405f7a23 Mon Sep 17 00:00:00 2001
From: Eslam Ahmed <eslam.ahemd@avidbeam.com>
Date: Sun, 11 Feb 2024 12:30:14 +0000
Subject: [PATCH] feat: support resnet 10 blob to roi converter and added proc
 files for resnet 10 detector and resnet 18 classifiers

---
 .../public/car_color_proc_file.json           |  37 +++++
 .../model_proc/public/car_make_proc_file.json |  44 +++++
 .../model_proc/public/car_type_proc_file.json |  31 ++++
 .../gstreamer/model_proc/public/resnet10.json |  21 +++
 .../to_roi/blob_to_roi_converter.cpp          |   3 +
 .../converters/to_roi/resnet_10.cpp           | 151 ++++++++++++++++++
 .../converters/to_roi/resnet_10.h             |  56 +++++++
 7 files changed, 343 insertions(+)
 create mode 100644 samples/gstreamer/model_proc/public/car_color_proc_file.json
 create mode 100644 samples/gstreamer/model_proc/public/car_make_proc_file.json
 create mode 100644 samples/gstreamer/model_proc/public/car_type_proc_file.json
 create mode 100644 samples/gstreamer/model_proc/public/resnet10.json
 create mode 100644 src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/resnet_10.cpp
 create mode 100644 src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/resnet_10.h

diff --git a/samples/gstreamer/model_proc/public/car_color_proc_file.json b/samples/gstreamer/model_proc/public/car_color_proc_file.json
new file mode 100644
index 00000000..735b7905
--- /dev/null
+++ b/samples/gstreamer/model_proc/public/car_color_proc_file.json
@@ -0,0 +1,37 @@
+{
+    "json_schema_version": "2.2.0",
+    "input_preproc": [
+        {
+            "precision": "FP32",
+            "params": {
+                "color_space": "BGR",
+                "resize": "aspect-ratio",
+                "mean": [
+                103.939,
+                116.779,
+                123.68
+                ]
+            }
+        }
+    ],
+    "output_postproc": [
+        {
+            "converter": "label",
+            "method": "max",
+            "labels": [
+                "black",
+                "blue",
+                "brown",
+                "gold",
+                "green",
+                "grey",
+                "maroon",
+                "orange",   
+                "red",
+                "silver", 
+                "white",
+                "yellow"
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/samples/gstreamer/model_proc/public/car_make_proc_file.json b/samples/gstreamer/model_proc/public/car_make_proc_file.json
new file mode 100644
index 00000000..f22f1920
--- /dev/null
+++ b/samples/gstreamer/model_proc/public/car_make_proc_file.json
@@ -0,0 +1,44 @@
+{
+    "json_schema_version": "2.2.0",
+    "input_preproc": [
+        {
+            "precision": "FP32",
+            "params": {
+                "color_space": "BGR",
+                "resize": "aspect-ratio",
+                "mean": [
+                103.939,
+                116.779,
+                123.68
+                ]
+            }
+        }
+    ],
+    "output_postproc": [
+        {
+            "converter": "label",
+            "method": "max",
+            "labels": [
+                "acura",
+                "audi",
+                "bmw",
+                "chevrolet",
+                "chrysler",
+                "dodge",
+                "ford",
+                "gmc",
+                "honda",
+                "hyundai",
+                "infiniti",
+                "jeep",
+                "kia",
+                "lexus",
+                "mercedes",
+                "nissan",
+                "subaru",
+                "toyota",
+                "volkswagen"
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/samples/gstreamer/model_proc/public/car_type_proc_file.json b/samples/gstreamer/model_proc/public/car_type_proc_file.json
new file mode 100644
index 00000000..64a49e93
--- /dev/null
+++ b/samples/gstreamer/model_proc/public/car_type_proc_file.json
@@ -0,0 +1,31 @@
+{
+    "json_schema_version": "2.2.0",
+    "input_preproc": [
+        {
+            "precision": "FP32",
+            "params": {
+                "color_space": "BGR",
+                "resize": "aspect-ratio",
+                "mean": [
+                103.939,
+                116.779,
+                123.68
+                ]
+            }
+        }
+    ],
+    "output_postproc": [
+        {
+            "converter": "label",
+            "method": "max",
+            "labels": [
+                "coupe",
+                "largevehicle",
+                "sedan",
+                "suv",
+                "truck",
+                "van"
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/samples/gstreamer/model_proc/public/resnet10.json b/samples/gstreamer/model_proc/public/resnet10.json
new file mode 100644
index 00000000..29b17588
--- /dev/null
+++ b/samples/gstreamer/model_proc/public/resnet10.json
@@ -0,0 +1,21 @@
+{
+  "json_schema_version": "2.2.0",
+  "input_preproc": [
+    {
+      "params": {
+        "resize": "aspect-ratio"
+      }
+    }
+  ],
+  "output_postproc": [
+    {
+      "converter": "resnet_10",
+      "labels": [
+        "Car",
+        "Bicycle",
+        "Person",
+        "Roadsign"
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/blob_to_roi_converter.cpp b/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/blob_to_roi_converter.cpp
index 6e21a92d..a6f22151 100644
--- a/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/blob_to_roi_converter.cpp
+++ b/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/blob_to_roi_converter.cpp
@@ -14,6 +14,7 @@
 #include "yolo_v2.h"
 #include "yolo_v3.h"
 #include "yolo_v5.h"
+#include "resnet_10.h"
 
 #include "inference_backend/logger.h"
 
@@ -40,6 +41,8 @@ BlobToMetaConverter::Ptr BlobToROIConverter::create(BlobToMetaConverter::Initial
 
     if (converter_name == DetectionOutputConverter::getName())
         return BlobToMetaConverter::Ptr(new DetectionOutputConverter(std::move(initializer), confidence_threshold));
+    else if (converter_name == Resnet10Converter::getName())
+        return BlobToMetaConverter::Ptr(new Resnet10Converter(std::move(initializer), confidence_threshold));
     else if (converter_name == BoxesLabelsConverter::getName())
         return BlobToMetaConverter::Ptr(new BoxesLabelsConverter(std::move(initializer), confidence_threshold));
     else if (converter_name == BoxesScoresConverter::getName())
diff --git a/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/resnet_10.cpp b/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/resnet_10.cpp
new file mode 100644
index 00000000..6a2a0280
--- /dev/null
+++ b/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/resnet_10.cpp
@@ -0,0 +1,151 @@
+/*******************************************************************************
+ * Copyright (C) 2021-2022 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ ******************************************************************************/
+
+#include "resnet_10.h"
+
+#include "inference_backend/image_inference.h"
+#include "inference_backend/logger.h"
+#include "safe_arithmetic.hpp"
+
+#include <gst/gst.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+#include <iostream>
+
+
+#define CLIP(a,min,max) (MAX(MIN(a, max), min))
+#define DIVIDE_AND_ROUND_UP(a, b) ((a + b - 1) / b)
+
+constexpr int kNUM_CONFIGURED_CLASSES = 4;
+
+
+using namespace post_processing;
+
+void Resnet10Converter::parseOutputBlob(const InferDimsCHW& covLayerDims, const InferDimsCHW& bboxLayerDims, const float *outputCovBuf, const float *outputBboxBuf,
+                                      int numClassesToParse, std::vector<DetectedObject> &objects) const {
+
+  int gridW = covLayerDims.w;
+  int gridH = covLayerDims.h;
+  int gridSize = gridW * gridH;
+  float gcCentersX[gridW];
+  float gcCentersY[gridH];
+  float bboxNormX = 35.0;
+  float bboxNormY = 35.0;
+  size_t input_width = getModelInputImageInfo().width;
+  size_t input_height = getModelInputImageInfo().height;
+  int strideX = DIVIDE_AND_ROUND_UP(input_width, bboxLayerDims.w);
+  int strideY = DIVIDE_AND_ROUND_UP(input_height, bboxLayerDims.h);
+
+  for (int i = 0; i < gridW; i++)
+  {
+    gcCentersX[i] = (float)(i * strideX + 0.5);
+    gcCentersX[i] /= (float)bboxNormX;
+
+  }
+  for (int i = 0; i < gridH; i++)
+  {
+    gcCentersY[i] = (float)(i * strideY + 0.5);
+    gcCentersY[i] /= (float)bboxNormY;
+
+  }
+
+  for (int c = 0; c < numClassesToParse; c++)
+  {
+    const float *outputX1 = outputBboxBuf + (c * 4 * bboxLayerDims.h * bboxLayerDims.w);
+
+    const float *outputY1 = outputX1 + gridSize;
+    const float *outputX2 = outputY1 + gridSize;
+    const float *outputY2 = outputX2 + gridSize;
+
+    if(c >= kNUM_CONFIGURED_CLASSES) throw std::runtime_error("class id " + std::to_string(c) + " is out of bound");
+
+    for (int h = 0; h < gridH; h++)
+    {
+      for (int w = 0; w < gridW; w++)
+      {
+        int i = w + h * gridW;
+        if (outputCovBuf[c * gridSize + i] >= confidence_threshold)
+        {
+          float rectX1f, rectY1f, rectX2f, rectY2f;
+
+          rectX1f = (outputX1[w + h * gridW] - gcCentersX[w]) * -bboxNormX;
+          rectY1f = (outputY1[w + h * gridW] - gcCentersY[h]) * -bboxNormY;
+          rectX2f = (outputX2[w + h * gridW] + gcCentersX[w]) * bboxNormX;
+          rectY2f = (outputY2[w + h * gridW] + gcCentersY[h]) * bboxNormY;
+
+          float x = CLIP(rectX1f, 0, input_width - 1);
+          float y = CLIP(rectY1f, 0, input_height - 1);
+          float w = CLIP(rectX2f, 0, input_width - 1) - x + 1;
+          float h = CLIP(rectY2f, 0, input_height - 1) - y + 1;
+
+          objects.push_back(DetectedObject(x, y, w, h, outputCovBuf[c * gridSize + i], c,
+                                             BlobToMetaConverter::getLabelByLabelId(c), 1.0f / input_width,
+                                             1.0f / input_height, false));
+        }
+      }
+    }
+  }
+
+}
+
+TensorsTable Resnet10Converter::convert(const OutputBlobs &output_blobs) const {
+    ITT_TASK(__FUNCTION__);
+    try {
+
+        static InferDimsCHW covLayerDims = {0, 0, 0};
+        static InferDimsCHW bboxLayerDims = {0, 0, 0};
+        int numClassesToParse;
+
+        const auto &model_input_image_info = getModelInputImageInfo();
+        size_t batch_size = model_input_image_info.batch_size;
+
+        DetectedObjectsTable objects_table(batch_size);
+
+        for (size_t batch_number = 0; batch_number < batch_size; ++batch_number) {
+            auto &objects = objects_table[batch_number];
+            const float *outputCovBuf = nullptr;
+            const float *outputBboxBuf = nullptr;
+            for (const auto &blob_iter : output_blobs) {
+                const InferenceBackend::OutputBlob::Ptr &blob = blob_iter.second;
+                if (not blob)
+                    throw std::invalid_argument("Output blob is nullptr.");
+
+                size_t unbatched_size = blob->GetSize() / batch_size;
+                if(blob_iter.first == "conv2d_bbox")
+                {
+                    if(!bboxLayerDims.c)
+                    {
+                        const auto& dims = blob->GetDims(); // NCHW
+                        bboxLayerDims.set(dims[1], dims[2], dims[3]);
+                    }
+                    outputBboxBuf = reinterpret_cast<const float *>(blob->GetData()) + unbatched_size * batch_number;
+                }
+                if(blob_iter.first == "conv2d_cov/Sigmoid")
+                {
+                    if(!covLayerDims.c)
+                    {
+                        const auto& dims = blob->GetDims();
+                        covLayerDims.set(dims[1], dims[2], dims[3]);
+                    }
+                    outputCovBuf = reinterpret_cast<const float *>(blob->GetData()) + unbatched_size * batch_number;
+                }
+            }
+
+            if(!outputCovBuf || !outputCovBuf) throw std::runtime_error("Failed to do Resnet10 post-processing.");
+
+            numClassesToParse = MIN(covLayerDims.c, kNUM_CONFIGURED_CLASSES);
+            parseOutputBlob(covLayerDims, bboxLayerDims, outputCovBuf, outputBboxBuf, numClassesToParse, objects);
+        }
+
+        return storeObjects(objects_table);
+    } catch (const std::exception &e) {
+        std::throw_with_nested(std::runtime_error("Failed to do Resnet10 post-processing."));
+    }
+    return TensorsTable{};
+}
diff --git a/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/resnet_10.h b/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/resnet_10.h
new file mode 100644
index 00000000..9a682006
--- /dev/null
+++ b/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/resnet_10.h
@@ -0,0 +1,56 @@
+/*******************************************************************************
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ ******************************************************************************/
+
+#pragma once
+
+#include "blob_to_roi_converter.h"
+#include <opencv2/opencv.hpp>
+#include "inference_backend/image_inference.h"
+
+#include <gst/gst.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace post_processing {
+    
+struct InferDimsCHW
+{
+    unsigned int c, h, w;
+    void set(unsigned int c, unsigned int h, unsigned w)
+    {
+        this->c = c;
+        this->h = h;
+        this->w = w;
+    }
+};
+
+class Resnet10Converter : public BlobToROIConverter {
+    
+  protected:
+
+    // FIXME: move roi_scale to coordinates restorer or attacher
+    void parseOutputBlob(const InferDimsCHW& covLayerDims, const InferDimsCHW& bboxLayerDims, const float *outputCovBuf, const float *outputBboxBuf,
+                                      int numClassesToParse, std::vector<DetectedObject> &objects) const;
+
+  public:
+    Resnet10Converter(BlobToMetaConverter::Initializer initializer, double confidence_threshold)
+        : BlobToROIConverter(std::move(initializer), confidence_threshold, true, 0.4) {
+    }
+
+    TensorsTable convert(const OutputBlobs &output_blobs) const override;
+
+    static std::string getName() {
+        return "resnet_10";
+    }
+
+    static std::string getDepricatedName() {
+        return "tensor_to_bbox_resnet_10";
+    }
+};
+} // namespace post_processing