From 515ac64597c3b921876f812f7c4b1567405f7a23 Mon Sep 17 00:00:00 2001 From: Eslam Ahmed Date: Sun, 11 Feb 2024 12:30:14 +0000 Subject: [PATCH] feat: support resnet 10 blob to roi converter and added proc files for resnet 10 detector and resnet 18 classifiers --- .../public/car_color_proc_file.json | 37 +++++ .../model_proc/public/car_make_proc_file.json | 44 +++++ .../model_proc/public/car_type_proc_file.json | 31 ++++ .../gstreamer/model_proc/public/resnet10.json | 21 +++ .../to_roi/blob_to_roi_converter.cpp | 3 + .../converters/to_roi/resnet_10.cpp | 151 ++++++++++++++++++ .../converters/to_roi/resnet_10.h | 56 +++++++ 7 files changed, 343 insertions(+) create mode 100644 samples/gstreamer/model_proc/public/car_color_proc_file.json create mode 100644 samples/gstreamer/model_proc/public/car_make_proc_file.json create mode 100644 samples/gstreamer/model_proc/public/car_type_proc_file.json create mode 100644 samples/gstreamer/model_proc/public/resnet10.json create mode 100644 src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/resnet_10.cpp create mode 100644 src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/resnet_10.h diff --git a/samples/gstreamer/model_proc/public/car_color_proc_file.json b/samples/gstreamer/model_proc/public/car_color_proc_file.json new file mode 100644 index 00000000..735b7905 --- /dev/null +++ b/samples/gstreamer/model_proc/public/car_color_proc_file.json @@ -0,0 +1,37 @@ +{ + "json_schema_version": "2.2.0", + "input_preproc": [ + { + "precision": "FP32", + "params": { + "color_space": "BGR", + "resize": "aspect-ratio", + "mean": [ + 103.939, + 116.779, + 123.68 + ] + } + } + ], + "output_postproc": [ + { + "converter": "label", + "method": "max", + "labels": [ + "black", + "blue", + "brown", + "gold", + "green", + "grey", + "maroon", + "orange", + "red", + "silver", + "white", + "yellow" + ] + } + ] +} \ No newline at end of file diff --git a/samples/gstreamer/model_proc/public/car_make_proc_file.json b/samples/gstreamer/model_proc/public/car_make_proc_file.json new file mode 100644 index 00000000..f22f1920 --- /dev/null +++ b/samples/gstreamer/model_proc/public/car_make_proc_file.json @@ -0,0 +1,44 @@ +{ + "json_schema_version": "2.2.0", + "input_preproc": [ + { + "precision": "FP32", + "params": { + "color_space": "BGR", + "resize": "aspect-ratio", + "mean": [ + 103.939, + 116.779, + 123.68 + ] + } + } + ], + "output_postproc": [ + { + "converter": "label", + "method": "max", + "labels": [ + "acura", + "audi", + "bmw", + "chevrolet", + "chrysler", + "dodge", + "ford", + "gmc", + "honda", + "hyundai", + "infiniti", + "jeep", + "kia", + "lexus", + "mercedes", + "nissan", + "subaru", + "toyota", + "volkswagen" + ] + } + ] +} \ No newline at end of file diff --git a/samples/gstreamer/model_proc/public/car_type_proc_file.json b/samples/gstreamer/model_proc/public/car_type_proc_file.json new file mode 100644 index 00000000..64a49e93 --- /dev/null +++ b/samples/gstreamer/model_proc/public/car_type_proc_file.json @@ -0,0 +1,31 @@ +{ + "json_schema_version": "2.2.0", + "input_preproc": [ + { + "precision": "FP32", + "params": { + "color_space": "BGR", + "resize": "aspect-ratio", + "mean": [ + 103.939, + 116.779, + 123.68 + ] + } + } + ], + "output_postproc": [ + { + "converter": "label", + "method": "max", + "labels": [ + "coupe", + "largevehicle", + "sedan", + "suv", + "truck", + "van" + ] + } + ] +} \ No newline at end of file diff --git a/samples/gstreamer/model_proc/public/resnet10.json b/samples/gstreamer/model_proc/public/resnet10.json new file mode 100644 index 00000000..29b17588 --- /dev/null +++ b/samples/gstreamer/model_proc/public/resnet10.json @@ -0,0 +1,21 @@ +{ + "json_schema_version": "2.2.0", + "input_preproc": [ + { + "params": { + "resize": "aspect-ratio" + } + } + ], + "output_postproc": [ + { + "converter": "resnet_10", + "labels": [ + "Car", + "Bicycle", + "Person", + "Roadsign" + ] + } + ] +} \ No newline at end of file diff --git a/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/blob_to_roi_converter.cpp b/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/blob_to_roi_converter.cpp index 6e21a92d..a6f22151 100644 --- a/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/blob_to_roi_converter.cpp +++ b/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/blob_to_roi_converter.cpp @@ -14,6 +14,7 @@ #include "yolo_v2.h" #include "yolo_v3.h" #include "yolo_v5.h" +#include "resnet_10.h" #include "inference_backend/logger.h" @@ -40,6 +41,8 @@ BlobToMetaConverter::Ptr BlobToROIConverter::create(BlobToMetaConverter::Initial if (converter_name == DetectionOutputConverter::getName()) return BlobToMetaConverter::Ptr(new DetectionOutputConverter(std::move(initializer), confidence_threshold)); + else if (converter_name == Resnet10Converter::getName()) + return BlobToMetaConverter::Ptr(new Resnet10Converter(std::move(initializer), confidence_threshold)); else if (converter_name == BoxesLabelsConverter::getName()) return BlobToMetaConverter::Ptr(new BoxesLabelsConverter(std::move(initializer), confidence_threshold)); else if (converter_name == BoxesScoresConverter::getName()) diff --git a/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/resnet_10.cpp b/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/resnet_10.cpp new file mode 100644 index 00000000..6a2a0280 --- /dev/null +++ b/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/resnet_10.cpp @@ -0,0 +1,151 @@ +/******************************************************************************* + * Copyright (C) 2021-2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + ******************************************************************************/ + +#include "resnet_10.h" + +#include "inference_backend/image_inference.h" +#include "inference_backend/logger.h" +#include "safe_arithmetic.hpp" + +#include + +#include +#include +#include +#include +#include + + +#define CLIP(a,min,max) (MAX(MIN(a, max), min)) +#define DIVIDE_AND_ROUND_UP(a, b) ((a + b - 1) / b) + +constexpr int kNUM_CONFIGURED_CLASSES = 4; + + +using namespace post_processing; + +void Resnet10Converter::parseOutputBlob(const InferDimsCHW& covLayerDims, const InferDimsCHW& bboxLayerDims, const float *outputCovBuf, const float *outputBboxBuf, + int numClassesToParse, std::vector &objects) const { + + int gridW = covLayerDims.w; + int gridH = covLayerDims.h; + int gridSize = gridW * gridH; + float gcCentersX[gridW]; + float gcCentersY[gridH]; + float bboxNormX = 35.0; + float bboxNormY = 35.0; + size_t input_width = getModelInputImageInfo().width; + size_t input_height = getModelInputImageInfo().height; + int strideX = DIVIDE_AND_ROUND_UP(input_width, bboxLayerDims.w); + int strideY = DIVIDE_AND_ROUND_UP(input_height, bboxLayerDims.h); + + for (int i = 0; i < gridW; i++) + { + gcCentersX[i] = (float)(i * strideX + 0.5); + gcCentersX[i] /= (float)bboxNormX; + + } + for (int i = 0; i < gridH; i++) + { + gcCentersY[i] = (float)(i * strideY + 0.5); + gcCentersY[i] /= (float)bboxNormY; + + } + + for (int c = 0; c < numClassesToParse; c++) + { + const float *outputX1 = outputBboxBuf + (c * 4 * bboxLayerDims.h * bboxLayerDims.w); + + const float *outputY1 = outputX1 + gridSize; + const float *outputX2 = outputY1 + gridSize; + const float *outputY2 = outputX2 + gridSize; + + if(c >= kNUM_CONFIGURED_CLASSES) throw std::runtime_error("class id " + std::to_string(c) + " is out of bound"); + + for (int h = 0; h < gridH; h++) + { + for (int w = 0; w < gridW; w++) + { + int i = w + h * gridW; + if (outputCovBuf[c * gridSize + i] >= confidence_threshold) + { + float rectX1f, rectY1f, rectX2f, rectY2f; + + rectX1f = (outputX1[w + h * gridW] - gcCentersX[w]) * -bboxNormX; + rectY1f = (outputY1[w + h * gridW] - gcCentersY[h]) * -bboxNormY; + rectX2f = (outputX2[w + h * gridW] + gcCentersX[w]) * bboxNormX; + rectY2f = (outputY2[w + h * gridW] + gcCentersY[h]) * bboxNormY; + + float x = CLIP(rectX1f, 0, input_width - 1); + float y = CLIP(rectY1f, 0, input_height - 1); + float w = CLIP(rectX2f, 0, input_width - 1) - x + 1; + float h = CLIP(rectY2f, 0, input_height - 1) - y + 1; + + objects.push_back(DetectedObject(x, y, w, h, outputCovBuf[c * gridSize + i], c, + BlobToMetaConverter::getLabelByLabelId(c), 1.0f / input_width, + 1.0f / input_height, false)); + } + } + } + } + +} + +TensorsTable Resnet10Converter::convert(const OutputBlobs &output_blobs) const { + ITT_TASK(__FUNCTION__); + try { + + static InferDimsCHW covLayerDims = {0, 0, 0}; + static InferDimsCHW bboxLayerDims = {0, 0, 0}; + int numClassesToParse; + + const auto &model_input_image_info = getModelInputImageInfo(); + size_t batch_size = model_input_image_info.batch_size; + + DetectedObjectsTable objects_table(batch_size); + + for (size_t batch_number = 0; batch_number < batch_size; ++batch_number) { + auto &objects = objects_table[batch_number]; + const float *outputCovBuf = nullptr; + const float *outputBboxBuf = nullptr; + for (const auto &blob_iter : output_blobs) { + const InferenceBackend::OutputBlob::Ptr &blob = blob_iter.second; + if (not blob) + throw std::invalid_argument("Output blob is nullptr."); + + size_t unbatched_size = blob->GetSize() / batch_size; + if(blob_iter.first == "conv2d_bbox") + { + if(!bboxLayerDims.c) + { + const auto& dims = blob->GetDims(); // NCHW + bboxLayerDims.set(dims[1], dims[2], dims[3]); + } + outputBboxBuf = reinterpret_cast(blob->GetData()) + unbatched_size * batch_number; + } + if(blob_iter.first == "conv2d_cov/Sigmoid") + { + if(!covLayerDims.c) + { + const auto& dims = blob->GetDims(); + covLayerDims.set(dims[1], dims[2], dims[3]); + } + outputCovBuf = reinterpret_cast(blob->GetData()) + unbatched_size * batch_number; + } + } + + if(!outputCovBuf || !outputCovBuf) throw std::runtime_error("Failed to do Resnet10 post-processing."); + + numClassesToParse = MIN(covLayerDims.c, kNUM_CONFIGURED_CLASSES); + parseOutputBlob(covLayerDims, bboxLayerDims, outputCovBuf, outputBboxBuf, numClassesToParse, objects); + } + + return storeObjects(objects_table); + } catch (const std::exception &e) { + std::throw_with_nested(std::runtime_error("Failed to do Resnet10 post-processing.")); + } + return TensorsTable{}; +} diff --git a/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/resnet_10.h b/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/resnet_10.h new file mode 100644 index 00000000..9a682006 --- /dev/null +++ b/src/monolithic/gst/inference_elements/common/post_processor/converters/to_roi/resnet_10.h @@ -0,0 +1,56 @@ +/******************************************************************************* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + ******************************************************************************/ + +#pragma once + +#include "blob_to_roi_converter.h" +#include +#include "inference_backend/image_inference.h" + +#include + +#include +#include +#include +#include + +namespace post_processing { + +struct InferDimsCHW +{ + unsigned int c, h, w; + void set(unsigned int c, unsigned int h, unsigned w) + { + this->c = c; + this->h = h; + this->w = w; + } +}; + +class Resnet10Converter : public BlobToROIConverter { + + protected: + + // FIXME: move roi_scale to coordinates restorer or attacher + void parseOutputBlob(const InferDimsCHW& covLayerDims, const InferDimsCHW& bboxLayerDims, const float *outputCovBuf, const float *outputBboxBuf, + int numClassesToParse, std::vector &objects) const; + + public: + Resnet10Converter(BlobToMetaConverter::Initializer initializer, double confidence_threshold) + : BlobToROIConverter(std::move(initializer), confidence_threshold, true, 0.4) { + } + + TensorsTable convert(const OutputBlobs &output_blobs) const override; + + static std::string getName() { + return "resnet_10"; + } + + static std::string getDepricatedName() { + return "tensor_to_bbox_resnet_10"; + } +}; +} // namespace post_processing