diff --git a/demos/common/cpp/pipelines/include/pipelines/metadata.h b/demos/common/cpp/pipelines/include/pipelines/metadata.h index aca18ee36aa..f7a52a4c608 100644 --- a/demos/common/cpp/pipelines/include/pipelines/metadata.h +++ b/demos/common/cpp/pipelines/include/pipelines/metadata.h @@ -1,5 +1,5 @@ /* -// Copyright (C) 2018-2020 Intel Corporation +// Copyright (C) 2020-2022 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/demos/crossroad_camera_demo/cpp/crossroad_camera_demo.hpp b/demos/crossroad_camera_demo/cpp/crossroad_camera_demo.hpp index c198a88430a..aa9bd944a4b 100644 --- a/demos/crossroad_camera_demo/cpp/crossroad_camera_demo.hpp +++ b/demos/crossroad_camera_demo/cpp/crossroad_camera_demo.hpp @@ -4,39 +4,50 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once +#include -#include "gflags/gflags.h" -#include "utils/default_flags.hpp" +#include + +#include DEFINE_INPUT_FLAGS DEFINE_OUTPUT_FLAGS static const char help_message[] = "Print a usage message."; -static const char person_vehicle_bike_detection_model_message[] = "Required. Path to the Person/Vehicle/Bike Detection Crossroad model (.xml) file."; -static const char person_attribs_model_message[] = "Optional. Path to the Person Attributes Recognition Crossroad model (.xml) file."; -static const char person_reid_model_message[] = "Optional. Path to the Person Reidentification Retail model (.xml) file."; -static const char target_device_message[] = "Optional. Specify the target device for Person/Vehicle/Bike Detection. " - "The list of available devices is shown below. Default value is CPU. " - "Use \"-d HETERO:\" format to specify HETERO plugin. " - "The application looks for a suitable plugin for the specified device."; -static const char target_device_message_person_attribs[] = "Optional. Specify the target device for Person Attributes Recognition. " - "The list of available devices is shown below. Default value is CPU. " - "Use \"-d HETERO:\" format to specify HETERO plugin. " - "The application looks for a suitable plugin for the specified device."; -static const char target_device_message_person_reid[] = "Optional. Specify the target device for Person Reidentification Retail. " - "The list of available devices is shown below. Default value is CPU. " - "Use \"-d HETERO:\" format to specify HETERO plugin. " - "The application looks for a suitable plugin for the specified device."; -static const char threshold_output_message[] = "Optional. Probability threshold for person/vehicle/bike crossroad detections."; -static const char threshold_output_message_person_reid[] = "Optional. Cosine similarity threshold between two vectors for person reidentification."; +static const char person_vehicle_bike_detection_model_message[] = + "Required. Path to the Person/Vehicle/Bike Detection Crossroad model (.xml) file."; +static const char person_attribs_model_message[] = + "Optional. Path to the Person Attributes Recognition Crossroad model (.xml) file."; +static const char person_reid_model_message[] = + "Optional. Path to the Person Reidentification Retail model (.xml) file."; +static const char target_device_message[] = + "Optional. Specify the target device for Person/Vehicle/Bike Detection. " + "The list of available devices is shown below. Default value is CPU. " + "Use \"-d HETERO:\" format to specify HETERO plugin. " + "The application looks for a suitable plugin for the specified device."; +static const char target_device_message_person_attribs[] = + "Optional. Specify the target device for Person Attributes Recognition. " + "The list of available devices is shown below. Default value is CPU. " + "Use \"-d HETERO:\" format to specify HETERO plugin. " + "The application looks for a suitable plugin for the specified device."; +static const char target_device_message_person_reid[] = + "Optional. Specify the target device for Person Reidentification Retail. " + "The list of available devices is shown below. Default value is CPU. " + "Use \"-d HETERO:\" format to specify HETERO plugin. " + "The application looks for a suitable plugin for the specified device."; +static const char threshold_output_message[] = + "Optional. Probability threshold for person/vehicle/bike crossroad detections."; +static const char threshold_output_message_person_reid[] = + "Optional. Cosine similarity threshold between two vectors for person reidentification."; static const char raw_output_message[] = "Optional. Output Inference results as raw values."; static const char no_show_message[] = "Optional. Don't show output."; -static const char input_resizable_message[] = "Optional. Enables resizable input with support of ROI crop & auto resize."; +static const char input_resizable_message[] = + "Optional. Enables resizable input with support of ROI crop & auto resize."; static const char utilization_monitors_message[] = "Optional. List of monitors to show initially."; -static const char person_label_message[] = "Optional. The integer index of the objects' category corresponding to persons " - "(as it is returned from the detection network, may vary from one network to another). " - "The default value is 1."; - +static const char person_label_message[] = + "Optional. The integer index of the objects' category corresponding to persons " + "(as it is returned from the detection network, may vary from one network to another). " + "The default value is 1."; DEFINE_bool(h, false, help_message); DEFINE_string(m, "", person_vehicle_bike_detection_model_message); @@ -53,10 +64,9 @@ DEFINE_bool(auto_resize, false, input_resizable_message); DEFINE_string(u, "", utilization_monitors_message); DEFINE_int32(person_label, 1, person_label_message); - /** -* @brief This function show a help message -*/ + * @brief This function show a help message + */ static void showUsage() { std::cout << std::endl; std::cout << "crossroad_camera_demo [OPTION]" << std::endl; @@ -67,7 +77,7 @@ static void showUsage() { std::cout << " -loop " << loop_message << std::endl; std::cout << " -o \"\" " << output_message << std::endl; std::cout << " -limit \"\" " << limit_message << std::endl; - std::cout << " -m \"\" " << person_vehicle_bike_detection_model_message<< std::endl; + std::cout << " -m \"\" " << person_vehicle_bike_detection_model_message << std::endl; std::cout << " -m_pa \"\" " << person_attribs_model_message << std::endl; std::cout << " -m_reid \"\" " << person_reid_model_message << std::endl; std::cout << " -d \"\" " << target_device_message << std::endl; diff --git a/demos/crossroad_camera_demo/cpp/detection_base.hpp b/demos/crossroad_camera_demo/cpp/detection_base.hpp index e85c8634fd1..19d98cc7dcf 100644 --- a/demos/crossroad_camera_demo/cpp/detection_base.hpp +++ b/demos/crossroad_camera_demo/cpp/detection_base.hpp @@ -2,9 +2,14 @@ // SPDX-License-Identifier: Apache-2.0 // +#include #include -#include "openvino/openvino.hpp" -#include "utils/slog.hpp" + +#include +#include + +#include +#include #pragma once @@ -17,8 +22,9 @@ struct BaseDetection { std::string m_inputName; std::string m_outputName; - BaseDetection(std::string& commandLineFlag, const std::string& detectorName) : - m_commandLineFlag(commandLineFlag), m_detectorName(detectorName) {} + BaseDetection(std::string& commandLineFlag, const std::string& detectorName) + : m_commandLineFlag(commandLineFlag), + m_detectorName(detectorName) {} ov::CompiledModel* operator->() { return &m_compiled_model; @@ -53,7 +59,7 @@ struct BaseDetection { } virtual void wait() { - if (!enabled()|| !m_infer_request) + if (!enabled() || !m_infer_request) return; m_infer_request.wait(); @@ -62,7 +68,7 @@ struct BaseDetection { mutable bool m_enablingChecked = false; mutable bool m_enabled = false; - bool enabled() const { + bool enabled() const { if (!m_enablingChecked) { m_enabled = !m_commandLineFlag.empty(); if (!m_enabled) { @@ -81,7 +87,10 @@ struct Load { void into(ov::Core& core, const std::string& deviceName) const { if (m_detector.enabled()) { m_detector.m_compiled_model = core.compile_model(m_detector.read(core), deviceName); - logCompiledModelInfo(m_detector.m_compiled_model, m_detector.m_commandLineFlag, deviceName, m_detector.m_detectorName); + logCompiledModelInfo(m_detector.m_compiled_model, + m_detector.m_commandLineFlag, + deviceName, + m_detector.m_detectorName); } } }; diff --git a/demos/crossroad_camera_demo/cpp/detection_person.hpp b/demos/crossroad_camera_demo/cpp/detection_person.hpp index 0663ddbc1a4..16e12a76b1f 100644 --- a/demos/crossroad_camera_demo/cpp/detection_person.hpp +++ b/demos/crossroad_camera_demo/cpp/detection_person.hpp @@ -2,14 +2,17 @@ // SPDX-License-Identifier: Apache-2.0 // +#include #include +#include -#include "openvino/openvino.hpp" +#include +#include + +#include -#include "gflags/gflags.h" -#include "utils/slog.hpp" -#include "detection_base.hpp" #include "crossroad_camera_demo.hpp" +#include "detection_base.hpp" struct PersonDetection : BaseDetection { size_t maxProposalCount; @@ -86,25 +89,25 @@ struct PersonDetection : BaseDetection { throw std::logic_error("Incorrect output dimensions for SSD"); } - const ov::Layout tensor_layout{ "NHWC" }; + const ov::Layout tensor_layout{"NHWC"}; ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model); if (FLAGS_auto_resize) { - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_spatial_dynamic_shape(). - set_layout(tensor_layout); - ppp.input().preprocess(). - convert_element_type(ov::element::f32). - convert_layout("NCHW"). - resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + ppp.input() + .tensor() + .set_element_type(ov::element::u8) + .set_spatial_dynamic_shape() + .set_layout(tensor_layout); + ppp.input() + .preprocess() + .convert_element_type(ov::element::f32) + .convert_layout("NCHW") + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); ppp.input().model().set_layout("NCHW"); ppp.output().tensor().set_element_type(ov::element::f32); } else { - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout({ "NCHW" }); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NCHW"}); } model = ppp.build(); @@ -125,7 +128,7 @@ struct PersonDetection : BaseDetection { const float* detections = m_infer_request.get_output_tensor().data(); // pretty much regular SSD post-processing for (size_t i = 0; i < maxProposalCount; i++) { - float image_id = detections[i * objectSize + 0]; // in case of batch + float image_id = detections[i * objectSize + 0]; // in case of batch if (image_id < 0) { // end of detections break; @@ -145,11 +148,10 @@ struct PersonDetection : BaseDetection { } if (FLAGS_r) { - slog::debug << - "[" << i << "," << r.label << "] element, prob = " << r.confidence << - " (" << r.location.x << "," << r.location.y << - ")-(" << r.location.width << "," << r.location.height << ")" << - ((r.confidence > FLAGS_t) ? " WILL BE RENDERED!" : "") << slog::endl; + slog::debug << "[" << i << "," << r.label << "] element, prob = " << r.confidence << " (" + << r.location.x << "," << r.location.y << ")-(" << r.location.width << "," + << r.location.height << ")" << ((r.confidence > FLAGS_t) ? " WILL BE RENDERED!" : "") + << slog::endl; } results.push_back(r); } diff --git a/demos/crossroad_camera_demo/cpp/detection_person_attr.hpp b/demos/crossroad_camera_demo/cpp/detection_person_attr.hpp index e2117762a3f..59ebb48bbcc 100644 --- a/demos/crossroad_camera_demo/cpp/detection_person_attr.hpp +++ b/demos/crossroad_camera_demo/cpp/detection_person_attr.hpp @@ -2,12 +2,16 @@ // SPDX-License-Identifier: Apache-2.0 // +#include +#include #include +#include -#include "openvino/openvino.hpp" +#include +#include + +#include -#include "gflags/gflags.h" -#include "utils/slog.hpp" #include "detection_base.hpp" struct PersonAttribsDetection : BaseDetection { @@ -16,7 +20,6 @@ struct PersonAttribsDetection : BaseDetection { std::string outputNameForBottomColorPoint; bool hasTopBottomColor; - PersonAttribsDetection() : BaseDetection(FLAGS_m_pa, "Person Attributes Recognition"), hasTopBottomColor(false) {} struct AttributesAndColorPoints { @@ -37,8 +40,13 @@ struct PersonAttribsDetection : BaseDetection { image.convertTo(image32f, CV_32F); image32f = image32f.reshape(1, image32f.rows * image32f.cols); clusterCount = std::min(clusterCount, image32f.rows); - cv::kmeans(image32f, clusterCount, labels, cv::TermCriteria(cv::TermCriteria::EPS+cv::TermCriteria::MAX_ITER, 10, 1.0), - 10, cv::KMEANS_RANDOM_CENTERS, centers); + cv::kmeans(image32f, + clusterCount, + labels, + cv::TermCriteria(cv::TermCriteria::EPS + cv::TermCriteria::MAX_ITER, 10, 1.0), + 10, + cv::KMEANS_RANDOM_CENTERS, + centers); centers.convertTo(centers, CV_8U); centers = centers.reshape(3, clusterCount); std::vector freq(clusterCount); @@ -53,12 +61,16 @@ struct PersonAttribsDetection : BaseDetection { } AttributesAndColorPoints GetPersonAttributes() { - static const char* const attributeStringsFor7Attributes[] = { - "is male", "has_bag", "has hat", "has longsleeves", "has longpants", "has longhair", "has coat_jacket" - }; - static const char* const attributeStringsFor8Attributes[] = { - "is male", "has_bag", "has_backpack" , "has hat", "has longsleeves", "has longpants", "has longhair", "has coat_jacket" - }; + static const char* const attributeStringsFor7Attributes[] = + {"is male", "has_bag", "has hat", "has longsleeves", "has longpants", "has longhair", "has coat_jacket"}; + static const char* const attributeStringsFor8Attributes[] = {"is male", + "has_bag", + "has_backpack", + "has hat", + "has longsleeves", + "has longpants", + "has longhair", + "has coat_jacket"}; ov::Tensor attribsTensor = m_infer_request.get_tensor(outputNameForAttributes); size_t numOfAttrChannels = attribsTensor.get_shape()[1]; @@ -69,12 +81,12 @@ struct PersonAttribsDetection : BaseDetection { } else if (numOfAttrChannels == arraySize(attributeStringsFor8Attributes)) { attributeStrings = attributeStringsFor8Attributes; } else { - throw std::logic_error("Output size (" + std::to_string(numOfAttrChannels) + ") of the " + throw std::logic_error("Output size (" + std::to_string(numOfAttrChannels) + + ") of the " "Person Attributes Recognition network is not equal to expected " - "number of attributes (" - + std::to_string(arraySize(attributeStringsFor7Attributes)) - + " or " - + std::to_string(arraySize(attributeStringsFor7Attributes)) + ")"); + "number of attributes (" + + std::to_string(arraySize(attributeStringsFor7Attributes)) + " or " + + std::to_string(arraySize(attributeStringsFor7Attributes)) + ")"); } AttributesAndColorPoints returnValue; @@ -92,11 +104,13 @@ struct PersonAttribsDetection : BaseDetection { size_t numOfTCPointChannels = topColorPointTensor.get_shape()[1]; size_t numOfBCPointChannels = bottomColorPointTensor.get_shape()[1]; if (numOfTCPointChannels != 2) { - throw std::logic_error("Output size (" + std::to_string(numOfTCPointChannels) + ") of the " + throw std::logic_error("Output size (" + std::to_string(numOfTCPointChannels) + + ") of the " "Person Attributes Recognition network is not equal to point coordinates(2)"); } if (numOfBCPointChannels != 2) { - throw std::logic_error("Output size (" + std::to_string(numOfBCPointChannels) + ") of the " + throw std::logic_error("Output size (" + std::to_string(numOfBCPointChannels) + + ") of the " "Person Attributes Recognition network is not equal to point coordinates (2)"); } @@ -118,8 +132,9 @@ struct PersonAttribsDetection : BaseDetection { } bool CheckOutputNameExist(const ov::OutputVector& outputs, const std::string name) { - if (std::find_if(outputs.begin(), outputs.end(), - [&](const ov::Output& output) {return output.get_any_name() == name; }) == outputs.end()) { + if (std::find_if(outputs.begin(), outputs.end(), [&](const ov::Output& output) { + return output.get_any_name() == name; + }) == outputs.end()) { return false; } return true; @@ -144,19 +159,15 @@ struct PersonAttribsDetection : BaseDetection { ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model); if (FLAGS_auto_resize) { - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_spatial_dynamic_shape(). - set_layout({ "NHWC" }); - ppp.input().preprocess(). - convert_element_type(ov::element::f32). - convert_layout("NCHW"). - resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + ppp.input().tensor().set_element_type(ov::element::u8).set_spatial_dynamic_shape().set_layout({"NHWC"}); + ppp.input() + .preprocess() + .convert_element_type(ov::element::f32) + .convert_layout("NCHW") + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); ppp.input().model().set_layout("NCHW"); } else { - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout({ "NCHW" }); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NCHW"}); } model = ppp.build(); @@ -189,8 +200,9 @@ struct PersonAttribsDetection : BaseDetection { } hasTopBottomColor = true; } else { - throw std::logic_error("Person Attribs Network expects either a network having one output (person attributes), " - "or a network having three outputs (person attributes, top color point, bottom color point)"); + throw std::logic_error( + "Person Attribs Network expects either a network having one output (person attributes), " + "or a network having three outputs (person attributes, top color point, bottom color point)"); } m_enabled = true; diff --git a/demos/crossroad_camera_demo/cpp/detection_person_reid.hpp b/demos/crossroad_camera_demo/cpp/detection_person_reid.hpp index 84347f06250..d779bbddf45 100644 --- a/demos/crossroad_camera_demo/cpp/detection_person_reid.hpp +++ b/demos/crossroad_camera_demo/cpp/detection_person_reid.hpp @@ -2,16 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // +#include #include +#include -#include "openvino/openvino.hpp" +#include +#include + +#include -#include "gflags/gflags.h" -#include "utils/slog.hpp" #include "detection_base.hpp" struct PersonReIdentification : BaseDetection { - std::vector> globalReIdVec; // contains vectors characterising all detected persons + std::vector> globalReIdVec; // contains vectors characterising all detected persons PersonReIdentification() : BaseDetection(FLAGS_m_reid, "Person Re-Identification Retail") {} @@ -48,8 +51,8 @@ struct PersonReIdentification : BaseDetection { float cosineSimilarity(const std::vector& vecA, const std::vector& vecB) { if (vecA.size() != vecB.size()) { throw std::logic_error("cosine similarity can't be called for the vectors of different lengths: " - "vecA size = " + std::to_string(vecA.size()) + - "vecB size = " + std::to_string(vecB.size())); + "vecA size = " + + std::to_string(vecA.size()) + "vecB size = " + std::to_string(vecB.size())); } T mul, denomA, denomB, A, B; @@ -88,19 +91,15 @@ struct PersonReIdentification : BaseDetection { ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model); if (FLAGS_auto_resize) { - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_spatial_dynamic_shape(). - set_layout({ "NHWC" }); - ppp.input().preprocess(). - convert_element_type(ov::element::f32). - convert_layout("NCHW"). - resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); + ppp.input().tensor().set_element_type(ov::element::u8).set_spatial_dynamic_shape().set_layout({"NHWC"}); + ppp.input() + .preprocess() + .convert_element_type(ov::element::f32) + .convert_layout("NCHW") + .resize(ov::preprocess::ResizeAlgorithm::RESIZE_LINEAR); ppp.input().model().set_layout("NCHW"); } else { - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout({ "NCHW" }); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout({"NCHW"}); } model = ppp.build(); diff --git a/demos/crossroad_camera_demo/cpp/main.cpp b/demos/crossroad_camera_demo/cpp/main.cpp index 5f81cb21821..8d9ef16a338 100644 --- a/demos/crossroad_camera_demo/cpp/main.cpp +++ b/demos/crossroad_camera_demo/cpp/main.cpp @@ -2,29 +2,37 @@ // SPDX-License-Identifier: Apache-2.0 // -#include -#include +#include + #include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include "openvino/openvino.hpp" +#include +#include +#include +#include +#include -#include "gflags/gflags.h" -#include "monitors/presenter.h" -#include "utils/images_capture.h" -#include "utils/ocv_common.hpp" -#include "utils/performance_metrics.hpp" -#include "utils/slog.hpp" +#include +#include +#include +#include +#include +#include +#include "crossroad_camera_demo.hpp" #include "detection_base.hpp" #include "detection_person.hpp" #include "detection_person_attr.hpp" #include "detection_person_reid.hpp" -#include "crossroad_camera_demo.hpp" bool ParseAndCheckCommandLine(int argc, char* argv[]) { // Parsing and validation of input args @@ -74,10 +82,10 @@ int main(int argc, char* argv[]) { Load(personReId).into(core, FLAGS_d_reid); // 3. Do inference - cv::Rect cropRoi; // cropped image coordinates + cv::Rect cropRoi; // cropped image coordinates ov::Tensor frameTensor; ov::Tensor roiTensor; - cv::Mat person; // Mat object containing person data cropped by openCV + cv::Mat person; // Mat object containing person data cropped by openCV // Start inference & calc performance typedef std::chrono::duration> ms; @@ -111,7 +119,7 @@ int main(int argc, char* argv[]) { // Process the results down to the pipeline ms personAttribsNetworkTime(0), personReIdNetworktime(0); - int personAttribsInferred = 0, personReIdInferred = 0; + int personAttribsInferred = 0, personReIdInferred = 0; for (PersonDetection::Result& result : personDetection.results) { if (result.label == FLAGS_person_label) { // person @@ -120,8 +128,9 @@ int main(int argc, char* argv[]) { cropRoi.y = (result.location.y < 0) ? 0 : result.location.y; cropRoi.width = std::min(result.location.width, frame.cols - cropRoi.x); cropRoi.height = std::min(result.location.height, frame.rows - cropRoi.y); - ov::Coordinate p00({ 0, (size_t)cropRoi.y, (size_t)cropRoi.x, 0 }); - ov::Coordinate p01({ 1, (size_t)(cropRoi.y + cropRoi.height), (size_t)(cropRoi.x + cropRoi.width), 3 }); + ov::Coordinate p00({0, (size_t)cropRoi.y, (size_t)cropRoi.x, 0}); + ov::Coordinate p01( + {1, (size_t)(cropRoi.y + cropRoi.height), (size_t)(cropRoi.x + cropRoi.width), 3}); roiTensor = ov::Tensor(frameTensor, p00, p01); } else { // To crop ROI manually and allocate required memory (cv::Mat) again @@ -158,7 +167,6 @@ int main(int argc, char* argv[]) { bottom_color_p.x = static_cast(resPersAttrAndColor.bottom_color_point.x) * person.cols; bottom_color_p.y = static_cast(resPersAttrAndColor.bottom_color_point.y) * person.rows; - cv::Rect person_rect(0, 0, person.cols, person.rows); // Define area around top color's location @@ -174,7 +182,7 @@ int main(int argc, char* argv[]) { cv::Rect bc_rect; bc_rect.x = bottom_color_p.x - person.cols / 6; bc_rect.y = bottom_color_p.y - person.rows / 10; - bc_rect.height = 2 * person.rows / 8; + bc_rect.height = 2 * person.rows / 8; bc_rect.width = 2 * person.cols / 6; bc_rect = bc_rect & person_rect; @@ -216,10 +224,14 @@ int main(int argc, char* argv[]) { // Process outputs if (!resPersAttrAndColor.attributes_strings.empty()) { cv::Rect image_area(0, 0, frame.cols, frame.rows); - cv::Rect tc_label(result.location.x + result.location.width, result.location.y, - result.location.width / 4, result.location.height / 2); - cv::Rect bc_label(result.location.x + result.location.width, result.location.y + result.location.height / 2, - result.location.width / 4, result.location.height / 2); + cv::Rect tc_label(result.location.x + result.location.width, + result.location.y, + result.location.width / 4, + result.location.height / 2); + cv::Rect bc_label(result.location.x + result.location.width, + result.location.y + result.location.height / 2, + result.location.width / 4, + result.location.height / 2); if (shouldHandleTopBottomColors) { frame(tc_label & image_area) = resPersAttrAndColor.top_color; @@ -229,17 +241,19 @@ int main(int argc, char* argv[]) { for (size_t i = 0; i < resPersAttrAndColor.attributes_strings.size(); ++i) { cv::Scalar color; if (resPersAttrAndColor.attributes_indicators[i]) { - color = cv::Scalar(0, 200, 0); // has attribute + color = cv::Scalar(0, 200, 0); // has attribute } else { - color = cv::Scalar(0, 0, 255); // doesn't have attribute + color = cv::Scalar(0, 0, 255); // doesn't have attribute } - putHighlightedText(frame, - resPersAttrAndColor.attributes_strings[i], - cv::Point2f(static_cast(result.location.x + 5 * result.location.width / 4), - static_cast(result.location.y + 15 + 15 * i)), - cv::FONT_HERSHEY_COMPLEX_SMALL, - 0.5, - color, 1); + putHighlightedText( + frame, + resPersAttrAndColor.attributes_strings[i], + cv::Point2f(static_cast(result.location.x + 5 * result.location.width / 4), + static_cast(result.location.y + 15 + 15 * i)), + cv::FONT_HERSHEY_COMPLEX_SMALL, + 0.5, + color, + 1); } if (FLAGS_r) { @@ -250,17 +264,20 @@ int main(int argc, char* argv[]) { slog::debug << "Person Attributes results: " << output_attribute_string << slog::endl; if (shouldHandleTopBottomColors) { slog::debug << "Person top color: " << resPersAttrAndColor.top_color << slog::endl; - slog::debug << "Person bottom color: " << resPersAttrAndColor.bottom_color << slog::endl; + slog::debug << "Person bottom color: " << resPersAttrAndColor.bottom_color + << slog::endl; } } } if (!resPersReid.empty()) { putHighlightedText(frame, - resPersReid, - cv::Point2f(static_cast(result.location.x), static_cast(result.location.y + 30)), - cv::FONT_HERSHEY_COMPLEX_SMALL, - 0.55, - cv::Scalar(250, 10, 10), 1); + resPersReid, + cv::Point2f(static_cast(result.location.x), + static_cast(result.location.y + 30)), + cv::FONT_HERSHEY_COMPLEX_SMALL, + 0.55, + cv::Scalar(250, 10, 10), + 1); if (FLAGS_r) { slog::debug << "Person Re-Identification results: " << resPersReid << slog::endl; @@ -275,10 +292,16 @@ int main(int argc, char* argv[]) { // Execution statistics std::ostringstream out; - out << "Detection time : " << std::fixed << std::setprecision(2) << detection.count() - << " ms (" << 1000.f / detection.count() << " fps)"; + out << "Detection time : " << std::fixed << std::setprecision(2) << detection.count() << " ms (" + << 1000.f / detection.count() << " fps)"; - putHighlightedText(frame, out.str(), cv::Point2f(0, 20), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, { 200, 10, 10 }, 2); + putHighlightedText(frame, + out.str(), + cv::Point2f(0, 20), + cv::FONT_HERSHEY_COMPLEX_SMALL, + 0.8, + {200, 10, 10}, + 2); if (personDetection.results.size()) { if (personAttribs.enabled() && personAttribsInferred) { @@ -286,7 +309,13 @@ int main(int argc, char* argv[]) { out.str(""); out << "Attributes Recognition time: " << std::fixed << std::setprecision(2) << average_time << " ms (" << 1000.f / average_time << " fps)"; - putHighlightedText(frame, out.str(), cv::Point2f(0, 40), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, { 200, 10, 10 }, 2); + putHighlightedText(frame, + out.str(), + cv::Point2f(0, 40), + cv::FONT_HERSHEY_COMPLEX_SMALL, + 0.8, + {200, 10, 10}, + 2); if (FLAGS_r) { slog::debug << out.str() << slog::endl; } @@ -294,9 +323,15 @@ int main(int argc, char* argv[]) { if (personReId.enabled() && personReIdInferred) { float average_time = static_cast(personReIdNetworktime.count() / personReIdInferred); out.str(""); - out << "Re-Identification time: " << std::fixed << std::setprecision(2) << average_time - << " ms (" << 1000.f / average_time << " fps)"; - putHighlightedText(frame, out.str(), cv::Point2f(0, 60), cv::FONT_HERSHEY_COMPLEX_SMALL, 0.8, { 200, 10, 10 }, 2); + out << "Re-Identification time: " << std::fixed << std::setprecision(2) << average_time << " ms (" + << 1000.f / average_time << " fps)"; + putHighlightedText(frame, + out.str(), + cv::Point2f(0, 60), + cv::FONT_HERSHEY_COMPLEX_SMALL, + 0.8, + {200, 10, 10}, + 2); if (FLAGS_r) { slog::debug << out.str() << slog::endl; } @@ -306,7 +341,7 @@ int main(int argc, char* argv[]) { if (!FLAGS_no_show) { cv::imshow("Detection results", frame); const int key = cv::waitKey(1); - if (27 == key) // Esc + if (27 == key) // Esc break; presenter.handleKey(key); } @@ -319,12 +354,10 @@ int main(int argc, char* argv[]) { slog::info << "Metrics report:" << slog::endl; metrics.logTotal(); slog::info << presenter.reportMeans() << slog::endl; - } - catch (const std::exception& error) { + } catch (const std::exception& error) { slog ::err << error.what() << slog::endl; return 1; - } - catch (...) { + } catch (...) { slog::err << "Unknown/internal exception happened." << slog::endl; return 1; } diff --git a/demos/face_detection_mtcnn_demo/cpp_gapi/main.cpp b/demos/face_detection_mtcnn_demo/cpp_gapi/main.cpp index da116b9e95a..fce9ad0f6a8 100644 --- a/demos/face_detection_mtcnn_demo/cpp_gapi/main.cpp +++ b/demos/face_detection_mtcnn_demo/cpp_gapi/main.cpp @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -45,7 +46,6 @@ #include "custom_kernels.hpp" #include "face_detection_mtcnn_demo.hpp" -#include "gflags/gflags.h" #include "utils.hpp" const int MAX_PYRAMID_LEVELS = 13; diff --git a/demos/gaze_estimation_demo/cpp/gaze_estimation_demo.hpp b/demos/gaze_estimation_demo/cpp/gaze_estimation_demo.hpp index 484631904ae..063e668b6a5 100644 --- a/demos/gaze_estimation_demo/cpp/gaze_estimation_demo.hpp +++ b/demos/gaze_estimation_demo/cpp/gaze_estimation_demo.hpp @@ -5,41 +5,56 @@ /////////////////////////////////////////////////////////////////////////////////////////////////// #pragma once +#include #include #include -#include -#include "gflags/gflags.h" -#include "utils/default_flags.hpp" +#include + +#include DEFINE_INPUT_FLAGS DEFINE_OUTPUT_FLAGS static const char help_message[] = "Print a usage message."; static const char camera_resolution_message[] = "Optional. Set camera resolution in format WxH."; -static const char gaze_estimation_model_message[] = "Required. Path to an .xml file with a trained Gaze Estimation model."; -static const char face_detection_model_message[] = "Required. Path to an .xml file with a trained Face Detection model."; -static const char head_pose_model_message[] = "Required. Path to an .xml file with a trained Head Pose Estimation model."; -static const char facial_landmarks_model_message[] = "Required. Path to an .xml file with a trained Facial Landmarks Estimation model."; -static const char eye_state_model_message[] = "Required. Path to an .xml file with a trained Open/Closed Eye Estimation model."; -static const char target_device_message[] = "Optional. Target device for Gaze Estimation network (the list of available devices is shown below). " - "Use \"-d HETERO:\" format to specify HETERO plugin. " - "The demo will look for a suitable plugin for a specified device. Default value is \"CPU\"."; -static const char target_device_message_fd[] = "Optional. Target device for Face Detection network (the list of available devices is shown below). " - "Use \"-d HETERO:\" format to specify HETERO plugin. " - "The demo will look for a suitable plugin for a specified device. Default value is \"CPU\"."; -static const char target_device_message_hp[] = "Optional. Target device for Head Pose Estimation network (the list of available devices is shown below). " - "Use \"-d HETERO:\" format to specify HETERO plugin. " - "The demo will look for a suitable plugin for a specified device. Default value is \"CPU\"."; -static const char target_device_message_lm[] = "Optional. Target device for Facial Landmarks Estimation network " - "(the list of available devices is shown below). Use \"-d HETERO:\" format to specify HETERO plugin. " - "The demo will look for a suitable plugin for a specified device. Default value is \"CPU\"."; -static const char target_device_message_es[] = "Optional. Target device for Open/Closed Eye network " - "(the list of available devices is shown below). Use \"-d HETERO:\" format to specify HETERO plugin. " - "The demo will look for a suitable plugin for a specified device. Default value is \"CPU\"."; -static const char thresh_output_message[] = "Optional. Probability threshold for Face Detector. The default value is 0.5."; +static const char gaze_estimation_model_message[] = + "Required. Path to an .xml file with a trained Gaze Estimation model."; +static const char face_detection_model_message[] = + "Required. Path to an .xml file with a trained Face Detection model."; +static const char head_pose_model_message[] = + "Required. Path to an .xml file with a trained Head Pose Estimation model."; +static const char facial_landmarks_model_message[] = + "Required. Path to an .xml file with a trained Facial Landmarks Estimation model."; +static const char eye_state_model_message[] = + "Required. Path to an .xml file with a trained Open/Closed Eye Estimation model."; +static const char target_device_message[] = + "Optional. Target device for Gaze Estimation network (the list of available devices is shown below). " + "Use \"-d HETERO:\" format to specify HETERO plugin. " + "The demo will look for a suitable plugin for a specified device. Default value is \"CPU\"."; +static const char target_device_message_fd[] = + "Optional. Target device for Face Detection network (the list of available devices is shown below). " + "Use \"-d HETERO:\" format to specify HETERO plugin. " + "The demo will look for a suitable plugin for a specified device. Default value is \"CPU\"."; +static const char target_device_message_hp[] = + "Optional. Target device for Head Pose Estimation network (the list of available devices is shown below). " + "Use \"-d HETERO:\" format to specify HETERO plugin. " + "The demo will look for a suitable plugin for a specified device. Default value is \"CPU\"."; +static const char target_device_message_lm[] = + "Optional. Target device for Facial Landmarks Estimation network " + "(the list of available devices is shown below). Use \"-d HETERO:\" format to " + "specify HETERO plugin. " + "The demo will look for a suitable plugin for a specified device. Default value is \"CPU\"."; +static const char target_device_message_es[] = + "Optional. Target device for Open/Closed Eye network " + "(the list of available devices is shown below). Use \"-d HETERO:\" format to " + "specify HETERO plugin. " + "The demo will look for a suitable plugin for a specified device. Default value is \"CPU\"."; +static const char thresh_output_message[] = + "Optional. Probability threshold for Face Detector. The default value is 0.5."; static const char raw_output_message[] = "Optional. Output inference results as raw values."; -static const char fd_reshape_message[] = "Optional. Reshape Face Detector network so that its input resolution has the same aspect ratio as the input frame."; +static const char fd_reshape_message[] = "Optional. Reshape Face Detector network so that its input resolution has the " + "same aspect ratio as the input frame."; static const char no_show_message[] = "Optional. Don't show output."; static const char utilization_monitors_message[] = "Optional. List of monitors to show initially."; @@ -62,8 +77,8 @@ DEFINE_bool(no_show, false, no_show_message); DEFINE_string(u, "", utilization_monitors_message); /** -* \brief This function shows a help message -*/ + * \brief This function shows a help message + */ static void showUsage() { std::cout << std::endl; diff --git a/demos/gaze_estimation_demo/cpp/include/base_estimator.hpp b/demos/gaze_estimation_demo/cpp/include/base_estimator.hpp index 1e6e30a05a1..a8dae2e2bd5 100644 --- a/demos/gaze_estimation_demo/cpp/include/base_estimator.hpp +++ b/demos/gaze_estimation_demo/cpp/include/base_estimator.hpp @@ -9,8 +9,7 @@ namespace gaze_estimation { class BaseEstimator { public: - void virtual estimate(const cv::Mat& image, - FaceInferenceResults& outputResults) = 0; + void virtual estimate(const cv::Mat& image, FaceInferenceResults& outputResults) = 0; virtual ~BaseEstimator() = default; }; } // namespace gaze_estimation diff --git a/demos/gaze_estimation_demo/cpp/include/eye_state_estimator.hpp b/demos/gaze_estimation_demo/cpp/include/eye_state_estimator.hpp index b6ff86f474c..bc13ad43993 100644 --- a/demos/gaze_estimation_demo/cpp/include/eye_state_estimator.hpp +++ b/demos/gaze_estimation_demo/cpp/include/eye_state_estimator.hpp @@ -4,20 +4,23 @@ #pragma once -#include #include -#include "base_estimator.hpp" +#include -#include "face_inference_results.hpp" +#include "base_estimator.hpp" #include "ie_wrapper.hpp" +namespace ov { +class Core; +} // namespace ov + namespace gaze_estimation { -class EyeStateEstimator: public BaseEstimator { +struct FaceInferenceResults; + +class EyeStateEstimator : public BaseEstimator { public: - EyeStateEstimator(ov::Core& core, - const std::string& modelPath, - const std::string& deviceName); + EyeStateEstimator(ov::Core& core, const std::string& modelPath, const std::string& deviceName); void estimate(const cv::Mat& image, FaceInferenceResults& outputResults) override; ~EyeStateEstimator() override; diff --git a/demos/gaze_estimation_demo/cpp/include/face_detector.hpp b/demos/gaze_estimation_demo/cpp/include/face_detector.hpp index c6afd745a79..a170ae2c584 100644 --- a/demos/gaze_estimation_demo/cpp/include/face_detector.hpp +++ b/demos/gaze_estimation_demo/cpp/include/face_detector.hpp @@ -4,16 +4,18 @@ #pragma once +#include #include #include -#include -#include +#include +#include -#include "face_inference_results.hpp" #include "ie_wrapper.hpp" namespace gaze_estimation { +struct FaceInferenceResults; + class FaceDetector { public: FaceDetector(ov::Core& core, diff --git a/demos/gaze_estimation_demo/cpp/include/face_inference_results.hpp b/demos/gaze_estimation_demo/cpp/include/face_inference_results.hpp index c91e74ae345..4882cef8bf1 100644 --- a/demos/gaze_estimation_demo/cpp/include/face_inference_results.hpp +++ b/demos/gaze_estimation_demo/cpp/include/face_inference_results.hpp @@ -5,8 +5,12 @@ #pragma once #include -#include -#include "utils/slog.hpp" + +#include + +namespace slog { +class LogStream; +} // namespace slog namespace gaze_estimation { struct FaceInferenceResults { diff --git a/demos/gaze_estimation_demo/cpp/include/gaze_estimator.hpp b/demos/gaze_estimation_demo/cpp/include/gaze_estimator.hpp index 5f64b87f0ad..1dc599c5651 100644 --- a/demos/gaze_estimation_demo/cpp/include/gaze_estimator.hpp +++ b/demos/gaze_estimation_demo/cpp/include/gaze_estimator.hpp @@ -4,23 +4,25 @@ #pragma once -#include #include -#include "face_inference_results.hpp" #include "base_estimator.hpp" - #include "ie_wrapper.hpp" +namespace cv { +class Mat; +} // namespace cv +namespace ov { +class Core; +} // namespace ov + namespace gaze_estimation { -class GazeEstimator: public BaseEstimator { +struct FaceInferenceResults; + +class GazeEstimator : public BaseEstimator { public: - GazeEstimator(ov::Core& core, - const std::string& modelPath, - const std::string& deviceName, - bool doRollAlign = true); - void estimate(const cv::Mat& image, - FaceInferenceResults& outputResults) override; + GazeEstimator(ov::Core& core, const std::string& modelPath, const std::string& deviceName, bool doRollAlign = true); + void estimate(const cv::Mat& image, FaceInferenceResults& outputResults) override; ~GazeEstimator() override; const std::string modelType = "Gaze Estimation"; diff --git a/demos/gaze_estimation_demo/cpp/include/head_pose_estimator.hpp b/demos/gaze_estimation_demo/cpp/include/head_pose_estimator.hpp index 5a574e9c042..cd610ca6e16 100644 --- a/demos/gaze_estimation_demo/cpp/include/head_pose_estimator.hpp +++ b/demos/gaze_estimation_demo/cpp/include/head_pose_estimator.hpp @@ -4,22 +4,25 @@ #pragma once -#include #include -#include "face_inference_results.hpp" #include "base_estimator.hpp" - #include "ie_wrapper.hpp" +namespace cv { +class Mat; +} // namespace cv +namespace ov { +class Core; +} // namespace ov + namespace gaze_estimation { -class HeadPoseEstimator: public BaseEstimator { +struct FaceInferenceResults; + +class HeadPoseEstimator : public BaseEstimator { public: - HeadPoseEstimator(ov::Core& core, - const std::string& modelPath, - const std::string& deviceName); - void estimate(const cv::Mat& image, - FaceInferenceResults& outputResults) override; + HeadPoseEstimator(ov::Core& core, const std::string& modelPath, const std::string& deviceName); + void estimate(const cv::Mat& image, FaceInferenceResults& outputResults) override; ~HeadPoseEstimator() override; const std::string modelType = "Head Pose Estimation"; diff --git a/demos/gaze_estimation_demo/cpp/include/ie_wrapper.hpp b/demos/gaze_estimation_demo/cpp/include/ie_wrapper.hpp index 8a6aba9c634..4db411d972d 100644 --- a/demos/gaze_estimation_demo/cpp/include/ie_wrapper.hpp +++ b/demos/gaze_estimation_demo/cpp/include/ie_wrapper.hpp @@ -4,14 +4,16 @@ #pragma once -#include -#include #include +#include +#include #include -#include "utils/common.hpp" -#include "utils/ocv_common.hpp" -#include "utils/slog.hpp" +#include + +namespace cv { +class Mat; +} // namespace cv namespace gaze_estimation { class IEWrapper { diff --git a/demos/gaze_estimation_demo/cpp/include/landmarks_estimator.hpp b/demos/gaze_estimation_demo/cpp/include/landmarks_estimator.hpp index 99335bb8654..3b38024af1b 100644 --- a/demos/gaze_estimation_demo/cpp/include/landmarks_estimator.hpp +++ b/demos/gaze_estimation_demo/cpp/include/landmarks_estimator.hpp @@ -6,20 +6,21 @@ #include #include +#include -#include "face_inference_results.hpp" -#include "base_estimator.hpp" +#include +#include +#include "base_estimator.hpp" #include "ie_wrapper.hpp" namespace gaze_estimation { -class LandmarksEstimator: public BaseEstimator { +struct FaceInferenceResults; + +class LandmarksEstimator : public BaseEstimator { public: - LandmarksEstimator(ov::Core& core, - const std::string& modelPath, - const std::string& deviceName); - void estimate(const cv::Mat& image, - FaceInferenceResults& outputResults) override; + LandmarksEstimator(ov::Core& core, const std::string& modelPath, const std::string& deviceName); + void estimate(const cv::Mat& image, FaceInferenceResults& outputResults) override; ~LandmarksEstimator() override; const std::string modelType = "Facial Landmarks Estimation"; @@ -33,8 +34,13 @@ class LandmarksEstimator: public BaseEstimator { std::vector split(std::vector& data, const ov::Shape& shape); std::vector getMaxPreds(std::vector heatMaps); int sign(float number); - cv::Mat affineTransform(cv::Point2f center, cv::Point2f scale, - float rot, size_t dst_w, size_t dst_h, cv::Point2f shift, bool inv); + cv::Mat affineTransform(cv::Point2f center, + cv::Point2f scale, + float rot, + size_t dst_w, + size_t dst_h, + cv::Point2f shift, + bool inv); cv::Point2f rotatePoint(cv::Point2f pt, float angle_rad); cv::Point2f get3rdPoint(cv::Point2f a, cv::Point2f b); }; diff --git a/demos/gaze_estimation_demo/cpp/include/results_marker.hpp b/demos/gaze_estimation_demo/cpp/include/results_marker.hpp index a22731f0328..903056d7732 100644 --- a/demos/gaze_estimation_demo/cpp/include/results_marker.hpp +++ b/demos/gaze_estimation_demo/cpp/include/results_marker.hpp @@ -4,12 +4,13 @@ #pragma once -#include -#include - -#include "face_inference_results.hpp" +namespace cv { +class Mat; +} // namespace cv namespace gaze_estimation { +struct FaceInferenceResults; + class ResultsMarker { public: ResultsMarker(bool showFaceBoundingBox, diff --git a/demos/gaze_estimation_demo/cpp/include/utils.hpp b/demos/gaze_estimation_demo/cpp/include/utils.hpp index ce61049238b..f3bbbff7d53 100644 --- a/demos/gaze_estimation_demo/cpp/include/utils.hpp +++ b/demos/gaze_estimation_demo/cpp/include/utils.hpp @@ -1,11 +1,10 @@ -// Copyright (C) 2018 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #pragma once #include -#include "utils/ocv_common.hpp" namespace gaze_estimation { void gazeVectorToGazeAngles(const cv::Point3f& gazeVector, cv::Point2f& gazeAngles); diff --git a/demos/gaze_estimation_demo/cpp/main.cpp b/demos/gaze_estimation_demo/cpp/main.cpp index 08fa8cf551f..cb97222e129 100644 --- a/demos/gaze_estimation_demo/cpp/main.cpp +++ b/demos/gaze_estimation_demo/cpp/main.cpp @@ -2,49 +2,39 @@ // SPDX-License-Identifier: Apache-2.0 // -/** -* \brief The entry point for the gaze_estimation_demo application -* \file gaze_estimation_demo/main.cpp -* \example gaze_estimation_demo/main.cpp -*/ -#include -#include -#include -#include +#include + +#include +#include +#include #include -#include +#include #include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "openvino/openvino.hpp" +#include #include +#include +#include +#include +#include + #include #include +#include #include #include #include #include -#include "face_inference_results.hpp" -#include "face_detector.hpp" #include "base_estimator.hpp" -#include "head_pose_estimator.hpp" -#include "landmarks_estimator.hpp" #include "eye_state_estimator.hpp" +#include "face_detector.hpp" +#include "face_inference_results.hpp" +#include "gaze_estimation_demo.hpp" #include "gaze_estimator.hpp" +#include "head_pose_estimator.hpp" +#include "landmarks_estimator.hpp" #include "results_marker.hpp" -#include "utils.hpp" - -#include "gaze_estimation_demo.hpp" using namespace gaze_estimation; @@ -73,7 +63,6 @@ bool ParseAndCheckCommandLine(int argc, char* argv[]) { return true; } - int main(int argc, char* argv[]) { try { PerformanceMetrics metrics; @@ -103,8 +92,12 @@ int main(int argc, char* argv[]) { int delay = 1; std::string windowName = "Gaze estimation demo"; - std::unique_ptr cap = openImagesCapture( - FLAGS_i, FLAGS_loop, read_type::efficient, 0, std::numeric_limits::max(), stringToSize(FLAGS_res)); + std::unique_ptr cap = openImagesCapture(FLAGS_i, + FLAGS_loop, + read_type::efficient, + 0, + std::numeric_limits::max(), + stringToSize(FLAGS_res)); auto startTime = std::chrono::steady_clock::now(); cv::Mat frame = cap->read(); @@ -131,7 +124,7 @@ int main(int argc, char* argv[]) { } presenter.drawGraphs(frame); - metrics.update(startTime, frame, { 10, 22 }, cv::FONT_HERSHEY_COMPLEX, 0.65); + metrics.update(startTime, frame, {10, 22}, cv::FONT_HERSHEY_COMPLEX, 0.65); if (FLAGS_r) { for (auto& inferenceResult : inferenceResults) { @@ -162,12 +155,10 @@ int main(int argc, char* argv[]) { slog::info << "Metrics report:" << slog::endl; metrics.logTotal(); slog::info << presenter.reportMeans() << slog::endl; - } - catch (const std::exception& error) { + } catch (const std::exception& error) { slog::err << error.what() << slog::endl; return 1; - } - catch (...) { + } catch (...) { slog::err << "Unknown/internal exception happened." << slog::endl; return 1; } diff --git a/demos/gaze_estimation_demo/cpp/src/eye_state_estimator.cpp b/demos/gaze_estimation_demo/cpp/src/eye_state_estimator.cpp index 6dece43af22..2be04cf6a8f 100644 --- a/demos/gaze_estimation_demo/cpp/src/eye_state_estimator.cpp +++ b/demos/gaze_estimation_demo/cpp/src/eye_state_estimator.cpp @@ -2,24 +2,30 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "eye_state_estimator.hpp" + +#include #include #include -#include "eye_state_estimator.hpp" +#include + +#include "face_inference_results.hpp" + +namespace ov { +class Core; +} // namespace ov namespace gaze_estimation { -EyeStateEstimator::EyeStateEstimator( - ov::Core& ie, const std::string& modelPath, const std::string& deviceName) : - ieWrapper(ie, modelPath, modelType, deviceName) -{ +EyeStateEstimator::EyeStateEstimator(ov::Core& ie, const std::string& modelPath, const std::string& deviceName) + : ieWrapper(ie, modelPath, modelType, deviceName) { inputTensorName = ieWrapper.expectSingleInput(); ieWrapper.expectImageInput(inputTensorName); outputTensorName = ieWrapper.expectSingleOutput(); } -cv::Rect EyeStateEstimator::createEyeBoundingBox( - const cv::Point2i& p1, const cv::Point2i& p2, float scale) const { +cv::Rect EyeStateEstimator::createEyeBoundingBox(const cv::Point2i& p1, const cv::Point2i& p2, float scale) const { cv::Rect result; float size = static_cast(cv::norm(p1 - p2)); @@ -34,8 +40,7 @@ cv::Rect EyeStateEstimator::createEyeBoundingBox( return result; } -void EyeStateEstimator::rotateImageAroundCenter( - const cv::Mat& srcImage, cv::Mat& dstImage, float angle) const { +void EyeStateEstimator::rotateImageAroundCenter(const cv::Mat& srcImage, cv::Mat& dstImage, float angle) const { auto w = srcImage.cols; auto h = srcImage.rows; @@ -47,8 +52,7 @@ void EyeStateEstimator::rotateImageAroundCenter( cv::warpAffine(srcImage, dstImage, rotMatrix, size, 1, cv::BORDER_REPLICATE); } -void EyeStateEstimator::estimate( - const cv::Mat& image, FaceInferenceResults& outputResults) { +void EyeStateEstimator::estimate(const cv::Mat& image, FaceInferenceResults& outputResults) { auto roll = outputResults.headPoseAngles.z; std::vector eyeLandmarks = outputResults.getEyeLandmarks(); @@ -89,6 +93,5 @@ void EyeStateEstimator::estimate( } } -EyeStateEstimator::~EyeStateEstimator() { -} +EyeStateEstimator::~EyeStateEstimator() {} } // namespace gaze_estimation diff --git a/demos/gaze_estimation_demo/cpp/src/face_detector.cpp b/demos/gaze_estimation_demo/cpp/src/face_detector.cpp index 20749db4381..5868d55c73a 100644 --- a/demos/gaze_estimation_demo/cpp/src/face_detector.cpp +++ b/demos/gaze_estimation_demo/cpp/src/face_detector.cpp @@ -2,24 +2,31 @@ // SPDX-License-Identifier: Apache-2.0 // -#include +#include "face_detector.hpp" + +#include +#include +#include +#include #include #include -#include -#include "openvino/openvino.hpp" +#include -#include "face_detector.hpp" +#include + +#include "face_inference_results.hpp" namespace gaze_estimation { -FaceDetector::FaceDetector( - ov::Core& core, const std::string& modelPath, const std::string& deviceName, - double detectionConfidenceThreshold, bool enableReshape) : - ieWrapper(core, modelPath, modelType, deviceName), - detectionThreshold(detectionConfidenceThreshold), - enableReshape(enableReshape) -{ +FaceDetector::FaceDetector(ov::Core& core, + const std::string& modelPath, + const std::string& deviceName, + double detectionConfidenceThreshold, + bool enableReshape) + : ieWrapper(core, modelPath, modelType, deviceName), + detectionThreshold(detectionConfidenceThreshold), + enableReshape(enableReshape) { const auto& inputInfo = ieWrapper.getInputTensorDimsInfo(); inputTensorName = ieWrapper.expectSingleInput(); @@ -31,7 +38,8 @@ FaceDetector::FaceDetector( outputTensorName = ieWrapper.expectSingleOutput(); const auto& outputTensorDims = outputInfo.at(outputTensorName); - if (outputTensorDims.size() != 4 || outputTensorDims[0] != 1 || outputTensorDims[1] != 1 || outputTensorDims[3] != 7) { + if (outputTensorDims.size() != 4 || outputTensorDims[0] != 1 || outputTensorDims[1] != 1 || + outputTensorDims[3] != 7) { throw std::runtime_error(modelPath + ": expected \"" + outputTensorName + "\" to have shape 1x1xNx7"); } @@ -67,8 +75,8 @@ std::vector FaceDetector::detect(const cv::Mat& image) { double networkAspectRatio = std::round(100. * inputTensorDims[3] / inputTensorDims[2]) / 100.; double aspectRatioThreshold = 0.01; - if (std::fabs(imageAspectRatio - networkAspectRatio) > aspectRatioThreshold) { - slog::debug << "Face Detection network is reshaped" << slog::endl; + if (std::fabs(imageAspectRatio - networkAspectRatio) > aspectRatioThreshold) { + slog::debug << "Face Detection network is reshaped" << slog::endl; // Fix height and change width to make networkAspectRatio equal to imageAspectRatio inputTensorDims[3] = static_cast(inputTensorDims[2] * imageAspectRatio); @@ -97,8 +105,7 @@ std::vector FaceDetector::detect(const cv::Mat& image) { auto y = rawDetectionResults[detectionID * 7 + 4] * imageSize.height; auto height = rawDetectionResults[detectionID * 7 + 6] * imageSize.height - y; - cv::Rect faceRect(static_cast(x), static_cast(y), - static_cast(width), static_cast(height)); + cv::Rect faceRect(static_cast(x), static_cast(y), static_cast(width), static_cast(height)); adjustBoundingBox(faceRect); auto rectIntersection = faceRect & imageRect; @@ -115,7 +122,6 @@ std::vector FaceDetector::detect(const cv::Mat& image) { return detectionResult; } -FaceDetector::~FaceDetector() { -} +FaceDetector::~FaceDetector() {} } // namespace gaze_estimation diff --git a/demos/gaze_estimation_demo/cpp/src/face_inference_results.cpp b/demos/gaze_estimation_demo/cpp/src/face_inference_results.cpp index 457412f433c..7c5fdf9f0ce 100644 --- a/demos/gaze_estimation_demo/cpp/src/face_inference_results.cpp +++ b/demos/gaze_estimation_demo/cpp/src/face_inference_results.cpp @@ -2,9 +2,13 @@ // SPDX-License-Identifier: Apache-2.0 // -#include #include "face_inference_results.hpp" +#include +#include + +#include + namespace gaze_estimation { slog::LogStream& operator<<(slog::LogStream& os, const FaceInferenceResults& faceInferenceResults) { @@ -35,14 +39,12 @@ std::vector FaceInferenceResults::getEyeLandmarks() { result[1] = faceLandmarks[1]; result[2] = faceLandmarks[2]; result[3] = faceLandmarks[3]; - } - else if (faceLandmarks.size() == 98) { + } else if (faceLandmarks.size() == 98) { result[0] = faceLandmarks[60]; result[1] = faceLandmarks[64]; result[2] = faceLandmarks[68]; result[3] = faceLandmarks[72]; - } - else { + } else { throw std::runtime_error("the network must output 35 or 98 points"); } return result; diff --git a/demos/gaze_estimation_demo/cpp/src/gaze_estimator.cpp b/demos/gaze_estimation_demo/cpp/src/gaze_estimator.cpp index 76ac4e73c62..c9fba70e628 100644 --- a/demos/gaze_estimation_demo/cpp/src/gaze_estimator.cpp +++ b/demos/gaze_estimation_demo/cpp/src/gaze_estimator.cpp @@ -2,11 +2,21 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "gaze_estimator.hpp" + +#include +#include +#include +#include +#include #include #include -#include -#include "gaze_estimator.hpp" +#include +#include +#include + +#include "face_inference_results.hpp" namespace gaze_estimation { @@ -14,20 +24,23 @@ const char TENSOR_HEAD_POSE_ANGLES[] = "head_pose_angles"; const char TENSOR_LEFT_EYE_IMAGE[] = "left_eye_image"; const char TENSOR_RIGHT_EYE_IMAGE[] = "right_eye_image"; -GazeEstimator::GazeEstimator( - ov::Core& ie, const std::string& modelPath, const std::string& deviceName, bool doRollAlign) : - ieWrapper(ie, modelPath, modelType, deviceName), rollAlign(doRollAlign) -{ +GazeEstimator::GazeEstimator(ov::Core& ie, + const std::string& modelPath, + const std::string& deviceName, + bool doRollAlign) + : ieWrapper(ie, modelPath, modelType, deviceName), + rollAlign(doRollAlign) { const auto& inputInfo = ieWrapper.getInputTensorDimsInfo(); - for (const auto& TensorName: {TENSOR_HEAD_POSE_ANGLES, TENSOR_LEFT_EYE_IMAGE, TENSOR_RIGHT_EYE_IMAGE}) { + for (const auto& TensorName : {TENSOR_HEAD_POSE_ANGLES, TENSOR_LEFT_EYE_IMAGE, TENSOR_RIGHT_EYE_IMAGE}) { if (inputInfo.find(TensorName) == inputInfo.end()) throw std::runtime_error(modelPath + ": expected to have input named \"" + TensorName + "\""); } auto expectAngles = [&modelPath](const std::string& TensorName, const ov::Shape dims) { - bool is1Dim = !dims.empty() - && std::all_of(dims.begin(), dims.end() - 1, [](unsigned long n) { return n == 1; }); + bool is1Dim = !dims.empty() && std::all_of(dims.begin(), dims.end() - 1, [](unsigned long n) { + return n == 1; + }); if (!is1Dim || dims.back() != 3) { throw std::runtime_error(modelPath + ": expected \"" + TensorName + "\" to have dimensions [1x...]3"); @@ -36,7 +49,7 @@ GazeEstimator::GazeEstimator( expectAngles(TENSOR_HEAD_POSE_ANGLES, inputInfo.at(TENSOR_HEAD_POSE_ANGLES)); - for (const auto& TensorName: { TENSOR_LEFT_EYE_IMAGE, TENSOR_RIGHT_EYE_IMAGE}) { + for (const auto& TensorName : {TENSOR_LEFT_EYE_IMAGE, TENSOR_RIGHT_EYE_IMAGE}) { ieWrapper.expectImageInput(TensorName); } @@ -111,6 +124,5 @@ void GazeEstimator::estimate(const cv::Mat& image, FaceInferenceResults& outputR outputResults.gazeVector = gazeVector; } -GazeEstimator::~GazeEstimator() { -} +GazeEstimator::~GazeEstimator() {} } // namespace gaze_estimation diff --git a/demos/gaze_estimation_demo/cpp/src/head_pose_estimator.cpp b/demos/gaze_estimation_demo/cpp/src/head_pose_estimator.cpp index f477d5f81f6..907a81d85af 100644 --- a/demos/gaze_estimation_demo/cpp/src/head_pose_estimator.cpp +++ b/demos/gaze_estimation_demo/cpp/src/head_pose_estimator.cpp @@ -2,10 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "head_pose_estimator.hpp" + +#include +#include +#include #include +#include #include -#include "head_pose_estimator.hpp" +#include +#include + +#include "face_inference_results.hpp" namespace gaze_estimation { @@ -15,27 +24,24 @@ const std::pair OUTPUTS[] = { {"angle_r_fc", &cv::Point3f::z}, }; -HeadPoseEstimator::HeadPoseEstimator( - ov::Core& ie, const std::string& modelPath, const std::string& deviceName) : - ieWrapper(ie, modelPath, modelType, deviceName) -{ +HeadPoseEstimator::HeadPoseEstimator(ov::Core& ie, const std::string& modelPath, const std::string& deviceName) + : ieWrapper(ie, modelPath, modelType, deviceName) { inputTensorName = ieWrapper.expectSingleInput(); ieWrapper.expectImageInput(inputTensorName); const auto& outputInfo = ieWrapper.getOutputTensorDimsInfo(); - for (const auto& output: OUTPUTS) { + for (const auto& output : OUTPUTS) { auto it = outputInfo.find(output.first); if (it == outputInfo.end()) - throw std::runtime_error( - modelPath + ": expected to have output named \"" + output.first + "\""); + throw std::runtime_error(modelPath + ": expected to have output named \"" + output.first + "\""); - bool correctDims = std::all_of(it->second.begin(), it->second.end(), - [](unsigned long n) { return n == 1; }); + bool correctDims = std::all_of(it->second.begin(), it->second.end(), [](unsigned long n) { + return n == 1; + }); if (!correctDims) - throw std::runtime_error( - modelPath + ": expected \"" + output.first + "\" to have total size 1"); + throw std::runtime_error(modelPath + ": expected \"" + output.first + "\" to have total size 1"); } } @@ -48,12 +54,11 @@ void HeadPoseEstimator::estimate(const cv::Mat& image, FaceInferenceResults& out std::vector outputValue; - for (const auto &output: OUTPUTS) { + for (const auto& output : OUTPUTS) { ieWrapper.getOutputTensor(output.first, outputValue); outputResults.headPoseAngles.*output.second = outputValue[0]; } } -HeadPoseEstimator::~HeadPoseEstimator() { -} +HeadPoseEstimator::~HeadPoseEstimator() {} } // namespace gaze_estimation diff --git a/demos/gaze_estimation_demo/cpp/src/ie_wrapper.cpp b/demos/gaze_estimation_demo/cpp/src/ie_wrapper.cpp index 0eb4f21ec1c..870bfad5e0c 100644 --- a/demos/gaze_estimation_demo/cpp/src/ie_wrapper.cpp +++ b/demos/gaze_estimation_demo/cpp/src/ie_wrapper.cpp @@ -2,22 +2,35 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "ie_wrapper.hpp" + +#include +#include #include +#include +#include #include +#include #include -#include "openvino/openvino.hpp" +#include +#include +#include #include - -#include "ie_wrapper.hpp" +#include +#include namespace gaze_estimation { -IEWrapper::IEWrapper( - ov::Core& core, const std::string& modelPath, const std::string& modelType, const std::string& deviceName) : - modelPath(modelPath), modelType(modelType), deviceName(deviceName), core(core) -{ +IEWrapper::IEWrapper(ov::Core& core, + const std::string& modelPath, + const std::string& modelType, + const std::string& deviceName) + : modelPath(modelPath), + modelType(modelType), + deviceName(deviceName), + core(core) { slog::info << "Reading model: " << modelPath << slog::endl; model = core.read_model(modelPath); logBasicModelInfo(model); @@ -33,19 +46,12 @@ void IEWrapper::setExecPart() { ov::Shape layerDims = inputs[i].get_shape(); input_tensors_dims_info[layerName] = layerDims; if (layerDims.size() == 4) { - ppp.input(layerName).tensor(). - set_element_type(ov::element::u8). - set_layout({ "NCHW" }); - } - else if (layerDims.size() == 2) { - ppp.input(layerName).tensor(). - set_element_type(ov::element::f32). - set_layout({ "NC" }); - } - else { + ppp.input(layerName).tensor().set_element_type(ov::element::u8).set_layout({"NCHW"}); + } else if (layerDims.size() == 2) { + ppp.input(layerName).tensor().set_element_type(ov::element::f32).set_layout({"NC"}); + } else { throw std::runtime_error("Unknown type of input layer layout. Expected either 4 or 2 dimensional inputs"); } - } // set map of output tensor name -- tensor dimension pairs ov::OutputVector outputs = model->outputs(); diff --git a/demos/gaze_estimation_demo/cpp/src/landmarks_estimator.cpp b/demos/gaze_estimation_demo/cpp/src/landmarks_estimator.cpp index b5d3e79f8f4..4ea617db1c0 100644 --- a/demos/gaze_estimation_demo/cpp/src/landmarks_estimator.cpp +++ b/demos/gaze_estimation_demo/cpp/src/landmarks_estimator.cpp @@ -2,16 +2,27 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "landmarks_estimator.hpp" + +#include +#include +#include +#include +#include +#include +#include #include #include -#include "landmarks_estimator.hpp" +#include + +#include "face_inference_results.hpp" namespace gaze_estimation { -LandmarksEstimator::LandmarksEstimator( - ov::Core& ie, const std::string& modelPath, const std::string& deviceName) : - ieWrapper(ie, modelPath, modelType, deviceName), numberLandmarks(0) { +LandmarksEstimator::LandmarksEstimator(ov::Core& ie, const std::string& modelPath, const std::string& deviceName) + : ieWrapper(ie, modelPath, modelType, deviceName), + numberLandmarks(0) { inputTensorName = ieWrapper.expectSingleInput(); ieWrapper.expectImageInput(inputTensorName); @@ -35,7 +46,7 @@ void LandmarksEstimator::estimate(const cv::Mat& image, FaceInferenceResults& ou const auto& outputInfo = ieWrapper.getOutputTensorDimsInfo(); const auto& outputTensorDims = outputInfo.at(outputTensorName); if (outputTensorDims.size() == 2) { - outputResults.faceLandmarks=simplePostprocess(faceBoundingBox, faceCrop); + outputResults.faceLandmarks = simplePostprocess(faceBoundingBox, faceCrop); } else { outputResults.faceLandmarks = heatMapPostprocess(faceBoundingBox, faceCrop); @@ -69,8 +80,8 @@ std::vector LandmarksEstimator::heatMapPostprocess(cv::Rect faceBou for (size_t landmarkId = 0; landmarkId < numberLandmarks; landmarkId++) { const cv::Mat& heatMap = heatMaps[landmarkId]; - int px = int(preds[landmarkId].x); - int py = int(preds[landmarkId].y); + int px = static_cast(preds[landmarkId].x); + int py = static_cast(preds[landmarkId].y); if (1 < px && px < heatMap.cols - 1 && 1 < py && py < heatMap.rows - 1) { float diffFirst = heatMap.at(py, px + 1) - heatMap.at(py, px - 1); float diffSecond = heatMap.at(py + 1, px) - heatMap.at(py - 1, px); @@ -79,7 +90,7 @@ std::vector LandmarksEstimator::heatMapPostprocess(cv::Rect faceBou } } - //transform preds + // transform preds cv::Mat trans = affineTransform(center, scale, 0, heatMapsDims[2], heatMapsDims[3], cv::Point2f(0., 0.), true); std::vector landmarks; for (size_t landmarkId = 0; landmarkId < numberLandmarks; landmarkId++) { @@ -120,12 +131,17 @@ std::vector LandmarksEstimator::getMaxPreds(std::vector he const float* heatMapData = heatMap.ptr(); std::vector indices(reshapedSize); std::iota(std::begin(indices), std::end(indices), 0); - std::partial_sort(std::begin(indices), std::begin(indices) + numberLandmarks, std::end(indices), - [heatMapData](int l, int r) { return heatMapData[l] > heatMapData[r]; }); + std::partial_sort(std::begin(indices), + std::begin(indices) + numberLandmarks, + std::end(indices), + [heatMapData](int l, int r) { + return heatMapData[l] > heatMapData[r]; + }); size_t idx = indices[0]; float maxVal = heatMapData[idx]; if (maxVal > 0) { - preds.push_back(cv::Point2f(static_cast(idx % heatMaps[0].cols), static_cast(idx / heatMaps[0].cols))); + preds.push_back( + cv::Point2f(static_cast(idx % heatMaps[0].cols), static_cast(idx / heatMaps[0].cols))); } else { preds.push_back(cv::Point2f(-1, -1)); } @@ -136,16 +152,19 @@ std::vector LandmarksEstimator::getMaxPreds(std::vector he int LandmarksEstimator::sign(float number) { if (number > 0) { return 1; - } - else if (number < 0) { + } else if (number < 0) { return -1; } return 0; } -cv::Mat LandmarksEstimator::affineTransform( - cv::Point2f center, cv::Point2f scale, float rot, size_t dst_w, size_t dst_h, cv::Point2f shift, bool inv) -{ +cv::Mat LandmarksEstimator::affineTransform(cv::Point2f center, + cv::Point2f scale, + float rot, + size_t dst_w, + size_t dst_h, + cv::Point2f shift, + bool inv) { cv::Point2f scale_tmp = scale; const float pi = acos(-1.0f); float rot_rad = pi * rot / 180; @@ -171,14 +190,13 @@ cv::Mat LandmarksEstimator::affineTransform( cv::Point2f LandmarksEstimator::rotatePoint(cv::Point2f pt, float angle_rad) { float new_x = pt.x * cos(angle_rad) - pt.y * sin(angle_rad); float new_y = pt.x * sin(angle_rad) + pt.y * cos(angle_rad); - return cv::Point2f(new_x, new_y); + return cv::Point2f(new_x, new_y); } cv::Point2f LandmarksEstimator::get3rdPoint(cv::Point2f a, cv::Point2f b) { cv::Point2f direction = a - b; - return cv::Point2f(b.x - direction.y, b.y + direction.x); + return cv::Point2f(b.x - direction.y, b.y + direction.x); } -LandmarksEstimator::~LandmarksEstimator() { -} +LandmarksEstimator::~LandmarksEstimator() {} } // namespace gaze_estimation diff --git a/demos/gaze_estimation_demo/cpp/src/results_marker.cpp b/demos/gaze_estimation_demo/cpp/src/results_marker.cpp index 46f111e492e..433c0ccbed4 100644 --- a/demos/gaze_estimation_demo/cpp/src/results_marker.cpp +++ b/demos/gaze_estimation_demo/cpp/src/results_marker.cpp @@ -1,46 +1,52 @@ -// Copyright (C) 2018 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include -#include #define _USE_MATH_DEFINES + +#include "results_marker.hpp" + +#include #include +#include -#include -#include -#include +#include +#include + +#include -#include "results_marker.hpp" #include "face_inference_results.hpp" #include "utils.hpp" namespace gaze_estimation { -ResultsMarker::ResultsMarker( - bool showFaceBoundingBox, bool showHeadPoseAxes, bool showLandmarks, bool showGaze, bool showEyeState) : - showFaceBoundingBox(showFaceBoundingBox), - showHeadPoseAxes(showHeadPoseAxes), - showLandmarks(showLandmarks), - showGaze(showGaze), - showEyeState(showEyeState) -{ -} +ResultsMarker::ResultsMarker(bool showFaceBoundingBox, + bool showHeadPoseAxes, + bool showLandmarks, + bool showGaze, + bool showEyeState) + : showFaceBoundingBox(showFaceBoundingBox), + showHeadPoseAxes(showHeadPoseAxes), + showLandmarks(showLandmarks), + showGaze(showGaze), + showEyeState(showEyeState) {} void ResultsMarker::mark(cv::Mat& image, const FaceInferenceResults& faceInferenceResults) const { auto faceBoundingBox = faceInferenceResults.faceBoundingBox; auto faceBoundingBoxWidth = faceBoundingBox.width; auto faceBoundingBoxHeight = faceBoundingBox.height; - auto scale = 0.002 * faceBoundingBoxWidth; + auto scale = 0.002 * faceBoundingBoxWidth; cv::Point tl = faceBoundingBox.tl(); if (showFaceBoundingBox) { cv::rectangle(image, faceInferenceResults.faceBoundingBox, cv::Scalar::all(255), 1); - putHighlightedText(image, - cv::format("Detector confidence: %0.2f", - static_cast(faceInferenceResults.faceDetectionConfidence)), - cv::Point(static_cast(tl.x), - static_cast(tl.y - 5. * faceBoundingBoxWidth / 200.)), - cv::FONT_HERSHEY_COMPLEX, scale, cv::Scalar(200, 10, 10), 1); + putHighlightedText( + image, + cv::format("Detector confidence: %0.2f", static_cast(faceInferenceResults.faceDetectionConfidence)), + cv::Point(static_cast(tl.x), static_cast(tl.y - 5. * faceBoundingBoxWidth / 200.)), + cv::FONT_HERSHEY_COMPLEX, + scale, + cv::Scalar(200, 10, 10), + 1); } if (showHeadPoseAxes) { @@ -61,26 +67,36 @@ void ResultsMarker::mark(cv::Mat& image, const FaceInferenceResults& faceInferen auto yCenter = faceBoundingBox.y + faceBoundingBoxHeight / 2; // center to right - cv::line(image, cv::Point(xCenter, yCenter), + cv::line(image, + cv::Point(xCenter, yCenter), cv::Point(static_cast(xCenter + axisLength * (cosR * cosY + sinY * sinP * sinR)), static_cast(yCenter + axisLength * cosP * sinR)), - cv::Scalar(0, 0, 255), 2); + cv::Scalar(0, 0, 255), + 2); // center to top - cv::line(image, cv::Point(xCenter, yCenter), + cv::line(image, + cv::Point(xCenter, yCenter), cv::Point(static_cast(xCenter + axisLength * (cosR * sinY * sinP + cosY * sinR)), static_cast(yCenter - axisLength * cosP * cosR)), - cv::Scalar(0, 255, 0), 2); + cv::Scalar(0, 255, 0), + 2); // center to forward - cv::line(image, cv::Point(xCenter, yCenter), + cv::line(image, + cv::Point(xCenter, yCenter), cv::Point(static_cast(xCenter + axisLength * sinY * cosP), static_cast(yCenter + axisLength * sinP)), - cv::Scalar(255, 0, 255), 2); + cv::Scalar(255, 0, 255), + 2); - putHighlightedText(image, + putHighlightedText( + image, cv::format("head pose: (y=%0.0f, p=%0.0f, r=%0.0f)", std::round(yaw), std::round(pitch), std::round(roll)), cv::Point(static_cast(faceBoundingBox.tl().x), - static_cast(faceBoundingBox.br().y + 5. * faceBoundingBoxWidth / 100.)), - cv::FONT_HERSHEY_PLAIN, scale * 2, cv::Scalar(200, 10, 10), 1); + static_cast(faceBoundingBox.br().y + 5. * faceBoundingBoxWidth / 100.)), + cv::FONT_HERSHEY_PLAIN, + scale * 2, + cv::Scalar(200, 10, 10), + 1); } if (showLandmarks) { @@ -105,25 +121,32 @@ void ResultsMarker::mark(cv::Mat& image, const FaceInferenceResults& faceInferen if (faceInferenceResults.leftEyeState) cv::arrowedLine(image, - faceInferenceResults.leftEyeMidpoint, - faceInferenceResults.leftEyeMidpoint + gazeArrow, cv::Scalar(255, 0, 0), 2); + faceInferenceResults.leftEyeMidpoint, + faceInferenceResults.leftEyeMidpoint + gazeArrow, + cv::Scalar(255, 0, 0), + 2); if (faceInferenceResults.rightEyeState) cv::arrowedLine(image, - faceInferenceResults.rightEyeMidpoint, - faceInferenceResults.rightEyeMidpoint + gazeArrow, cv::Scalar(255, 0, 0), 2); + faceInferenceResults.rightEyeMidpoint, + faceInferenceResults.rightEyeMidpoint + gazeArrow, + cv::Scalar(255, 0, 0), + 2); cv::Point2f gazeAngles; if (faceInferenceResults.leftEyeState && faceInferenceResults.rightEyeState) { gazeVectorToGazeAngles(faceInferenceResults.gazeVector, gazeAngles); putHighlightedText(image, - cv::format("gaze angles: (h=%0.0f, v=%0.0f)", - static_cast(std::round(gazeAngles.x)), - static_cast(std::round(gazeAngles.y))), - cv::Point(static_cast(faceBoundingBox.tl().x), - static_cast(faceBoundingBox.br().y + 12. * faceBoundingBoxWidth / 100.)), - cv::FONT_HERSHEY_PLAIN, scale * 2, cv::Scalar(200, 10, 10), 1); + cv::format("gaze angles: (h=%0.0f, v=%0.0f)", + static_cast(std::round(gazeAngles.x)), + static_cast(std::round(gazeAngles.y))), + cv::Point(static_cast(faceBoundingBox.tl().x), + static_cast(faceBoundingBox.br().y + 12. * faceBoundingBoxWidth / 100.)), + cv::FONT_HERSHEY_PLAIN, + scale * 2, + cv::Scalar(200, 10, 10), + 1); } } if (showEyeState) { diff --git a/demos/gaze_estimation_demo/cpp/src/utils.cpp b/demos/gaze_estimation_demo/cpp/src/utils.cpp index 90583d3bb0c..af724c97173 100644 --- a/demos/gaze_estimation_demo/cpp/src/utils.cpp +++ b/demos/gaze_estimation_demo/cpp/src/utils.cpp @@ -1,21 +1,15 @@ -// Copyright (C) 2018 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include -#include - #define _USE_MATH_DEFINES -#include - -#include -#include -#include -#include -#include #include "utils.hpp" +#include + +#include + namespace gaze_estimation { void gazeVectorToGazeAngles(const cv::Point3f& gazeVector, cv::Point2f& gazeAngles) { auto r = cv::norm(gazeVector); diff --git a/demos/gaze_estimation_demo/cpp_gapi/src/results_marker.cpp b/demos/gaze_estimation_demo/cpp_gapi/src/results_marker.cpp index a9c23bb950f..18774d09155 100644 --- a/demos/gaze_estimation_demo/cpp_gapi/src/results_marker.cpp +++ b/demos/gaze_estimation_demo/cpp_gapi/src/results_marker.cpp @@ -6,8 +6,8 @@ #include "results_marker.hpp" -#include #include +#include #include #include diff --git a/demos/gesture_recognition_demo/cpp_gapi/main.cpp b/demos/gesture_recognition_demo/cpp_gapi/main.cpp index 9a81f46c0bf..38d020e9032 100644 --- a/demos/gesture_recognition_demo/cpp_gapi/main.cpp +++ b/demos/gesture_recognition_demo/cpp_gapi/main.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -40,7 +41,6 @@ #include "custom_kernels.hpp" #include "gesture_recognition_demo_gapi.hpp" -#include "gflags/gflags.h" #include "stream_source.hpp" #include "tracker.hpp" #include "utils.hpp" diff --git a/demos/gesture_recognition_demo/cpp_gapi/src/utils.cpp b/demos/gesture_recognition_demo/cpp_gapi/src/utils.cpp index 128c8958115..5262f18ff1a 100644 --- a/demos/gesture_recognition_demo/cpp_gapi/src/utils.cpp +++ b/demos/gesture_recognition_demo/cpp_gapi/src/utils.cpp @@ -16,8 +16,6 @@ #include #include -#define _USE_MATH_DEFINES - cv::Scalar getNetShape(const std::string& path) { const auto network = InferenceEngine::Core{}.ReadNetwork(path); const auto layerData = network.getInputsInfo().begin()->second; diff --git a/demos/gesture_recognition_demo/cpp_gapi/src/visualizer.cpp b/demos/gesture_recognition_demo/cpp_gapi/src/visualizer.cpp index 4133c02571d..46d314c4fb8 100644 --- a/demos/gesture_recognition_demo/cpp_gapi/src/visualizer.cpp +++ b/demos/gesture_recognition_demo/cpp_gapi/src/visualizer.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#define _USE_MATH_DEFINES - #include "visualizer.hpp" #include diff --git a/demos/image_processing_demo/cpp/visualizer.cpp b/demos/image_processing_demo/cpp/visualizer.cpp index b0b61cdd615..00d5226b62c 100644 --- a/demos/image_processing_demo/cpp/visualizer.cpp +++ b/demos/image_processing_demo/cpp/visualizer.cpp @@ -1,5 +1,5 @@ /* -// Copyright (C) 2021 Intel Corporation +// Copyright (C) 2021-2022 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/demos/image_processing_demo/cpp/visualizer.hpp b/demos/image_processing_demo/cpp/visualizer.hpp index 6cb7bdb0c3b..e2b38924884 100644 --- a/demos/image_processing_demo/cpp/visualizer.hpp +++ b/demos/image_processing_demo/cpp/visualizer.hpp @@ -1,5 +1,5 @@ /* -// Copyright (C) 2021 Intel Corporation +// Copyright (C) 2021-2022 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/demos/interactive_face_detection_demo/cpp/detectors.cpp b/demos/interactive_face_detection_demo/cpp/detectors.cpp index 0ef6731e108..e797f7ba113 100644 --- a/demos/interactive_face_detection_demo/cpp/detectors.cpp +++ b/demos/interactive_face_detection_demo/cpp/detectors.cpp @@ -3,28 +3,44 @@ // #include "detectors.hpp" + +#include + +#include +#include +#include + +#include #include +#include namespace { constexpr size_t ndetections = 200; } // namespace -BaseDetection::BaseDetection(const std::string &pathToModel, bool doRawOutputMessages) - : pathToModel(pathToModel), doRawOutputMessages(doRawOutputMessages) { -} +BaseDetection::BaseDetection(const std::string& pathToModel, bool doRawOutputMessages) + : pathToModel(pathToModel), + doRawOutputMessages(doRawOutputMessages) {} -bool BaseDetection::enabled() const { - return bool(request); +bool BaseDetection::enabled() const { + return static_cast(request); } -FaceDetection::FaceDetection(const std::string &pathToModel, - double detectionThreshold, bool doRawOutputMessages, - float bb_enlarge_coefficient, float bb_dx_coefficient, float bb_dy_coefficient) +FaceDetection::FaceDetection(const std::string& pathToModel, + double detectionThreshold, + bool doRawOutputMessages, + float bb_enlarge_coefficient, + float bb_dx_coefficient, + float bb_dy_coefficient) : BaseDetection(pathToModel, doRawOutputMessages), detectionThreshold(detectionThreshold), - objectSize(0), width(0), height(0), - model_input_width(0), model_input_height(0), - bb_enlarge_coefficient(bb_enlarge_coefficient), bb_dx_coefficient(bb_dx_coefficient), + objectSize(0), + width(0), + height(0), + model_input_width(0), + model_input_height(0), + bb_enlarge_coefficient(bb_enlarge_coefficient), + bb_dx_coefficient(bb_dx_coefficient), bb_dy_coefficient(bb_dy_coefficient) {} void FaceDetection::submitRequest(const cv::Mat& frame) { @@ -54,15 +70,17 @@ std::shared_ptr FaceDetection::read(const ov::Core& core) { throw std::logic_error("Face Detection model output layer should have 7 as a last dimension"); } if (outShape[2] != ndetections) { - throw std::logic_error("Face Detection model output must contain " + std::to_string(ndetections) + " detections"); + throw std::logic_error("Face Detection model output must contain " + std::to_string(ndetections) + + " detections"); } } else { - for (const auto& out: outputs) { + for (const auto& out : outputs) { const auto& outShape = out.get_shape(); if (outShape.size() == 2 && outShape.back() == 5) { output = out.get_any_name(); if (outShape[0] != ndetections) { - throw std::logic_error("Face Detection model output must contain " + std::to_string(ndetections) + " detections"); + throw std::logic_error("Face Detection model output must contain " + std::to_string(ndetections) + + " detections"); } objectSize = outShape.back(); } else if (outShape.size() == 1 && out.get_element_type() == ov::element::i32) { @@ -70,15 +88,14 @@ std::shared_ptr FaceDetection::read(const ov::Core& core) { } } if (output.empty() || labels_output.empty()) { - throw std::logic_error("Face Detection model must contain either single DetectionOutput or " - "'boxes' [nx5] and 'labels' [n] at least, where 'n' is a number of detected objects."); + throw std::logic_error( + "Face Detection model must contain either single DetectionOutput or " + "'boxes' [nx5] and 'labels' [n] at least, where 'n' is a number of detected objects."); } } ov::preprocess::PrePostProcessor ppp(model); - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout("NHWC"); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC"); ppp.input().preprocess().convert_layout("NCHW"); ppp.output(output).tensor().set_element_type(ov::element::f32); model = ppp.build(); @@ -89,9 +106,9 @@ std::shared_ptr FaceDetection::read(const ov::Core& core) { std::vector FaceDetection::fetchResults() { std::vector results; request.wait(); - float *detections = request.get_tensor(output).data(); + float* detections = request.get_tensor(output).data(); if (!labels_output.empty()) { - const int32_t *labels = request.get_tensor(labels_output).data(); + const int32_t* labels = request.get_tensor(labels_output).data(); for (size_t i = 0; i < ndetections; i++) { Result r; r.label = labels[i]; @@ -103,8 +120,10 @@ std::vector FaceDetection::fetchResults() { r.location.x = static_cast(detections[i * objectSize] / model_input_width * width); r.location.y = static_cast(detections[i * objectSize + 1] / model_input_height * height); - r.location.width = static_cast(detections[i * objectSize + 2] / model_input_width * width - r.location.x); - r.location.height = static_cast(detections[i * objectSize + 3] / model_input_height * height - r.location.y); + r.location.width = + static_cast(detections[i * objectSize + 2] / model_input_width * width - r.location.x); + r.location.height = + static_cast(detections[i * objectSize + 3] / model_input_height * height - r.location.y); // Make square and enlarge face bounding box for more robust operation of face analytics networks int bb_width = r.location.width; @@ -125,10 +144,10 @@ std::vector FaceDetection::fetchResults() { r.location.height = bb_new_height; if (doRawOutputMessages) { - slog::debug << "[" << i << "," << r.label << "] element, prob = " << r.confidence << - " (" << r.location.x << "," << r.location.y << ")-(" << r.location.width << "," - << r.location.height << ")" - << ((r.confidence > detectionThreshold) ? " WILL BE RENDERED!" : "") << slog::endl; + slog::debug << "[" << i << "," << r.label << "] element, prob = " << r.confidence << " (" + << r.location.x << "," << r.location.y << ")-(" << r.location.width << "," + << r.location.height << ")" + << ((r.confidence > detectionThreshold) ? " WILL BE RENDERED!" : "") << slog::endl; } if (r.confidence > detectionThreshold) { results.push_back(r); @@ -173,10 +192,9 @@ std::vector FaceDetection::fetchResults() { r.location.height = bb_new_height; if (doRawOutputMessages) { - slog::debug << "[" << i << "," << r.label << "] element, prob = " << r.confidence << - " (" << r.location.x << "," << r.location.y << ")-(" << r.location.width << "," - << r.location.height << ")" - << ((r.confidence > detectionThreshold) ? " WILL BE RENDERED!" : "") << slog::endl; + slog::debug << "[" << i << "," << r.label << "] element, prob = " << r.confidence << " (" << r.location.x + << "," << r.location.y << ")-(" << r.location.width << "," << r.location.height << ")" + << ((r.confidence > detectionThreshold) ? " WILL BE RENDERED!" : "") << slog::endl; } if (r.confidence > detectionThreshold) { results.push_back(r); @@ -187,13 +205,13 @@ std::vector FaceDetection::fetchResults() { AntispoofingClassifier::AntispoofingClassifier(const std::string& pathToModel, bool doRawOutputMessages) : BaseDetection(pathToModel, doRawOutputMessages), - enquedFaces(0) { -} + enquedFaces(0) {} void AntispoofingClassifier::submitRequest() { if (!enquedFaces) return; - request.set_input_tensor(ov::Tensor{request.get_input_tensor(), {0, 0, 0, 0}, {enquedFaces, inShape[1], inShape[2], inShape[3]}}); + request.set_input_tensor( + ov::Tensor{request.get_input_tensor(), {0, 0, 0, 0}, {enquedFaces, inShape[1], inShape[2], inShape[3]}}); request.start_async(); enquedFaces = 0; } @@ -204,7 +222,8 @@ void AntispoofingClassifier::enqueue(const cv::Mat& face) { } ov::Tensor batch = request.get_input_tensor(); batch.set_shape(inShape); - resize2tensor(face, ov::Tensor{batch, {enquedFaces, 0, 0, 0}, {enquedFaces + 1, inShape[1], inShape[2], inShape[3]}}); + resize2tensor(face, + ov::Tensor{batch, {enquedFaces, 0, 0, 0}, {enquedFaces + 1, inShape[1], inShape[2], inShape[3]}}); enquedFaces++; } @@ -224,9 +243,7 @@ std::shared_ptr AntispoofingClassifier::read(const ov::Core& core) { logBasicModelInfo(model); ov::preprocess::PrePostProcessor ppp(model); - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout("NHWC"); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC"); ppp.input().preprocess().convert_layout("NCHW"); ppp.output().tensor().set_element_type(ov::element::f32); model = ppp.build(); @@ -236,27 +253,27 @@ std::shared_ptr AntispoofingClassifier::read(const ov::Core& core) { return model; } -AgeGenderDetection::AgeGenderDetection(const std::string &pathToModel, - bool doRawOutputMessages) +AgeGenderDetection::AgeGenderDetection(const std::string& pathToModel, bool doRawOutputMessages) : BaseDetection(pathToModel, doRawOutputMessages), - enquedFaces(0) { -} + enquedFaces(0) {} void AgeGenderDetection::submitRequest() { if (!enquedFaces) return; - request.set_input_tensor(ov::Tensor{request.get_input_tensor(), {0, 0, 0, 0}, {enquedFaces, inShape[1], inShape[2], inShape[3]}}); + request.set_input_tensor( + ov::Tensor{request.get_input_tensor(), {0, 0, 0, 0}, {enquedFaces, inShape[1], inShape[2], inShape[3]}}); request.start_async(); enquedFaces = 0; } -void AgeGenderDetection::enqueue(const cv::Mat &face) { +void AgeGenderDetection::enqueue(const cv::Mat& face) { if (!enabled()) { return; } ov::Tensor batch = request.get_input_tensor(); batch.set_shape(inShape); - resize2tensor(face, ov::Tensor{batch, {enquedFaces, 0, 0, 0}, {enquedFaces + 1, inShape[1], inShape[2], inShape[3]}}); + resize2tensor(face, + ov::Tensor{batch, {enquedFaces, 0, 0, 0}, {enquedFaces + 1, inShape[1], inShape[2], inShape[3]}}); enquedFaces++; } @@ -279,12 +296,8 @@ std::shared_ptr AgeGenderDetection::read(const ov::Core& core) { outputGender = "prob"; ov::preprocess::PrePostProcessor ppp(model); - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout("NHWC"); - ppp.input().preprocess(). - convert_element_type(ov::element::f32). - convert_layout("NCHW"); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC"); + ppp.input().preprocess().convert_element_type(ov::element::f32).convert_layout("NCHW"); ppp.output(outputAge).tensor().set_element_type(ov::element::f32); ppp.output(outputGender).tensor().set_element_type(ov::element::f32); model = ppp.build(); @@ -294,27 +307,30 @@ std::shared_ptr AgeGenderDetection::read(const ov::Core& core) { return model; } - -HeadPoseDetection::HeadPoseDetection(const std::string &pathToModel, - bool doRawOutputMessages) +HeadPoseDetection::HeadPoseDetection(const std::string& pathToModel, bool doRawOutputMessages) : BaseDetection(pathToModel, doRawOutputMessages), - outputAngleR("angle_r_fc"), outputAngleP("angle_p_fc"), outputAngleY("angle_y_fc"), enquedFaces(0) { -} + outputAngleR("angle_r_fc"), + outputAngleP("angle_p_fc"), + outputAngleY("angle_y_fc"), + enquedFaces(0) {} -void HeadPoseDetection::submitRequest() { - if (!enquedFaces) return; - request.set_input_tensor(ov::Tensor{request.get_input_tensor(), {0, 0, 0, 0}, {enquedFaces, inShape[1], inShape[2], inShape[3]}}); +void HeadPoseDetection::submitRequest() { + if (!enquedFaces) + return; + request.set_input_tensor( + ov::Tensor{request.get_input_tensor(), {0, 0, 0, 0}, {enquedFaces, inShape[1], inShape[2], inShape[3]}}); request.start_async(); enquedFaces = 0; } -void HeadPoseDetection::enqueue(const cv::Mat &face) { +void HeadPoseDetection::enqueue(const cv::Mat& face) { if (!enabled()) { return; } ov::Tensor batch = request.get_input_tensor(); batch.set_shape(inShape); - resize2tensor(face, ov::Tensor{batch, {enquedFaces, 0, 0, 0}, {enquedFaces + 1, inShape[1], inShape[2], inShape[3]}}); + resize2tensor(face, + ov::Tensor{batch, {enquedFaces, 0, 0, 0}, {enquedFaces + 1, inShape[1], inShape[2], inShape[3]}}); enquedFaces++; } @@ -324,9 +340,8 @@ HeadPoseDetection::Results HeadPoseDetection::operator[](int idx) { request.get_tensor(outputAngleP).data()[idx], request.get_tensor(outputAngleY).data()[idx]}; if (doRawOutputMessages) { - slog::debug << "[" << idx << "] element, yaw = " << r.angle_y << - ", pitch = " << r.angle_p << - ", roll = " << r.angle_r << slog::endl; + slog::debug << "[" << idx << "] element, yaw = " << r.angle_y << ", pitch = " << r.angle_p + << ", roll = " << r.angle_r << slog::endl; } return r; @@ -338,9 +353,7 @@ std::shared_ptr HeadPoseDetection::read(const ov::Core& core) { logBasicModelInfo(model); ov::preprocess::PrePostProcessor ppp(model); - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout("NHWC"); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC"); ppp.input().preprocess().convert_layout("NCHW"); ppp.output(outputAngleR).tensor().set_element_type(ov::element::f32); ppp.output(outputAngleP).tensor().set_element_type(ov::element::f32); @@ -352,26 +365,27 @@ std::shared_ptr HeadPoseDetection::read(const ov::Core& core) { return model; } -EmotionsDetection::EmotionsDetection(const std::string &pathToModel, - bool doRawOutputMessages) - : BaseDetection(pathToModel, doRawOutputMessages), - enquedFaces(0) { -} +EmotionsDetection::EmotionsDetection(const std::string& pathToModel, bool doRawOutputMessages) + : BaseDetection(pathToModel, doRawOutputMessages), + enquedFaces(0) {} void EmotionsDetection::submitRequest() { - if (!enquedFaces) return; - request.set_input_tensor(ov::Tensor{request.get_input_tensor(), {0, 0, 0, 0}, {enquedFaces, inShape[1], inShape[2], inShape[3]}}); + if (!enquedFaces) + return; + request.set_input_tensor( + ov::Tensor{request.get_input_tensor(), {0, 0, 0, 0}, {enquedFaces, inShape[1], inShape[2], inShape[3]}}); request.start_async(); enquedFaces = 0; } -void EmotionsDetection::enqueue(const cv::Mat &face) { +void EmotionsDetection::enqueue(const cv::Mat& face) { if (!enabled()) { return; } ov::Tensor batch = request.get_input_tensor(); batch.set_shape(inShape); - resize2tensor(face, ov::Tensor{batch, {enquedFaces, 0, 0, 0}, {enquedFaces + 1, inShape[1], inShape[2], inShape[3]}}); + resize2tensor(face, + ov::Tensor{batch, {enquedFaces, 0, 0, 0}, {enquedFaces + 1, inShape[1], inShape[2], inShape[3]}}); enquedFaces++; } @@ -418,9 +432,7 @@ std::shared_ptr EmotionsDetection::read(const ov::Core& core) { logBasicModelInfo(model); ov::preprocess::PrePostProcessor ppp(model); - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout("NHWC"); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC"); ppp.input().preprocess().convert_layout("NCHW"); ppp.output().tensor().set_element_type(ov::element::f32); model = ppp.build(); @@ -430,26 +442,27 @@ std::shared_ptr EmotionsDetection::read(const ov::Core& core) { return model; } - -FacialLandmarksDetection::FacialLandmarksDetection(const std::string &pathToModel, - bool doRawOutputMessages) - : BaseDetection(pathToModel, doRawOutputMessages), enquedFaces(0) { -} +FacialLandmarksDetection::FacialLandmarksDetection(const std::string& pathToModel, bool doRawOutputMessages) + : BaseDetection(pathToModel, doRawOutputMessages), + enquedFaces(0) {} void FacialLandmarksDetection::submitRequest() { - if (!enquedFaces) return; - request.set_input_tensor(ov::Tensor{request.get_input_tensor(), {0, 0, 0, 0}, {enquedFaces, inShape[1], inShape[2], inShape[3]}}); + if (!enquedFaces) + return; + request.set_input_tensor( + ov::Tensor{request.get_input_tensor(), {0, 0, 0, 0}, {enquedFaces, inShape[1], inShape[2], inShape[3]}}); request.start_async(); enquedFaces = 0; } -void FacialLandmarksDetection::enqueue(const cv::Mat &face) { +void FacialLandmarksDetection::enqueue(const cv::Mat& face) { if (!enabled()) { return; } ov::Tensor batch = request.get_input_tensor(); batch.set_shape(inShape); - resize2tensor(face, ov::Tensor{batch, {enquedFaces, 0, 0, 0}, {enquedFaces + 1, inShape[1], inShape[2], inShape[3]}}); + resize2tensor(face, + ov::Tensor{batch, {enquedFaces, 0, 0, 0}, {enquedFaces + 1, inShape[1], inShape[2], inShape[3]}}); enquedFaces++; } @@ -459,7 +472,7 @@ std::vector FacialLandmarksDetection::operator[](int idx) { request.wait(); const ov::Tensor& tensor = request.get_output_tensor(); size_t n_lm = tensor.get_shape().at(1); - const float *normed_coordinates = request.get_output_tensor().data(); + const float* normed_coordinates = request.get_output_tensor().data(); if (doRawOutputMessages) { slog::debug << "[" << idx << "] element, normed facial landmarks coordinates (x, y):" << slog::endl; @@ -472,7 +485,7 @@ std::vector FacialLandmarksDetection::operator[](int idx) { float normed_y = normed_coordinates[2 * i_lm + 1]; if (doRawOutputMessages) { - slog::debug <<'\t' << normed_x << ", " << normed_y << slog::endl; + slog::debug << '\t' << normed_x << ", " << normed_y << slog::endl; } normedLandmarks.push_back(normed_x); @@ -493,9 +506,7 @@ std::shared_ptr FacialLandmarksDetection::read(const ov::Core& core) " the last dimension"); } ov::preprocess::PrePostProcessor ppp(model); - ppp.input().tensor(). - set_element_type(ov::element::u8). - set_layout("NHWC"); + ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC"); ppp.input().preprocess().convert_layout("NCHW"); ppp.output().tensor().set_element_type(ov::element::f32); model = ppp.build(); @@ -505,11 +516,9 @@ std::shared_ptr FacialLandmarksDetection::read(const ov::Core& core) return model; } +Load::Load(BaseDetection& detector) : detector(detector) {} -Load::Load(BaseDetection& detector) : detector(detector) { -} - -void Load::into(ov::Core& core, const std::string & deviceName) const { +void Load::into(ov::Core& core, const std::string& deviceName) const { if (!detector.pathToModel.empty()) { ov::CompiledModel cml = core.compile_model(detector.read(core), deviceName); logCompiledModelInfo(cml, detector.pathToModel, deviceName); @@ -517,10 +526,7 @@ void Load::into(ov::Core& core, const std::string & deviceName) const { } } - -CallStat::CallStat(): - _number_of_calls(0), _total_duration(0.0), _last_call_duration(0.0), _smoothed_duration(-1.0) { -} +CallStat::CallStat() : _number_of_calls(0), _total_duration(0.0), _last_call_duration(0.0), _smoothed_duration(-1.0) {} double CallStat::getSmoothedDuration() { // Additional check is needed for the first frame while duration of the first diff --git a/demos/interactive_face_detection_demo/cpp/detectors.hpp b/demos/interactive_face_detection_demo/cpp/detectors.hpp index 1fde4ce64ad..2c9c59c5593 100644 --- a/demos/interactive_face_detection_demo/cpp/detectors.hpp +++ b/demos/interactive_face_detection_demo/cpp/detectors.hpp @@ -2,12 +2,19 @@ // SPDX-License-Identifier: Apache-2.0 // -# pragma once +#pragma once -#include +#include +#include +#include +#include +#include +#include +#include + +#include #include -#include struct BaseDetection { ov::InferRequest request; @@ -15,7 +22,7 @@ struct BaseDetection { ov::Shape inShape; const bool doRawOutputMessages; - BaseDetection(const std::string &pathToModel, bool doRawOutputMessages); + BaseDetection(const std::string& pathToModel, bool doRawOutputMessages); virtual ~BaseDetection() = default; virtual std::shared_ptr read(const ov::Core& core) = 0; bool enabled() const; @@ -40,13 +47,15 @@ struct FaceDetection : BaseDetection { float bb_dx_coefficient; float bb_dy_coefficient; - FaceDetection(const std::string &pathToModel, - double detectionThreshold, bool doRawOutputMessages, - float bb_enlarge_coefficient, float bb_dx_coefficient, + FaceDetection(const std::string& pathToModel, + double detectionThreshold, + bool doRawOutputMessages, + float bb_enlarge_coefficient, + float bb_dx_coefficient, float bb_dy_coefficient); std::shared_ptr read(const ov::Core& core) override; - void submitRequest(const cv::Mat &frame); + void submitRequest(const cv::Mat& frame); std::vector fetchResults(); }; @@ -60,13 +69,12 @@ struct AgeGenderDetection : BaseDetection { std::string outputGender; size_t enquedFaces; - AgeGenderDetection(const std::string &pathToModel, - bool doRawOutputMessages); + AgeGenderDetection(const std::string& pathToModel, bool doRawOutputMessages); std::shared_ptr read(const ov::Core& core) override; void submitRequest(); - void enqueue(const cv::Mat &face); + void enqueue(const cv::Mat& face); Result operator[](int idx); }; @@ -83,26 +91,24 @@ struct HeadPoseDetection : BaseDetection { size_t enquedFaces; cv::Mat cameraMatrix; - HeadPoseDetection(const std::string &pathToModel, - bool doRawOutputMessages); + HeadPoseDetection(const std::string& pathToModel, bool doRawOutputMessages); std::shared_ptr read(const ov::Core& core) override; void submitRequest(); - void enqueue(const cv::Mat &face); + void enqueue(const cv::Mat& face); Results operator[](int idx); }; struct EmotionsDetection : BaseDetection { size_t enquedFaces; - EmotionsDetection(const std::string &pathToModel, - bool doRawOutputMessages); + EmotionsDetection(const std::string& pathToModel, bool doRawOutputMessages); std::shared_ptr read(const ov::Core& core) override; void submitRequest(); - void enqueue(const cv::Mat &face); + void enqueue(const cv::Mat& face); std::map operator[](int idx); const std::vector emotionsVec = {"neutral", "happy", "sad", "surprise", "anger"}; @@ -113,21 +119,19 @@ struct FacialLandmarksDetection : BaseDetection { std::vector> landmarks_results; std::vector faces_bounding_boxes; - FacialLandmarksDetection(const std::string &pathToModel, - bool doRawOutputMessages); + FacialLandmarksDetection(const std::string& pathToModel, bool doRawOutputMessages); std::shared_ptr read(const ov::Core& core) override; void submitRequest(); - void enqueue(const cv::Mat &face); + void enqueue(const cv::Mat& face); std::vector operator[](int idx); }; struct AntispoofingClassifier : BaseDetection { size_t enquedFaces; - AntispoofingClassifier(const std::string &pathToModel, - bool doRawOutputMessages); + AntispoofingClassifier(const std::string& pathToModel, bool doRawOutputMessages); std::shared_ptr read(const ov::Core& core) override; void submitRequest(); @@ -141,7 +145,7 @@ struct Load { explicit Load(BaseDetection& detector); - void into(ov::Core& core, const std::string & deviceName) const; + void into(ov::Core& core, const std::string& deviceName) const; }; class CallStat { diff --git a/demos/interactive_face_detection_demo/cpp/face.cpp b/demos/interactive_face_detection_demo/cpp/face.cpp index 3d172daf395..30ee152b4f6 100644 --- a/demos/interactive_face_detection_demo/cpp/face.cpp +++ b/demos/interactive_face_detection_demo/cpp/face.cpp @@ -1,21 +1,33 @@ -// Copyright (C) 2018-2019 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -#include +#include "face.hpp" + +#include +#include +#include #include +#include #include -#include #include -#include "face.hpp" - -Face::Face(size_t id, cv::Rect& location): - _location(location), _intensity_mean(0.f), _id(id), _age(-1), - _maleScore(0), _femaleScore(0), _headPose({0.f, 0.f, 0.f}), _realFaceConfidence(0), - _isAgeGenderEnabled(false), _isEmotionsEnabled(false), - _isHeadPoseEnabled(false), _isLandmarksEnabled(false), _isAntispoofingEnabled(false) { -} +#include + +Face::Face(size_t id, cv::Rect& location) + : _location(location), + _intensity_mean(0.f), + _id(id), + _age(-1), + _maleScore(0), + _femaleScore(0), + _headPose({0.f, 0.f, 0.f}), + _realFaceConfidence(0), + _isAgeGenderEnabled(false), + _isEmotionsEnabled(false), + _isHeadPoseEnabled(false), + _isLandmarksEnabled(false), + _isAntispoofingEnabled(false) {} void Face::updateAge(float value) { _age = (_age == -1) ? value : 0.95f * _age + 0.05f * value; @@ -71,9 +83,11 @@ std::map Face::getEmotions() { } std::pair Face::getMainEmotion() { - auto x = std::max_element(_emotions.begin(), _emotions.end(), - [](const std::pair& p1, const std::pair& p2) { - return p1.second < p2.second; }); + auto x = std::max_element(_emotions.begin(), + _emotions.end(), + [](const std::pair& p1, const std::pair& p2) { + return p1.second < p2.second; + }); return std::make_pair(x->first, x->second); } diff --git a/demos/interactive_face_detection_demo/cpp/face.hpp b/demos/interactive_face_detection_demo/cpp/face.hpp index d54b7b2adb5..e2e4ea1aebf 100644 --- a/demos/interactive_face_detection_demo/cpp/face.hpp +++ b/demos/interactive_face_detection_demo/cpp/face.hpp @@ -1,15 +1,18 @@ -// Copyright (C) 2018-2019 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // -# pragma once -#include +#pragma once +#include + +#include #include #include +#include #include -#include #include -#include + +#include #include "detectors.hpp" diff --git a/demos/interactive_face_detection_demo/cpp/main.cpp b/demos/interactive_face_detection_demo/cpp/main.cpp index d23ffdc8ab7..b365444021d 100644 --- a/demos/interactive_face_detection_demo/cpp/main.cpp +++ b/demos/interactive_face_detection_demo/cpp/main.cpp @@ -2,12 +2,32 @@ // SPDX-License-Identifier: Apache-2.0 // +#include + +#include +#include +#include #include #include +#include +#include +#include +#include +#include +#include #include -#include +#include +#include +#include +#include + #include +#include +#include +#include +#include +#include #include "detectors.hpp" #include "face.hpp" @@ -20,17 +40,18 @@ DEFINE_bool(h, false, h_msg); constexpr char m_msg[] = "path to an .xml file with a trained Face Detection model"; DEFINE_string(m, "", m_msg); -constexpr char i_msg[] = "an input to process. The input must be a single image, a folder of images, video file or camera id. Default is 0"; +constexpr char i_msg[] = + "an input to process. The input must be a single image, a folder of images, video file or camera id. Default is 0"; DEFINE_string(i, "0", i_msg); -constexpr char bb_enlarge_coef_msg[] = "coefficient to enlarge/reduce the size of the bounding box around the detected face. Default is 1.2"; +constexpr char bb_enlarge_coef_msg[] = + "coefficient to enlarge/reduce the size of the bounding box around the detected face. Default is 1.2"; DEFINE_double(bb_enlarge_coef, 1.2, bb_enlarge_coef_msg); -constexpr char d_msg[] = - "specify a device to infer on (the list of available devices is shown below). " - "Use '-d HETERO:' format to specify HETERO plugin. " - "Use '-d MULTI:' format to specify MULTI plugin. " - "Default is CPU"; +constexpr char d_msg[] = "specify a device to infer on (the list of available devices is shown below). " + "Use '-d HETERO:' format to specify HETERO plugin. " + "Use '-d MULTI:' format to specify MULTI plugin. " + "Default is CPU"; DEFINE_string(d, "CPU", d_msg); constexpr char dx_coef_msg[] = "coefficient to shift the bounding box around the detected face along the Ox axis"; @@ -82,13 +103,13 @@ constexpr char t_msg[] = "probability threshold for detections. Default is 0.5"; DEFINE_double(t, 0.5, t_msg); constexpr char u_msg[] = "resource utilization graphs. Default is cdm. " - "c - average CPU load, d - load distribution over cores, m - memory usage, h - hide"; + "c - average CPU load, d - load distribution over cores, m - memory usage, h - hide"; DEFINE_string(u, "cdm", u_msg); -void parse(int argc, char *argv[]) { +void parse(int argc, char* argv[]) { gflags::ParseCommandLineFlags(&argc, &argv, false); if (FLAGS_h || 1 == argc) { - std::cout << "\t[ -h] " << h_msg + std::cout << "\t[ -h] " << h_msg << "\n\t[--help] print help on all arguments" << "\n\t -m " << m_msg << "\n\t[ -i ] " << i_msg @@ -118,16 +139,18 @@ void parse(int argc, char *argv[]) { showAvailableDevices(); std::cout << ov::get_openvino_version() << std::endl; exit(0); - } if (FLAGS_i.empty()) { + } + if (FLAGS_i.empty()) { throw std::invalid_argument{"-i can't be empty"}; - } if (FLAGS_m.empty()) { + } + if (FLAGS_m.empty()) { throw std::invalid_argument{"-m can't be empty"}; } slog::info << ov::get_openvino_version() << slog::endl; } -} // namespace +} // namespace -int main(int argc, char *argv[]) { +int main(int argc, char* argv[]) { std::set_terminate(catcher); parse(argc, argv); PerformanceMetrics metrics; @@ -135,8 +158,12 @@ int main(int argc, char *argv[]) { // --------------------------- 1. Loading Inference Engine ----------------------------- ov::Core core; - FaceDetection faceDetector(FLAGS_m, FLAGS_t, FLAGS_r, - static_cast(FLAGS_bb_enlarge_coef), static_cast(FLAGS_dx_coef), static_cast(FLAGS_dy_coef)); + FaceDetection faceDetector(FLAGS_m, + FLAGS_t, + FLAGS_r, + static_cast(FLAGS_bb_enlarge_coef), + static_cast(FLAGS_dx_coef), + static_cast(FLAGS_dy_coef)); AgeGenderDetection ageGenderDetector(FLAGS_mag, FLAGS_r); HeadPoseDetection headPoseDetector(FLAGS_mhp, FLAGS_r); EmotionsDetection emotionsDetector(FLAGS_mem, FLAGS_r); @@ -202,7 +229,7 @@ int main(int argc, char *argv[]) { } // Filling inputs of face analytics networks - for (auto &&face : prev_detection_results) { + for (auto&& face : prev_detection_results) { cv::Rect clippedRect = face.location & cv::Rect({0, 0}, prevFrame.size()); const cv::Mat& crop = prevFrame(clippedRect); ageGenderDetector.enqueue(crop); @@ -212,7 +239,8 @@ int main(int argc, char *argv[]) { antispoofingClassifier.enqueue(crop); } - // Running Age/Gender Recognition, Head Pose Estimation, Emotions Recognition, Facial Landmarks Estimation and Antispoofing Classifier networks simultaneously + // Running Age/Gender Recognition, Head Pose Estimation, Emotions Recognition, Facial Landmarks Estimation and + // Antispoofing Classifier networks simultaneously ageGenderDetector.submitRequest(); headPoseDetector.submitRequest(); emotionsDetector.submitRequest(); @@ -289,7 +317,7 @@ int main(int argc, char *argv[]) { visualizer.draw(prevFrame, faces); presenter.drawGraphs(prevFrame); - metrics.update(startTimePrevFrame, prevFrame, { 10, 22 }, cv::FONT_HERSHEY_COMPLEX, 0.65); + metrics.update(startTimePrevFrame, prevFrame, {10, 22}, cv::FONT_HERSHEY_COMPLEX, 0.65); timer.finish("total"); diff --git a/demos/interactive_face_detection_demo/cpp/visualizer.cpp b/demos/interactive_face_detection_demo/cpp/visualizer.cpp index 3093ea366c9..95e08bfae12 100644 --- a/demos/interactive_face_detection_demo/cpp/visualizer.cpp +++ b/demos/interactive_face_detection_demo/cpp/visualizer.cpp @@ -1,36 +1,57 @@ -// Copyright (C) 2018-2019 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include "visualizer.hpp" + +#include +#include +#include +#include #include +#include #include -#include +#include #include -#include -#include -#include -#include "visualizer.hpp" +#include + +#include +#include // EmotionBarVisualizer -EmotionBarVisualizer::EmotionBarVisualizer(std::vector const& emotionNames, cv::Size size, cv::Size padding, - double opacity, double textScale, int textThickness): - emotionNames(emotionNames), size(size), padding(padding), - opacity(opacity), textScale(textScale), textThickness(textThickness) -{ - auto itMax = std::max_element(emotionNames.begin(), emotionNames.end(), [] (std::string const& lhs, std::string const& rhs) { - return lhs.length() < rhs.length(); - }); +EmotionBarVisualizer::EmotionBarVisualizer(std::vector const& emotionNames, + cv::Size size, + cv::Size padding, + double opacity, + double textScale, + int textThickness) + : emotionNames(emotionNames), + size(size), + padding(padding), + opacity(opacity), + textScale(textScale), + textThickness(textThickness) { + auto itMax = + std::max_element(emotionNames.begin(), emotionNames.end(), [](std::string const& lhs, std::string const& rhs) { + return lhs.length() < rhs.length(); + }); textSize = cv::getTextSize(*itMax, cv::FONT_HERSHEY_COMPLEX_SMALL, textScale, textThickness, &textBaseline); - ystep = (emotionNames.size() < 2) ? 0 : (size.height - 2 * padding.height - textSize.height) / (emotionNames.size() - 1); + ystep = (emotionNames.size() < 2) + ? 0 + : (size.height - 2 * padding.height - textSize.height) / (emotionNames.size() - 1); } cv::Size EmotionBarVisualizer::getSize() { return size; } -void EmotionBarVisualizer::draw(cv::Mat& img, std::map emotions, cv::Point org, cv::Scalar fgcolor, cv::Scalar bgcolor) { +void EmotionBarVisualizer::draw(cv::Mat& img, + std::map emotions, + cv::Point org, + cv::Scalar fgcolor, + cv::Scalar bgcolor) { cv::Mat tmp = img(cv::Rect(org.x, org.y, size.width, size.height)); cv::addWeighted(tmp, 1.f - opacity, bgcolor, opacity, 0, tmp); @@ -38,7 +59,10 @@ void EmotionBarVisualizer::draw(cv::Mat& img, std::map emoti cv::Point torg(org.x + padding.width, org.y + n * ystep + textSize.height + padding.height); int textWidth = textSize.width + 10; - cv::Rect r(torg.x + textWidth, torg.y - textSize.height, size.width - 2 * padding.width - textWidth, textSize.height + textBaseline / 2); + cv::Rect r(torg.x + textWidth, + torg.y - textSize.height, + size.width - 2 * padding.width - textWidth, + textSize.height + textBaseline / 2); cv::putText(img, text, torg, cv::FONT_HERSHEY_COMPLEX_SMALL, textScale, fgcolor, textThickness); cv::rectangle(img, r, fgcolor, 1); @@ -46,15 +70,16 @@ void EmotionBarVisualizer::draw(cv::Mat& img, std::map emoti cv::rectangle(img, r, fgcolor, cv::FILLED); }; - for (size_t i = 0; i< emotionNames.size(); i++) { + for (size_t i = 0; i < emotionNames.size(); i++) { drawEmotion(i, emotionNames[i], emotions[emotionNames[i]]); } } // PhotoFrameVisualizer -PhotoFrameVisualizer::PhotoFrameVisualizer(int bbThickness, int photoFrameThickness, float photoFrameLength): - bbThickness(bbThickness), photoFrameThickness(photoFrameThickness), photoFrameLength(photoFrameLength) { -} +PhotoFrameVisualizer::PhotoFrameVisualizer(int bbThickness, int photoFrameThickness, float photoFrameLength) + : bbThickness(bbThickness), + photoFrameThickness(photoFrameThickness), + photoFrameLength(photoFrameLength) {} void PhotoFrameVisualizer::draw(cv::Mat& img, cv::Rect& bb, cv::Scalar color) { cv::rectangle(img, bb, color, bbThickness); @@ -74,12 +99,20 @@ void PhotoFrameVisualizer::draw(cv::Mat& img, cv::Rect& bb, cv::Scalar color) { } // HeadPoseVisualizer -HeadPoseVisualizer::HeadPoseVisualizer(float scale, cv::Scalar xAxisColor, cv::Scalar yAxisColor, cv::Scalar zAxisColor, int axisThickness): - xAxisColor(xAxisColor), yAxisColor(yAxisColor), zAxisColor(zAxisColor), axisThickness(axisThickness), scale(scale) { -} +HeadPoseVisualizer::HeadPoseVisualizer(float scale, + cv::Scalar xAxisColor, + cv::Scalar yAxisColor, + cv::Scalar zAxisColor, + int axisThickness) + : xAxisColor(xAxisColor), + yAxisColor(yAxisColor), + zAxisColor(zAxisColor), + axisThickness(axisThickness), + scale(scale) {} void HeadPoseVisualizer::buildCameraMatrix(cv::Mat& cameraMatrix, int cx, int cy, float focalLength) { - if (!cameraMatrix.empty()) return; + if (!cameraMatrix.empty()) + return; cameraMatrix = cv::Mat::zeros(3, 3, CV_32F); cameraMatrix.at(0) = focalLength; cameraMatrix.at(2) = static_cast(cx); @@ -89,28 +122,45 @@ void HeadPoseVisualizer::buildCameraMatrix(cv::Mat& cameraMatrix, int cx, int cy } void HeadPoseVisualizer::draw(cv::Mat& frame, cv::Point3f cpoint, HeadPoseDetection::Results headPose) { - double yaw = headPose.angle_y; + double yaw = headPose.angle_y; double pitch = headPose.angle_p; - double roll = headPose.angle_r; + double roll = headPose.angle_r; pitch *= CV_PI / 180.0; - yaw *= CV_PI / 180.0; - roll *= CV_PI / 180.0; - - cv::Matx33f Rx(1, 0, 0, - 0, static_cast(cos(pitch)), static_cast(-sin(pitch)), - 0, static_cast(sin(pitch)), static_cast(cos(pitch))); - - cv::Matx33f Ry(static_cast(cos(yaw)), 0, static_cast(-sin(yaw)), - 0, 1, 0, - static_cast(sin(yaw)), 0, static_cast(cos(yaw))); - - cv::Matx33f Rz(static_cast(cos(roll)), static_cast(-sin(roll)), 0, - static_cast(sin(roll)), static_cast(cos(roll)), 0, - 0, 0, 1); - - - auto r = cv::Mat(Rz*Ry*Rx); + yaw *= CV_PI / 180.0; + roll *= CV_PI / 180.0; + + cv::Matx33f Rx(1, + 0, + 0, + 0, + static_cast(cos(pitch)), + static_cast(-sin(pitch)), + 0, + static_cast(sin(pitch)), + static_cast(cos(pitch))); + + cv::Matx33f Ry(static_cast(cos(yaw)), + 0, + static_cast(-sin(yaw)), + 0, + 1, + 0, + static_cast(sin(yaw)), + 0, + static_cast(cos(yaw))); + + cv::Matx33f Rz(static_cast(cos(roll)), + static_cast(-sin(roll)), + 0, + static_cast(sin(roll)), + static_cast(cos(roll)), + 0, + 0, + 0, + 1); + + auto r = cv::Mat(Rz * Ry * Rx); cv::Mat cameraMatrix; buildCameraMatrix(cameraMatrix, frame.cols / 2, frame.rows / 2, 950.0); @@ -160,11 +210,20 @@ void HeadPoseVisualizer::draw(cv::Mat& frame, cv::Point3f cpoint, HeadPoseDetect } // Visualizer -Visualizer::Visualizer(cv::Size const& imgSize, int leftPadding, int rightPadding, int topPadding, int bottomPadding): - emotionVisualizer(nullptr), photoFrameVisualizer(std::make_shared()), - headPoseVisualizer(std::make_shared()), - nxcells(0), nycells(0), xstep(0), ystep(0), imgSize(imgSize), leftPadding(leftPadding), - rightPadding(rightPadding), topPadding(topPadding), bottomPadding(bottomPadding), frameCounter(0) {} +Visualizer::Visualizer(cv::Size const& imgSize, int leftPadding, int rightPadding, int topPadding, int bottomPadding) + : emotionVisualizer(nullptr), + photoFrameVisualizer(std::make_shared()), + headPoseVisualizer(std::make_shared()), + nxcells(0), + nycells(0), + xstep(0), + ystep(0), + imgSize(imgSize), + leftPadding(leftPadding), + rightPadding(rightPadding), + topPadding(topPadding), + bottomPadding(bottomPadding), + frameCounter(0) {} void Visualizer::enableEmotionBar(std::vector const& emotionNames) { emotionVisualizer = std::make_shared(emotionNames); @@ -188,10 +247,8 @@ void Visualizer::enableEmotionBar(std::vector const& emotionNames) } void Visualizer::drawFace(cv::Mat& img, Face::Ptr f, bool drawEmotionBar) { - auto genderColor = (f->isAgeGenderEnabled()) ? - ((f->isMale()) ? cv::Scalar(255, 0, 0) : - cv::Scalar(147, 20, 255)) : - cv::Scalar(192, 192, 192); + auto genderColor = (f->isAgeGenderEnabled()) ? ((f->isMale()) ? cv::Scalar(255, 0, 0) : cv::Scalar(147, 20, 255)) + : cv::Scalar(192, 192, 192); std::ostringstream out; if (f->isAgeGenderEnabled()) { @@ -227,7 +284,11 @@ void Visualizer::drawFace(cv::Mat& img, Face::Ptr f, bool drawEmotionBar) { int x_lm = f->_location.x + static_cast(f->_location.width * normed_x); int y_lm = f->_location.y + static_cast(f->_location.height * normed_y); - cv::circle(img, cv::Point(x_lm, y_lm), 1 + static_cast(0.012 * f->_location.width), cv::Scalar(0, 255, 255), -1); + cv::circle(img, + cv::Point(x_lm, y_lm), + 1 + static_cast(0.012 * f->_location.width), + cv::Scalar(0, 255, 255), + -1); } } @@ -235,7 +296,8 @@ void Visualizer::drawFace(cv::Mat& img, Face::Ptr f, bool drawEmotionBar) { if (drawEmotionBar) { DrawParams& dp = drawParams[f->getId()]; - cv::Point org(dp.cell.x * xstep + leftPadding, imgSize.height - dp.cell.y * ystep - emotionBarSize.height - bottomPadding); + cv::Point org(dp.cell.x * xstep + leftPadding, + imgSize.height - dp.cell.y * ystep - emotionBarSize.height - bottomPadding); emotionVisualizer->draw(img, f->getEmotions(), org, cv::Scalar(255, 255, 255), genderColor); @@ -315,7 +377,7 @@ void Visualizer::draw(cv::Mat img, std::list faces) { if (!newFaces.empty()) { auto it = drawParams.begin(); auto endIt = drawParams.end(); - for (; it != endIt; ) { + for (; it != endIt;) { if (it->second.frameIdx != frameCounter) { it = drawParams.erase(it); } else { diff --git a/demos/interactive_face_detection_demo/cpp/visualizer.hpp b/demos/interactive_face_detection_demo/cpp/visualizer.hpp index 5f82557bae7..98b484f167e 100644 --- a/demos/interactive_face_detection_demo/cpp/visualizer.hpp +++ b/demos/interactive_face_detection_demo/cpp/visualizer.hpp @@ -1,30 +1,43 @@ -// Copyright (C) 2018-2019 Intel Corporation +// Copyright (C) 2018-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #pragma once +#include + +#include +#include #include #include -#include #include -#include -#include +#include + +#include "detectors.hpp" #include "face.hpp" -// -------------------------Generic routines for visualization of detection results------------------------------------------------- +// Generic routines for visualization of detection results // Drawing a bar of emotions class EmotionBarVisualizer { public: using Ptr = std::shared_ptr; - explicit EmotionBarVisualizer(std::vector const& emotionNames, cv::Size size = cv::Size(300, 140), cv::Size padding = cv::Size(10, 10), - double opacity = 0.6, double textScale = 1, int textThickness = 1); - - void draw(cv::Mat& img, std::map emotions, cv::Point org, cv::Scalar fgcolor, cv::Scalar bgcolor); + explicit EmotionBarVisualizer(std::vector const& emotionNames, + cv::Size size = cv::Size(300, 140), + cv::Size padding = cv::Size(10, 10), + double opacity = 0.6, + double textScale = 1, + int textThickness = 1); + + void draw(cv::Mat& img, + std::map emotions, + cv::Point org, + cv::Scalar fgcolor, + cv::Scalar bgcolor); cv::Size getSize(); + private: std::vector emotionNames; cv::Size size; @@ -58,10 +71,10 @@ class HeadPoseVisualizer { using Ptr = std::shared_ptr; explicit HeadPoseVisualizer(float scale = 50, - cv::Scalar xAxisColor = cv::Scalar(0, 0, 255), - cv::Scalar yAxisColor = cv::Scalar(0, 255, 0), - cv::Scalar zAxisColor = cv::Scalar(255, 0, 0), - int axisThickness = 2); + cv::Scalar xAxisColor = cv::Scalar(0, 0, 255), + cv::Scalar yAxisColor = cv::Scalar(0, 255, 0), + cv::Scalar zAxisColor = cv::Scalar(255, 0, 0), + int axisThickness = 2); void draw(cv::Mat& frame, cv::Point3f cpoint, HeadPoseDetection::Results headPose); @@ -79,12 +92,7 @@ class HeadPoseVisualizer { // Drawing detected faces on the frame class Visualizer { public: - enum AnchorType { - TL = 0, - TR, - BL, - BR - }; + enum AnchorType { TL = 0, TR, BL, BR }; struct DrawParams { cv::Point cell; @@ -93,7 +101,11 @@ class Visualizer { size_t frameIdx; }; - explicit Visualizer(cv::Size const& imgSize, int leftPadding = 10, int rightPadding = 10, int topPadding = 75, int bottomPadding = 10); + explicit Visualizer(cv::Size const& imgSize, + int leftPadding = 10, + int rightPadding = 10, + int topPadding = 75, + int bottomPadding = 10); void enableEmotionBar(std::vector const& emotionNames); void draw(cv::Mat img, std::list faces); diff --git a/demos/mask_rcnn_demo/cpp/main.cpp b/demos/mask_rcnn_demo/cpp/main.cpp index f05797670f0..973cb37026d 100644 --- a/demos/mask_rcnn_demo/cpp/main.cpp +++ b/demos/mask_rcnn_demo/cpp/main.cpp @@ -7,22 +7,30 @@ * @file mask_rcnn_demo/main.cpp * @example mask_rcnn_demo/main.cpp */ + +#include + #include +#include #include #include -#include -#include #include +#include +#include #include +#include #include -#include "openvino/openvino.hpp" +#include +#include +#include +#include +#include -#include "gflags/gflags.h" -#include "utils/common.hpp" -#include "utils/ocv_common.hpp" -#include "utils/performance_metrics.hpp" -#include "utils/slog.hpp" +#include +#include +#include +#include #include "mask_rcnn_demo.h" @@ -140,7 +148,7 @@ int main(int argc, char* argv[]) { std::string boxes_tensor_name; std::string masks_tensor_name; - ov::Layout masks_tensor_layout = {"NCHW"}; // expected by mask processing logic + ov::Layout masks_tensor_layout = {"NCHW"}; // expected by mask processing logic for (ov::Output output : outputs) { ov::Shape shape = output.get_shape(); @@ -157,27 +165,20 @@ int main(int argc, char* argv[]) { std::vector images; if (modelBatchSize > imagePaths.size()) { - slog::warn << "Model batch size is greater than number of images (" << imagePaths.size() << - "), some input files will be duplicated" << slog::endl; - } - else if (modelBatchSize < imagePaths.size()) { + slog::warn << "Model batch size is greater than number of images (" << imagePaths.size() + << "), some input files will be duplicated" << slog::endl; + } else if (modelBatchSize < imagePaths.size()) { modelBatchSize = imagePaths.size(); - slog::warn << "Model batch size is less than number of images (" << imagePaths.size() << - "), model will be reshaped" << slog::endl; + slog::warn << "Model batch size is less than number of images (" << imagePaths.size() + << "), model will be reshaped" << slog::endl; } ov::preprocess::PrePostProcessor ppp(model); - ppp.input(image_tensor_name) - .tensor() - .set_element_type(ov::element::u8) - .set_layout(desired_tensor_layout); - ppp.input(info_tensor_name) - .tensor() - .set_layout(info_tensor_layout); + ppp.input(image_tensor_name).tensor().set_element_type(ov::element::u8).set_layout(desired_tensor_layout); + ppp.input(info_tensor_name).tensor().set_layout(info_tensor_layout); - ppp.input(image_tensor_name).model() - .set_layout(image_tensor_layout); + ppp.input(image_tensor_name).model().set_layout(image_tensor_layout); model = ppp.build(); slog::info << "Preprocessor configuration: " << slog::endl; @@ -219,11 +220,9 @@ int main(int argc, char* argv[]) { if (shape.size() == 4) { for (size_t batchId = 0; batchId < modelBatchSize; ++batchId) { - cv::Size size = { - int(image_tensor_width), - int(image_tensor_height) - }; - unsigned char* data = tensor.data() + batchId * image_tensor_width * image_tensor_height * 3; + cv::Size size = {static_cast(image_tensor_width), static_cast(image_tensor_height)}; + unsigned char* data = + tensor.data() + batchId * image_tensor_width * image_tensor_height * 3; cv::Mat image_resized(size, CV_8UC3, data); cv::resize(images[batchId], image_resized, size); } @@ -231,8 +230,8 @@ int main(int argc, char* argv[]) { if (shape.size() == 2) { float* data = tensor.data(); - data[0] = static_cast(image_tensor_height); // height - data[1] = static_cast(image_tensor_width); // width + data[0] = static_cast(image_tensor_height); // height + data[1] = static_cast(image_tensor_width); // width data[2] = 1; } } @@ -321,8 +320,10 @@ int main(int argc, char* argv[]) { cv::Mat resized_mask_mat(box_height, box_width, CV_32FC1); cv::resize(mask_mat, resized_mask_mat, cv::Size(box_width, box_height)); - cv::Mat uchar_resized_mask(box_height, box_width, CV_8UC3, - cv::Scalar(color.blue(), color.green(), color.red())); + cv::Mat uchar_resized_mask(box_height, + box_width, + CV_8UC3, + cv::Scalar(color.blue(), color.green(), color.red())); roi_input_img.copyTo(uchar_resized_mask, resized_mask_mat <= MASK_THRESHOLD); cv::addWeighted(uchar_resized_mask, alpha, roi_input_img, 1.0 - alpha, 0.0f, roi_input_img); @@ -334,13 +335,14 @@ int main(int argc, char* argv[]) { for (size_t i = 0; i < output_images.size(); i++) { std::string imgName = "out" + std::to_string(i) + ".png"; - if(!cv::imwrite(imgName, output_images[i])) + if (!cv::imwrite(imgName, output_images[i])) throw std::runtime_error("Can't write image to file: " + imgName); slog::info << "Image " << imgName << " created!" << slog::endl; } slog::info << "Metrics report:" << slog::endl; - slog::info << "\tLatency: " << std::fixed << std::setprecision(1) << metrics.getTotal().latency << " ms" << slog::endl; + slog::info << "\tLatency: " << std::fixed << std::setprecision(1) << metrics.getTotal().latency << " ms" + << slog::endl; return 0; } diff --git a/demos/mask_rcnn_demo/cpp/mask_rcnn_demo.h b/demos/mask_rcnn_demo/cpp/mask_rcnn_demo.h index 9d17c78ba4d..2d58d7173b0 100644 --- a/demos/mask_rcnn_demo/cpp/mask_rcnn_demo.h +++ b/demos/mask_rcnn_demo/cpp/mask_rcnn_demo.h @@ -5,15 +5,18 @@ #pragma once #include -#include "gflags/gflags.h" + +#include static const char help_message[] = "Print a usage message."; static const char image_message[] = "Required. Path to a .bmp image."; static const char model_message[] = "Required. Path to an .xml file with a trained model."; -static const char target_device_message[] = "Optional. Specify the target device to infer on (the list of available devices is shown below). " - "Use \"-d HETERO:\" format to specify HETERO plugin. " - "The demo will look for a suitable plugin for a specified device (CPU by default)"; -static const char detection_output_layer_name_message[] = "Optional. The name of detection output layer. Default value is \"reshape_do_2d\""; +static const char target_device_message[] = + "Optional. Specify the target device to infer on (the list of available devices is shown below). " + "Use \"-d HETERO:\" format to specify HETERO plugin. " + "The demo will look for a suitable plugin for a specified device (CPU by default)"; +static const char detection_output_layer_name_message[] = + "Optional. The name of detection output layer. Default value is \"reshape_do_2d\""; static const char masks_layer_name_message[] = "Optional. The name of masks layer. Default value is \"masks\""; DEFINE_bool(h, false, help_message); @@ -24,8 +27,8 @@ DEFINE_string(detection_output_name, "reshape_do_2d", detection_output_layer_nam DEFINE_string(masks_name, "masks", masks_layer_name_message); /** -* @brief This function show a help message -*/ + * @brief This function show a help message + */ static void showUsage() { std::cout << std::endl; std::cout << "mask_rcnn_demo [OPTION]" << std::endl; diff --git a/demos/mri_reconstruction_demo/cpp/main.cpp b/demos/mri_reconstruction_demo/cpp/main.cpp index 3038093a188..2c017b29889 100644 --- a/demos/mri_reconstruction_demo/cpp/main.cpp +++ b/demos/mri_reconstruction_demo/cpp/main.cpp @@ -1,8 +1,25 @@ // Copyright (C) 2021-2022 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include #include -#include + #include #include #include @@ -10,7 +27,9 @@ #include "mri_reconstruction_demo.hpp" #include "npy_reader.hpp" -bool ParseAndCheckCommandLine(int argc, char *argv[]); +constexpr char const* kWinName = "MRI reconstruction with OpenVINO"; + +bool ParseAndCheckCommandLine(int argc, char* argv[]); static cv::Mat tensorToMat(const ov::Tensor& tensor); @@ -22,8 +41,6 @@ struct MRIData { static constexpr double stats[]{2.20295299e-1, 1.11048916e+3}; }; -static const std::string kWinName = "MRI reconstruction with OpenVINO"; - void callback(int pos, void* userdata); cv::Mat kspaceToImage(const cv::Mat& kspace); @@ -77,7 +94,7 @@ int main(int argc, char** argv) { const int width = mri.data.size[2]; mri.data /= sqrt(height * width); - mri.reconstructed.create({ numSlices, height, width }, CV_8U); + mri.reconstructed.create({numSlices, height, width}, CV_8U); slog::info << "Compute..." << slog::endl; @@ -103,7 +120,8 @@ int main(int argc, char** argv) { metrics.update(startTime); slog::info << "Metrics report:" << slog::endl; - slog::info << "\tLatency: " << std::fixed << std::setprecision(1) << metrics.getTotal().latency << " ms" << slog::endl; + slog::info << "\tLatency: " << std::fixed << std::setprecision(1) << metrics.getTotal().latency << " ms" + << slog::endl; // Visualization loop. if (!FLAGS_no_show) { @@ -121,10 +139,12 @@ cv::Mat tensorToMat(const ov::Tensor& tensor) { // NOTE: OpenVINO runtime sizes are reversed. ov::Shape tensorShape = tensor.get_shape(); std::vector size; - std::transform(tensorShape.begin(), tensorShape.end(), std::back_inserter(size), [](size_t dim) -> int { return int(dim); }); + std::transform(tensorShape.begin(), tensorShape.end(), std::back_inserter(size), [](size_t dim) -> int { + return static_cast(dim); + }); ov::element::Type precision = tensor.get_element_type(); CV_Assert(precision == ov::element::f32); - return cv::Mat(size, CV_32F, (void*)tensor.data()); + return cv::Mat(size, CV_32F, static_cast(tensor.data())); } void callback(int sliceId, void* userdata) { @@ -145,10 +165,18 @@ void callback(int sliceId, void* userdata) { cv::hconcat(std::vector({img, masked, rec}), render); cv::copyMakeBorder(render, render, kBorderSize, 0, 0, 0, cv::BORDER_CONSTANT, 255); cv::putText(render, "Original", cv::Point(0, 15), cv::FONT_HERSHEY_SIMPLEX, 0.5, 0); - cv::putText(render, cv::format("Sampled (PSNR %.1f)", cv::PSNR(img, masked, 255)), - cv::Point(width, 15), cv::FONT_HERSHEY_SIMPLEX, 0.5, 0); - cv::putText(render, cv::format("Reconstructed (PSNR %.1f)", cv::PSNR(img, rec, 255)), - cv::Point(width*2, 15), cv::FONT_HERSHEY_SIMPLEX, 0.5, 0); + cv::putText(render, + cv::format("Sampled (PSNR %.1f)", cv::PSNR(img, masked, 255)), + cv::Point(width, 15), + cv::FONT_HERSHEY_SIMPLEX, + 0.5, + 0); + cv::putText(render, + cv::format("Reconstructed (PSNR %.1f)", cv::PSNR(img, rec, 255)), + cv::Point(width * 2, 15), + cv::FONT_HERSHEY_SIMPLEX, + 0.5, + 0); cv::imshow(kWinName, render); cv::waitKey(1); @@ -169,7 +197,7 @@ cv::Mat kspaceToImage(const cv::Mat& kspace) { return img; } -bool ParseAndCheckCommandLine(int argc, char *argv[]) { +bool ParseAndCheckCommandLine(int argc, char* argv[]) { // ---------------------------Parsing and validation of input args-------------------------------------- gflags::ParseCommandLineNonHelpFlags(&argc, &argv, true); if (FLAGS_h) { diff --git a/demos/mri_reconstruction_demo/cpp/mri_reconstruction_demo.hpp b/demos/mri_reconstruction_demo/cpp/mri_reconstruction_demo.hpp index 0581a0d3942..5d35ddd6500 100644 --- a/demos/mri_reconstruction_demo/cpp/mri_reconstruction_demo.hpp +++ b/demos/mri_reconstruction_demo/cpp/mri_reconstruction_demo.hpp @@ -4,10 +4,11 @@ #pragma once +#include #include #include + #include -#include static const char help_message[] = "Print a usage message."; static const char input_message[] = "Required. Path to input .npy file with MRI scan data."; @@ -25,8 +26,8 @@ DEFINE_string(p, "", pattern_message); DEFINE_bool(no_show, false, no_show_message); /** -* @brief This function show a help message -*/ + * @brief This function show a help message + */ static void showUsage() { std::cout << std::endl; std::cout << "mri_reconstruction_demo [OPTION]" << std::endl; diff --git a/demos/mri_reconstruction_demo/cpp/npy_reader.hpp b/demos/mri_reconstruction_demo/cpp/npy_reader.hpp index 57c791ce225..893936f1944 100644 --- a/demos/mri_reconstruction_demo/cpp/npy_reader.hpp +++ b/demos/mri_reconstruction_demo/cpp/npy_reader.hpp @@ -4,6 +4,8 @@ #pragma once #include +#include +#include #include @@ -40,8 +42,7 @@ static std::vector getShape(const std::string& header) { return std::vector(1, 1); // Remove all commas. - shapeStr.erase(std::remove(shapeStr.begin(), shapeStr.end(), ','), - shapeStr.end()); + shapeStr.erase(std::remove(shapeStr.begin(), shapeStr.end(), ','), shapeStr.end()); std::istringstream ss(shapeStr); int value; @@ -65,7 +66,7 @@ cv::Mat blobFromNPY(const std::string& path) { ifs.ignore(1); // Skip minor version byte. unsigned short headerSize; - ifs.read((char*)&headerSize, sizeof(headerSize)); + ifs.read(reinterpret_cast(&headerSize), sizeof(headerSize)); std::string header(headerSize, '*'); ifs.read(&header[0], header.size()); @@ -85,8 +86,8 @@ cv::Mat blobFromNPY(const std::string& path) { else throw std::logic_error("Unexpected numpy data type: " + dataType); - ifs.read((char*)blob.data, blob.total() * blob.elemSize()); - CV_Assert((size_t)ifs.gcount() == blob.total() * blob.elemSize()); + ifs.read(reinterpret_cast(blob.data), blob.total() * blob.elemSize()); + CV_Assert(static_cast(ifs.gcount()) == blob.total() * blob.elemSize()); return blob; } diff --git a/demos/noise_suppression_demo/cpp/main.cpp b/demos/noise_suppression_demo/cpp/main.cpp index 1dd7969cd15..a1d415edc99 100644 --- a/demos/noise_suppression_demo/cpp/main.cpp +++ b/demos/noise_suppression_demo/cpp/main.cpp @@ -2,10 +2,29 @@ // SPDX-License-Identifier: Apache-2.0 // +#include +#include + +#include +#include +#include +#include #include +#include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include +#include + #include #include @@ -25,42 +44,42 @@ DEFINE_string(d, "CPU", d_msg); constexpr char o_msg[] = "path to an output WAV file. Default is noise_suppression_demo_out.wav"; DEFINE_string(o, "noise_suppression_demo_out.wav", o_msg); -void parse(int argc, char *argv[]) { +void parse(int argc, char* argv[]) { gflags::ParseCommandLineFlags(&argc, &argv, false); if (FLAGS_h || 1 == argc) { - std::cout << "\t[-h] " << h_msg - << "\n\t[--help] print help on all arguments" - << "\n\t -m " << m_msg - << "\n\t -i " << i_msg - << "\n\t[-d ] " << d_msg - << "\n\t[-o ] " << o_msg << '\n'; + std::cout << "\t[-h] " << h_msg << "\n\t[--help] print help on all arguments" + << "\n\t -m " << m_msg << "\n\t -i " << i_msg + << "\n\t[-d ] " << d_msg << "\n\t[-o ] " << o_msg << '\n'; showAvailableDevices(); slog::info << ov::get_openvino_version() << slog::endl; exit(0); - } if (FLAGS_m.empty()) { + } + if (FLAGS_m.empty()) { throw std::invalid_argument{"-m can't be empty"}; - } if (FLAGS_i.empty()) { + } + if (FLAGS_i.empty()) { throw std::invalid_argument{"-i can't be empty"}; - } if (FLAGS_o.empty()) { + } + if (FLAGS_o.empty()) { throw std::invalid_argument{"-o can't be empty"}; } slog::info << ov::get_openvino_version() << slog::endl; } struct RiffWaveHeader { - unsigned int riff_tag; // "RIFF" string - int riff_length; // Total length - unsigned int wave_tag; // "WAVE" + unsigned int riff_tag; // "RIFF" string + int riff_length; // Total length + unsigned int wave_tag; // "WAVE" unsigned int fmt_tag; // "fmt " string (note space after 't') - int fmt_length; // Remaining length - short data_format; // Data format tag, 1 = PCM - short num_of_channels; // Number of channels in file - int sampling_freq; // Sampling frequency - int bytes_per_sec; // Average bytes/sec - short block_align; // Block align + int fmt_length; // Remaining length + short data_format; // Data format tag, 1 = PCM + short num_of_channels; // Number of channels in file + int sampling_freq; // Sampling frequency + int bytes_per_sec; // Average bytes/sec + short block_align; // Block align short bits_per_sample; - unsigned int data_tag; // "data" string - int data_length; // Raw data length + unsigned int data_tag; // "data" string + int data_length; // Raw data length }; const unsigned int fourcc(const char c[4]) { @@ -68,14 +87,17 @@ const unsigned int fourcc(const char c[4]) { } void read_wav(const std::string& file_name, RiffWaveHeader& wave_header, std::vector& wave) { - std::ifstream inp_wave(file_name, std::ios::in|std::ios::binary); - if(!inp_wave.is_open()) + std::ifstream inp_wave(file_name, std::ios::in | std::ios::binary); + if (!inp_wave.is_open()) throw std::runtime_error("fail to open " + file_name); - inp_wave.read((char*)&wave_header, sizeof(RiffWaveHeader)); + inp_wave.read(reinterpret_cast(&wave_header), sizeof(RiffWaveHeader)); std::string error_msg = ""; - #define CHECK_IF(cond) if(cond){ error_msg = error_msg + #cond + ", "; } +#define CHECK_IF(cond) \ + if (cond) { \ + error_msg = error_msg + #cond + ", "; \ + } // make sure it is actually a RIFF file with WAVE CHECK_IF(wave_header.riff_tag != fourcc("RIFF")); @@ -89,7 +111,7 @@ void read_wav(const std::string& file_name, RiffWaveHeader& wave_header, std::ve CHECK_IF(wave_header.bits_per_sample != 16); // make sure that data chunk follows file header CHECK_IF(wave_header.data_tag != fourcc("data")); - #undef CHECK_IF +#undef CHECK_IF if (!error_msg.empty()) { throw std::runtime_error(error_msg + "for '" + file_name + "' file."); @@ -98,16 +120,16 @@ void read_wav(const std::string& file_name, RiffWaveHeader& wave_header, std::ve size_t wave_size = wave_header.data_length / sizeof(int16_t); wave.resize(wave_size); - inp_wave.read((char*)&(wave.front()), wave_size * sizeof(int16_t)); + inp_wave.read(reinterpret_cast(&(wave.front())), wave_size * sizeof(int16_t)); } void write_wav(const std::string& file_name, const RiffWaveHeader& wave_header, const std::vector& wave) { - std::ofstream out_wave(file_name, std::ios::out|std::ios::binary); - if(!out_wave.is_open()) + std::ofstream out_wave(file_name, std::ios::out | std::ios::binary); + if (!out_wave.is_open()) throw std::runtime_error("fail to open " + file_name); - out_wave.write((char*)&wave_header, sizeof(RiffWaveHeader)); - out_wave.write((char*)&(wave.front()), wave.size() * sizeof(int16_t)); + out_wave.write(reinterpret_cast(const_cast<::RiffWaveHeader*>(&wave_header)), sizeof(RiffWaveHeader)); + out_wave.write(reinterpret_cast(const_cast(&wave.front())), wave.size() * sizeof(int16_t)); } } // namespace @@ -134,9 +156,10 @@ int main(int argc, char* argv[]) { out_state_name.replace(0, 3, "out"); // find corresponding output state - if (outputs.end() == std::find_if(outputs.begin(), outputs.end(), [&out_state_name](const ov::Output& output) { - return output.get_any_name() == out_state_name; - })) + if (outputs.end() == + std::find_if(outputs.begin(), outputs.end(), [&out_state_name](const ov::Output& output) { + return output.get_any_name() == out_state_name; + })) throw std::runtime_error("model output state name does not correspond input state name"); state_names.emplace_back(inp_state_name, out_state_name); @@ -150,7 +173,8 @@ int main(int argc, char* argv[]) { if (state_size == 0) throw std::runtime_error("no expected model state inputs found"); - slog::info << "State_param_num = " << state_size << " (" << std::setprecision(4) << state_size * sizeof(float) * 1e-6f << "Mb)" << slog::endl; + slog::info << "State_param_num = " << state_size << " (" << std::setprecision(4) + << state_size * sizeof(float) * 1e-6f << "Mb)" << slog::endl; ov::CompiledModel compiled_model = core.compile_model(model, FLAGS_d, {}); logCompiledModelInfo(compiled_model, FLAGS_m, FLAGS_d); @@ -165,7 +189,7 @@ int main(int argc, char* argv[]) { // try to get delay output and freq output for model int delay = 0; - int freq_model = 16000; // default sampling rate for model + int freq_model = 16000; // default sampling rate for model infer_request.infer(); for (size_t i = 0; i < outputs.size(); i++) { std::string out_name = outputs[i].get_any_name(); @@ -186,7 +210,8 @@ int main(int argc, char* argv[]) { int freq_data = wave_header.sampling_freq; if (freq_data != freq_model) { - slog::err << "Wav file " << FLAGS_i << " sampling rate " << freq_data << " does not match model sampling rate " << freq_model << slog::endl; + slog::err << "Wav file " << FLAGS_i << " sampling rate " << freq_data << " does not match model sampling rate " + << freq_model << slog::endl; throw std::runtime_error("data sampling rate does not match model sampling rate"); } @@ -204,16 +229,16 @@ int main(int argc, char* argv[]) { // convert sint16_t to float float scale = 1.0f / std::numeric_limits::max(); - for(size_t i = 0; i < inp_wave_s16.size(); ++i) { - inp_wave_fp32[i] = (float)inp_wave_s16[i] * scale; + for (size_t i = 0; i < inp_wave_s16.size(); ++i) { + inp_wave_fp32[i] = static_cast(inp_wave_s16[i]) * scale; } auto start_time = std::chrono::steady_clock::now(); - for(size_t i = 0; i < iter; ++i) { + for (size_t i = 0; i < iter; ++i) { ov::Tensor input_tensor(ov::element::f32, inp_shape, &inp_wave_fp32[i * patch_size]); infer_request.set_tensor(input_name, input_tensor); - for (auto& state_name: state_names) { + for (auto& state_name : state_names) { const std::string& inp_state_name = state_name.first; const std::string& out_state_name = state_name.second; ov::Tensor state_tensor; @@ -237,18 +262,19 @@ int main(int argc, char* argv[]) { float* dst = &out_wave_fp32[i * patch_size]; std::memcpy(dst, src, patch_size * sizeof(float)); } - } // for iter + } // for iter using ms = std::chrono::duration>; double total_latency = std::chrono::duration_cast(std::chrono::steady_clock::now() - start_time).count(); slog::info << "Metrics report:" << slog::endl; slog::info << "\tLatency: " << std::fixed << std::setprecision(1) << total_latency << " ms" << slog::endl; - slog::info << "\tSample length: " << std::fixed << std::setprecision(1) << patch_size * iter / (freq_data * 1e-3) << " ms" << slog::endl; + slog::info << "\tSample length: " << std::fixed << std::setprecision(1) << patch_size * iter / (freq_data * 1e-3) + << " ms" << slog::endl; slog::info << "\tSampling freq: " << freq_data << " Hz" << slog::endl; // convert fp32 to int16_t and save to wav - for(size_t i = 0; i < out_wave_s16.size(); ++i) { - float v = out_wave_fp32[i+delay]; + for (size_t i = 0; i < out_wave_s16.size(); ++i) { + float v = out_wave_fp32[i + delay]; v = clamp(v, -1.0f, +1.0f); out_wave_s16[i] = (int16_t)(v * std::numeric_limits::max()); }