-
Notifications
You must be signed in to change notification settings - Fork 6
/
real_dnn.cpp
214 lines (185 loc) · 7.42 KB
/
real_dnn.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
// This example is derived from the ssd_mobilenet_object_detection opencv demo
// and adapted to be used with Intel RealSense Cameras
// Please see https://github.com/opencv/opencv/blob/master/LICENSE
#include <opencv2/dnn.hpp>
#include <librealsense2/rs.hpp>
#include "cv-helpers.hpp"
#include <chrono>
#include <algorithm>
// #define YOLO
const size_t inWidth = 300;
const size_t inHeight = 300;
const float WHRatio = inWidth / (float)inHeight;
const float inScaleFactor = 0.007843f;
const float meanVal = 127.5;
const char* classNames[] = {"background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor"};
float cvMin(cv::Mat mat)
{
// float min=0;
// float* pData=(float*)mat.data;
// for(int i=0;i<mat.rows;i++)
// {
// for(int j=0;j<mat.cols;j++)
// {
// float value = pData[j+i*mat.cols];
// if(value<min)
// {
// min=value;
// }
// }
// }
double min, max;
minMaxIdx(mat, &min, &max);
return min;
}
double cvMax(cv::Mat mat)
{
// float max=0;
// float* pData=(float*)mat.data;
// for(int i=0;i<mat.rows;i++)
// {
// for(int j=0;j<mat.cols;j++)
// {
// float value = pData[j+i*mat.cols];
// if(value>max)
// {
// max=value;
// }
// }
// }
double max, min;
minMaxIdx(mat, &min, &max);
// return min;
return max;
}
int main(int argc, char** argv) try
{
using namespace cv;
using namespace cv::dnn;
using namespace rs2;
String caffeCfg = "MobileNetSSD_deploy.prototxt.txt";
String caffeModel = "MobileNetSSD_deploy.caffemodel";
String yoloCfg = "yolov3-tiny.cfg";
String yoloModel = "yolov3-tiny.weights";
#ifndef YOLO
Net net = readNetFromCaffe(caffeCfg, caffeModel);
std::cout<<"Using Caffe model..."<<std::endl;
#else
Net net = readNetFromDarknet(yoloCfg, yoloModel);
std::cout<<"Using YOLO model..."<<std::endl;
#endif
// Start streaming from Intel RealSense Camera
pipeline pipe;
auto config = pipe.start();
auto profile = config.get_stream(RS2_STREAM_COLOR)
.as<video_stream_profile>();
rs2::align align_to(RS2_STREAM_COLOR);
Size cropSize;
if (profile.width() / (float)profile.height() > WHRatio)
{
cropSize = Size(static_cast<int>(profile.height() * WHRatio),
profile.height());
}
else
{
cropSize = Size(profile.width(),
static_cast<int>(profile.width() / WHRatio));
}
Rect crop(Point((profile.width() - cropSize.width) / 2,
(profile.height() - cropSize.height) / 2),
cropSize);
const auto window_name = "Display Image";
namedWindow(window_name, WINDOW_AUTOSIZE);
int frame = 0;
auto start = std::chrono::high_resolution_clock::now();
while (cvGetWindowHandle(window_name))
{
//Increment frame count
frame += 1;
// Wait for the next set of frames
auto data = pipe.wait_for_frames();
// Make sure the frames are spatially aligned
data = align_to.process(data);
auto color_frame = data.get_color_frame();
auto depth_frame = data.get_depth_frame();
// If we only received new depth frame,
// but the color did not update, continue
static int last_frame_number = 0;
if (color_frame.get_frame_number() == last_frame_number) continue;
last_frame_number = color_frame.get_frame_number();
// Convert RealSense frame to OpenCV matrix:
auto color_mat = frame_to_mat(color_frame);
auto depth_mat = depth_frame_to_meters(pipe, depth_frame);
Mat inputBlob = blobFromImage(color_mat, inScaleFactor,
Size(inWidth, inHeight), meanVal, false); //Convert Mat to batch of images
net.setInput(inputBlob, "data"); //set the network input
Mat detection = net.forward("detection_out"); //compute output
Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
// Crop both color and depth frames
color_mat = color_mat(crop);
depth_mat = depth_mat(crop);
float confidenceThreshold = 0.8f;
for(int i = 0; i < detectionMat.rows; i++)
{
float confidence = detectionMat.at<float>(i, 2);
if(confidence > confidenceThreshold)
{
size_t objectClass = (size_t)(detectionMat.at<float>(i, 1));
int xLeftBottom = static_cast<int>(detectionMat.at<float>(i, 3) * color_mat.cols);
int yLeftBottom = static_cast<int>(detectionMat.at<float>(i, 4) * color_mat.rows);
int xRightTop = static_cast<int>(detectionMat.at<float>(i, 5) * color_mat.cols);
int yRightTop = static_cast<int>(detectionMat.at<float>(i, 6) * color_mat.rows);
Rect object((int)xLeftBottom, (int)yLeftBottom,
(int)(xRightTop - xLeftBottom),
(int)(yRightTop - yLeftBottom));
object = object & Rect(0, 0, depth_mat.cols, depth_mat.rows);
// Calculate mean depth inside the detection region
// This is a very naive way to estimate objects depth
// but it is intended to demonstrate how one might
// use depht data in general
Scalar m = mean(depth_mat(object));
// float m = cvMin(depth_mat(object));
// double m = cvMax(depth_mat(object));
std::ostringstream ss;
ss << classNames[objectClass] << " ";
// ss << std::setprecision(2) << m[0] << " meters away";
ss << std::setprecision(2) << m[0] << " meters away";
String conf(ss.str());
rectangle(color_mat, object, Scalar(0, 255, 0));
int baseLine = 0;
Size labelSize = getTextSize(ss.str(), FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
auto center = (object.br() + object.tl())*0.5;
center.x = center.x - labelSize.width / 2;
rectangle(color_mat, Rect(Point(center.x, center.y - labelSize.height),
Size(labelSize.width, labelSize.height + baseLine)),
Scalar(255, 255, 255), CV_FILLED);
putText(color_mat, ss.str(), center,
FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0,0,0));
}
}
imshow(window_name, color_mat);
if (waitKey(1) >= 0)
{
auto finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = finish - start;
std::cout<<"FPS "<<frame / elapsed.count() <<std::endl;
break;
}
}
return EXIT_SUCCESS;
}
catch (const rs2::error & e)
{
std::cerr << "RealSense error calling " << e.get_failed_function() << "(" << e.get_failed_args() << "):\n " << e.what() << std::endl;
return EXIT_FAILURE;
}
catch (const std::exception& e)
{
std::cerr << e.what() << std::endl;
return EXIT_FAILURE;
}