forked from stephanecharette/DarkHelp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvideo_object_counter.cpp
238 lines (202 loc) · 7.47 KB
/
video_object_counter.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
/* DarkHelp - C++ helper class for Darknet's C API.
* Copyright 2019-2024 Stephane Charette <[email protected]>
* MIT license applies. See "license.txt" for details.
*/
#include "DarkHelp.hpp"
/* A possible input video to use for this sample app is this one:
*
* https://youtu.be/F191OV3TqFw
*
* The output, given a network trained to detect pigs, should look like this:
*
* https://youtu.be/2biQpVRFhbk
*/
// Set this to "1" if you want to also create an output video.
// Otherwise, the output is only shown on screen.
#define SAVE_OUTPUT_VIDEO 0
int main(int argc, char * argv[])
{
int rc = 0;
try
{
if (argc != 5)
{
std::cout
<< "Usage:" << std::endl
<< argv[0] << " <filename.cfg> <filename.names> <filename.weights> <video>" << std::endl;
throw std::invalid_argument("wrong number of arguments");
}
// Load the neural network. The order of the 3 files does not matter, DarkHelp should figure out which file is which.
DarkHelp::NN nn(argv[1], argv[2], argv[3], true, DarkHelp::EDriver::kOpenCV);
DarkHelp::PositionTracker tracker;
tracker.maximum_number_of_frames_per_object = 10;
const cv::Scalar black (0x00, 0x00, 0x00);
const cv::Scalar red (0x00, 0x00, 0xff);
const cv::Scalar green (0x00, 0xff, 0x00);
const cv::Scalar blue (0xff, 0x00, 0x00);
int key = -1;
/* Use OpenCV to read the video frames. Note the technique used below is not necessarily how I'd process webcams
* since a long pause is inserted at the time the frame is displayed. If you're using a webcam, see the other
* example apps where the filenames contain the word "webcam".
*/
const std::string video_filename = argv[4];
cv::VideoCapture cap(video_filename);
if (not cap.isOpened())
{
throw std::runtime_error("failed to open the video file " + video_filename);
}
// get the first frame so we know exactly what size we're dealing with
cv::Mat frame;
cap >> frame;
cap.set(cv::CAP_PROP_POS_FRAMES, 0.0);
const double fps = cap.get(cv::CAP_PROP_FPS);
const int width = frame.cols;
const int height = frame.rows;
const int vertical_boundary_line = width / 2;
int64_t object_counter = 0;
#if SAVE_OUTPUT_VIDEO > 0
cv::VideoWriter output("output.mp4", cv::VideoWriter::fourcc('m', 'p', '4', 'v'), fps, frame.size());
#endif
// determine exactly how much time we should be displaying each frame (so we know how long to wait)
const size_t duration_in_nanoseconds = std::round(1000000000.0 / fps);
std::cout << video_filename << ": " << fps << " FPS, meaning we must display each frame for " << duration_in_nanoseconds << " nanoseconds" << std::endl;
const std::chrono::high_resolution_clock::duration duration_of_each_frame = std::chrono::nanoseconds(duration_in_nanoseconds);
std::chrono::high_resolution_clock::time_point next_frame_timestamp = std::chrono::high_resolution_clock::now() + duration_of_each_frame;
while (cap.isOpened())
{
cap >> frame;
if (frame.empty())
{
// video has ended, no more frames available
break;
}
auto results = nn.predict(frame);
tracker.add(results);
for (const auto & pred : results)
{
// say we only want to track objects with a class id #0, and ignore everything else
if (pred.best_class != 0)
{
continue;
}
const auto & obj = tracker.get(pred.object_id);
int current_x = -1;
int previous_x = -1;
// determine the average "X" positon for this object over the last few frames
// so we can determine the direction in which it is moving
float frame_counter = 0.0f;
float average_x = 0.0f;
for (auto iter = obj.fids_and_rects.crbegin(); frame_counter < 5.0f and iter != obj.fids_and_rects.crend(); iter ++)
{
frame_counter ++;
const int x = iter->second.x;
average_x += x;
if (current_x == -1)
{
current_x = x;
}
else if (previous_x == -1)
{
previous_x = x;
// now that we know both the current and previous X coordinate,
// see if the object has crossed the line so we can adjust the object counter
if (previous_x < vertical_boundary_line and current_x >= vertical_boundary_line)
{
object_counter ++;
}
else if (previous_x >= vertical_boundary_line and current_x < vertical_boundary_line)
{
object_counter --;
}
}
}
average_x /= frame_counter;
// compare the most recent tracker object rectangle to the average to see if we're moving left->right, right->left, or sitting still
auto colour = black;
if (obj.fids_and_rects.crbegin()->second.x - average_x >= 3)
{
colour = green; // left-to-right
}
else if (obj.fids_and_rects.crbegin()->second.x - average_x <= -3)
{
colour = red; // right-to-left
}
// draw some circles over the past few locations where this object has been tracked
frame_counter = 0.0f;
for (auto iter = obj.fids_and_rects.rbegin(); frame_counter < 5.0f and iter != obj.fids_and_rects.rend(); iter ++)
{
frame_counter ++;
const auto r = iter->second;
cv::Point p(r.x + r.width / 2, r.y + r.height / 2);
cv::circle(frame, p, 10, colour, 3, cv::LINE_AA);
}
cv::putText(frame, std::to_string(obj.oid), obj.center(), cv::FONT_HERSHEY_SIMPLEX, 1.25, black, 2, cv::LINE_AA);
}
// draw the vertical line and display the total count at the top of the frame
cv::line(frame, cv::Point(vertical_boundary_line, 0), cv::Point(vertical_boundary_line, height), blue, 1);
cv::putText(frame, std::to_string(object_counter), cv::Point(vertical_boundary_line, 35), cv::FONT_HERSHEY_SIMPLEX, 1.25, blue, 2, cv::LINE_AA);
#if SAVE_OUTPUT_VIDEO > 0
output.write(frame);
#endif
// show the frame and sleep for exactly the right amount of time so the video is displayed "realtime"
const auto now = std::chrono::high_resolution_clock::now();
auto milliseconds_to_wait = std::chrono::duration_cast<std::chrono::milliseconds>(next_frame_timestamp - now).count();
#if 0
// if you're running this on a slow computer, you may need to force
// the timeout to be non-negative, otherwise you'll never see anything
if (milliseconds_to_wait < 5)
{
milliseconds_to_wait = 5;
}
#endif
if (milliseconds_to_wait > 0) // beware, length of time remaining can be negative if we're falling behind!
{
cv::imshow("counting objects", frame);
while (true)
{
// press spacebar to pause or unpause, ESC to exit, any other key to continue
key = cv::waitKey(milliseconds_to_wait);
if (key == -1)
{
// regular timeout happened (video is not paused, otherwise we wouldn't timeout)
break;
}
else if (key == 27)
{
// we've been asked to exit completely
cap.release();
break;
}
else if (key == 32 and milliseconds_to_wait < 0)
{
// we've been asked to unpause the video
// since we've been paused for a while, reset the timestamp when the next frame is expected to be shown
next_frame_timestamp = std::chrono::high_resolution_clock::now();
break;
}
else if (key == 32 and milliseconds_to_wait >= 0)
{
// we've been asked to pause the video
milliseconds_to_wait = -1;
continue;
}
}
}
next_frame_timestamp += duration_of_each_frame;
}
#if SAVE_OUTPUT_VIDEO > 0
output.release();
#endif
if (key != 27)
{
// pause to leave the last frame up on the screen
cv::waitKey();
}
}
catch (const std::exception & e)
{
std::cout << e.what() << std::endl;
rc = 1;
}
return rc;
}