Skip to content

Commit ad1bc53

Browse files
authoredJan 17, 2021
Add files via upload
0 parents  commit ad1bc53

File tree

8 files changed

+386
-0
lines changed

8 files changed

+386
-0
lines changed
 

Diff for: ‎bus.jpg

476 KB
Loading

Diff for: ‎coco.names

+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
person
2+
bicycle
3+
car
4+
motorbike
5+
aeroplane
6+
bus
7+
train
8+
truck
9+
boat
10+
traffic light
11+
fire hydrant
12+
stop sign
13+
parking meter
14+
bench
15+
bird
16+
cat
17+
dog
18+
horse
19+
sheep
20+
cow
21+
elephant
22+
bear
23+
zebra
24+
giraffe
25+
backpack
26+
umbrella
27+
handbag
28+
tie
29+
suitcase
30+
frisbee
31+
skis
32+
snowboard
33+
sports ball
34+
kite
35+
baseball bat
36+
baseball glove
37+
skateboard
38+
surfboard
39+
tennis racket
40+
bottle
41+
wine glass
42+
cup
43+
fork
44+
knife
45+
spoon
46+
bowl
47+
banana
48+
apple
49+
sandwich
50+
orange
51+
broccoli
52+
carrot
53+
hot dog
54+
pizza
55+
donut
56+
cake
57+
chair
58+
sofa
59+
pottedplant
60+
bed
61+
diningtable
62+
toilet
63+
tvmonitor
64+
laptop
65+
mouse
66+
remote
67+
keyboard
68+
cell phone
69+
microwave
70+
oven
71+
toaster
72+
sink
73+
refrigerator
74+
book
75+
clock
76+
vase
77+
scissors
78+
teddy bear
79+
hair drier
80+
toothbrush

Diff for: ‎dog.jpg

160 KB
Loading

Diff for: ‎main_yolo.cpp

+136
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
#include "yolo.h"
2+
3+
YOLO::YOLO(Net_config config)
4+
{
5+
cout << "Net use " << config.netname << endl;
6+
this->confThreshold = config.confThreshold;
7+
this->nmsThreshold = config.nmsThreshold;
8+
this->objThreshold = config.objThreshold;
9+
strcpy_s(this->netname, config.netname.c_str());
10+
11+
ifstream ifs(this->classesFile.c_str());
12+
string line;
13+
while (getline(ifs, line)) this->classes.push_back(line);
14+
15+
string modelFile = this->netname;
16+
modelFile += ".onnx";
17+
this->net = readNet(modelFile);
18+
}
19+
20+
void YOLO::drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame) // Draw the predicted bounding box
21+
{
22+
//Draw a rectangle displaying the bounding box
23+
rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 3);
24+
25+
//Get the label for the class name and its confidence
26+
string label = format("%.2f", conf);
27+
label = this->classes[classId] + ":" + label;
28+
29+
//Display the label at the top of the bounding box
30+
int baseLine;
31+
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
32+
top = max(top, labelSize.height);
33+
//rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
34+
putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
35+
}
36+
37+
void YOLO::sigmoid(Mat* out, int length)
38+
{
39+
float* pdata = (float*)(out->data);
40+
int i = 0;
41+
for (i = 0; i < length; i++)
42+
{
43+
pdata[i] = 1.0 / (1 + expf(-pdata[i]));
44+
}
45+
}
46+
47+
void YOLO::detect(Mat& frame)
48+
{
49+
Mat blob;
50+
blobFromImage(frame, blob, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false);
51+
this->net.setInput(blob);
52+
vector<Mat> outs;
53+
this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
54+
55+
/////generate proposals
56+
vector<int> classIds;
57+
vector<float> confidences;
58+
vector<Rect> boxes;
59+
float ratioh = (float)frame.rows / this->inpHeight, ratiow = (float)frame.cols / this->inpWidth;
60+
int n = 0, q = 0, i = 0, j = 0, nout = this->classes.size() + 5, c = 0;
61+
for (n = 0; n < 3; n++) ///尺度
62+
{
63+
int num_grid_x = (int)(this->inpWidth / this->stride[n]);
64+
int num_grid_y = (int)(this->inpHeight / this->stride[n]);
65+
int area = num_grid_x * num_grid_y;
66+
this->sigmoid(&outs[n], 3 * nout * area);
67+
for (q = 0; q < 3; q++) ///anchor数
68+
{
69+
const float anchor_w = this->anchors[n][q * 2];
70+
const float anchor_h = this->anchors[n][q * 2 + 1];
71+
float* pdata = (float*)outs[n].data + q * nout * area;
72+
for (i = 0; i < num_grid_y; i++)
73+
{
74+
for (j = 0; j < num_grid_x; j++)
75+
{
76+
float box_score = pdata[4 * area + i * num_grid_x + j];
77+
if (box_score > this->objThreshold)
78+
{
79+
float max_class_socre = 0, class_socre = 0;
80+
int max_class_id = 0;
81+
for (c = 0; c < this->classes.size(); c++) //// get max socre
82+
{
83+
class_socre = pdata[(c + 5) * area + i * num_grid_x + j];
84+
if (class_socre > max_class_socre)
85+
{
86+
max_class_socre = class_socre;
87+
max_class_id = c;
88+
}
89+
}
90+
91+
if (max_class_socre > this->confThreshold)
92+
{
93+
float cx = (pdata[i * num_grid_x + j] * 2.f - 0.5f + j) * this->stride[n]; ///cx
94+
float cy = (pdata[area + i * num_grid_x + j] * 2.f - 0.5f + i) * this->stride[n]; ///cy
95+
float w = powf(pdata[2 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_w; ///w
96+
float h = powf(pdata[3 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_h; ///h
97+
98+
int left = (cx - 0.5*w)*ratiow;
99+
int top = (cy - 0.5*h)*ratioh; ///坐标还原到原图上
100+
101+
classIds.push_back(max_class_id);
102+
confidences.push_back(max_class_socre);
103+
boxes.push_back(Rect(left, top, (int)(w*ratiow), (int)(h*ratioh)));
104+
}
105+
}
106+
}
107+
}
108+
}
109+
}
110+
111+
// Perform non maximum suppression to eliminate redundant overlapping boxes with
112+
// lower confidences
113+
vector<int> indices;
114+
NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
115+
for (size_t i = 0; i < indices.size(); ++i)
116+
{
117+
int idx = indices[i];
118+
Rect box = boxes[idx];
119+
this->drawPred(classIds[idx], confidences[idx], box.x, box.y,
120+
box.x + box.width, box.y + box.height, frame);
121+
}
122+
}
123+
124+
int main()
125+
{
126+
YOLO yolo_model(yolo_nets[0]);
127+
string imgpath = "bus.jpg";
128+
Mat srcimg = imread(imgpath);
129+
yolo_model.detect(srcimg);
130+
131+
static const string kWinName = "Deep learning object detection in OpenCV";
132+
namedWindow(kWinName, WINDOW_NORMAL);
133+
imshow(kWinName, srcimg);
134+
waitKey(0);
135+
destroyAllWindows();
136+
}

Diff for: ‎main_yolov5.py

+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import cv2
2+
import argparse
3+
import numpy as np
4+
5+
class yolov5():
6+
def __init__(self, yolo_type, confThreshold=0.5, nmsThreshold=0.5, objThreshold=0.5):
7+
with open('coco.names', 'rt') as f:
8+
self.classes = f.read().rstrip('\n').split('\n')
9+
self.colors = [np.random.randint(0, 255, size=3).tolist() for _ in range(len(self.classes))]
10+
num_classes = len(self.classes)
11+
anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
12+
self.nl = len(anchors)
13+
self.na = len(anchors[0]) // 2
14+
self.no = num_classes + 5
15+
self.grid = [np.zeros(1)] * self.nl
16+
self.stride = np.array([8., 16., 32.])
17+
self.anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(self.nl, 1, -1, 1, 1, 2)
18+
19+
self.net = cv2.dnn.readNet(yolo_type + '.onnx')
20+
self.confThreshold = confThreshold
21+
self.nmsThreshold = nmsThreshold
22+
self.objThreshold = objThreshold
23+
24+
def _make_grid(self, nx=20, ny=20):
25+
xv, yv = np.meshgrid(np.arange(ny), np.arange(nx))
26+
return np.stack((xv, yv), 2).reshape((1, 1, ny, nx, 2)).astype(np.float32)
27+
28+
def postprocess(self, frame, outs):
29+
frameHeight = frame.shape[0]
30+
frameWidth = frame.shape[1]
31+
ratioh, ratiow = frameHeight / 640, frameWidth / 640
32+
# Scan through all the bounding boxes output from the network and keep only the
33+
# ones with high confidence scores. Assign the box's class label as the class with the highest score.
34+
classIds = []
35+
confidences = []
36+
boxes = []
37+
for out in outs:
38+
for detection in out:
39+
scores = detection[5:]
40+
classId = np.argmax(scores)
41+
confidence = scores[classId]
42+
if confidence > self.confThreshold and detection[4] > self.objThreshold:
43+
center_x = int(detection[0] * ratiow)
44+
center_y = int(detection[1] * ratioh)
45+
width = int(detection[2] * ratiow)
46+
height = int(detection[3] * ratioh)
47+
left = int(center_x - width / 2)
48+
top = int(center_y - height / 2)
49+
classIds.append(classId)
50+
confidences.append(float(confidence))
51+
boxes.append([left, top, width, height])
52+
53+
# Perform non maximum suppression to eliminate redundant overlapping boxes with
54+
# lower confidences.
55+
indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold)
56+
for i in indices:
57+
i = i[0]
58+
box = boxes[i]
59+
left = box[0]
60+
top = box[1]
61+
width = box[2]
62+
height = box[3]
63+
frame = self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height)
64+
return frame
65+
def drawPred(self, frame, classId, conf, left, top, right, bottom):
66+
# Draw a bounding box.
67+
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=4)
68+
69+
label = '%.2f' % conf
70+
label = '%s:%s' % (self.classes[classId], label)
71+
72+
# Display the label at the top of the bounding box
73+
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
74+
top = max(top, labelSize[1])
75+
# cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED)
76+
cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
77+
return frame
78+
def detect(self, srcimg):
79+
blob = cv2.dnn.blobFromImage(srcimg, 1 / 255.0, (640, 640), [0, 0, 0], swapRB=True, crop=False)
80+
# Sets the input to the network
81+
self.net.setInput(blob)
82+
83+
# Runs the forward pass to get output of the output layers
84+
outs = self.net.forward(self.net.getUnconnectedOutLayersNames())
85+
86+
z = [] # inference output
87+
for i in range(self.nl):
88+
bs, _, ny, nx = outs[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
89+
# outs[i] = outs[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
90+
outs[i] = outs[i].reshape(bs, self.na, self.no, ny, nx).transpose(0, 1, 3, 4, 2)
91+
if self.grid[i].shape[2:4] != outs[i].shape[2:4]:
92+
self.grid[i] = self._make_grid(nx, ny)
93+
94+
y = 1 / (1 + np.exp(-outs[i])) ### sigmoid
95+
###其实只需要对x,y,w,h做sigmoid变换的, 不过全做sigmoid变换对结果影响不大,因为sigmoid是单调递增函数,那么就不影响类别置信度的排序关系,因此不影响后面的NMS
96+
###不过设断点查看类别置信度,都是负数,看来有必要做sigmoid变换把概率值强行拉回到0到1的区间内
97+
y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * int(self.stride[i])
98+
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
99+
z.append(y.reshape(bs, -1, self.no))
100+
z = np.concatenate(z, axis=1)
101+
return z
102+
103+
if __name__ == "__main__":
104+
parser = argparse.ArgumentParser()
105+
parser.add_argument("--imgpath", type=str, default='bus.jpg', help="image path")
106+
parser.add_argument('--net_type', default='yolov5s', choices=['yolov5s', 'yolov5l', 'yolov5m', 'yolov5x'])
107+
args = parser.parse_args()
108+
109+
yolonet = yolov5(args.net_type)
110+
srcimg = cv2.imread(args.imgpath)
111+
dets = yolonet.detect(srcimg)
112+
srcimg = yolonet.postprocess(srcimg, dets)
113+
114+
winName = 'Deep learning object detection in OpenCV'
115+
cv2.namedWindow(winName, 0)
116+
cv2.imshow(winName, srcimg)
117+
cv2.waitKey(0)
118+
cv2.destroyAllWindows()

Diff for: ‎person.jpg

111 KB
Loading

Diff for: ‎yolo.h

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#include <fstream>
2+
#include <sstream>
3+
#include <iostream>
4+
#include <opencv2/dnn.hpp>
5+
#include <opencv2/imgproc.hpp>
6+
#include <opencv2/highgui.hpp>
7+
8+
using namespace cv;
9+
using namespace dnn;
10+
using namespace std;
11+
12+
struct Net_config
13+
{
14+
float confThreshold; // Confidence threshold
15+
float nmsThreshold; // Non-maximum suppression threshold
16+
float objThreshold; //Object Confidence threshold
17+
string netname;
18+
};
19+
20+
class YOLO
21+
{
22+
public:
23+
YOLO(Net_config config);
24+
void detect(Mat& frame);
25+
private:
26+
const float anchors[3][6] = {{10.0, 13.0, 16.0, 30.0, 33.0, 23.0}, {30.0, 61.0, 62.0, 45.0, 59.0, 119.0},{116.0, 90.0, 156.0, 198.0, 373.0, 326.0}};
27+
const float stride[3] = { 8.0, 16.0, 32.0 };
28+
const string classesFile = "coco.names";
29+
const int inpWidth = 640;
30+
const int inpHeight = 640;
31+
float confThreshold;
32+
float nmsThreshold;
33+
float objThreshold;
34+
35+
char netname[20];
36+
vector<string> classes;
37+
Net net;
38+
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);
39+
void sigmoid(Mat* out, int length);
40+
};
41+
42+
static inline float sigmoid_x(float x)
43+
{
44+
return static_cast<float>(1.f / (1.f + exp(-x)));
45+
}
46+
47+
Net_config yolo_nets[4] = {
48+
{0.5, 0.5, 0.5, "yolov5s"},
49+
{0.5, 0.5, 0.5, "yolov5m"},
50+
{0.5, 0.5, 0.5, "yolov5l"},
51+
{0.5, 0.5, 0.5, "yolov5x"}
52+
};

Diff for: ‎zidane.jpg

165 KB
Loading

0 commit comments

Comments
 (0)
Please sign in to comment.