hpc203 · Jan 17, 2021
diff --git a/Diff for: ‎bus.jpg
476 KB b/Diff for: ‎bus.jpg
476 KB
diff --git a/Diff for: ‎coco.names
+80 b/Diff for: ‎coco.names
+80
diff --git a/Diff for: ‎dog.jpg
160 KB b/Diff for: ‎dog.jpg
160 KB
diff --git a/Diff for: ‎main_yolo.cpp
+136 b/Diff for: ‎main_yolo.cpp
+136
diff --git a/Diff for: ‎main_yolov5.py
+118 b/Diff for: ‎main_yolov5.py
+118
diff --git a/Diff for: ‎person.jpg
111 KB b/Diff for: ‎person.jpg
111 KB
diff --git a/Diff for: ‎yolo.h
+52 b/Diff for: ‎yolo.h
+52
diff --git a/Diff for: ‎zidane.jpg
165 KB b/Diff for: ‎zidane.jpg
165 KB
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
@@ -0,0 +1,136 @@
+#include "yolo.h"
+
+YOLO::YOLO(Net_config config)
+{
+	cout << "Net use " << config.netname << endl;
+	this->confThreshold = config.confThreshold;
+	this->nmsThreshold = config.nmsThreshold;
+	this->objThreshold = config.objThreshold;
+	strcpy_s(this->netname, config.netname.c_str());
+
+	ifstream ifs(this->classesFile.c_str());
+	string line;
+	while (getline(ifs, line)) this->classes.push_back(line);
+
+	string modelFile = this->netname;
+	modelFile += ".onnx";
+	this->net = readNet(modelFile);
+}
+
+void YOLO::drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)   // Draw the predicted bounding box
+{
+	//Draw a rectangle displaying the bounding box
+	rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 3);
+
+	//Get the label for the class name and its confidence
+	string label = format("%.2f", conf);
+	label = this->classes[classId] + ":" + label;
+
+	//Display the label at the top of the bounding box
+	int baseLine;
+	Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
+	top = max(top, labelSize.height);
+	//rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
+	putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
+}
+
+void YOLO::sigmoid(Mat* out, int length)
+{
+	float* pdata = (float*)(out->data);
+	int i = 0; 
+	for (i = 0; i < length; i++)
+	{
+		pdata[i] = 1.0 / (1 + expf(-pdata[i]));
+	}
+}
+
+void YOLO::detect(Mat& frame)
+{
+	Mat blob;
+	blobFromImage(frame, blob, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false);
+	this->net.setInput(blob);
+	vector<Mat> outs;
+	this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
+	
+	/////generate proposals
+	vector<int> classIds;
+	vector<float> confidences;
+	vector<Rect> boxes;
+	float ratioh = (float)frame.rows / this->inpHeight, ratiow = (float)frame.cols / this->inpWidth;
+	int n = 0, q = 0, i = 0, j = 0, nout = this->classes.size() + 5, c = 0;
+	for (n = 0; n < 3; n++)   ///尺度
+	{
+		int num_grid_x = (int)(this->inpWidth / this->stride[n]);
+		int num_grid_y = (int)(this->inpHeight / this->stride[n]);
+		int area = num_grid_x * num_grid_y;
+		this->sigmoid(&outs[n], 3 * nout * area);
+		for (q = 0; q < 3; q++)    ///anchor数
+		{
+			const float anchor_w = this->anchors[n][q * 2];
+			const float anchor_h = this->anchors[n][q * 2 + 1];
+			float* pdata = (float*)outs[n].data + q * nout * area;  
+			for (i = 0; i < num_grid_y; i++)
+			{
+				for (j = 0; j < num_grid_x; j++)
+				{
+					float box_score = pdata[4 * area + i * num_grid_x + j];
+					if (box_score > this->objThreshold)
+					{
+						float max_class_socre = 0, class_socre = 0;
+						int max_class_id = 0;
+						for (c = 0; c < this->classes.size(); c++) //// get max socre
+						{
+							class_socre = pdata[(c + 5) * area + i * num_grid_x + j];
+							if (class_socre > max_class_socre)
+							{
+								max_class_socre = class_socre;
+								max_class_id = c;
+							}
+						}
+						
+						if (max_class_socre > this->confThreshold)
+						{
+							float cx = (pdata[i * num_grid_x + j] * 2.f - 0.5f + j) * this->stride[n];  ///cx
+							float cy = (pdata[area + i * num_grid_x + j] * 2.f - 0.5f + i) * this->stride[n];   ///cy
+							float w = powf(pdata[2 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_w;   ///w
+							float h = powf(pdata[3 * area + i * num_grid_x + j] * 2.f, 2.f) * anchor_h;  ///h
+							
+							int left = (cx - 0.5*w)*ratiow;
+							int top = (cy - 0.5*h)*ratioh;   ///坐标还原到原图上
+
+							classIds.push_back(max_class_id);
+							confidences.push_back(max_class_socre);
+							boxes.push_back(Rect(left, top, (int)(w*ratiow), (int)(h*ratioh)));
+						}	
+					}	
+				}
+			}
+		}
+	}
+	
+	// Perform non maximum suppression to eliminate redundant overlapping boxes with
+	// lower confidences
+	vector<int> indices;
+	NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
+	for (size_t i = 0; i < indices.size(); ++i)
+	{
+		int idx = indices[i];
+		Rect box = boxes[idx];
+		this->drawPred(classIds[idx], confidences[idx], box.x, box.y,
+			box.x + box.width, box.y + box.height, frame);
+	}
+}
+
+int main()
+{
+	YOLO yolo_model(yolo_nets[0]);
+	string imgpath = "bus.jpg";
+	Mat srcimg = imread(imgpath);
+	yolo_model.detect(srcimg);
+	
+	static const string kWinName = "Deep learning object detection in OpenCV";
+	namedWindow(kWinName, WINDOW_NORMAL);
+	imshow(kWinName, srcimg);
+	waitKey(0);
+	destroyAllWindows();
+}
@@ -0,0 +1,118 @@
+import cv2
+import argparse
+import numpy as np
+
+class yolov5():
+    def __init__(self, yolo_type, confThreshold=0.5, nmsThreshold=0.5, objThreshold=0.5):
+        with open('coco.names', 'rt') as f:
+            self.classes = f.read().rstrip('\n').split('\n')
+        self.colors = [np.random.randint(0, 255, size=3).tolist() for _ in range(len(self.classes))]
+        num_classes = len(self.classes)
+        anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
+        self.nl = len(anchors)
+        self.na = len(anchors[0]) // 2
+        self.no = num_classes + 5
+        self.grid = [np.zeros(1)] * self.nl
+        self.stride = np.array([8., 16., 32.])
+        self.anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(self.nl, 1, -1, 1, 1, 2)
+
+        self.net = cv2.dnn.readNet(yolo_type + '.onnx')
+        self.confThreshold = confThreshold
+        self.nmsThreshold = nmsThreshold
+        self.objThreshold = objThreshold
+
+    def _make_grid(self, nx=20, ny=20):
+        xv, yv = np.meshgrid(np.arange(ny), np.arange(nx))
+        return np.stack((xv, yv), 2).reshape((1, 1, ny, nx, 2)).astype(np.float32)
+
+    def postprocess(self, frame, outs):
+        frameHeight = frame.shape[0]
+        frameWidth = frame.shape[1]
+        ratioh, ratiow = frameHeight / 640, frameWidth / 640
+        # Scan through all the bounding boxes output from the network and keep only the
+        # ones with high confidence scores. Assign the box's class label as the class with the highest score.
+        classIds = []
+        confidences = []
+        boxes = []
+        for out in outs:
+            for detection in out:
+                scores = detection[5:]
+                classId = np.argmax(scores)
+                confidence = scores[classId]
+                if confidence > self.confThreshold and detection[4] > self.objThreshold:
+                    center_x = int(detection[0] * ratiow)
+                    center_y = int(detection[1] * ratioh)
+                    width = int(detection[2] * ratiow)
+                    height = int(detection[3] * ratioh)
+                    left = int(center_x - width / 2)
+                    top = int(center_y - height / 2)
+                    classIds.append(classId)
+                    confidences.append(float(confidence))
+                    boxes.append([left, top, width, height])
+
+        # Perform non maximum suppression to eliminate redundant overlapping boxes with
+        # lower confidences.
+        indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold)
+        for i in indices:
+            i = i[0]
+            box = boxes[i]
+            left = box[0]
+            top = box[1]
+            width = box[2]
+            height = box[3]
+            frame = self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height)
+        return frame
+    def drawPred(self, frame, classId, conf, left, top, right, bottom):
+        # Draw a bounding box.
+        cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=4)
+
+        label = '%.2f' % conf
+        label = '%s:%s' % (self.classes[classId], label)
+
+        # Display the label at the top of the bounding box
+        labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+        top = max(top, labelSize[1])
+        # cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED)
+        cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
+        return frame
+    def detect(self, srcimg):
+        blob = cv2.dnn.blobFromImage(srcimg, 1 / 255.0, (640, 640), [0, 0, 0], swapRB=True, crop=False)
+        # Sets the input to the network
+        self.net.setInput(blob)
+
+        # Runs the forward pass to get output of the output layers
+        outs = self.net.forward(self.net.getUnconnectedOutLayersNames())
+
+        z = []  # inference output
+        for i in range(self.nl):
+            bs, _, ny, nx = outs[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
+            # outs[i] = outs[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+            outs[i] = outs[i].reshape(bs, self.na, self.no, ny, nx).transpose(0, 1, 3, 4, 2)
+            if self.grid[i].shape[2:4] != outs[i].shape[2:4]:
+                self.grid[i] = self._make_grid(nx, ny)
+
+            y = 1 / (1 + np.exp(-outs[i]))  ### sigmoid
+            ###其实只需要对x,y,w,h做sigmoid变换的， 不过全做sigmoid变换对结果影响不大，因为sigmoid是单调递增函数，那么就不影响类别置信度的排序关系，因此不影响后面的NMS
+            ###不过设断点查看类别置信度，都是负数，看来有必要做sigmoid变换把概率值强行拉回到0到1的区间内
+            y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * int(self.stride[i])
+            y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+            z.append(y.reshape(bs, -1, self.no))
+        z = np.concatenate(z, axis=1)
+        return z
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--imgpath", type=str, default='bus.jpg', help="image path")
+    parser.add_argument('--net_type', default='yolov5s', choices=['yolov5s', 'yolov5l', 'yolov5m', 'yolov5x'])
+    args = parser.parse_args()
+
+    yolonet = yolov5(args.net_type)
+    srcimg = cv2.imread(args.imgpath)
+    dets = yolonet.detect(srcimg)
+    srcimg = yolonet.postprocess(srcimg, dets)
+
+    winName = 'Deep learning object detection in OpenCV'
+    cv2.namedWindow(winName, 0)
+    cv2.imshow(winName, srcimg)
+    cv2.waitKey(0)
+    cv2.destroyAllWindows()
@@ -0,0 +1,52 @@
+#include <fstream>
+#include <sstream>
+#include <iostream>
+#include <opencv2/dnn.hpp>
+#include <opencv2/imgproc.hpp>
+#include <opencv2/highgui.hpp>
+
+using namespace cv;
+using namespace dnn;
+using namespace std;
+
+struct Net_config
+{
+	float confThreshold; // Confidence threshold
+	float nmsThreshold;  // Non-maximum suppression threshold
+	float objThreshold;  //Object Confidence threshold
+	string netname;
+};
+
+class YOLO
+{
+	public:
+		YOLO(Net_config config);
+		void detect(Mat& frame);
+	private:
+		const float anchors[3][6] = {{10.0, 13.0, 16.0, 30.0, 33.0, 23.0}, {30.0, 61.0, 62.0, 45.0, 59.0, 119.0},{116.0, 90.0, 156.0, 198.0, 373.0, 326.0}};
+		const float stride[3] = { 8.0, 16.0, 32.0 };
+		const string classesFile = "coco.names";
+		const int inpWidth = 640;
+		const int inpHeight = 640;
+		float confThreshold;
+		float nmsThreshold;
+		float objThreshold;
+		
+		char netname[20];
+		vector<string> classes;
+		Net net;
+		void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);
+		void sigmoid(Mat* out, int length);
+};
+
+static inline float sigmoid_x(float x)
+{
+	return static_cast<float>(1.f / (1.f + exp(-x)));
+}
+
+Net_config yolo_nets[4] = {
+	{0.5, 0.5, 0.5, "yolov5s"},
+	{0.5, 0.5, 0.5,  "yolov5m"},
+	{0.5, 0.5, 0.5, "yolov5l"},
+	{0.5, 0.5, 0.5, "yolov5x"}
+};