@@ -11,42 +11,53 @@ void TRTFaceFusionFaceRestoration::detect(cv::Mat &face_swap_image, std::vector<
11
11
12
12
cv::Mat crop_image;
13
13
cv::Mat affine_matrix;
14
- std::tie (crop_image,affine_matrix) = face_utils::warp_face_by_face_landmark_5 (face_swap_image,target_landmarks_5,face_utils::FFHQ_512);
14
+ // 记录时间
15
+ auto start_warp = std::chrono::high_resolution_clock::now ();
16
+ std::tie (crop_image,affine_matrix) = face_utils::warp_face_by_face_landmark_5 (face_swap_image,target_landmarks_5,
17
+ face_utils::FFHQ_512);
15
18
16
19
std::vector<float > crop_size = {512 ,512 };
17
20
cv::Mat box_mask = face_utils::create_static_box_mask (crop_size);
18
21
std::vector<cv::Mat> crop_mask_list;
19
22
crop_mask_list.emplace_back (box_mask);
20
23
21
- cv::cvtColor (crop_image,crop_image,cv::COLOR_BGR2RGB);
22
- crop_image.convertTo (crop_image,CV_32FC3,1 .f / 255 .f );
23
- crop_image.convertTo (crop_image,CV_32FC3,2 .0f ,-1 .f );
24
+ cv::Mat crop_image_rgb;
25
+ launch_bgr2rgb (crop_image,crop_image_rgb);
26
+ crop_image_rgb.convertTo (crop_image_rgb,CV_32FC3,1 .f / 255 .f );
27
+ crop_image_rgb.convertTo (crop_image_rgb,CV_32FC3,2 .0f ,-1 .f );
24
28
25
29
std::vector<float > input_vector;
26
- trtcv::utils::transform::create_tensor (crop_image ,input_vector,input_node_dims,trtcv::utils::transform::CHW);
30
+ trtcv::utils::transform::create_tensor (crop_image_rgb ,input_vector,input_node_dims,trtcv::utils::transform::CHW);
27
31
28
- // 拷贝
32
+ auto end_warp = std::chrono::high_resolution_clock::now ();
33
+ std::chrono::duration<double , std::milli> fp_ms_warp = end_warp - start_warp;
34
+ std::cout << " FaceRestoration preprocess time: " << fp_ms_warp.count () << " ms" << std::endl;
29
35
36
+
37
+ // 记录时间
38
+ auto start = std::chrono::high_resolution_clock::now ();
30
39
// 先不用拷贝了 处理完成再拷贝出来 类似于整个后处理放在GPU上完成
31
40
cudaMemcpyAsync (buffers[0 ],input_vector.data (),1 * 3 * 512 * 512 * sizeof (float ),cudaMemcpyHostToDevice,stream);
32
-
33
41
// 同步
34
42
cudaStreamSynchronize (stream);
35
-
36
43
// 推理
37
44
bool status = trt_context->enqueueV3 (stream);
45
+
38
46
if (!status) {
39
47
std::cerr << " Failed to inference" << std::endl;
40
48
return ;
41
49
}
42
-
43
-
44
50
// 同步
45
51
cudaStreamSynchronize (stream);
52
+ auto end = std::chrono::high_resolution_clock::now ();
53
+ std::chrono::duration<double , std::milli> fp_ms = end - start;
54
+ std::cout << " FaceRestoration Inference time: " << fp_ms.count () << " ms" << std::endl;
46
55
std::vector<unsigned char > transposed_data (1 * 3 * 512 * 512 );
47
56
48
57
// std::vector<float> transposed_data(1 * 3 * 512 * 512);
49
58
59
+ // 记录时间
60
+ auto start_postprocess = std::chrono::high_resolution_clock::now ();
50
61
// 这里buffer1就是输出了
51
62
launch_face_restoration_postprocess (
52
63
static_cast <float *>(buffers[1 ]),
@@ -64,47 +75,31 @@ void TRTFaceFusionFaceRestoration::detect(cv::Mat &face_swap_image, std::vector<
64
75
std::vector<float > output_vector (1 * 3 * 512 * 512 );
65
76
// cudaMemcpyAsync(output_vector.data(),buffers[1],1 * 3 * 512 * 512 * sizeof(float),cudaMemcpyDeviceToHost,stream);
66
77
cudaStreamSynchronize (stream);
67
- //
68
78
// 后处理
69
79
int channel = 3 ;
70
80
int height = 512 ;
71
81
int width = 512 ;
72
- // std::vector<float> output(channel * height * width);
73
- // output.assign(output_vector.begin(),output_vector.end());
74
- //
75
- // std::transform(output.begin(),output.end(),output.begin(),
76
- // [](double x){return std::max(-1.0,std::max(-1.0,std::min(1.0,x)));});
77
- //
78
- // std::transform(output.begin(),output.end(),output.begin(),
79
- // [](double x){return (x + 1.f) /2.f;});
80
- //
81
- // // CHW2HWC
82
- // for (int c = 0; c < channel; ++c){
83
- // for (int h = 0 ; h < height; ++h){
84
- // for (int w = 0; w < width ; ++w){
85
- // int src_index = c * (height * width) + h * width + w;
86
- // int dst_index = h * (width * channel) + w * channel + c;
87
- // transposed_data[dst_index] = output[src_index];
88
- // }
89
- // }
90
- // }
91
- //
92
- // std::transform(transposed_data.begin(),transposed_data.end(),transposed_data.begin(),
93
- // [](float x){return std::round(x * 255.f);});
94
- //
95
- // std::transform(transposed_data.begin(), transposed_data.end(), transposed_data.begin(),
96
- // [](float x) { return static_cast<uint8_t>(x); });
97
82
98
83
99
84
cv::Mat mat (height, width, CV_32FC3, transposed_data_float.data ());
100
- // cv::imwrite("/home/lite.ai.toolkit/mid_process.jpg",mat);
101
85
cv::cvtColor (mat, mat, cv::COLOR_RGB2BGR);
86
+ // 到这里为止基本不耗时
102
87
103
88
104
89
auto crop_mask = crop_mask_list[0 ];
105
- cv::Mat paste_frame = face_utils::paste_back (ori_image,mat,crop_mask,affine_matrix);
106
-
90
+ // 这里的paste_back 40ms左右
91
+ cv::Mat paste_frame = launch_paste_back (ori_image,mat,crop_mask,affine_matrix);
92
+ // cv::Mat paste_frame = face_utils::paste_back(ori_image,mat,crop_mask,affine_matrix);
107
93
cv::Mat dst_image = face_utils::blend_frame (ori_image,paste_frame);
94
+ auto end_postprocess = std::chrono::high_resolution_clock::now ();
95
+ std::chrono::duration<double , std::milli> fp_ms_postprocess = end_postprocess - start_postprocess;
96
+ std::cout << " FaceRestoration postprocess time: " << fp_ms_postprocess.count () << " ms" << std::endl;
108
97
98
+ // 记录时间
99
+ auto start_save = std::chrono::high_resolution_clock::now ();
109
100
cv::imwrite (face_enchaner_path,dst_image);
101
+ auto end_save = std::chrono::high_resolution_clock::now ();
102
+ std::chrono::duration<double , std::milli> fp_ms_save = end_save - start_save;
103
+ std::cout << " FaceRestoration save time: " << fp_ms_save.count () << " ms" << std::endl;
104
+
110
105
}
0 commit comments