-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvideo.py
185 lines (147 loc) · 6.02 KB
/
video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import easyocr
import cv2
import logging
from matplotlib import pyplot as plt
import os
# Configure logging
logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s')
logging.config.dictConfig({
'version': 1,
'disable_existing_loggers': True,
})
# Import custom modules
import bbox
import subtitle
import search_sub
# Define text extractor class
class TextExtractor:
@staticmethod
def find_image_shape(img: cv2.Mat) -> tuple:
"""
Determines the shape of an image.
Args:
img: The input image.
Returns:
A tuple containing the image's height and width.
"""
return img.shape
@staticmethod
def load_easyocr() -> easyocr.Reader:
"""
Loads the EasyOCR text recognition model.
Returns:
An instance of the EasyOCR text recognition model.
"""
reader = easyocr.Reader(['hi', 'en'], gpu=True) # Hindi, Telugu, and English
return reader
@staticmethod
def infer_from_model(image: cv2.Mat, reader: easyocr.Reader) -> list:
"""
Recognizes text from an image using the EasyOCR model.
Args:
image: The input image.
reader: The EasyOCR text recognition model.
Returns:
A list of recognized text elements, each represented as a tuple containing the bounding box, text, and probability.
"""
results = reader.readtext(image, detail=1, paragraph=False) # Set detail to 0 for simple text output
#logging.debug(f"Recognition details for image: {results}")
return results
@staticmethod
def extract_bbox_from_details(details: list) -> list:
"""
Extracts bounding boxes from recognition details.
Args:
details: The recognition details.
Returns:
A list of bounding boxes.
"""
bboxes = []
for i in details:
bboxes.append(i[0])
return bboxes
@staticmethod
def show_bbox_details(details: list, img: cv2.Mat, display: bool):
"""
Visualizes bounding boxes and text on an image.
Args:
details: The recognition details.
img: The input image.
display: Whether to display the image.
"""
for (ibbox, text, prob) in details:
bbox.Bbox.draw_bbox_2_colors(ibbox, img, text, ((0,0,255),(255,0,255)))
if display:
plt.imshow(img)
plt.show()
# Define obstruction calculation function
def obstruction_from_image(img: cv2.Mat, reader: easyocr.Reader, subbox: list) -> float:
"""
Calculates the obstruction of text by subboxes in an image.
Args:
img: The input image.
reader: The EasyOCR text recognition model.
subbox: The subbox representing the area of interest.
Returns:
The relative obstruction of text by subboxes.
"""
details_from_image = TextExtractor.infer_from_model(img, reader)
TextExtractor.show_bbox_details(details_from_image, img, False)
abs_obstruction = bbox.Bbox.find_obstruction(details_from_image, subbox)
relative_obstruction = abs_obstruction / (img.shape[0] * img.shape[1])
return relative_obstruction
# Define video analysis function
def analye_video(cam: cv2.VideoCapture, subbox: list, reader: easyocr.Reader, display_image: bool, save_image: bool, every_what_frame: int, list_1sub:list) -> list:
logging.info("Starting video analysis")
logging.debug(f"Initial subtitle box: {subbox}")
issue_frames = []
create_issue_frames = True
try:
if not os.path.exists(f'./position_issue_frames'):
os.makedirs(f'./position_issue_frames')
logging.info("Created directory for position issue frames")
except OSError as e:
print('Error: Creating directory of position_issue_frames')
logging.error(e)
frame_count = 0
obs_arr = []
ob_loc = []
# Process video frames
while True:
subbox_for_i_frame = subbox.copy()
ret, frame = cam.read()
if ret:
frame_count += 1
# Process only every `every_what_frame` frames
if frame_count % every_what_frame == 0:
logging.debug(f"Processing frame {frame_count}")
# Extract text in the current frame
text = search_sub.search_text_in_frame(frame_count, list_1sub)
# If no text is found, reset the subtitle box to zeros
if len(text) == 0:
logging.debug("No text found in frame, resetting subtitle box")
subbox_for_i_frame = [[[0, 0], [0, 0], [0, 0], [0, 0]]]
logging.debug(f"Updated subtitle box: {subbox_for_i_frame}")
# Calculate obstruction
obs = obstruction_from_image(frame, reader, subbox_for_i_frame)
obs_arr.append(obs)
logging.debug(f"Obstruction: {obs}, Frame: {frame_count}, Time: {frame_count / cam.get(cv2.CAP_PROP_FPS)} sec")
if obs > 0:
logging.warning(f"Obstruction issue detected in frame {frame_count}, Time: {frame_count / cam.get(cv2.CAP_PROP_FPS)} sec")
issue_frames.append(frame_count)
# Create issue frames if necessary
if create_issue_frames and obs > 0:
ob_loc.append(frame_count)
frame = subtitle.Subtitle.show_sub(subbox_for_i_frame, frame, display_image)
if save_image:
logging.debug(f"Saving issue frame {frame_count}")
frame.savefig(f"./position_issue_frames/frame_{frame_count}.png", bbox_inches='tight')
else:
logging.info("Video analysis complete")
break
# Release camera object
cam.release()
# Calculate average obstruction
avg_obs_frame = sum(obs_arr) / len(obs_arr)
logging.debug(f"Average obstruction per frame: {avg_obs_frame}")
return issue_frames