-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathrun_image.py
248 lines (198 loc) · 10.2 KB
/
run_image.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------------------------------------------------------
#%% Imports
import argparse
from time import perf_counter
import cv2
import numpy as np
import torch
from lib.make_dpt import make_dpt_from_state_dict
from lib.demo_helpers.history_keeper import HistoryKeeper
from lib.demo_helpers.loading import ask_for_path_if_missing, ask_for_model_path_if_missing
from lib.demo_helpers.ui import SliderCB, ColormapButtonsCB, ButtonBar, ScaleByKeypress
from lib.demo_helpers.visualization import DisplayWindow, histogram_equalization
from lib.demo_helpers.plane_fit import estimate_plane_of_best_fit
from lib.demo_helpers.saving import save_image, save_numpy_array, save_uint16
from lib.demo_helpers.misc import (
get_default_device_string, make_device_config, print_config_feedback, reduce_overthreading
)
# ---------------------------------------------------------------------------------------------------------------------
#%% Set up script args
# Set argparse defaults
default_device = get_default_device_string()
default_image_path = None
default_model_path = None
default_display_size = 800
default_base_size = None
# Define script arguments
parser = argparse.ArgumentParser(description="Script used to run MiDaS DPT depth-estimation on a single image")
parser.add_argument("-i", "--image_path", default=default_image_path,
help="Path to image to run depth estimation on")
parser.add_argument("-m", "--model_path", default=default_model_path, type=str,
help="Path to DPT model weights")
parser.add_argument("-s", "--display_size", default=default_display_size, type=int,
help="Controls size of displayed results (default: {})".format(default_display_size))
parser.add_argument("-d", "--device", default=default_device, type=str,
help="Device to use when running model (ex: 'cpu', 'cuda', 'mps')")
parser.add_argument("-f32", "--use_float32", default=False, action="store_true",
help="Use 32-bit floating point model weights. Note: this doubles VRAM usage")
parser.add_argument("-ar", "--use_aspect_ratio", default=False, action="store_true",
help="Process the image at it's original aspect ratio, if the model supports it")
parser.add_argument("-b", "--base_size_px", default=default_base_size, type=int,
help="Override base (e.g. 384, 512) model size")
# For convenience
args = parser.parse_args()
arg_image_path = args.image_path
arg_model_path = args.model_path
display_size_px = args.display_size
device_str = args.device
use_float32 = args.use_float32
force_square_resolution = not args.use_aspect_ratio
model_base_size = args.base_size_px
# Hard-code no-cache usage, since there is no benefit if the model only runs once
use_cache = False
# Set up device config
device_config_dict = make_device_config(device_str, use_float32)
# Create history to re-use selected inputs
history = HistoryKeeper()
_, history_imgpath = history.read("image_path")
_, history_modelpath = history.read("model_path")
# Get pathing to resources, if not provided already
image_path = ask_for_path_if_missing(arg_image_path, "image", history_imgpath)
model_path = ask_for_model_path_if_missing(__file__, arg_model_path, history_modelpath)
# Store history for use on reload
history.store(image_path=image_path, model_path=model_path)
# Improve cpu utilization
reduce_overthreading(device_str)
# ---------------------------------------------------------------------------------------------------------------------
#%% Load resources
# Load model & image pre-processor
print("", "Loading model weights...", " @ {}".format(model_path), sep="\n", flush=True)
model_config_dict, dpt_model, dpt_imgproc = make_dpt_from_state_dict(model_path, use_cache)
if (model_base_size is not None):
dpt_imgproc.set_base_size(model_base_size)
# Move model to selected device
dpt_model.to(**device_config_dict)
dpt_model.eval()
# Load image and apply preprocessing
orig_image_bgr = cv2.imread(image_path)
assert orig_image_bgr is not None, f"Error loading image: {image_path}"
img_tensor = dpt_imgproc.prepare_image_bgr(orig_image_bgr, force_square_resolution)
print_config_feedback(model_path, device_config_dict, use_cache, img_tensor)
# Prepare original image for display (and get sizing info)
scaled_input_img = dpt_imgproc.scale_to_max_side_length(orig_image_bgr, display_size_px)
disp_h, disp_w = scaled_input_img.shape[0:2]
disp_wh = (int(disp_w), int(disp_h))
# ---------------------------------------------------------------------------------------------------------------------
#%% Run model
t1 = perf_counter()
# Run the model and move the result to the cpu (in case it was on GPU)
print("", "Computing inverse depth...", sep="\n", flush=True)
img_tensor = img_tensor.to(**device_config_dict)
prediction = dpt_model.inference(img_tensor)
# Perform some post-processing to prepare for display
scaled_prediction = dpt_imgproc.scale_prediction(prediction, disp_wh)
depth_norm = dpt_imgproc.remove_infinities(scaled_prediction)
depth_norm = dpt_imgproc.normalize_01(scaled_prediction).float().cpu().numpy().squeeze()
t2 = perf_counter()
print(" -> Took", round(1000*(t2-t1), 1), "ms")
# Provide memory usage feedback, if using cuda GPU
if device_str == "cuda":
peak_vram_mb = torch.cuda.max_memory_allocated() // 1_000_000
print(" -> Peak VRAM:", peak_vram_mb, "MB")
# ---------------------------------------------------------------------------------------------------------------------
#%% Display results
# Calculate a plane-of-best-fit, so we can (potentially) remove it during display
plane_depth = estimate_plane_of_best_fit(depth_norm)
# Set up button controls
btnbar = ButtonBar()
toggle_reverse_color = btnbar.add_toggle("[r] Reversed", "[r] Normal Order", keypress="r", default=False)
toggle_high_contrast = btnbar.add_toggle("[h] High Contrast", "[h] Normal Contrast", keypress="h", default=False)
btn_save = btnbar.add_button("[s] Save", keypress="s")
# Set up other UI elements
gray_cmap = ColormapButtonsCB.make_gray_colormap()
spec_cmap = ColormapButtonsCB.make_spectral_colormap()
cmap_btns = ColormapButtonsCB(cv2.COLORMAP_MAGMA, cv2.COLORMAP_VIRIDIS, cv2.COLORMAP_TWILIGHT, spec_cmap, gray_cmap)
plane_slider = SliderCB("Remove plane", 0, -1, 2, 0.01, marker_step_size=0.5)
min_slider = SliderCB("Min Threshold", 0, 0, 1, 0.01, marker_step_size=0.1)
max_slider = SliderCB("Max Threshold", 1, 0, 1, 0.01, marker_step_size=0.1)
display_scaler = ScaleByKeypress()
# Set up window with controls
cv2.destroyAllWindows()
window = DisplayWindow("Inverse Depth Result - q to quit")
window.set_callbacks(btnbar, cmap_btns, plane_slider, min_slider, max_slider)
# Pre-define parameters used inside conditionals
prev_plane_removal_factor = None
depth_1ch = depth_norm
# Feedback about controls
print("", "Displaying results",
" - Click and drag bars to adjust display",
" - Right click on bars to reset values",
" - Use up/down arrow keys to adjust display size",
" - Press esc or q to quit",
"",
sep="\n", flush=True)
while True:
# Read controls
plane_removal_factor = plane_slider.read()
thresh_min = min_slider.read()
thresh_max = max_slider.read()
use_high_contrast = toggle_high_contrast.read()
use_reverse_colors = toggle_reverse_color.read()
# Re-calculate depth image if plane removal changes
removal_factor_changed = (plane_removal_factor != prev_plane_removal_factor)
if removal_factor_changed:
depth_1ch = depth_norm - (plane_depth * plane_removal_factor)
depth_1ch = dpt_imgproc.normalize_01(depth_1ch)
prev_plane_removal_factor = plane_removal_factor
# Make sure we actually get min < max thresholds & non-zero delta to avoid divide-by-zero
thresh_min, thresh_max = sorted([thresh_min, thresh_max])
thresh_delta = max(0.001, thresh_max - thresh_min)
depth_thresholded = np.clip((depth_1ch - thresh_min) / thresh_delta, 0.0, 1.0)
# Produce colored depth image for display
depth_uint8 = np.uint8(np.round(255.0*depth_thresholded))
if use_high_contrast: depth_uint8 = histogram_equalization(depth_uint8, thresh_min, thresh_max)
if use_reverse_colors: depth_uint8 = 255 - depth_uint8
depth_color = cmap_btns.apply_colormap(depth_uint8)
# Generate display image: button controls / colormaps / side-by-side images / sliders
sidebyside_display = display_scaler.resize(np.hstack((scaled_input_img, depth_color)))
display_frame = btnbar.draw_standalone(sidebyside_display.shape[1])
display_frame = cmap_btns.append_to_frame(display_frame)
display_frame = np.vstack((display_frame, sidebyside_display))
display_frame = SliderCB.append_many_to_frame(
display_frame,
plane_slider,
min_slider,
max_slider,
)
# Update displayed image
window.imshow(display_frame)
req_break, keypress = window.waitKey(20)
if req_break:
break
# Handle keypresses
display_scaler.on_keypress(keypress)
btnbar.on_keypress(keypress)
if btn_save.read():
# Apply modifications to raw prediction for saving
npy_prediction = dpt_imgproc.remove_infinities(prediction.clone())
npy_prediction = dpt_imgproc.normalize_01(npy_prediction).float().cpu().numpy().squeeze()
npy_prediction = npy_prediction - (plane_removal_factor * estimate_plane_of_best_fit(npy_prediction))
npy_prediction = dpt_imgproc.normalize_01(npy_prediction)
npy_prediction = np.clip((npy_prediction - thresh_min) / thresh_delta, 0.0, 1.0)
if use_reverse_colors:
npy_prediction = 1.0 - npy_prediction
# Save data!
ok_img_save, save_img_path = save_image(depth_color, image_path)
ok_npy_save, save_npy_path = save_numpy_array(npy_prediction, save_img_path)
ok_uint16_save, save_uint16_path = save_uint16(npy_prediction, save_img_path)
if any((ok_img_save, ok_npy_save, ok_uint16_save)):
print("", "SAVED:", save_img_path, sep="\n")
if ok_npy_save:
print(save_npy_path)
if ok_uint16_save:
print(save_uint16_path)
pass
# Clean up windows
cv2.destroyAllWindows()