-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathwarp.py
171 lines (150 loc) · 5.22 KB
/
warp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import numpy as np
import os
import sys
import tensorflow as tf
from PIL import Image
import cv2
import matplotlib.colors as cl
import matplotlib.pyplot as plt
# WARNING: this will work on little-endian architectures (eg Intel x86) only!
#warp using scipy
def warp_image(im, flow):
"""
Use optical flow to warp image to the next
:param im: image to warp
:param flow: optical flow
:return: warped image
"""
from scipy import interpolate
image_height = im.shape[0]
image_width = im.shape[1]
flow_height = flow.shape[0]
flow_width = flow.shape[1]
n = image_height * image_width
(iy, ix) = np.mgrid[0:image_height, 0:image_width]
(fy, fx) = np.mgrid[0:flow_height, 0:flow_width]
fx = fx.astype(np.float64)
fy = fy.astype(np.float64)
fx += flow[:,:,0]
fy += flow[:,:,1]
mask = np.logical_or(fx <0 , fx > flow_width)
mask = np.logical_or(mask, fy < 0)
mask = np.logical_or(mask, fy > flow_height)
fx = np.minimum(np.maximum(fx, 0), flow_width)
fy = np.minimum(np.maximum(fy, 0), flow_height)
points = np.concatenate((ix.reshape(n,1), iy.reshape(n,1)), axis=1)
xi = np.concatenate((fx.reshape(n, 1), fy.reshape(n,1)), axis=1)
warp = np.zeros((image_height, image_width, im.shape[2]))
for i in range(im.shape[2]):
channel = im[:, :, i]
values = channel.reshape(n, 1)
new_channel = interpolate.griddata(points, values, xi, method='cubic')
new_channel = np.reshape(new_channel, [flow_height, flow_width])
new_channel[mask] = 1
warp[:, :, i] = new_channel.astype(np.uint8)
return warp.astype(np.uint8)
def get_flow(filename):
with open(filename, 'rb') as f:
magic = np.fromfile(f, np.float32, count=1)
if 202021.25 != magic:
print 'Magic number incorrect. Invalid .flo file'
else:
w = np.fromfile(f, np.int32, count=1)
h = np.fromfile(f, np.int32, count=1)
print 'Reading %d x %d flo file' % (w, h)
data = np.fromfile(f, np.float32, count=2*w*h)
# Reshape data into 3D array (columns, rows, bands)
data2D = np.resize(data, (1, h[0], w[0],2))
data2D = np.transpose(data2D,[0, 3,1,2])
return data2D
def get_pixel_value(img, x, y):
"""
Utility function to get pixel value for coordinate
vectors x and y from a 4D tensor image.
Input
-----
- img: tensor of shape (B, H, W, C)
- x: flattened tensor of shape (B*H*W, )
- y: flattened tensor of shape (B*H*W, )
Returns
-------
- output: tensor of shape (B, H, W, C)
"""
shape = tf.shape(x)
batch_size = shape[0]
height = shape[1]
width = shape[2]
batch_idx = tf.range(0, batch_size)
batch_idx = tf.reshape(batch_idx, (batch_size, 1, 1))
b = tf.tile(batch_idx, (1, height, width))
indices = tf.stack([b, y, x], 3)
return tf.gather_nd(img, indices)
def tf_warp(img, flow, H, W):
# H = 256
# W = 256
x,y = tf.meshgrid(tf.range(W), tf.range(H))
x = tf.expand_dims(x,0)
x = tf.expand_dims(x,0)
y =tf.expand_dims(y,0)
y = tf.expand_dims(y,0)
x = tf.cast(x, tf.float32)
y = tf.cast(y, tf.float32)
grid = tf.concat([x,y],axis = 1)
# print grid.shape
flows = grid+flow
print flows.shape
max_y = tf.cast(H - 1, tf.int32)
max_x = tf.cast(W - 1, tf.int32)
zero = tf.zeros([], dtype=tf.int32)
x = flows[:,0,:,:]
y = flows[:,1,:,:]
x0 = x
y0 = y
x0 = tf.cast(x0, tf.int32)
x1 = x0 + 1
y0 = tf.cast(y0, tf.int32)
y1 = y0 + 1
# clip to range [0, H/W] to not violate img boundaries
x0 = tf.clip_by_value(x0, zero, max_x)
x1 = tf.clip_by_value(x1, zero, max_x)
y0 = tf.clip_by_value(y0, zero, max_y)
y1 = tf.clip_by_value(y1, zero, max_y)
# get pixel value at corner coords
Ia = get_pixel_value(img, x0, y0)
Ib = get_pixel_value(img, x0, y1)
Ic = get_pixel_value(img, x1, y0)
Id = get_pixel_value(img, x1, y1)
# recast as float for delta calculation
x0 = tf.cast(x0, tf.float32)
x1 = tf.cast(x1, tf.float32)
y0 = tf.cast(y0, tf.float32)
y1 = tf.cast(y1, tf.float32)
# calculate deltas
wa = (x1-x) * (y1-y)
wb = (x1-x) * (y-y0)
wc = (x-x0) * (y1-y)
wd = (x-x0) * (y-y0)
# add dimension for addition
wa = tf.expand_dims(wa, axis=3)
wb = tf.expand_dims(wb, axis=3)
wc = tf.expand_dims(wc, axis=3)
wd = tf.expand_dims(wd, axis=3)
# compute output
out = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id])
return out
if __name__ == "__main__":
img = Image.open('frame_0048.png')
img = np.reshape(np.asarray(img, dtype=np.float32),[1,436,1024,3])
flow_file = 'frame_0047.flo'
flow = get_flow(flow_file)
with tf.Session() as sess:
a = tf.placeholder(tf.float32, shape = [None,None,None,3])
flow_vec = tf.placeholder(tf.float32, shape = [None, 2, None, None])
init = tf.global_variables_initializer()
sess.run(init)
output = tf_warp(a, flow, 436, 1024)
out = sess.run(output, feed_dict = {a:img, flow_vec:flow})
out = np.clip(out,0,255).astype('uint8')
# print out.shape
im = Image.fromarray(out[0].astype('uint8'))
im.save('output.jpg')