Skip to content

Commit

Permalink
Correct computing offset of anchors.
Browse files Browse the repository at this point in the history
Check fizyr/keras-retinanet#1073 for more
information.
  • Loading branch information
hgaiser committed Jul 18, 2019
1 parent f9049e1 commit a494a81
Show file tree
Hide file tree
Showing 7 changed files with 99 additions and 65 deletions.
18 changes: 12 additions & 6 deletions keras_retinanet/backend/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,22 @@ def bbox_transform_inv(boxes, deltas, mean=None, std=None):
return pred_boxes


def shift(shape, stride, anchors):
def shift(image_shape, features_shape, stride, anchors):
""" Produce shifted anchors based on shape of the map and stride size.
Args
shape : Shape to shift the anchors over.
stride : Stride to shift the anchors with over the shape.
anchors: The anchors to apply at each location.
image_shape : Shape of the input image.
features_shape : Shape of the feature map.
stride : Stride to shift the anchors with over the image.
anchors : The anchors to apply at each location.
"""
shift_x = (keras.backend.arange(0, shape[1], dtype=keras.backend.floatx()) + keras.backend.constant(0.5, dtype=keras.backend.floatx())) * stride
shift_y = (keras.backend.arange(0, shape[0], dtype=keras.backend.floatx()) + keras.backend.constant(0.5, dtype=keras.backend.floatx())) * stride
# compute the offset of the anchors based on the image shape and the feature map shape
# see https://github.com/fizyr/keras-retinanet/issues/1073 for more information
offset_x = keras.backend.cast((image_shape[1] - (features_shape[1] - 1) * stride), keras.backend.floatx()) / 2.0
offset_y = keras.backend.cast((image_shape[0] - (features_shape[0] - 1) * stride), keras.backend.floatx()) / 2.0

shift_x = keras.backend.arange(0, features_shape[1], dtype=keras.backend.floatx()) * stride + offset_x
shift_y = keras.backend.arange(0, features_shape[0], dtype=keras.backend.floatx()) * stride + offset_y

shift_x, shift_y = meshgrid(shift_x, shift_y)
shift_x = keras.backend.reshape(shift_x, [-1])
Expand Down
19 changes: 10 additions & 9 deletions keras_retinanet/layers/_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,28 +58,29 @@ def __init__(self, size, stride, ratios=None, scales=None, *args, **kwargs):
super(Anchors, self).__init__(*args, **kwargs)

def call(self, inputs, **kwargs):
features = inputs
features_shape = keras.backend.shape(features)
image, features = inputs
features_shape = keras.backend.shape(features)
image_shape = keras.backend.shape(image)

# generate proposals from bbox deltas and shifted anchors
if keras.backend.image_data_format() == 'channels_first':
anchors = backend.shift(features_shape[2:4], self.stride, self.anchors)
anchors = backend.shift(image_shape[2:4], features_shape[2:4], self.stride, self.anchors)
else:
anchors = backend.shift(features_shape[1:3], self.stride, self.anchors)
anchors = backend.shift(image_shape[1:3], features_shape[1:3], self.stride, self.anchors)
anchors = keras.backend.tile(keras.backend.expand_dims(anchors, axis=0), (features_shape[0], 1, 1))

return anchors

def compute_output_shape(self, input_shape):
if None not in input_shape[1:]:
if None not in input_shape[1][1:]:
if keras.backend.image_data_format() == 'channels_first':
total = np.prod(input_shape[2:4]) * self.num_anchors
total = np.prod(input_shape[1][2:4]) * self.num_anchors
else:
total = np.prod(input_shape[1:3]) * self.num_anchors
total = np.prod(input_shape[1][1:3]) * self.num_anchors

return (input_shape[0], total, 4)
return (input_shape[1][0], total, 4)
else:
return (input_shape[0], None, 4)
return (input_shape[1][0], None, 4)

def get_config(self):
config = super(Anchors, self).get_config()
Expand Down
7 changes: 4 additions & 3 deletions keras_retinanet/models/retinanet.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,11 +207,12 @@ def __build_pyramid(models, features):
return [__build_model_pyramid(n, m, features) for n, m in models]


def __build_anchors(anchor_parameters, features):
def __build_anchors(anchor_parameters, image, features):
""" Builds anchors for the shape of the features from FPN.
Args
anchor_parameters : Parameteres that determine how anchors are generated.
image : The image input tensor.
features : The FPN features.
Returns
Expand All @@ -229,7 +230,7 @@ def __build_anchors(anchor_parameters, features):
ratios=anchor_parameters.ratios,
scales=anchor_parameters.scales,
name='anchors_{}'.format(i)
)(f) for i, f in enumerate(features)
)([image, f]) for i, f in enumerate(features)
]

return keras.layers.Concatenate(axis=1, name='anchors')(anchors)
Expand Down Expand Up @@ -328,7 +329,7 @@ def retinanet_bbox(

# compute the anchors
features = [model.get_layer(p_name).output for p_name in ['P3', 'P4', 'P5', 'P6', 'P7']]
anchors = __build_anchors(anchor_params, features)
anchors = __build_anchors(anchor_params, model.inputs[0], features)

# we expect the anchors, regression and classification values as first output
regression = model.outputs[0]
Expand Down
21 changes: 13 additions & 8 deletions keras_retinanet/utils/anchors.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,24 +234,29 @@ def anchors_for_shape(
ratios=anchor_params.ratios,
scales=anchor_params.scales
)
shifted_anchors = shift(image_shapes[idx], anchor_params.strides[idx], anchors)
shifted_anchors = shift(image_shape, image_shapes[idx], anchor_params.strides[idx], anchors)
all_anchors = np.append(all_anchors, shifted_anchors, axis=0)

return all_anchors


def shift(shape, stride, anchors):
""" Produce shifted anchors based on shape of the map and stride size.
def shift(image_shape, features_shape, stride, anchors):
""" Produce shifted anchors based on shape of the image, shape of the feature map and stride.
Args
shape : Shape to shift the anchors over.
stride : Stride to shift the anchors with over the shape.
anchors: The anchors to apply at each location.
image_shape : Shape of the input image.
features_shape : Shape of the feature map.
stride : Stride to shift the anchors with over the image.
anchors : The anchors to apply at each location.
"""
# compute the offset of the anchors based on the image shape and the feature map shape
# see https://github.com/fizyr/keras-retinanet/issues/1073 for more information
offset_x = (image_shape[1] - (features_shape[1] - 1) * stride) / 2.0
offset_y = (image_shape[0] - (features_shape[0] - 1) * stride) / 2.0

# create a grid starting from half stride from the top left corner
shift_x = (np.arange(0, shape[1]) + 0.5) * stride
shift_y = (np.arange(0, shape[0]) + 0.5) * stride
shift_x = np.arange(0, features_shape[1]) * stride + offset_x
shift_y = np.arange(0, features_shape[0]) * stride + offset_y

shift_x, shift_y = np.meshgrid(shift_x, shift_y)

Expand Down
61 changes: 24 additions & 37 deletions tests/backend/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ def test_bbox_transform_inv():


def test_shift():
shape = (2, 3)
stride = 8
image_shape = (20, 20)
feature_shape = (2, 2)
stride = 8

anchors = np.array([
[-8, -8, 8, 8],
Expand All @@ -75,49 +76,35 @@ def test_shift():

expected = [
# anchors for (0, 0)
[4 - 8, 4 - 8, 4 + 8, 4 + 8],
[4 - 16, 4 - 16, 4 + 16, 4 + 16],
[4 - 12, 4 - 12, 4 + 12, 4 + 12],
[4 - 12, 4 - 16, 4 + 12, 4 + 16],
[4 - 16, 4 - 12, 4 + 16, 4 + 12],
[6 - 8, 6 - 8, 6 + 8, 6 + 8],
[6 - 16, 6 - 16, 6 + 16, 6 + 16],
[6 - 12, 6 - 12, 6 + 12, 6 + 12],
[6 - 12, 6 - 16, 6 + 12, 6 + 16],
[6 - 16, 6 - 12, 6 + 16, 6 + 12],

# anchors for (0, 1)
[12 - 8, 4 - 8, 12 + 8, 4 + 8],
[12 - 16, 4 - 16, 12 + 16, 4 + 16],
[12 - 12, 4 - 12, 12 + 12, 4 + 12],
[12 - 12, 4 - 16, 12 + 12, 4 + 16],
[12 - 16, 4 - 12, 12 + 16, 4 + 12],

# anchors for (0, 2)
[20 - 8, 4 - 8, 20 + 8, 4 + 8],
[20 - 16, 4 - 16, 20 + 16, 4 + 16],
[20 - 12, 4 - 12, 20 + 12, 4 + 12],
[20 - 12, 4 - 16, 20 + 12, 4 + 16],
[20 - 16, 4 - 12, 20 + 16, 4 + 12],
[14 - 8, 6 - 8, 14 + 8, 6 + 8],
[14 - 16, 6 - 16, 14 + 16, 6 + 16],
[14 - 12, 6 - 12, 14 + 12, 6 + 12],
[14 - 12, 6 - 16, 14 + 12, 6 + 16],
[14 - 16, 6 - 12, 14 + 16, 6 + 12],

# anchors for (1, 0)
[4 - 8, 12 - 8, 4 + 8, 12 + 8],
[4 - 16, 12 - 16, 4 + 16, 12 + 16],
[4 - 12, 12 - 12, 4 + 12, 12 + 12],
[4 - 12, 12 - 16, 4 + 12, 12 + 16],
[4 - 16, 12 - 12, 4 + 16, 12 + 12],
[6 - 8, 14 - 8, 6 + 8, 14 + 8],
[6 - 16, 14 - 16, 6 + 16, 14 + 16],
[6 - 12, 14 - 12, 6 + 12, 14 + 12],
[6 - 12, 14 - 16, 6 + 12, 14 + 16],
[6 - 16, 14 - 12, 6 + 16, 14 + 12],

# anchors for (1, 1)
[12 - 8, 12 - 8, 12 + 8, 12 + 8],
[12 - 16, 12 - 16, 12 + 16, 12 + 16],
[12 - 12, 12 - 12, 12 + 12, 12 + 12],
[12 - 12, 12 - 16, 12 + 12, 12 + 16],
[12 - 16, 12 - 12, 12 + 16, 12 + 12],

# anchors for (1, 2)
[20 - 8, 12 - 8, 20 + 8, 12 + 8],
[20 - 16, 12 - 16, 20 + 16, 12 + 16],
[20 - 12, 12 - 12, 20 + 12, 12 + 12],
[20 - 12, 12 - 16, 20 + 12, 12 + 16],
[20 - 16, 12 - 12, 20 + 16, 12 + 12],
[14 - 8, 14 - 8, 14 + 8, 14 + 8],
[14 - 16, 14 - 16, 14 + 16, 14 + 16],
[14 - 12, 14 - 12, 14 + 12, 14 + 12],
[14 - 12, 14 - 16, 14 + 12, 14 + 16],
[14 - 16, 14 - 12, 14 + 16, 14 + 12],
]

result = keras_retinanet.backend.shift(shape, stride, anchors)
result = keras_retinanet.backend.shift(image_shape, feature_shape, stride, anchors)
result = keras.backend.eval(result)

np.testing.assert_array_equal(result, expected)
12 changes: 10 additions & 2 deletions tests/layers/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,16 @@ def test_simple(self):
scales=np.array([1], keras.backend.floatx()),
)

# create fake image input (only shape is used anyway)
image = np.zeros((1, 16, 16, 3), dtype=keras.backend.floatx())
image = keras.backend.variable(image)

# create fake features input (only shape is used anyway)
features = np.zeros((1, 2, 2, 1024), dtype=keras.backend.floatx())
features = keras.backend.variable(features)

# call the Anchors layer
anchors = anchors_layer.call(features)
anchors = anchors_layer.call([image, features])
anchors = keras.backend.eval(anchors)

# expected anchor values
Expand All @@ -59,12 +63,16 @@ def test_mini_batch(self):
scales=np.array([1], dtype=keras.backend.floatx()),
)

# create fake image input (only shape is used anyway)
image = np.zeros((2, 16, 16, 3), dtype=keras.backend.floatx())
image = keras.backend.variable(image)

# create fake features input with batch_size=2
features = np.zeros((2, 2, 2, 1024), dtype=keras.backend.floatx())
features = keras.backend.variable(features)

# call the Anchors layer
anchors = anchors_layer.call(features)
anchors = anchors_layer.call([image, features])
anchors = keras.backend.eval(anchors)

# expected anchor values
Expand Down
26 changes: 26 additions & 0 deletions tests/utils/test_anchors.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,29 @@ def test_anchors_for_shape_values():
strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2,
strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2,
], decimal=6)


def test_anchors_for_shape_odd_input():
pyramid_levels = [3]
image_shape = (20, 20) # this shape causes rounding errors when downsampling using convolutions
sizes = [32]
strides = [8]
ratios = np.array([1], keras.backend.floatx())
scales = np.array([1], keras.backend.floatx())
anchor_params = AnchorParameters(sizes, strides, ratios, scales)

anchors = anchors_for_shape(image_shape, pyramid_levels = pyramid_levels, anchor_params = anchor_params)

expected_anchors = np.array([
[-14, -14, 18, 18],
[-6 , -14, 26, 18],
[2 , -14, 34, 18],
[-14, -6 , 18, 26],
[-6 , -6 , 26, 26],
[2 , -6 , 34, 26],
[-14, 2 , 18, 34],
[-6 , 2 , 26, 34],
[2 , 2 , 34, 34],
])

np.testing.assert_equal(anchors, expected_anchors)

0 comments on commit a494a81

Please sign in to comment.