diff --git a/keras_retinanet/backend/common.py b/keras_retinanet/backend/common.py index 8f8dcc6e0..115528e9e 100644 --- a/keras_retinanet/backend/common.py +++ b/keras_retinanet/backend/common.py @@ -52,16 +52,22 @@ def bbox_transform_inv(boxes, deltas, mean=None, std=None): return pred_boxes -def shift(shape, stride, anchors): +def shift(image_shape, features_shape, stride, anchors): """ Produce shifted anchors based on shape of the map and stride size. Args - shape : Shape to shift the anchors over. - stride : Stride to shift the anchors with over the shape. - anchors: The anchors to apply at each location. + image_shape : Shape of the input image. + features_shape : Shape of the feature map. + stride : Stride to shift the anchors with over the image. + anchors : The anchors to apply at each location. """ - shift_x = (keras.backend.arange(0, shape[1], dtype=keras.backend.floatx()) + keras.backend.constant(0.5, dtype=keras.backend.floatx())) * stride - shift_y = (keras.backend.arange(0, shape[0], dtype=keras.backend.floatx()) + keras.backend.constant(0.5, dtype=keras.backend.floatx())) * stride + # compute the offset of the anchors based on the image shape and the feature map shape + # see https://github.com/fizyr/keras-retinanet/issues/1073 for more information + offset_x = keras.backend.cast((image_shape[1] - (features_shape[1] - 1) * stride), keras.backend.floatx()) / 2.0 + offset_y = keras.backend.cast((image_shape[0] - (features_shape[0] - 1) * stride), keras.backend.floatx()) / 2.0 + + shift_x = keras.backend.arange(0, features_shape[1], dtype=keras.backend.floatx()) * stride + offset_x + shift_y = keras.backend.arange(0, features_shape[0], dtype=keras.backend.floatx()) * stride + offset_y shift_x, shift_y = meshgrid(shift_x, shift_y) shift_x = keras.backend.reshape(shift_x, [-1]) diff --git a/keras_retinanet/layers/_misc.py b/keras_retinanet/layers/_misc.py index 72db2571c..700f2b7b2 100644 --- a/keras_retinanet/layers/_misc.py +++ b/keras_retinanet/layers/_misc.py @@ -58,28 +58,29 @@ def __init__(self, size, stride, ratios=None, scales=None, *args, **kwargs): super(Anchors, self).__init__(*args, **kwargs) def call(self, inputs, **kwargs): - features = inputs - features_shape = keras.backend.shape(features) + image, features = inputs + features_shape = keras.backend.shape(features) + image_shape = keras.backend.shape(image) # generate proposals from bbox deltas and shifted anchors if keras.backend.image_data_format() == 'channels_first': - anchors = backend.shift(features_shape[2:4], self.stride, self.anchors) + anchors = backend.shift(image_shape[2:4], features_shape[2:4], self.stride, self.anchors) else: - anchors = backend.shift(features_shape[1:3], self.stride, self.anchors) + anchors = backend.shift(image_shape[1:3], features_shape[1:3], self.stride, self.anchors) anchors = keras.backend.tile(keras.backend.expand_dims(anchors, axis=0), (features_shape[0], 1, 1)) return anchors def compute_output_shape(self, input_shape): - if None not in input_shape[1:]: + if None not in input_shape[1][1:]: if keras.backend.image_data_format() == 'channels_first': - total = np.prod(input_shape[2:4]) * self.num_anchors + total = np.prod(input_shape[1][2:4]) * self.num_anchors else: - total = np.prod(input_shape[1:3]) * self.num_anchors + total = np.prod(input_shape[1][1:3]) * self.num_anchors - return (input_shape[0], total, 4) + return (input_shape[1][0], total, 4) else: - return (input_shape[0], None, 4) + return (input_shape[1][0], None, 4) def get_config(self): config = super(Anchors, self).get_config() diff --git a/keras_retinanet/models/retinanet.py b/keras_retinanet/models/retinanet.py index ba75d4505..554a46183 100644 --- a/keras_retinanet/models/retinanet.py +++ b/keras_retinanet/models/retinanet.py @@ -207,11 +207,12 @@ def __build_pyramid(models, features): return [__build_model_pyramid(n, m, features) for n, m in models] -def __build_anchors(anchor_parameters, features): +def __build_anchors(anchor_parameters, image, features): """ Builds anchors for the shape of the features from FPN. Args anchor_parameters : Parameteres that determine how anchors are generated. + image : The image input tensor. features : The FPN features. Returns @@ -229,7 +230,7 @@ def __build_anchors(anchor_parameters, features): ratios=anchor_parameters.ratios, scales=anchor_parameters.scales, name='anchors_{}'.format(i) - )(f) for i, f in enumerate(features) + )([image, f]) for i, f in enumerate(features) ] return keras.layers.Concatenate(axis=1, name='anchors')(anchors) @@ -328,7 +329,7 @@ def retinanet_bbox( # compute the anchors features = [model.get_layer(p_name).output for p_name in ['P3', 'P4', 'P5', 'P6', 'P7']] - anchors = __build_anchors(anchor_params, features) + anchors = __build_anchors(anchor_params, model.inputs[0], features) # we expect the anchors, regression and classification values as first output regression = model.outputs[0] diff --git a/keras_retinanet/utils/anchors.py b/keras_retinanet/utils/anchors.py index 08007c02b..43af0c314 100644 --- a/keras_retinanet/utils/anchors.py +++ b/keras_retinanet/utils/anchors.py @@ -234,24 +234,29 @@ def anchors_for_shape( ratios=anchor_params.ratios, scales=anchor_params.scales ) - shifted_anchors = shift(image_shapes[idx], anchor_params.strides[idx], anchors) + shifted_anchors = shift(image_shape, image_shapes[idx], anchor_params.strides[idx], anchors) all_anchors = np.append(all_anchors, shifted_anchors, axis=0) return all_anchors -def shift(shape, stride, anchors): - """ Produce shifted anchors based on shape of the map and stride size. +def shift(image_shape, features_shape, stride, anchors): + """ Produce shifted anchors based on shape of the image, shape of the feature map and stride. Args - shape : Shape to shift the anchors over. - stride : Stride to shift the anchors with over the shape. - anchors: The anchors to apply at each location. + image_shape : Shape of the input image. + features_shape : Shape of the feature map. + stride : Stride to shift the anchors with over the image. + anchors : The anchors to apply at each location. """ + # compute the offset of the anchors based on the image shape and the feature map shape + # see https://github.com/fizyr/keras-retinanet/issues/1073 for more information + offset_x = (image_shape[1] - (features_shape[1] - 1) * stride) / 2.0 + offset_y = (image_shape[0] - (features_shape[0] - 1) * stride) / 2.0 # create a grid starting from half stride from the top left corner - shift_x = (np.arange(0, shape[1]) + 0.5) * stride - shift_y = (np.arange(0, shape[0]) + 0.5) * stride + shift_x = np.arange(0, features_shape[1]) * stride + offset_x + shift_y = np.arange(0, features_shape[0]) * stride + offset_y shift_x, shift_y = np.meshgrid(shift_x, shift_y)