Add upsample_cfg support in FPN (#2787)

* Add upsample_cfg support in FPN * small fix * Add multiple extra conv sources * small logical fix * Add neck tests for fpn * Add neck tests for fpn * fixed several typos * resolved issues * Removed extra_convs_source option * added necks to apis.rst * change according to comments * reconfigured configs
hyz-xmaster · May 24, 2020 · 50ffa24 · 50ffa24
1 parent e903b5c
commit 50ffa24
Show file tree

Hide file tree

Showing 12 changed files with 263 additions and 27 deletions.
diff --git a/configs/_base_/models/retinanet_r50_fpn.py b/configs/_base_/models/retinanet_r50_fpn.py
@@ -16,7 +16,7 @@
         in_channels=[256, 512, 1024, 2048],
         out_channels=256,
         start_level=1,
-        add_extra_convs=True,
+        add_extra_convs='on_input',
         num_outs=5),
     bbox_head=dict(
         type='RetinaHead',

diff --git a/configs/atss/atss_r50_fpn_1x_coco.py b/configs/atss/atss_r50_fpn_1x_coco.py
@@ -19,8 +19,7 @@
         in_channels=[256, 512, 1024, 2048],
         out_channels=256,
         start_level=1,
-        add_extra_convs=True,
-        extra_convs_on_inputs=False,
+        add_extra_convs='on_output',
         num_outs=5),
     bbox_head=dict(
         type='ATSSHead',

diff --git a/configs/fcos/fcos_r50_caffe_fpn_4x4_1x_coco.py b/configs/fcos/fcos_r50_caffe_fpn_4x4_1x_coco.py
@@ -20,8 +20,7 @@
         in_channels=[256, 512, 1024, 2048],
         out_channels=256,
         start_level=1,
-        add_extra_convs=True,
-        extra_convs_on_inputs=False,  # use P5
+        add_extra_convs='on_output',  # use P5
         num_outs=5,
         relu_before_extra_convs=True),
     bbox_head=dict(

diff --git a/configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py b/configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py
@@ -20,8 +20,7 @@
         in_channels=[256, 512, 1024, 2048],
         out_channels=256,
         start_level=1,
-        add_extra_convs=True,
-        extra_convs_on_inputs=False,  # use P5
+        add_extra_convs='on_output',  # use P5
         num_outs=5,
         relu_before_extra_convs=True),
     bbox_head=dict(

diff --git a/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py b/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py
@@ -21,7 +21,7 @@
         out_channels=256,
         start_level=1,
         num_outs=5,
-        add_extra_convs=True),
+        add_extra_convs='on_input'),
     bbox_head=dict(
         type='FoveaHead',
         num_classes=80,

diff --git a/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_mstrain_2x.py b/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_mstrain_2x.py
@@ -16,7 +16,7 @@
         in_channels=[256, 512, 1024, 2048],
         out_channels=256,
         start_level=1,
-        add_extra_convs=True,
+        add_extra_convs='on_input',
         num_outs=5),
     bbox_head=dict(
         type='GARetinaHead',

diff --git a/configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py b/configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py
@@ -7,7 +7,7 @@
             in_channels=[256, 512, 1024, 2048],
             out_channels=256,
             start_level=1,
-            add_extra_convs=True,
+            add_extra_convs='on_input',
             num_outs=5),
         dict(
             type='BFP',

diff --git a/configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py b/configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py
@@ -19,7 +19,7 @@
         in_channels=[256, 512, 1024, 2048],
         out_channels=256,
         start_level=1,
-        add_extra_convs=True,
+        add_extra_convs='on_input',
         num_outs=5),
     bbox_head=dict(
         type='RepPointsHead',

diff --git a/demo/mmdet_inference_colab.ipynb b/demo/mmdet_inference_colab.ipynb
@@ -534,4 +534,4 @@
       ]
     }
   ]
-}
+}
diff --git a/docs/api.rst b/docs/api.rst
@@ -75,6 +75,11 @@ backbones
 .. automodule:: mmdet.models.backbones
     :members:
 
+necks
+^^^^^^^^^^^^
+.. automodule:: mmdet.models.necks
+    :members:
+
 dense_heads
 ^^^^^^^^^^^^
 .. automodule:: mmdet.models.dense_heads

diff --git a/mmdet/models/necks/fpn.py b/mmdet/models/necks/fpn.py
@@ -22,10 +22,19 @@ class FPN(nn.Module):
             build the feature pyramid. Default: 0.
         end_level (int): Index of the end input backbone level (exclusive) to
             build the feature pyramid. Default: -1, which means the last level.
-        add_extra_convs (bool): Whether to add conv layers on top of the
-            original feature maps. Default: False.
-        extra_convs_on_inputs (bool): Whether to apply extra conv on
-            the original feature from the backbone. Default: False.
+        add_extra_convs (bool | str): If bool, it decides whether to add conv
+            layers on top of the original feature maps. Default to False.
+            If True, its actual mode is specified by `extra_convs_on_inputs`.
+            If str, it specifies the source feature map of the extra convs.
+            Only the following options are allowed
+
+            - 'on_input': Last feat map of neck inputs (i.e. backbone feature).
+            - 'on_lateral':  Last feature map after lateral convs.
+            - 'on_output': The last output feature map after fpn convs.
+        extra_convs_on_inputs (bool, deprecated): Whether to apply extra convs
+            on the original feature from the backbone. If True,
+            it is equivalent to `add_extra_convs='on_input'`. If False, it is
+            equivalent to set `add_extra_convs='on_output'`. Default to True.
         relu_before_extra_convs (bool): Whether to apply relu before the extra
             conv. Default: False.
         no_norm_on_lateral (bool): Whether to apply norm on lateral.
@@ -34,6 +43,8 @@ class FPN(nn.Module):
         norm_cfg (dict): Config dict for normalization layer. Default: None.
         act_cfg (str): Config dict for activation layer in ConvModule.
             Default: None.
+        upsample_cfg (dict): Config dict for interpolate layer.
+            Default: `dict(mode='nearest')`
 
     Example:
         >>> import torch
@@ -63,7 +74,8 @@ def __init__(self,
                  no_norm_on_lateral=False,
                  conv_cfg=None,
                  norm_cfg=None,
-                 act_cfg=None):
+                 act_cfg=None,
+                 upsample_cfg=dict(mode='nearest')):
         super(FPN, self).__init__()
         assert isinstance(in_channels, list)
         self.in_channels = in_channels
@@ -73,6 +85,7 @@ def __init__(self,
         self.relu_before_extra_convs = relu_before_extra_convs
         self.no_norm_on_lateral = no_norm_on_lateral
         self.fp16_enabled = False
+        self.upsample_cfg = upsample_cfg.copy()
 
         if end_level == -1:
             self.backbone_end_level = self.num_ins
@@ -85,7 +98,17 @@ def __init__(self,
         self.start_level = start_level
         self.end_level = end_level
         self.add_extra_convs = add_extra_convs
-        self.extra_convs_on_inputs = extra_convs_on_inputs
+        assert isinstance(add_extra_convs, (str, bool))
+        if isinstance(add_extra_convs, str):
+            # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output'
+            assert add_extra_convs in ('on_input', 'on_lateral', 'on_output')
+        elif add_extra_convs:  # True
+            if extra_convs_on_inputs:
+                # For compatibility with previous release
+                # TODO: deprecate `extra_convs_on_inputs`
+                self.add_extra_convs = 'on_input'
+            else:
+                self.add_extra_convs = 'on_output'
 
         self.lateral_convs = nn.ModuleList()
         self.fpn_convs = nn.ModuleList()
@@ -114,9 +137,9 @@ def __init__(self,
 
         # add extra conv layers (e.g., RetinaNet)
         extra_levels = num_outs - self.backbone_end_level + self.start_level
-        if add_extra_convs and extra_levels >= 1:
+        if self.add_extra_convs and extra_levels >= 1:
             for i in range(extra_levels):
-                if i == 0 and self.extra_convs_on_inputs:
+                if i == 0 and self.add_extra_convs == 'on_input':
                     in_channels = self.in_channels[self.backbone_end_level - 1]
                 else:
                     in_channels = out_channels
@@ -151,9 +174,15 @@ def forward(self, inputs):
         # build top-down path
         used_backbone_levels = len(laterals)
         for i in range(used_backbone_levels - 1, 0, -1):
-            prev_shape = laterals[i - 1].shape[2:]
-            laterals[i - 1] += F.interpolate(
-                laterals[i], size=prev_shape, mode='nearest')
+            # In some cases, fixing `scale factor` (e.g. 2) is preferred, but
+            #  it cannot co-exist with `size` in `F.interpolate`.
+            if 'scale_factor' in self.upsample_cfg:
+                laterals[i - 1] += F.interpolate(laterals[i],
+                                                 **self.upsample_cfg)
+            else:
+                prev_shape = laterals[i - 1].shape[2:]
+                laterals[i - 1] += F.interpolate(
+                    laterals[i], size=prev_shape, **self.upsample_cfg)
 
         # build outputs
         # part 1: from original levels
@@ -169,11 +198,15 @@ def forward(self, inputs):
                     outs.append(F.max_pool2d(outs[-1], 1, stride=2))
             # add conv layers on top of original feature maps (RetinaNet)
             else:
-                if self.extra_convs_on_inputs:
-                    orig = inputs[self.backbone_end_level - 1]
-                    outs.append(self.fpn_convs[used_backbone_levels](orig))
+                if self.add_extra_convs == 'on_input':
+                    extra_source = inputs[self.backbone_end_level - 1]
+                elif self.add_extra_convs == 'on_lateral':
+                    extra_source = laterals[-1]
+                elif self.add_extra_convs == 'on_output':
+                    extra_source = outs[-1]
                 else:
-                    outs.append(self.fpn_convs[used_backbone_levels](outs[-1]))
+                    raise NotImplementedError
+                outs.append(self.fpn_convs[used_backbone_levels](extra_source))
                 for i in range(used_backbone_levels + 1, self.num_outs):
                     if self.relu_before_extra_convs:
                         outs.append(self.fpn_convs[i](F.relu(outs[-1])))
-Original file line number
+Diff line change
@@ Expand Up / @@ -534,4 +534,4 @@ @@
           ]
         }
       ]
-    }
+    }